Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("ButlerURI",) 

25 

26import contextlib 

27import os 

28import os.path 

29import shutil 

30import urllib 

31import pkg_resources 

32import posixpath 

33from pathlib import Path, PurePath, PurePosixPath 

34import requests 

35import tempfile 

36import copy 

37import logging 

38import re 

39 

40from typing import ( 

41 TYPE_CHECKING, 

42 Any, 

43 Callable, 

44 cast, 

45 Iterator, 

46 Optional, 

47 Tuple, 

48 Type, 

49 Union, 

50) 

51 

52from .utils import safeMakeDir 

53 

54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true

55 try: 

56 import boto3 

57 except ImportError: 

58 pass 

59 from .datastore import DatastoreTransaction 

60 

61 

62log = logging.getLogger(__name__) 

63 

64# Determine if the path separator for the OS looks like POSIX 

65IS_POSIX = os.sep == posixpath.sep 

66 

67# Root path for this operating system 

68OS_ROOT_PATH = Path().resolve().root 

69 

70# Regex for looking for URI escapes 

71ESCAPES_RE = re.compile(r"%[A-F0-9]{2}") 

72 

73 

74def os2posix(ospath: str) -> str: 

75 """Convert a local path description to a POSIX path description. 

76 

77 Parameters 

78 ---------- 

79 ospath : `str` 

80 Path using the local path separator. 

81 

82 Returns 

83 ------- 

84 posix : `str` 

85 Path using POSIX path separator 

86 """ 

87 if IS_POSIX: 

88 return ospath 

89 

90 posix = PurePath(ospath).as_posix() 

91 

92 # PurePath strips trailing "/" from paths such that you can no 

93 # longer tell if a path is meant to be referring to a directory 

94 # Try to fix this. 

95 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep): 

96 posix += posixpath.sep 

97 

98 return posix 

99 

100 

101def posix2os(posix: Union[PurePath, str]) -> str: 

102 """Convert a POSIX path description to a local path description. 

103 

104 Parameters 

105 ---------- 

106 posix : `str`, `PurePath` 

107 Path using the POSIX path separator. 

108 

109 Returns 

110 ------- 

111 ospath : `str` 

112 Path using OS path separator 

113 """ 

114 if IS_POSIX: 

115 return str(posix) 

116 

117 posixPath = PurePosixPath(posix) 

118 paths = list(posixPath.parts) 

119 

120 # Have to convert the root directory after splitting 

121 if paths[0] == posixPath.root: 

122 paths[0] = OS_ROOT_PATH 

123 

124 # Trailing "/" is stripped so we need to add back an empty path 

125 # for consistency 

126 if str(posix).endswith(posixpath.sep): 

127 paths.append("") 

128 

129 return os.path.join(*paths) 

130 

131 

132class NoTransaction: 

133 """A simple emulation of the `DatastoreTransaction` class. 

134 

135 Does nothing. 

136 """ 

137 

138 def __init__(self) -> None: 

139 return 

140 

141 @contextlib.contextmanager 

142 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

143 """No-op context manager to replace `DatastoreTransaction` 

144 """ 

145 yield None 

146 

147 

148class ButlerURI: 

149 """Convenience wrapper around URI parsers. 

150 

151 Provides access to URI components and can convert file 

152 paths into absolute path URIs. Scheme-less URIs are treated as if 

153 they are local file system paths and are converted to absolute URIs. 

154 

155 A specialist subclass is created for each supported URI scheme. 

156 

157 Parameters 

158 ---------- 

159 uri : `str` or `urllib.parse.ParseResult` 

160 URI in string form. Can be scheme-less if referring to a local 

161 filesystem path. 

162 root : `str`, optional 

163 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

164 use this as the root. Must be absolute. If `None` the current 

165 working directory will be used. 

166 forceAbsolute : `bool`, optional 

167 If `True`, scheme-less relative URI will be converted to an absolute 

168 path using a ``file`` scheme. If `False` scheme-less URI will remain 

169 scheme-less and will not be updated to ``file`` or absolute path. 

170 forceDirectory: `bool`, optional 

171 If `True` forces the URI to end with a separator, otherwise given URI 

172 is interpreted as is. 

173 """ 

174 

175 _pathLib: Type[PurePath] = PurePosixPath 

176 """Path library to use for this scheme.""" 

177 

178 _pathModule = posixpath 

179 """Path module to use for this scheme.""" 

180 

181 transferModes: Tuple[str, ...] = ("copy", "auto", "move") 

182 """Transfer modes supported by this implementation. 

183 

184 Move is special in that it is generally a copy followed by an unlink. 

185 Whether that unlink works depends critically on whether the source URI 

186 implements unlink. If it does not the move will be reported as a failure. 

187 """ 

188 

189 transferDefault: str = "copy" 

190 """Default mode to use for transferring if ``auto`` is specified.""" 

191 

192 quotePaths = True 

193 """True if path-like elements modifying a URI should be quoted. 

194 

195 All non-schemeless URIs have to internally use quoted paths. Therefore 

196 if a new file name is given (e.g. to updateFile or join) a decision must 

197 be made whether to quote it to be consistent. 

198 """ 

199 

200 # This is not an ABC with abstract methods because the __new__ being 

201 # a factory confuses mypy such that it assumes that every constructor 

202 # returns a ButlerURI and then determines that all the abstract methods 

203 # are still abstract. If they are not marked abstract but just raise 

204 # mypy is fine with it. 

205 

206 # mypy is confused without this 

207 _uri: urllib.parse.ParseResult 

208 

209 def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI], 

210 root: Optional[str] = None, forceAbsolute: bool = True, 

211 forceDirectory: bool = False) -> ButlerURI: 

212 parsed: urllib.parse.ParseResult 

213 dirLike: bool 

214 subclass: Optional[Type] = None 

215 

216 # Record if we need to post process the URI components 

217 # or if the instance is already fully configured 

218 if isinstance(uri, str): 

219 # Since local file names can have special characters in them 

220 # we need to quote them for the parser but we can unquote 

221 # later. Assume that all other URI schemes are quoted. 

222 # Since sometimes people write file:/a/b and not file:///a/b 

223 # we should not quote in the explicit case of file: 

224 if "://" not in uri and not uri.startswith("file:"): 

225 if ESCAPES_RE.search(uri): 

226 log.warning("Possible double encoding of %s", uri) 

227 else: 

228 uri = urllib.parse.quote(uri) 

229 parsed = urllib.parse.urlparse(uri) 

230 elif isinstance(uri, urllib.parse.ParseResult): 

231 parsed = copy.copy(uri) 

232 elif isinstance(uri, ButlerURI): 

233 parsed = copy.copy(uri._uri) 

234 dirLike = uri.dirLike 

235 # No further parsing required and we know the subclass 

236 subclass = type(uri) 

237 else: 

238 raise ValueError(f"Supplied URI must be string, ButlerURI, or ParseResult but got '{uri!r}'") 

239 

240 if subclass is None: 

241 # Work out the subclass from the URI scheme 

242 if not parsed.scheme: 

243 subclass = ButlerSchemelessURI 

244 elif parsed.scheme == "file": 

245 subclass = ButlerFileURI 

246 elif parsed.scheme == "s3": 

247 subclass = ButlerS3URI 

248 elif parsed.scheme.startswith("http"): 

249 subclass = ButlerHttpURI 

250 elif parsed.scheme == "resource": 

251 # Rules for scheme names disasllow pkg_resource 

252 subclass = ButlerPackageResourceURI 

253 elif parsed.scheme == "mem": 

254 # in-memory datastore object 

255 subclass = ButlerInMemoryURI 

256 else: 

257 raise NotImplementedError(f"No URI support for scheme: '{parsed.scheme}'" 

258 " in {parsed.geturl()}") 

259 

260 parsed, dirLike = subclass._fixupPathUri(parsed, root=root, 

261 forceAbsolute=forceAbsolute, 

262 forceDirectory=forceDirectory) 

263 

264 # It is possible for the class to change from schemeless 

265 # to file so handle that 

266 if parsed.scheme == "file": 

267 subclass = ButlerFileURI 

268 

269 # Now create an instance of the correct subclass and set the 

270 # attributes directly 

271 self = object.__new__(subclass) 

272 self._uri = parsed 

273 self.dirLike = dirLike 

274 return self 

275 

276 @property 

277 def scheme(self) -> str: 

278 """The URI scheme (``://`` is not part of the scheme).""" 

279 return self._uri.scheme 

280 

281 @property 

282 def netloc(self) -> str: 

283 """The URI network location.""" 

284 return self._uri.netloc 

285 

286 @property 

287 def path(self) -> str: 

288 """The path component of the URI.""" 

289 return self._uri.path 

290 

291 @property 

292 def unquoted_path(self) -> str: 

293 """The path component of the URI with any URI quoting reversed.""" 

294 return urllib.parse.unquote(self._uri.path) 

295 

296 @property 

297 def ospath(self) -> str: 

298 """Path component of the URI localized to current OS.""" 

299 raise AttributeError(f"Non-file URI ({self}) has no local OS path.") 

300 

301 @property 

302 def relativeToPathRoot(self) -> str: 

303 """Returns path relative to network location. 

304 

305 Effectively, this is the path property with posix separator stripped 

306 from the left hand side of the path. 

307 

308 Always unquotes. 

309 """ 

310 p = self._pathLib(self.path) 

311 relToRoot = str(p.relative_to(p.root)) 

312 if self.dirLike and not relToRoot.endswith("/"): 

313 relToRoot += "/" 

314 return urllib.parse.unquote(relToRoot) 

315 

316 @property 

317 def fragment(self) -> str: 

318 """The fragment component of the URI.""" 

319 return self._uri.fragment 

320 

321 @property 

322 def params(self) -> str: 

323 """Any parameters included in the URI.""" 

324 return self._uri.params 

325 

326 @property 

327 def query(self) -> str: 

328 """Any query strings included in the URI.""" 

329 return self._uri.query 

330 

331 def geturl(self) -> str: 

332 """Return the URI in string form. 

333 

334 Returns 

335 ------- 

336 url : `str` 

337 String form of URI. 

338 """ 

339 return self._uri.geturl() 

340 

341 def split(self) -> Tuple[ButlerURI, str]: 

342 """Splits URI into head and tail. Equivalent to os.path.split where 

343 head preserves the URI components. 

344 

345 Returns 

346 ------- 

347 head: `ButlerURI` 

348 Everything leading up to tail, expanded and normalized as per 

349 ButlerURI rules. 

350 tail : `str` 

351 Last `self.path` component. Tail will be empty if path ends on a 

352 separator. Tail will never contain separators. It will be 

353 unquoted. 

354 """ 

355 head, tail = self._pathModule.split(self.path) 

356 headuri = self._uri._replace(path=head) 

357 

358 # The file part should never include quoted metacharacters 

359 tail = urllib.parse.unquote(tail) 

360 

361 # Schemeless is special in that it can be a relative path 

362 # We need to ensure that it stays that way. All other URIs will 

363 # be absolute already. 

364 forceAbsolute = self._pathModule.isabs(self.path) 

365 return ButlerURI(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail 

366 

367 def basename(self) -> str: 

368 """Returns the base name, last element of path, of the URI. If URI ends 

369 on a slash returns an empty string. This is the second element returned 

370 by split(). 

371 

372 Equivalent of os.path.basename(). 

373 

374 Returns 

375 ------- 

376 tail : `str` 

377 Last part of the path attribute. Trail will be empty if path ends 

378 on a separator. 

379 """ 

380 return self.split()[1] 

381 

382 def dirname(self) -> ButlerURI: 

383 """Returns a ButlerURI containing all the directories of the path 

384 attribute. 

385 

386 Equivalent of os.path.dirname() 

387 

388 Returns 

389 ------- 

390 head : `ButlerURI` 

391 Everything except the tail of path attribute, expanded and 

392 normalized as per ButlerURI rules. 

393 """ 

394 return self.split()[0] 

395 

396 def replace(self, **kwargs: Any) -> ButlerURI: 

397 """Replace components in a URI with new values and return a new 

398 instance. 

399 

400 Returns 

401 ------- 

402 new : `ButlerURI` 

403 New `ButlerURI` object with updated values. 

404 """ 

405 return self.__class__(self._uri._replace(**kwargs)) 

406 

407 def updateFile(self, newfile: str) -> None: 

408 """Update in place the final component of the path with the supplied 

409 file name. 

410 

411 Parameters 

412 ---------- 

413 newfile : `str` 

414 File name with no path component. 

415 

416 Notes 

417 ----- 

418 Updates the URI in place. 

419 Updates the ButlerURI.dirLike attribute. The new file path will 

420 be quoted if necessary. 

421 """ 

422 if self.quotePaths: 

423 newfile = urllib.parse.quote(newfile) 

424 dir, _ = self._pathModule.split(self.path) 

425 newpath = self._pathModule.join(dir, newfile) 

426 

427 self.dirLike = False 

428 self._uri = self._uri._replace(path=newpath) 

429 

430 def getExtension(self) -> str: 

431 """Return the file extension(s) associated with this URI path. 

432 

433 Returns 

434 ------- 

435 ext : `str` 

436 The file extension (including the ``.``). Can be empty string 

437 if there is no file extension. Will return all file extensions 

438 as a single extension such that ``file.fits.gz`` will return 

439 a value of ``.fits.gz``. 

440 """ 

441 extensions = self._pathLib(self.path).suffixes 

442 return "".join(extensions) 

443 

444 def join(self, path: str) -> ButlerURI: 

445 """Create a new `ButlerURI` with additional path components including 

446 a file. 

447 

448 Parameters 

449 ---------- 

450 path : `str` 

451 Additional file components to append to the current URI. Assumed 

452 to include a file at the end. Will be quoted depending on the 

453 associated URI scheme. 

454 

455 Returns 

456 ------- 

457 new : `ButlerURI` 

458 New URI with any file at the end replaced with the new path 

459 components. 

460 

461 Notes 

462 ----- 

463 Schemeless URIs assume local path separator but all other URIs assume 

464 POSIX separator if the supplied path has directory structure. It 

465 may be this never becomes a problem but datastore templates assume 

466 POSIX separator is being used. 

467 """ 

468 new = self.dirname() # By definition a directory URI 

469 

470 # new should be asked about quoting, not self, since dirname can 

471 # change the URI scheme for schemeless -> file 

472 if new.quotePaths: 

473 path = urllib.parse.quote(path) 

474 

475 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path)) 

476 new._uri = new._uri._replace(path=newpath) 

477 # Declare the new URI not be dirLike unless path ended in / 

478 if not path.endswith(self._pathModule.sep): 

479 new.dirLike = False 

480 return new 

481 

482 def relative_to(self, other: ButlerURI) -> Optional[str]: 

483 """Return the relative path from this URI to the other URI. 

484 

485 Parameters 

486 ---------- 

487 other : `ButlerURI` 

488 URI to use to calculate the relative path. Must be a parent 

489 of this URI. 

490 

491 Returns 

492 ------- 

493 subpath : `str` 

494 The sub path of this URI relative to the supplied other URI. 

495 Returns `None` if there is no parent child relationship. 

496 Scheme and netloc must match. 

497 """ 

498 if self.scheme != other.scheme or self.netloc != other.netloc: 

499 return None 

500 

501 enclosed_path = self._pathLib(self.relativeToPathRoot) 

502 parent_path = other.relativeToPathRoot 

503 subpath: Optional[str] 

504 try: 

505 subpath = str(enclosed_path.relative_to(parent_path)) 

506 except ValueError: 

507 subpath = None 

508 else: 

509 subpath = urllib.parse.unquote(subpath) 

510 return subpath 

511 

512 def exists(self) -> bool: 

513 """Indicate that the resource is available. 

514 

515 Returns 

516 ------- 

517 exists : `bool` 

518 `True` if the resource exists. 

519 """ 

520 raise NotImplementedError() 

521 

522 def remove(self) -> None: 

523 """Remove the resource.""" 

524 raise NotImplementedError() 

525 

526 def isabs(self) -> bool: 

527 """Indicate that the resource is fully specified. 

528 

529 For non-schemeless URIs this is always true. 

530 

531 Returns 

532 ------- 

533 isabs : `bool` 

534 `True` in all cases except schemeless URI. 

535 """ 

536 return True 

537 

538 def as_local(self) -> Tuple[str, bool]: 

539 """Return the location of the (possibly remote) resource in the 

540 local file system. 

541 

542 Returns 

543 ------- 

544 path : `str` 

545 If this is a remote resource, it will be a copy of the resource 

546 on the local file system, probably in a temporary directory. 

547 For a local resource this should be the actual path to the 

548 resource. 

549 is_temporary : `bool` 

550 Indicates if the local path is a temporary file or not. 

551 """ 

552 raise NotImplementedError() 

553 

554 def read(self, size: int = -1) -> bytes: 

555 """Open the resource and return the contents in bytes. 

556 

557 Parameters 

558 ---------- 

559 size : `int`, optional 

560 The number of bytes to read. Negative or omitted indicates 

561 that all data should be read. 

562 """ 

563 raise NotImplementedError() 

564 

565 def write(self, data: bytes, overwrite: bool = True) -> None: 

566 """Write the supplied bytes to the new resource. 

567 

568 Parameters 

569 ---------- 

570 data : `bytes` 

571 The bytes to write to the resource. The entire contents of the 

572 resource will be replaced. 

573 overwrite : `bool`, optional 

574 If `True` the resource will be overwritten if it exists. Otherwise 

575 the write will fail. 

576 """ 

577 raise NotImplementedError() 

578 

579 def mkdir(self) -> None: 

580 """For a dir-like URI, create the directory resource if it does not 

581 already exist. 

582 """ 

583 raise NotImplementedError() 

584 

585 def __str__(self) -> str: 

586 return self.geturl() 

587 

588 def __repr__(self) -> str: 

589 return f'ButlerURI("{self.geturl()}")' 

590 

591 def __eq__(self, other: Any) -> bool: 

592 if not isinstance(other, ButlerURI): 

593 return False 

594 return self.geturl() == other.geturl() 

595 

596 def __copy__(self) -> ButlerURI: 

597 # Implement here because the __new__ method confuses things 

598 return type(self)(str(self)) 

599 

600 def __deepcopy__(self, memo: Any) -> ButlerURI: 

601 # Implement here because the __new__ method confuses things 

602 return self.__copy__() 

603 

604 def __getnewargs__(self) -> Tuple: 

605 return (str(self),) 

606 

607 @staticmethod 

608 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None, 

609 forceAbsolute: bool = False, 

610 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

611 """Correct any issues with the supplied URI. 

612 

613 Parameters 

614 ---------- 

615 parsed : `~urllib.parse.ParseResult` 

616 The result from parsing a URI using `urllib.parse`. 

617 root : `str`, ignored 

618 Not used by the this implementation since all URIs are 

619 absolute except for those representing the local file system. 

620 forceAbsolute : `bool`, ignored. 

621 Not used by this implementation. URIs are generally always 

622 absolute. 

623 forceDirectory : `bool`, optional 

624 If `True` forces the URI to end with a separator, otherwise given 

625 URI is interpreted as is. Specifying that the URI is conceptually 

626 equivalent to a directory can break some ambiguities when 

627 interpreting the last element of a path. 

628 

629 Returns 

630 ------- 

631 modified : `~urllib.parse.ParseResult` 

632 Update result if a URI is being handled. 

633 dirLike : `bool` 

634 `True` if given parsed URI has a trailing separator or 

635 forceDirectory is True. Otherwise `False`. 

636 

637 Notes 

638 ----- 

639 Relative paths are explicitly not supported by RFC8089 but `urllib` 

640 does accept URIs of the form ``file:relative/path.ext``. They need 

641 to be turned into absolute paths before they can be used. This is 

642 always done regardless of the ``forceAbsolute`` parameter. 

643 

644 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

645 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

646 

647 Scheme-less paths are normalized. 

648 """ 

649 # assume we are not dealing with a directory like URI 

650 dirLike = False 

651 

652 # URI is dir-like if explicitly stated or if it ends on a separator 

653 endsOnSep = parsed.path.endswith(posixpath.sep) 

654 if forceDirectory or endsOnSep: 

655 dirLike = True 

656 # only add the separator if it's not already there 

657 if not endsOnSep: 

658 parsed = parsed._replace(path=parsed.path+posixpath.sep) 

659 

660 return parsed, dirLike 

661 

662 def transfer_from(self, src: ButlerURI, transfer: str, 

663 overwrite: bool = False, 

664 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

665 """Transfer the current resource to a new location. 

666 

667 Parameters 

668 ---------- 

669 src : `ButlerURI` 

670 Source URI. 

671 transfer : `str` 

672 Mode to use for transferring the resource. Generically there are 

673 many standard options: copy, link, symlink, hardlink, relsymlink. 

674 Not all URIs support all modes. 

675 overwrite : `bool`, optional 

676 Allow an existing file to be overwritten. Defaults to `False`. 

677 transaction : `DatastoreTransaction`, optional 

678 A transaction object that can (depending on implementation) 

679 rollback transfers on error. Not guaranteed to be implemented. 

680 

681 Notes 

682 ----- 

683 Conceptually this is hard to scale as the number of URI schemes 

684 grow. The destination URI is more important than the source URI 

685 since that is where all the transfer modes are relevant (with the 

686 complication that "move" deletes the source). 

687 

688 Local file to local file is the fundamental use case but every 

689 other scheme has to support "copy" to local file (with implicit 

690 support for "move") and copy from local file. 

691 All the "link" options tend to be specific to local file systems. 

692 

693 "move" is a "copy" where the remote resource is deleted at the end. 

694 Whether this works depends on the source URI rather than the 

695 destination URI. Reverting a move on transaction rollback is 

696 expected to be problematic if a remote resource was involved. 

697 """ 

698 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}") 

699 

700 

701class ButlerFileURI(ButlerURI): 

702 """URI for explicit ``file`` scheme.""" 

703 

704 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move") 

705 transferDefault: str = "link" 

706 

707 @property 

708 def ospath(self) -> str: 

709 """Path component of the URI localized to current OS. 

710 

711 Will unquote URI path since a formal URI must include the quoting. 

712 """ 

713 return urllib.parse.unquote(posix2os(self._uri.path)) 

714 

715 def exists(self) -> bool: 

716 # Uses os.path.exists so if there is a soft link that points 

717 # to a file that no longer exists this will return False 

718 return os.path.exists(self.ospath) 

719 

720 def remove(self) -> None: 

721 """Remove the resource.""" 

722 os.remove(self.ospath) 

723 

724 def as_local(self) -> Tuple[str, bool]: 

725 """Return the local path of the file. 

726 

727 Returns 

728 ------- 

729 path : `str` 

730 The local path to this file. 

731 temporary : `bool` 

732 Always returns `False` (this is not a temporary file). 

733 """ 

734 return self.ospath, False 

735 

736 def _force_to_file(self) -> ButlerFileURI: 

737 """Force a schemeless URI to a file URI and returns a new URI. 

738 

739 Returns 

740 ------- 

741 file : `ButlerFileURI` 

742 A copy of the URI using file scheme. If already a file scheme 

743 the copy will be identical. 

744 

745 Raises 

746 ------ 

747 ValueError 

748 Raised if this URI is schemeless and relative path and so can 

749 not be forced to file absolute path without context. 

750 """ 

751 # This is always a file scheme so always return copy 

752 return copy.copy(self) 

753 

754 def relative_to(self, other: ButlerURI) -> Optional[str]: 

755 """Return the relative path from this URI to the other URI. 

756 

757 Parameters 

758 ---------- 

759 other : `ButlerURI` 

760 URI to use to calculate the relative path. Must be a parent 

761 of this URI. 

762 

763 Returns 

764 ------- 

765 subpath : `str` 

766 The sub path of this URI relative to the supplied other URI. 

767 Returns `None` if there is no parent child relationship. 

768 Scheme and netloc must match but for file URIs schemeless 

769 is also used. If this URI is a relative URI but the other is 

770 absolute, it is assumed to be in the parent completely unless it 

771 starts with ".." (in which case the path is combined and tested). 

772 If both URIs are relative, the relative paths are compared 

773 for commonality. 

774 

775 Notes 

776 ----- 

777 By definition a relative path will be relative to the enclosing 

778 absolute parent URI. It will be returned unchanged if it does not 

779 use a parent directory specification. 

780 """ 

781 # We know self is a file so check the other. Anything other than 

782 # file or schemeless means by definition these have no paths in common 

783 if other.scheme and other.scheme != "file": 

784 return None 

785 

786 # for case where both URIs are relative use the normal logic 

787 # where a/b/c.txt and a/b/ returns c.txt. 

788 if not self.isabs() and not other.isabs(): 

789 return super().relative_to(other) 

790 

791 # if we have a relative path convert it to absolute 

792 # relative to the supplied parent. This is solely to handle 

793 # the case where the relative path includes ".." but somehow 

794 # then goes back inside the directory of the parent 

795 if not self.isabs(): 

796 childUri = other.join(self.path) 

797 return childUri.relative_to(other) 

798 

799 # By this point if the schemes are identical we can use the 

800 # base class implementation. 

801 if self.scheme == other.scheme: 

802 return super().relative_to(other) 

803 

804 # if one is schemeless and the other is not the base implementation 

805 # will fail so we need to fix that -- they are both absolute so 

806 # forcing to file is fine. 

807 # Use a cast to convince mypy that other has to be a ButlerFileURI 

808 # in order to get to this part of the code. 

809 return self._force_to_file().relative_to(cast(ButlerFileURI, other)._force_to_file()) 

810 

811 def read(self, size: int = -1) -> bytes: 

812 # Docstring inherits 

813 with open(self.ospath, "rb") as fh: 

814 return fh.read(size) 

815 

816 def write(self, data: bytes, overwrite: bool = True) -> None: 

817 dir = os.path.dirname(self.ospath) 

818 if not os.path.exists(dir): 

819 safeMakeDir(dir) 

820 if overwrite: 

821 mode = "wb" 

822 else: 

823 mode = "xb" 

824 with open(self.ospath, mode) as f: 

825 f.write(data) 

826 

827 def mkdir(self) -> None: 

828 if not os.path.exists(self.ospath): 

829 safeMakeDir(self.ospath) 

830 elif not os.path.isdir(self.ospath): 

831 raise FileExistsError(f"URI {self} exists but is not a directory!") 

832 

833 def transfer_from(self, src: ButlerURI, transfer: str, 

834 overwrite: bool = False, 

835 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

836 """Transfer the current resource to a local file. 

837 

838 Parameters 

839 ---------- 

840 src : `ButlerURI` 

841 Source URI. 

842 transfer : `str` 

843 Mode to use for transferring the resource. Supports the following 

844 options: copy, link, symlink, hardlink, relsymlink. 

845 overwrite : `bool`, optional 

846 Allow an existing file to be overwritten. Defaults to `False`. 

847 transaction : `DatastoreTransaction`, optional 

848 If a transaction is provided, undo actions will be registered. 

849 """ 

850 # Fail early to prevent delays if remote resources are requested 

851 if transfer not in self.transferModes: 

852 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

853 

854 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

855 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

856 

857 # We do not have to special case ButlerFileURI here because 

858 # as_local handles that. 

859 local_src, is_temporary = src.as_local() 

860 

861 # Default transfer mode depends on whether we have a temporary 

862 # file or not. 

863 if transfer == "auto": 

864 transfer = self.transferDefault if not is_temporary else "copy" 

865 

866 # Follow soft links 

867 local_src = os.path.realpath(os.path.normpath(local_src)) 

868 

869 if not os.path.exists(local_src): 

870 raise FileNotFoundError(f"Source URI {src} does not exist") 

871 

872 # All the modes involving linking use "link" somewhere 

873 if "link" in transfer and is_temporary: 

874 raise RuntimeError("Can not use local file system transfer mode" 

875 f" {transfer} for remote resource ({src})") 

876 

877 # For temporary files we can own them 

878 requested_transfer = transfer 

879 if is_temporary and transfer == "copy": 

880 transfer = "move" 

881 

882 # The output location should not exist 

883 dest_exists = self.exists() 

884 if not overwrite and dest_exists: 

885 raise FileExistsError(f"Destination path '{self}' already exists. Transfer " 

886 f"from {src} cannot be completed.") 

887 

888 # Make the path absolute (but don't follow links since that 

889 # would possibly cause us to end up in the wrong place if the 

890 # file existed already as a soft link) 

891 newFullPath = os.path.abspath(self.ospath) 

892 outputDir = os.path.dirname(newFullPath) 

893 if not os.path.isdir(outputDir): 

894 # Must create the directory -- this can not be rolled back 

895 # since another transfer running concurrently may 

896 # be relying on this existing. 

897 safeMakeDir(outputDir) 

898 

899 if transaction is None: 

900 # Use a no-op transaction to reduce code duplication 

901 transaction = NoTransaction() 

902 

903 # For links the OS doesn't let us overwrite so if something does 

904 # exist we have to remove it before we do the actual "transfer" below 

905 if "link" in transfer and overwrite and dest_exists: 

906 try: 

907 self.remove() 

908 except Exception: 

909 # If this fails we ignore it since it's a problem 

910 # that will manifest immediately below with a more relevant 

911 # error message 

912 pass 

913 

914 if transfer == "move": 

915 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src): 

916 shutil.move(local_src, newFullPath) 

917 elif transfer == "copy": 

918 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath): 

919 shutil.copy(local_src, newFullPath) 

920 elif transfer == "link": 

921 # Try hard link and if that fails use a symlink 

922 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath): 

923 try: 

924 os.link(local_src, newFullPath) 

925 except OSError: 

926 # Read through existing symlinks 

927 os.symlink(local_src, newFullPath) 

928 elif transfer == "hardlink": 

929 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath): 

930 os.link(local_src, newFullPath) 

931 elif transfer == "symlink": 

932 # Read through existing symlinks 

933 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath): 

934 os.symlink(local_src, newFullPath) 

935 elif transfer == "relsymlink": 

936 # This is a standard symlink but using a relative path 

937 # Need the directory name to give to relative root 

938 # A full file path confuses it into an extra ../ 

939 newFullPathRoot = os.path.dirname(newFullPath) 

940 relPath = os.path.relpath(local_src, newFullPathRoot) 

941 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath): 

942 os.symlink(relPath, newFullPath) 

943 else: 

944 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer)) 

945 

946 # This was an explicit move requested from a remote resource 

947 # try to remove that resource. We check is_temporary because 

948 # the local file would have been moved by shutil.move already. 

949 if requested_transfer == "move" and is_temporary: 

950 # Transactions do not work here 

951 src.remove() 

952 

953 if is_temporary and os.path.exists(local_src): 

954 # This should never happen since we have moved it above 

955 os.remove(local_src) 

956 

957 @staticmethod 

958 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None, 

959 forceAbsolute: bool = False, 

960 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

961 """Fix up relative paths in URI instances. 

962 

963 Parameters 

964 ---------- 

965 parsed : `~urllib.parse.ParseResult` 

966 The result from parsing a URI using `urllib.parse`. 

967 root : `str`, optional 

968 Path to use as root when converting relative to absolute. 

969 If `None`, it will be the current working directory. This 

970 is a local file system path, not a URI. It is only used if 

971 a file-scheme is used incorrectly with a relative path. 

972 forceAbsolute : `bool`, ignored 

973 Has no effect for this subclass. ``file`` URIs are always 

974 absolute. 

975 forceDirectory : `bool`, optional 

976 If `True` forces the URI to end with a separator, otherwise given 

977 URI is interpreted as is. 

978 

979 Returns 

980 ------- 

981 modified : `~urllib.parse.ParseResult` 

982 Update result if a URI is being handled. 

983 dirLike : `bool` 

984 `True` if given parsed URI has a trailing separator or 

985 forceDirectory is True. Otherwise `False`. 

986 

987 Notes 

988 ----- 

989 Relative paths are explicitly not supported by RFC8089 but `urllib` 

990 does accept URIs of the form ``file:relative/path.ext``. They need 

991 to be turned into absolute paths before they can be used. This is 

992 always done regardless of the ``forceAbsolute`` parameter. 

993 """ 

994 # assume we are not dealing with a directory like URI 

995 dirLike = False 

996 

997 # file URI implies POSIX path separators so split as POSIX, 

998 # then join as os, and convert to abspath. Do not handle 

999 # home directories since "file" scheme is explicitly documented 

1000 # to not do tilde expansion. 

1001 sep = posixpath.sep 

1002 

1003 # For local file system we can explicitly check to see if this 

1004 # really is a directory. The URI might point to a location that 

1005 # does not exists yet but all that matters is if it is a directory 

1006 # then we make sure use that fact. No need to do the check if 

1007 # we are already being told. 

1008 if not forceDirectory and posixpath.isdir(parsed.path): 

1009 forceDirectory = True 

1010 

1011 # For an absolute path all we need to do is check if we need 

1012 # to force the directory separator 

1013 if posixpath.isabs(parsed.path): 

1014 if forceDirectory: 

1015 if not parsed.path.endswith(sep): 

1016 parsed = parsed._replace(path=parsed.path+sep) 

1017 dirLike = True 

1018 return copy.copy(parsed), dirLike 

1019 

1020 # Relative path so must fix it to be compliant with the standard 

1021 

1022 # Replacement values for the URI 

1023 replacements = {} 

1024 

1025 if root is None: 

1026 root = os.path.abspath(os.path.curdir) 

1027 

1028 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

1029 

1030 # normpath strips trailing "/" so put it back if necessary 

1031 # Acknowledge that trailing separator exists. 

1032 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

1033 replacements["path"] += sep 

1034 dirLike = True 

1035 

1036 # ParseResult is a NamedTuple so _replace is standard API 

1037 parsed = parsed._replace(**replacements) 

1038 

1039 if parsed.params or parsed.query: 

1040 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl()) 

1041 

1042 return parsed, dirLike 

1043 

1044 

1045class ButlerS3URI(ButlerURI): 

1046 """S3 URI""" 

1047 

1048 @property 

1049 def client(self) -> boto3.client: 

1050 """Client object to address remote resource.""" 

1051 # Defer import for circular dependencies 

1052 from .s3utils import getS3Client 

1053 return getS3Client() 

1054 

1055 def exists(self) -> bool: 

1056 # s3utils itself imports ButlerURI so defer this import 

1057 from .s3utils import s3CheckFileExists 

1058 exists, _ = s3CheckFileExists(self, client=self.client) 

1059 return exists 

1060 

1061 def remove(self) -> None: 

1062 """Remove the resource.""" 

1063 

1064 # https://github.com/boto/boto3/issues/507 - there is no 

1065 # way of knowing if the file was actually deleted except 

1066 # for checking all the keys again, reponse is HTTP 204 OK 

1067 # response all the time 

1068 self.client.delete(Bucket=self.netloc, Key=self.relativeToPathRoot) 

1069 

1070 def read(self, size: int = -1) -> bytes: 

1071 args = {} 

1072 if size > 0: 

1073 args["Range"] = f"bytes=0-{size-1}" 

1074 try: 

1075 response = self.client.get_object(Bucket=self.netloc, 

1076 Key=self.relativeToPathRoot, 

1077 **args) 

1078 except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err: 

1079 raise FileNotFoundError(f"No such resource: {self}") from err 

1080 body = response["Body"].read() 

1081 response["Body"].close() 

1082 return body 

1083 

1084 def write(self, data: bytes, overwrite: bool = True) -> None: 

1085 if not overwrite: 

1086 if self.exists(): 

1087 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled") 

1088 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot, 

1089 Body=data) 

1090 

1091 def mkdir(self) -> None: 

1092 # Defer import for circular dependencies 

1093 from .s3utils import bucketExists 

1094 if not bucketExists(self.netloc): 

1095 raise ValueError(f"Bucket {self.netloc} does not exist for {self}!") 

1096 

1097 if not self.dirLike: 

1098 raise ValueError("Can not create a 'directory' for file-like URI {self}") 

1099 

1100 # don't create S3 key when root is at the top-level of an Bucket 

1101 if not self.path == "/": 

1102 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot) 

1103 

1104 def as_local(self) -> Tuple[str, bool]: 

1105 """Download object from S3 and place in temporary directory. 

1106 

1107 Returns 

1108 ------- 

1109 path : `str` 

1110 Path to local temporary file. 

1111 temporary : `bool` 

1112 Always returns `True`. This is always a temporary file. 

1113 """ 

1114 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile: 

1115 self.client.download_fileobj(self.netloc, self.relativeToPathRoot, tmpFile) 

1116 return tmpFile.name, True 

1117 

1118 def transfer_from(self, src: ButlerURI, transfer: str = "copy", 

1119 overwrite: bool = False, 

1120 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

1121 """Transfer the current resource to an S3 bucket. 

1122 

1123 Parameters 

1124 ---------- 

1125 src : `ButlerURI` 

1126 Source URI. 

1127 transfer : `str` 

1128 Mode to use for transferring the resource. Supports the following 

1129 options: copy. 

1130 overwrite : `bool`, optional 

1131 Allow an existing file to be overwritten. Defaults to `False`. 

1132 transaction : `DatastoreTransaction`, optional 

1133 Currently unused. 

1134 """ 

1135 # Fail early to prevent delays if remote resources are requested 

1136 if transfer not in self.transferModes: 

1137 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

1138 

1139 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

1140 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

1141 

1142 if not overwrite and self.exists(): 

1143 raise FileExistsError(f"Destination path '{self}' already exists.") 

1144 

1145 if transfer == "auto": 

1146 transfer = self.transferDefault 

1147 

1148 if isinstance(src, type(self)): 

1149 # Looks like an S3 remote uri so we can use direct copy 

1150 # note that boto3.resource.meta.copy is cleverer than the low 

1151 # level copy_object 

1152 copy_source = { 

1153 "Bucket": src.netloc, 

1154 "Key": src.relativeToPathRoot, 

1155 } 

1156 self.client.copy_object(CopySource=copy_source, Bucket=self.netloc, Key=self.relativeToPathRoot) 

1157 else: 

1158 # Use local file and upload it 

1159 local_src, is_temporary = src.as_local() 

1160 

1161 # resource.meta.upload_file seems like the right thing 

1162 # but we have a low level client 

1163 with open(local_src, "rb") as fh: 

1164 self.client.put_object(Bucket=self.netloc, 

1165 Key=self.relativeToPathRoot, Body=fh) 

1166 if is_temporary: 

1167 os.remove(local_src) 

1168 

1169 # This was an explicit move requested from a remote resource 

1170 # try to remove that resource 

1171 if transfer == "move": 

1172 # Transactions do not work here 

1173 src.remove() 

1174 

1175 

1176class ButlerPackageResourceURI(ButlerURI): 

1177 """URI referring to a Python package resource. 

1178 

1179 These URIs look like: ``resource://lsst.daf.butler/configs/file.yaml`` 

1180 where the network location is the Python package and the path is the 

1181 resource name. 

1182 """ 

1183 

1184 def exists(self) -> bool: 

1185 """Check that the python resource exists.""" 

1186 return pkg_resources.resource_exists(self.netloc, self.relativeToPathRoot) 

1187 

1188 def read(self, size: int = -1) -> bytes: 

1189 with pkg_resources.resource_stream(self.netloc, self.relativeToPathRoot) as fh: 

1190 return fh.read(size) 

1191 

1192 

1193class ButlerHttpURI(ButlerURI): 

1194 """General HTTP(S) resource.""" 

1195 

1196 def exists(self) -> bool: 

1197 """Check that a remote HTTP resource exists.""" 

1198 header = requests.head(self.geturl()) 

1199 return True if header.status_code == 200 else False 

1200 

1201 def as_local(self) -> Tuple[str, bool]: 

1202 """Download object over HTTP and place in temporary directory. 

1203 

1204 Returns 

1205 ------- 

1206 path : `str` 

1207 Path to local temporary file. 

1208 temporary : `bool` 

1209 Always returns `True`. This is always a temporary file. 

1210 """ 

1211 r = requests.get(self.geturl(), stream=True) 

1212 if r.status_code != 200: 

1213 raise FileNotFoundError(f"Unable to download resource {self}; status code: {r.status_code}") 

1214 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile: 

1215 for chunk in r.iter_content(): 

1216 tmpFile.write(chunk) 

1217 return tmpFile.name, True 

1218 

1219 def read(self, size: int = -1) -> bytes: 

1220 # Docstring inherits 

1221 stream = True if size > 0 else False 

1222 r = requests.get(self.geturl(), stream=stream) 

1223 if not stream: 

1224 return r.content 

1225 else: 

1226 return next(r.iter_content(chunk_size=size)) 

1227 

1228 

1229class ButlerInMemoryURI(ButlerURI): 

1230 """Internal in-memory datastore URI (`mem://`). 

1231 

1232 Not used for any real purpose other than indicating that the dataset 

1233 is in memory. 

1234 """ 

1235 

1236 def exists(self) -> bool: 

1237 """Test for existence and always return False.""" 

1238 return True 

1239 

1240 def as_local(self) -> Tuple[str, bool]: 

1241 raise RuntimeError(f"Do not know how to retrieve data for URI '{self}'") 

1242 

1243 

1244class ButlerSchemelessURI(ButlerFileURI): 

1245 """Scheme-less URI referring to the local file system""" 

1246 

1247 _pathLib = PurePath 

1248 _pathModule = os.path 

1249 quotePaths = False 

1250 

1251 @property 

1252 def ospath(self) -> str: 

1253 """Path component of the URI localized to current OS.""" 

1254 return self.path 

1255 

1256 def isabs(self) -> bool: 

1257 """Indicate that the resource is fully specified. 

1258 

1259 For non-schemeless URIs this is always true. 

1260 

1261 Returns 

1262 ------- 

1263 isabs : `bool` 

1264 `True` if the file is absolute, `False` otherwise. 

1265 """ 

1266 return os.path.isabs(self.ospath) 

1267 

1268 def _force_to_file(self) -> ButlerFileURI: 

1269 """Force a schemeless URI to a file URI and returns a new URI. 

1270 

1271 This will include URI quoting of the path. 

1272 

1273 Returns 

1274 ------- 

1275 file : `ButlerFileURI` 

1276 A copy of the URI using file scheme. If already a file scheme 

1277 the copy will be identical. 

1278 

1279 Raises 

1280 ------ 

1281 ValueError 

1282 Raised if this URI is schemeless and relative path and so can 

1283 not be forced to file absolute path without context. 

1284 """ 

1285 if not self.isabs(): 

1286 raise RuntimeError(f"Internal error: Can not force {self} to absolute file URI") 

1287 uri = self._uri._replace(scheme="file", path=urllib.parse.quote(os2posix(self.path))) 

1288 # mypy really wants a ButlerFileURI to be returned here 

1289 return ButlerURI(uri, forceDirectory=self.dirLike) # type: ignore 

1290 

1291 @staticmethod 

1292 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None, 

1293 forceAbsolute: bool = False, 

1294 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

1295 """Fix up relative paths for local file system. 

1296 

1297 Parameters 

1298 ---------- 

1299 parsed : `~urllib.parse.ParseResult` 

1300 The result from parsing a URI using `urllib.parse`. 

1301 root : `str`, optional 

1302 Path to use as root when converting relative to absolute. 

1303 If `None`, it will be the current working directory. This 

1304 is a local file system path, not a URI. 

1305 forceAbsolute : `bool`, optional 

1306 If `True`, scheme-less relative URI will be converted to an 

1307 absolute path using a ``file`` scheme. If `False` scheme-less URI 

1308 will remain scheme-less and will not be updated to ``file`` or 

1309 absolute path. 

1310 forceDirectory : `bool`, optional 

1311 If `True` forces the URI to end with a separator, otherwise given 

1312 URI is interpreted as is. 

1313 

1314 Returns 

1315 ------- 

1316 modified : `~urllib.parse.ParseResult` 

1317 Update result if a URI is being handled. 

1318 dirLike : `bool` 

1319 `True` if given parsed URI has a trailing separator or 

1320 forceDirectory is True. Otherwise `False`. 

1321 

1322 Notes 

1323 ----- 

1324 Relative paths are explicitly not supported by RFC8089 but `urllib` 

1325 does accept URIs of the form ``file:relative/path.ext``. They need 

1326 to be turned into absolute paths before they can be used. This is 

1327 always done regardless of the ``forceAbsolute`` parameter. 

1328 

1329 Scheme-less paths are normalized. 

1330 """ 

1331 # assume we are not dealing with a directory URI 

1332 dirLike = False 

1333 

1334 # Replacement values for the URI 

1335 replacements = {} 

1336 

1337 if root is None: 

1338 root = os.path.abspath(os.path.curdir) 

1339 

1340 # this is a local OS file path which can support tilde expansion. 

1341 # we quoted it in the constructor so unquote here 

1342 expandedPath = os.path.expanduser(urllib.parse.unquote(parsed.path)) 

1343 

1344 # Ensure that this becomes a file URI if it is already absolute 

1345 if os.path.isabs(expandedPath): 

1346 replacements["scheme"] = "file" 

1347 # Keep in OS form for now to simplify later logic 

1348 replacements["path"] = os.path.normpath(expandedPath) 

1349 elif forceAbsolute: 

1350 # This can stay in OS path form, do not change to file 

1351 # scheme. 

1352 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath)) 

1353 else: 

1354 # No change needed for relative local path staying relative 

1355 # except normalization 

1356 replacements["path"] = os.path.normpath(expandedPath) 

1357 # normalization of empty path returns "." so we are dirLike 

1358 if expandedPath == "": 

1359 dirLike = True 

1360 

1361 # normpath strips trailing "/" which makes it hard to keep 

1362 # track of directory vs file when calling replaceFile 

1363 

1364 # For local file system we can explicitly check to see if this 

1365 # really is a directory. The URI might point to a location that 

1366 # does not exists yet but all that matters is if it is a directory 

1367 # then we make sure use that fact. No need to do the check if 

1368 # we are already being told. 

1369 if not forceDirectory and os.path.isdir(replacements["path"]): 

1370 forceDirectory = True 

1371 

1372 # add the trailing separator only if explicitly required or 

1373 # if it was stripped by normpath. Acknowledge that trailing 

1374 # separator exists. 

1375 endsOnSep = expandedPath.endswith(os.sep) and not replacements["path"].endswith(os.sep) 

1376 if (forceDirectory or endsOnSep or dirLike): 

1377 dirLike = True 

1378 if not replacements["path"].endswith(os.sep): 

1379 replacements["path"] += os.sep 

1380 

1381 if "scheme" in replacements: 

1382 # This is now meant to be a URI path so force to posix 

1383 # and quote 

1384 replacements["path"] = urllib.parse.quote(os2posix(replacements["path"])) 

1385 

1386 # ParseResult is a NamedTuple so _replace is standard API 

1387 parsed = parsed._replace(**replacements) 

1388 

1389 if parsed.params or parsed.fragment or parsed.query: 

1390 log.warning("Additional items unexpectedly encountered in schemeless URI: %s", parsed.geturl()) 

1391 

1392 return parsed, dirLike