Coverage for python/lsst/resources/_resourcePath.py: 23%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

316 statements  

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14import concurrent.futures 

15import contextlib 

16import copy 

17import logging 

18import os 

19import posixpath 

20import re 

21import shutil 

22import tempfile 

23import urllib.parse 

24from pathlib import Path, PurePath, PurePosixPath 

25from random import Random 

26 

27__all__ = ("ResourcePath",) 

28 

29from typing import TYPE_CHECKING, Any, Dict, Iterable, Iterator, List, Optional, Tuple, Type, Union 

30 

31if TYPE_CHECKING: 31 ↛ 32line 31 didn't jump to line 32, because the condition on line 31 was never true

32 from .utils import TransactionProtocol 

33 

34 

35log = logging.getLogger(__name__) 

36 

37# Regex for looking for URI escapes 

38ESCAPES_RE = re.compile(r"%[A-F0-9]{2}") 

39 

40# Precomputed escaped hash 

41ESCAPED_HASH = urllib.parse.quote("#") 

42 

43# Maximum number of worker threads for parallelized operations. 

44# If greater than 10, be aware that this number has to be consistent 

45# with connection pool sizing (for example in urllib3). 

46MAX_WORKERS = 10 

47 

48 

49class ResourcePath: 

50 """Convenience wrapper around URI parsers. 

51 

52 Provides access to URI components and can convert file 

53 paths into absolute path URIs. Scheme-less URIs are treated as if 

54 they are local file system paths and are converted to absolute URIs. 

55 

56 A specialist subclass is created for each supported URI scheme. 

57 

58 Parameters 

59 ---------- 

60 uri : `str` or `urllib.parse.ParseResult` 

61 URI in string form. Can be scheme-less if referring to a local 

62 filesystem path. 

63 root : `str` or `ResourcePath`, optional 

64 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

65 use this as the root. Must be absolute. If `None` the current 

66 working directory will be used. Can be a file URI. 

67 forceAbsolute : `bool`, optional 

68 If `True`, scheme-less relative URI will be converted to an absolute 

69 path using a ``file`` scheme. If `False` scheme-less URI will remain 

70 scheme-less and will not be updated to ``file`` or absolute path. 

71 forceDirectory: `bool`, optional 

72 If `True` forces the URI to end with a separator, otherwise given URI 

73 is interpreted as is. 

74 isTemporary : `bool`, optional 

75 If `True` indicates that this URI points to a temporary resource. 

76 """ 

77 

78 _pathLib: Type[PurePath] = PurePosixPath 

79 """Path library to use for this scheme.""" 

80 

81 _pathModule = posixpath 

82 """Path module to use for this scheme.""" 

83 

84 transferModes: Tuple[str, ...] = ("copy", "auto", "move") 

85 """Transfer modes supported by this implementation. 

86 

87 Move is special in that it is generally a copy followed by an unlink. 

88 Whether that unlink works depends critically on whether the source URI 

89 implements unlink. If it does not the move will be reported as a failure. 

90 """ 

91 

92 transferDefault: str = "copy" 

93 """Default mode to use for transferring if ``auto`` is specified.""" 

94 

95 quotePaths = True 

96 """True if path-like elements modifying a URI should be quoted. 

97 

98 All non-schemeless URIs have to internally use quoted paths. Therefore 

99 if a new file name is given (e.g. to updatedFile or join) a decision must 

100 be made whether to quote it to be consistent. 

101 """ 

102 

103 isLocal = False 

104 """If `True` this URI refers to a local file.""" 

105 

106 # This is not an ABC with abstract methods because the __new__ being 

107 # a factory confuses mypy such that it assumes that every constructor 

108 # returns a ResourcePath and then determines that all the abstract methods 

109 # are still abstract. If they are not marked abstract but just raise 

110 # mypy is fine with it. 

111 

112 # mypy is confused without these 

113 _uri: urllib.parse.ParseResult 

114 isTemporary: bool 

115 dirLike: bool 

116 

117 def __new__( 

118 cls, 

119 uri: Union[str, urllib.parse.ParseResult, ResourcePath, Path], 

120 root: Optional[Union[str, ResourcePath]] = None, 

121 forceAbsolute: bool = True, 

122 forceDirectory: bool = False, 

123 isTemporary: bool = False, 

124 ) -> ResourcePath: 

125 """Create and return new specialist ResourcePath subclass.""" 

126 parsed: urllib.parse.ParseResult 

127 dirLike: bool = False 

128 subclass: Optional[Type[ResourcePath]] = None 

129 

130 if isinstance(uri, os.PathLike): 

131 uri = str(uri) 

132 

133 # Record if we need to post process the URI components 

134 # or if the instance is already fully configured 

135 if isinstance(uri, str): 

136 # Since local file names can have special characters in them 

137 # we need to quote them for the parser but we can unquote 

138 # later. Assume that all other URI schemes are quoted. 

139 # Since sometimes people write file:/a/b and not file:///a/b 

140 # we should not quote in the explicit case of file: 

141 if "://" not in uri and not uri.startswith("file:"): 

142 if ESCAPES_RE.search(uri): 

143 log.warning("Possible double encoding of %s", uri) 

144 else: 

145 uri = urllib.parse.quote(uri) 

146 # Special case hash since we must support fragments 

147 # even in schemeless URIs -- although try to only replace 

148 # them in file part and not directory part 

149 if ESCAPED_HASH in uri: 

150 dirpos = uri.rfind("/") 

151 # Do replacement after this / 

152 uri = uri[: dirpos + 1] + uri[dirpos + 1 :].replace(ESCAPED_HASH, "#") 

153 

154 parsed = urllib.parse.urlparse(uri) 

155 elif isinstance(uri, urllib.parse.ParseResult): 

156 parsed = copy.copy(uri) 

157 # If we are being instantiated with a subclass, rather than 

158 # ResourcePath, ensure that that subclass is used directly. 

159 # This could lead to inconsistencies if this constructor 

160 # is used externally outside of the ResourcePath.replace() method. 

161 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt")) 

162 # will be a problem. 

163 # This is needed to prevent a schemeless absolute URI become 

164 # a file URI unexpectedly when calling updatedFile or 

165 # updatedExtension 

166 if cls is not ResourcePath: 

167 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory) 

168 subclass = cls 

169 

170 elif isinstance(uri, ResourcePath): 

171 # Since ResourcePath is immutable we can return the argument 

172 # unchanged. 

173 return uri 

174 else: 

175 raise ValueError( 

176 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'" 

177 ) 

178 

179 if subclass is None: 

180 # Work out the subclass from the URI scheme 

181 if not parsed.scheme: 

182 from .schemeless import SchemelessResourcePath 

183 

184 subclass = SchemelessResourcePath 

185 elif parsed.scheme == "file": 

186 from .file import FileResourcePath 

187 

188 subclass = FileResourcePath 

189 elif parsed.scheme == "s3": 

190 from .s3 import S3ResourcePath 

191 

192 subclass = S3ResourcePath 

193 elif parsed.scheme.startswith("http"): 

194 from .http import HttpResourcePath 

195 

196 subclass = HttpResourcePath 

197 elif parsed.scheme == "resource": 

198 # Rules for scheme names disallow pkg_resource 

199 from .packageresource import PackageResourcePath 

200 

201 subclass = PackageResourcePath 

202 elif parsed.scheme == "mem": 

203 # in-memory datastore object 

204 from .mem import InMemoryResourcePath 

205 

206 subclass = InMemoryResourcePath 

207 else: 

208 raise NotImplementedError( 

209 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}" 

210 ) 

211 

212 parsed, dirLike = subclass._fixupPathUri( 

213 parsed, root=root, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory 

214 ) 

215 

216 # It is possible for the class to change from schemeless 

217 # to file so handle that 

218 if parsed.scheme == "file": 

219 from .file import FileResourcePath 

220 

221 subclass = FileResourcePath 

222 

223 # Now create an instance of the correct subclass and set the 

224 # attributes directly 

225 self = object.__new__(subclass) 

226 self._uri = parsed 

227 self.dirLike = dirLike 

228 self.isTemporary = isTemporary 

229 return self 

230 

231 @property 

232 def scheme(self) -> str: 

233 """Return the URI scheme. 

234 

235 Notes 

236 ----- 

237 (``://`` is not part of the scheme). 

238 """ 

239 return self._uri.scheme 

240 

241 @property 

242 def netloc(self) -> str: 

243 """Return the URI network location.""" 

244 return self._uri.netloc 

245 

246 @property 

247 def path(self) -> str: 

248 """Return the path component of the URI.""" 

249 return self._uri.path 

250 

251 @property 

252 def unquoted_path(self) -> str: 

253 """Return path component of the URI with any URI quoting reversed.""" 

254 return urllib.parse.unquote(self._uri.path) 

255 

256 @property 

257 def ospath(self) -> str: 

258 """Return the path component of the URI localized to current OS.""" 

259 raise AttributeError(f"Non-file URI ({self}) has no local OS path.") 

260 

261 @property 

262 def relativeToPathRoot(self) -> str: 

263 """Return path relative to network location. 

264 

265 Effectively, this is the path property with posix separator stripped 

266 from the left hand side of the path. 

267 

268 Always unquotes. 

269 """ 

270 p = self._pathLib(self.path) 

271 relToRoot = str(p.relative_to(p.root)) 

272 if self.dirLike and not relToRoot.endswith("/"): 

273 relToRoot += "/" 

274 return urllib.parse.unquote(relToRoot) 

275 

276 @property 

277 def is_root(self) -> bool: 

278 """Return whether this URI points to the root of the network location. 

279 

280 This means that the path components refers to the top level. 

281 """ 

282 relpath = self.relativeToPathRoot 

283 if relpath == "./": 

284 return True 

285 return False 

286 

287 @property 

288 def fragment(self) -> str: 

289 """Return the fragment component of the URI.""" 

290 return self._uri.fragment 

291 

292 @property 

293 def params(self) -> str: 

294 """Return any parameters included in the URI.""" 

295 return self._uri.params 

296 

297 @property 

298 def query(self) -> str: 

299 """Return any query strings included in the URI.""" 

300 return self._uri.query 

301 

302 def geturl(self) -> str: 

303 """Return the URI in string form. 

304 

305 Returns 

306 ------- 

307 url : `str` 

308 String form of URI. 

309 """ 

310 return self._uri.geturl() 

311 

312 def root_uri(self) -> ResourcePath: 

313 """Return the base root URI. 

314 

315 Returns 

316 ------- 

317 uri : `ResourcePath` 

318 root URI. 

319 """ 

320 return self.replace(path="", forceDirectory=True) 

321 

322 def split(self) -> Tuple[ResourcePath, str]: 

323 """Split URI into head and tail. 

324 

325 Returns 

326 ------- 

327 head: `ResourcePath` 

328 Everything leading up to tail, expanded and normalized as per 

329 ResourcePath rules. 

330 tail : `str` 

331 Last `self.path` component. Tail will be empty if path ends on a 

332 separator. Tail will never contain separators. It will be 

333 unquoted. 

334 

335 Notes 

336 ----- 

337 Equivalent to `os.path.split()` where head preserves the URI 

338 components. 

339 """ 

340 head, tail = self._pathModule.split(self.path) 

341 headuri = self._uri._replace(path=head) 

342 

343 # The file part should never include quoted metacharacters 

344 tail = urllib.parse.unquote(tail) 

345 

346 # Schemeless is special in that it can be a relative path 

347 # We need to ensure that it stays that way. All other URIs will 

348 # be absolute already. 

349 forceAbsolute = self._pathModule.isabs(self.path) 

350 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail 

351 

352 def basename(self) -> str: 

353 """Return the base name, last element of path, of the URI. 

354 

355 Returns 

356 ------- 

357 tail : `str` 

358 Last part of the path attribute. Trail will be empty if path ends 

359 on a separator. 

360 

361 Notes 

362 ----- 

363 If URI ends on a slash returns an empty string. This is the second 

364 element returned by `split()`. 

365 

366 Equivalent of `os.path.basename()``. 

367 """ 

368 return self.split()[1] 

369 

370 def dirname(self) -> ResourcePath: 

371 """Return the directory component of the path as a new `ResourcePath`. 

372 

373 Returns 

374 ------- 

375 head : `ResourcePath` 

376 Everything except the tail of path attribute, expanded and 

377 normalized as per ResourcePath rules. 

378 

379 Notes 

380 ----- 

381 Equivalent of `os.path.dirname()`. 

382 """ 

383 return self.split()[0] 

384 

385 def parent(self) -> ResourcePath: 

386 """Return a `ResourcePath` of the parent directory. 

387 

388 Returns 

389 ------- 

390 head : `ResourcePath` 

391 Everything except the tail of path attribute, expanded and 

392 normalized as per `ResourcePath` rules. 

393 

394 Notes 

395 ----- 

396 For a file-like URI this will be the same as calling `dirname()`. 

397 """ 

398 # When self is file-like, return self.dirname() 

399 if not self.dirLike: 

400 return self.dirname() 

401 # When self is dir-like, return its parent directory, 

402 # regardless of the presence of a trailing separator 

403 originalPath = self._pathLib(self.path) 

404 parentPath = originalPath.parent 

405 return self.replace(path=str(parentPath), forceDirectory=True) 

406 

407 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath: 

408 """Return new `ResourcePath` with specified components replaced. 

409 

410 Parameters 

411 ---------- 

412 forceDirectory : `bool`, optional 

413 Parameter passed to ResourcePath constructor to force this 

414 new URI to be dir-like. 

415 isTemporary : `bool`, optional 

416 Indicate that the resulting URI is temporary resource. 

417 **kwargs 

418 Components of a `urllib.parse.ParseResult` that should be 

419 modified for the newly-created `ResourcePath`. 

420 

421 Returns 

422 ------- 

423 new : `ResourcePath` 

424 New `ResourcePath` object with updated values. 

425 

426 Notes 

427 ----- 

428 Does not, for now, allow a change in URI scheme. 

429 """ 

430 # Disallow a change in scheme 

431 if "scheme" in kwargs: 

432 raise ValueError(f"Can not use replace() method to change URI scheme for {self}") 

433 return self.__class__( 

434 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary 

435 ) 

436 

437 def updatedFile(self, newfile: str) -> ResourcePath: 

438 """Return new URI with an updated final component of the path. 

439 

440 Parameters 

441 ---------- 

442 newfile : `str` 

443 File name with no path component. 

444 

445 Returns 

446 ------- 

447 updated : `ResourcePath` 

448 

449 Notes 

450 ----- 

451 Forces the ResourcePath.dirLike attribute to be false. The new file 

452 path will be quoted if necessary. 

453 """ 

454 if self.quotePaths: 

455 newfile = urllib.parse.quote(newfile) 

456 dir, _ = self._pathModule.split(self.path) 

457 newpath = self._pathModule.join(dir, newfile) 

458 

459 updated = self.replace(path=newpath) 

460 updated.dirLike = False 

461 return updated 

462 

463 def updatedExtension(self, ext: Optional[str]) -> ResourcePath: 

464 """Return a new `ResourcePath` with updated file extension. 

465 

466 All file extensions are replaced. 

467 

468 Parameters 

469 ---------- 

470 ext : `str` or `None` 

471 New extension. If an empty string is given any extension will 

472 be removed. If `None` is given there will be no change. 

473 

474 Returns 

475 ------- 

476 updated : `ResourcePath` 

477 URI with the specified extension. Can return itself if 

478 no extension was specified. 

479 """ 

480 if ext is None: 

481 return self 

482 

483 # Get the extension 

484 current = self.getExtension() 

485 

486 # Nothing to do if the extension already matches 

487 if current == ext: 

488 return self 

489 

490 # Remove the current extension from the path 

491 # .fits.gz counts as one extension do not use os.path.splitext 

492 path = self.path 

493 if current: 

494 path = path[: -len(current)] 

495 

496 # Ensure that we have a leading "." on file extension (and we do not 

497 # try to modify the empty string) 

498 if ext and not ext.startswith("."): 

499 ext = "." + ext 

500 

501 return self.replace(path=path + ext) 

502 

503 def getExtension(self) -> str: 

504 """Return the file extension(s) associated with this URI path. 

505 

506 Returns 

507 ------- 

508 ext : `str` 

509 The file extension (including the ``.``). Can be empty string 

510 if there is no file extension. Usually returns only the last 

511 file extension unless there is a special extension modifier 

512 indicating file compression, in which case the combined 

513 extension (e.g. ``.fits.gz``) will be returned. 

514 """ 

515 special = {".gz", ".bz2", ".xz", ".fz"} 

516 

517 # Get the file part of the path so as not to be confused by 

518 # "." in directory names. 

519 basename = self.basename() 

520 extensions = self._pathLib(basename).suffixes 

521 

522 if not extensions: 

523 return "" 

524 

525 ext = extensions.pop() 

526 

527 # Multiple extensions, decide whether to include the final two 

528 if extensions and ext in special: 

529 ext = f"{extensions[-1]}{ext}" 

530 

531 return ext 

532 

533 def join(self, path: Union[str, ResourcePath], isTemporary: bool = False) -> ResourcePath: 

534 """Return new `ResourcePath` with additional path components. 

535 

536 Parameters 

537 ---------- 

538 path : `str`, `ResourcePath` 

539 Additional file components to append to the current URI. Assumed 

540 to include a file at the end. Will be quoted depending on the 

541 associated URI scheme. If the path looks like a URI with a scheme 

542 referring to an absolute location, it will be returned 

543 directly (matching the behavior of `os.path.join()`). It can 

544 also be a `ResourcePath`. 

545 isTemporary : `bool`, optional 

546 Indicate that the resulting URI represents a temporary resource. 

547 

548 Returns 

549 ------- 

550 new : `ResourcePath` 

551 New URI with any file at the end replaced with the new path 

552 components. 

553 

554 Notes 

555 ----- 

556 Schemeless URIs assume local path separator but all other URIs assume 

557 POSIX separator if the supplied path has directory structure. It 

558 may be this never becomes a problem but datastore templates assume 

559 POSIX separator is being used. 

560 

561 If an absolute `ResourcePath` is given for ``path`` is is assumed that 

562 this should be returned directly. Giving a ``path`` of an absolute 

563 scheme-less URI is not allowed for safety reasons as it may indicate 

564 a mistake in the calling code. 

565 

566 Raises 

567 ------ 

568 ValueError 

569 Raised if the ``path`` is an absolute scheme-less URI. In that 

570 situation it is unclear whether the intent is to return a 

571 ``file`` URI or it was a mistake and a relative scheme-less URI 

572 was meant. 

573 """ 

574 # If we have a full URI in path we will use it directly 

575 # but without forcing to absolute so that we can trap the 

576 # expected option of relative path. 

577 path_uri = ResourcePath(path, forceAbsolute=False) 

578 if path_uri.scheme: 

579 # Check for scheme so can distinguish explicit URIs from 

580 # absolute scheme-less URIs. 

581 return path_uri 

582 

583 if path_uri.isabs(): 

584 # Absolute scheme-less path. 

585 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.") 

586 

587 # If this was originally a ResourcePath extract the unquoted path from 

588 # it. Otherwise we use the string we were given to allow "#" to appear 

589 # in the filename if given as a plain string. 

590 if not isinstance(path, str): 

591 path = path_uri.unquoted_path 

592 

593 new = self.dirname() # By definition a directory URI 

594 

595 # new should be asked about quoting, not self, since dirname can 

596 # change the URI scheme for schemeless -> file 

597 if new.quotePaths: 

598 path = urllib.parse.quote(path) 

599 

600 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path)) 

601 

602 # normpath can strip trailing / so we force directory if the supplied 

603 # path ended with a / 

604 return new.replace( 

605 path=newpath, forceDirectory=path.endswith(self._pathModule.sep), isTemporary=isTemporary 

606 ) 

607 

608 def relative_to(self, other: ResourcePath) -> Optional[str]: 

609 """Return the relative path from this URI to the other URI. 

610 

611 Parameters 

612 ---------- 

613 other : `ResourcePath` 

614 URI to use to calculate the relative path. Must be a parent 

615 of this URI. 

616 

617 Returns 

618 ------- 

619 subpath : `str` 

620 The sub path of this URI relative to the supplied other URI. 

621 Returns `None` if there is no parent child relationship. 

622 Scheme and netloc must match. 

623 """ 

624 # Scheme-less absolute other is treated as if it's a file scheme. 

625 # Scheme-less relative other can only return non-None if self 

626 # is also scheme-less relative and that is handled specifically 

627 # in a subclass. 

628 if not other.scheme and other.isabs(): 

629 other = other.abspath() 

630 

631 # Scheme-less self is handled elsewhere. 

632 if self.scheme != other.scheme or self.netloc != other.netloc: 

633 return None 

634 

635 enclosed_path = self._pathLib(self.relativeToPathRoot) 

636 parent_path = other.relativeToPathRoot 

637 subpath: Optional[str] 

638 try: 

639 subpath = str(enclosed_path.relative_to(parent_path)) 

640 except ValueError: 

641 subpath = None 

642 else: 

643 subpath = urllib.parse.unquote(subpath) 

644 return subpath 

645 

646 def exists(self) -> bool: 

647 """Indicate that the resource is available. 

648 

649 Returns 

650 ------- 

651 exists : `bool` 

652 `True` if the resource exists. 

653 """ 

654 raise NotImplementedError() 

655 

656 @classmethod 

657 def mexists(cls, uris: Iterable[ResourcePath]) -> Dict[ResourcePath, bool]: 

658 """Check for existence of multiple URIs at once. 

659 

660 Parameters 

661 ---------- 

662 uris : iterable of `ResourcePath` 

663 The URIs to test. 

664 

665 Returns 

666 ------- 

667 existence : `dict` of [`ResourcePath`, `bool`] 

668 Mapping of original URI to boolean indicating existence. 

669 """ 

670 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) 

671 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris} 

672 

673 results: Dict[ResourcePath, bool] = {} 

674 for future in concurrent.futures.as_completed(future_exists): 

675 uri = future_exists[future] 

676 try: 

677 exists = future.result() 

678 except Exception: 

679 exists = False 

680 results[uri] = exists 

681 return results 

682 

683 def remove(self) -> None: 

684 """Remove the resource.""" 

685 raise NotImplementedError() 

686 

687 def isabs(self) -> bool: 

688 """Indicate that the resource is fully specified. 

689 

690 For non-schemeless URIs this is always true. 

691 

692 Returns 

693 ------- 

694 isabs : `bool` 

695 `True` in all cases except schemeless URI. 

696 """ 

697 return True 

698 

699 def abspath(self) -> ResourcePath: 

700 """Return URI using an absolute path. 

701 

702 Returns 

703 ------- 

704 abs : `ResourcePath` 

705 Absolute URI. For non-schemeless URIs this always returns itself. 

706 Schemeless URIs are upgraded to file URIs. 

707 """ 

708 return self 

709 

710 def _as_local(self) -> Tuple[str, bool]: 

711 """Return the location of the (possibly remote) resource as local file. 

712 

713 This is a helper function for `as_local` context manager. 

714 

715 Returns 

716 ------- 

717 path : `str` 

718 If this is a remote resource, it will be a copy of the resource 

719 on the local file system, probably in a temporary directory. 

720 For a local resource this should be the actual path to the 

721 resource. 

722 is_temporary : `bool` 

723 Indicates if the local path is a temporary file or not. 

724 """ 

725 raise NotImplementedError() 

726 

727 @contextlib.contextmanager 

728 def as_local(self) -> Iterator[ResourcePath]: 

729 """Return the location of the (possibly remote) resource as local file. 

730 

731 Yields 

732 ------ 

733 local : `ResourcePath` 

734 If this is a remote resource, it will be a copy of the resource 

735 on the local file system, probably in a temporary directory. 

736 For a local resource this should be the actual path to the 

737 resource. 

738 

739 Notes 

740 ----- 

741 The context manager will automatically delete any local temporary 

742 file. 

743 

744 Examples 

745 -------- 

746 Should be used as a context manager: 

747 

748 .. code-block:: py 

749 

750 with uri.as_local() as local: 

751 ospath = local.ospath 

752 """ 

753 local_src, is_temporary = self._as_local() 

754 local_uri = ResourcePath(local_src, isTemporary=is_temporary) 

755 

756 try: 

757 yield local_uri 

758 finally: 

759 # The caller might have relocated the temporary file 

760 if is_temporary and local_uri.exists(): 

761 local_uri.remove() 

762 

763 @classmethod 

764 @contextlib.contextmanager 

765 def temporary_uri( 

766 cls, prefix: Optional[ResourcePath] = None, suffix: Optional[str] = None 

767 ) -> Iterator[ResourcePath]: 

768 """Create a temporary URI. 

769 

770 Parameters 

771 ---------- 

772 prefix : `ResourcePath`, optional 

773 Prefix to use. Without this the path will be formed as a local 

774 file URI in a temporary directory. Ensuring that the prefix 

775 location exists is the responsibility of the caller. 

776 suffix : `str`, optional 

777 A file suffix to be used. The ``.`` should be included in this 

778 suffix. 

779 

780 Yields 

781 ------ 

782 uri : `ResourcePath` 

783 The temporary URI. Will be removed when the context is completed. 

784 """ 

785 use_tempdir = False 

786 if prefix is None: 

787 prefix = ResourcePath(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True) 

788 # Record that we need to delete this directory. Can not rely 

789 # on isTemporary flag since an external prefix may have that 

790 # set as well. 

791 use_tempdir = True 

792 

793 # Need to create a randomized file name. For consistency do not 

794 # use mkstemp for local and something else for remote. Additionally 

795 # this method does not create the file to prevent name clashes. 

796 characters = "abcdefghijklmnopqrstuvwxyz0123456789_" 

797 rng = Random() 

798 tempname = "".join(rng.choice(characters) for _ in range(16)) 

799 if suffix: 

800 tempname += suffix 

801 temporary_uri = prefix.join(tempname, isTemporary=True) 

802 

803 try: 

804 yield temporary_uri 

805 finally: 

806 if use_tempdir: 

807 shutil.rmtree(prefix.ospath, ignore_errors=True) 

808 else: 

809 try: 

810 # It's okay if this does not work because the user removed 

811 # the file. 

812 temporary_uri.remove() 

813 except FileNotFoundError: 

814 pass 

815 

816 def read(self, size: int = -1) -> bytes: 

817 """Open the resource and return the contents in bytes. 

818 

819 Parameters 

820 ---------- 

821 size : `int`, optional 

822 The number of bytes to read. Negative or omitted indicates 

823 that all data should be read. 

824 """ 

825 raise NotImplementedError() 

826 

827 def write(self, data: bytes, overwrite: bool = True) -> None: 

828 """Write the supplied bytes to the new resource. 

829 

830 Parameters 

831 ---------- 

832 data : `bytes` 

833 The bytes to write to the resource. The entire contents of the 

834 resource will be replaced. 

835 overwrite : `bool`, optional 

836 If `True` the resource will be overwritten if it exists. Otherwise 

837 the write will fail. 

838 """ 

839 raise NotImplementedError() 

840 

841 def mkdir(self) -> None: 

842 """For a dir-like URI, create the directory resource if needed.""" 

843 raise NotImplementedError() 

844 

845 def isdir(self) -> bool: 

846 """Return True if this URI looks like a directory, else False.""" 

847 return self.dirLike 

848 

849 def size(self) -> int: 

850 """For non-dir-like URI, return the size of the resource. 

851 

852 Returns 

853 ------- 

854 sz : `int` 

855 The size in bytes of the resource associated with this URI. 

856 Returns 0 if dir-like. 

857 """ 

858 raise NotImplementedError() 

859 

860 def __str__(self) -> str: 

861 """Convert the URI to its native string form.""" 

862 return self.geturl() 

863 

864 def __repr__(self) -> str: 

865 """Return string representation suitable for evaluation.""" 

866 return f'ResourcePath("{self.geturl()}")' 

867 

868 def __eq__(self, other: Any) -> bool: 

869 """Compare supplied object with this `ResourcePath`.""" 

870 if not isinstance(other, ResourcePath): 

871 return NotImplemented 

872 return self.geturl() == other.geturl() 

873 

874 def __hash__(self) -> int: 

875 """Return hash of this object.""" 

876 return hash(str(self)) 

877 

878 def __copy__(self) -> ResourcePath: 

879 """Copy constructor. 

880 

881 Object is immutable so copy can return itself. 

882 """ 

883 # Implement here because the __new__ method confuses things 

884 return self 

885 

886 def __deepcopy__(self, memo: Any) -> ResourcePath: 

887 """Deepcopy the object. 

888 

889 Object is immutable so copy can return itself. 

890 """ 

891 # Implement here because the __new__ method confuses things 

892 return self 

893 

894 def __getnewargs__(self) -> Tuple: 

895 """Support pickling.""" 

896 return (str(self),) 

897 

898 @classmethod 

899 def _fixDirectorySep( 

900 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False 

901 ) -> Tuple[urllib.parse.ParseResult, bool]: 

902 """Ensure that a path separator is present on directory paths. 

903 

904 Parameters 

905 ---------- 

906 parsed : `~urllib.parse.ParseResult` 

907 The result from parsing a URI using `urllib.parse`. 

908 forceDirectory : `bool`, optional 

909 If `True` forces the URI to end with a separator, otherwise given 

910 URI is interpreted as is. Specifying that the URI is conceptually 

911 equivalent to a directory can break some ambiguities when 

912 interpreting the last element of a path. 

913 

914 Returns 

915 ------- 

916 modified : `~urllib.parse.ParseResult` 

917 Update result if a URI is being handled. 

918 dirLike : `bool` 

919 `True` if given parsed URI has a trailing separator or 

920 forceDirectory is True. Otherwise `False`. 

921 """ 

922 # assume we are not dealing with a directory like URI 

923 dirLike = False 

924 

925 # Directory separator 

926 sep = cls._pathModule.sep 

927 

928 # URI is dir-like if explicitly stated or if it ends on a separator 

929 endsOnSep = parsed.path.endswith(sep) 

930 if forceDirectory or endsOnSep: 

931 dirLike = True 

932 # only add the separator if it's not already there 

933 if not endsOnSep: 

934 parsed = parsed._replace(path=parsed.path + sep) 

935 

936 return parsed, dirLike 

937 

938 @classmethod 

939 def _fixupPathUri( 

940 cls, 

941 parsed: urllib.parse.ParseResult, 

942 root: Optional[Union[str, ResourcePath]] = None, 

943 forceAbsolute: bool = False, 

944 forceDirectory: bool = False, 

945 ) -> Tuple[urllib.parse.ParseResult, bool]: 

946 """Correct any issues with the supplied URI. 

947 

948 Parameters 

949 ---------- 

950 parsed : `~urllib.parse.ParseResult` 

951 The result from parsing a URI using `urllib.parse`. 

952 root : `str` or `ResourcePath`, ignored 

953 Not used by the this implementation since all URIs are 

954 absolute except for those representing the local file system. 

955 forceAbsolute : `bool`, ignored. 

956 Not used by this implementation. URIs are generally always 

957 absolute. 

958 forceDirectory : `bool`, optional 

959 If `True` forces the URI to end with a separator, otherwise given 

960 URI is interpreted as is. Specifying that the URI is conceptually 

961 equivalent to a directory can break some ambiguities when 

962 interpreting the last element of a path. 

963 

964 Returns 

965 ------- 

966 modified : `~urllib.parse.ParseResult` 

967 Update result if a URI is being handled. 

968 dirLike : `bool` 

969 `True` if given parsed URI has a trailing separator or 

970 forceDirectory is True. Otherwise `False`. 

971 

972 Notes 

973 ----- 

974 Relative paths are explicitly not supported by RFC8089 but `urllib` 

975 does accept URIs of the form ``file:relative/path.ext``. They need 

976 to be turned into absolute paths before they can be used. This is 

977 always done regardless of the ``forceAbsolute`` parameter. 

978 

979 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

980 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

981 

982 Scheme-less paths are normalized. 

983 """ 

984 return cls._fixDirectorySep(parsed, forceDirectory) 

985 

986 def transfer_from( 

987 self, 

988 src: ResourcePath, 

989 transfer: str, 

990 overwrite: bool = False, 

991 transaction: Optional[TransactionProtocol] = None, 

992 ) -> None: 

993 """Transfer the current resource to a new location. 

994 

995 Parameters 

996 ---------- 

997 src : `ResourcePath` 

998 Source URI. 

999 transfer : `str` 

1000 Mode to use for transferring the resource. Generically there are 

1001 many standard options: copy, link, symlink, hardlink, relsymlink. 

1002 Not all URIs support all modes. 

1003 overwrite : `bool`, optional 

1004 Allow an existing file to be overwritten. Defaults to `False`. 

1005 transaction : `~lsst.resources.utils.TransactionProtocol`, optional 

1006 A transaction object that can (depending on implementation) 

1007 rollback transfers on error. Not guaranteed to be implemented. 

1008 

1009 Notes 

1010 ----- 

1011 Conceptually this is hard to scale as the number of URI schemes 

1012 grow. The destination URI is more important than the source URI 

1013 since that is where all the transfer modes are relevant (with the 

1014 complication that "move" deletes the source). 

1015 

1016 Local file to local file is the fundamental use case but every 

1017 other scheme has to support "copy" to local file (with implicit 

1018 support for "move") and copy from local file. 

1019 All the "link" options tend to be specific to local file systems. 

1020 

1021 "move" is a "copy" where the remote resource is deleted at the end. 

1022 Whether this works depends on the source URI rather than the 

1023 destination URI. Reverting a move on transaction rollback is 

1024 expected to be problematic if a remote resource was involved. 

1025 """ 

1026 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}") 

1027 

1028 def walk( 

1029 self, file_filter: Optional[Union[str, re.Pattern]] = None 

1030 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]: 

1031 """Walk the directory tree returning matching files and directories. 

1032 

1033 Parameters 

1034 ---------- 

1035 file_filter : `str` or `re.Pattern`, optional 

1036 Regex to filter out files from the list before it is returned. 

1037 

1038 Yields 

1039 ------ 

1040 dirpath : `ResourcePath` 

1041 Current directory being examined. 

1042 dirnames : `list` of `str` 

1043 Names of subdirectories within dirpath. 

1044 filenames : `list` of `str` 

1045 Names of all the files within dirpath. 

1046 """ 

1047 raise NotImplementedError() 

1048 

1049 @classmethod 

1050 def findFileResources( 

1051 cls, 

1052 candidates: Iterable[Union[str, ResourcePath]], 

1053 file_filter: Optional[str] = None, 

1054 grouped: bool = False, 

1055 ) -> Iterator[Union[ResourcePath, Iterator[ResourcePath]]]: 

1056 """Get all the files from a list of values. 

1057 

1058 Parameters 

1059 ---------- 

1060 candidates : iterable [`str` or `ResourcePath`] 

1061 The files to return and directories in which to look for files to 

1062 return. 

1063 file_filter : `str`, optional 

1064 The regex to use when searching for files within directories. 

1065 By default returns all the found files. 

1066 grouped : `bool`, optional 

1067 If `True` the results will be grouped by directory and each 

1068 yielded value will be an iterator over URIs. If `False` each 

1069 URI will be returned separately. 

1070 

1071 Yields 

1072 ------ 

1073 found_file: `ResourcePath` 

1074 The passed-in URIs and URIs found in passed-in directories. 

1075 If grouping is enabled, each of the yielded values will be an 

1076 iterator yielding members of the group. Files given explicitly 

1077 will be returned as a single group at the end. 

1078 

1079 Notes 

1080 ----- 

1081 If a value is a file it is yielded immediately without checking that it 

1082 exists. If a value is a directory, all the files in the directory 

1083 (recursively) that match the regex will be yielded in turn. 

1084 """ 

1085 fileRegex = None if file_filter is None else re.compile(file_filter) 

1086 

1087 singles = [] 

1088 

1089 # Find all the files of interest 

1090 for location in candidates: 

1091 uri = ResourcePath(location) 

1092 if uri.isdir(): 

1093 for found in uri.walk(fileRegex): 

1094 if not found: 

1095 # This means the uri does not exist and by 

1096 # convention we ignore it 

1097 continue 

1098 root, dirs, files = found 

1099 if not files: 

1100 continue 

1101 if grouped: 

1102 yield (root.join(name) for name in files) 

1103 else: 

1104 for name in files: 

1105 yield root.join(name) 

1106 else: 

1107 if grouped: 

1108 singles.append(uri) 

1109 else: 

1110 yield uri 

1111 

1112 # Finally, return any explicitly given files in one group 

1113 if grouped and singles: 

1114 yield iter(singles)