Coverage for python/lsst/resources/_resourcePath.py: 20%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

371 statements  

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14import concurrent.futures 

15import contextlib 

16import copy 

17import io 

18import locale 

19import logging 

20import os 

21import posixpath 

22import re 

23import shutil 

24import tempfile 

25import urllib.parse 

26from pathlib import Path, PurePath, PurePosixPath 

27from random import Random 

28 

29__all__ = ("ResourcePath", "ResourcePathExpression") 

30 

31from typing import IO, TYPE_CHECKING, Any, Dict, Iterable, Iterator, List, Optional, Tuple, Type, Union 

32 

33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true

34 from .utils import TransactionProtocol 

35 

36 

37log = logging.getLogger(__name__) 

38 

39# Regex for looking for URI escapes 

40ESCAPES_RE = re.compile(r"%[A-F0-9]{2}") 

41 

42# Precomputed escaped hash 

43ESCAPED_HASH = urllib.parse.quote("#") 

44 

45# Maximum number of worker threads for parallelized operations. 

46# If greater than 10, be aware that this number has to be consistent 

47# with connection pool sizing (for example in urllib3). 

48MAX_WORKERS = 10 

49 

50 

51ResourcePathExpression = Union[str, urllib.parse.ParseResult, "ResourcePath", Path] 

52"""Type-annotation alias for objects that can be coerced to ResourcePath. 

53""" 

54 

55 

56class ResourcePath: 

57 """Convenience wrapper around URI parsers. 

58 

59 Provides access to URI components and can convert file 

60 paths into absolute path URIs. Scheme-less URIs are treated as if 

61 they are local file system paths and are converted to absolute URIs. 

62 

63 A specialist subclass is created for each supported URI scheme. 

64 

65 Parameters 

66 ---------- 

67 uri : `str`, `Path`, `urllib.parse.ParseResult`, or `ResourcePath`. 

68 URI in string form. Can be scheme-less if referring to a local 

69 filesystem path. 

70 root : `str` or `ResourcePath`, optional 

71 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

72 use this as the root. Must be absolute. If `None` the current 

73 working directory will be used. Can be a file URI. 

74 forceAbsolute : `bool`, optional 

75 If `True`, scheme-less relative URI will be converted to an absolute 

76 path using a ``file`` scheme. If `False` scheme-less URI will remain 

77 scheme-less and will not be updated to ``file`` or absolute path. 

78 forceDirectory: `bool`, optional 

79 If `True` forces the URI to end with a separator, otherwise given URI 

80 is interpreted as is. 

81 isTemporary : `bool`, optional 

82 If `True` indicates that this URI points to a temporary resource. 

83 The default is `False`, unless ``uri`` is already a `ResourcePath` 

84 instance and ``uri.isTemporary is True``. 

85 """ 

86 

87 _pathLib: Type[PurePath] = PurePosixPath 

88 """Path library to use for this scheme.""" 

89 

90 _pathModule = posixpath 

91 """Path module to use for this scheme.""" 

92 

93 transferModes: Tuple[str, ...] = ("copy", "auto", "move") 

94 """Transfer modes supported by this implementation. 

95 

96 Move is special in that it is generally a copy followed by an unlink. 

97 Whether that unlink works depends critically on whether the source URI 

98 implements unlink. If it does not the move will be reported as a failure. 

99 """ 

100 

101 transferDefault: str = "copy" 

102 """Default mode to use for transferring if ``auto`` is specified.""" 

103 

104 quotePaths = True 

105 """True if path-like elements modifying a URI should be quoted. 

106 

107 All non-schemeless URIs have to internally use quoted paths. Therefore 

108 if a new file name is given (e.g. to updatedFile or join) a decision must 

109 be made whether to quote it to be consistent. 

110 """ 

111 

112 isLocal = False 

113 """If `True` this URI refers to a local file.""" 

114 

115 # This is not an ABC with abstract methods because the __new__ being 

116 # a factory confuses mypy such that it assumes that every constructor 

117 # returns a ResourcePath and then determines that all the abstract methods 

118 # are still abstract. If they are not marked abstract but just raise 

119 # mypy is fine with it. 

120 

121 # mypy is confused without these 

122 _uri: urllib.parse.ParseResult 

123 isTemporary: bool 

124 dirLike: bool 

125 

126 def __new__( 

127 cls, 

128 uri: ResourcePathExpression, 

129 root: Optional[Union[str, ResourcePath]] = None, 

130 forceAbsolute: bool = True, 

131 forceDirectory: bool = False, 

132 isTemporary: Optional[bool] = None, 

133 ) -> ResourcePath: 

134 """Create and return new specialist ResourcePath subclass.""" 

135 parsed: urllib.parse.ParseResult 

136 dirLike: bool = False 

137 subclass: Optional[Type[ResourcePath]] = None 

138 

139 if isinstance(uri, os.PathLike): 

140 uri = str(uri) 

141 

142 # Record if we need to post process the URI components 

143 # or if the instance is already fully configured 

144 if isinstance(uri, str): 

145 # Since local file names can have special characters in them 

146 # we need to quote them for the parser but we can unquote 

147 # later. Assume that all other URI schemes are quoted. 

148 # Since sometimes people write file:/a/b and not file:///a/b 

149 # we should not quote in the explicit case of file: 

150 if "://" not in uri and not uri.startswith("file:"): 

151 if ESCAPES_RE.search(uri): 

152 log.warning("Possible double encoding of %s", uri) 

153 else: 

154 uri = urllib.parse.quote(uri) 

155 # Special case hash since we must support fragments 

156 # even in schemeless URIs -- although try to only replace 

157 # them in file part and not directory part 

158 if ESCAPED_HASH in uri: 

159 dirpos = uri.rfind("/") 

160 # Do replacement after this / 

161 uri = uri[: dirpos + 1] + uri[dirpos + 1 :].replace(ESCAPED_HASH, "#") 

162 

163 parsed = urllib.parse.urlparse(uri) 

164 elif isinstance(uri, urllib.parse.ParseResult): 

165 parsed = copy.copy(uri) 

166 # If we are being instantiated with a subclass, rather than 

167 # ResourcePath, ensure that that subclass is used directly. 

168 # This could lead to inconsistencies if this constructor 

169 # is used externally outside of the ResourcePath.replace() method. 

170 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt")) 

171 # will be a problem. 

172 # This is needed to prevent a schemeless absolute URI become 

173 # a file URI unexpectedly when calling updatedFile or 

174 # updatedExtension 

175 if cls is not ResourcePath: 

176 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory) 

177 subclass = cls 

178 

179 elif isinstance(uri, ResourcePath): 

180 # Since ResourcePath is immutable we can return the argument 

181 # unchanged if it already agrees with forceDirectory, isTemporary, 

182 # and forceAbsolute. 

183 # We invoke __new__ again with str(self) to add a scheme for 

184 # forceAbsolute, but for the others that seems more likely to paper 

185 # over logic errors than do something useful, so we just raise. 

186 if forceDirectory and not uri.dirLike: 

187 raise RuntimeError( 

188 f"{uri} is already a file-like ResourcePath; cannot force it to directory." 

189 ) 

190 if isTemporary is not None and isTemporary is not uri.isTemporary: 

191 raise RuntimeError( 

192 f"{uri} is already a {'temporary' if uri.isTemporary else 'permanent'} " 

193 f"ResourcePath; cannot make it {'temporary' if isTemporary else 'permanent'}." 

194 ) 

195 if forceAbsolute and not uri.scheme: 

196 return ResourcePath( 

197 str(uri), 

198 root=root, 

199 forceAbsolute=True, 

200 forceDirectory=uri.dirLike, 

201 isTemporary=uri.isTemporary, 

202 ) 

203 return uri 

204 else: 

205 raise ValueError( 

206 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'" 

207 ) 

208 

209 if subclass is None: 

210 # Work out the subclass from the URI scheme 

211 if not parsed.scheme: 

212 from .schemeless import SchemelessResourcePath 

213 

214 subclass = SchemelessResourcePath 

215 elif parsed.scheme == "file": 

216 from .file import FileResourcePath 

217 

218 subclass = FileResourcePath 

219 elif parsed.scheme == "s3": 

220 from .s3 import S3ResourcePath 

221 

222 subclass = S3ResourcePath 

223 elif parsed.scheme.startswith("http"): 

224 from .http import HttpResourcePath 

225 

226 subclass = HttpResourcePath 

227 elif parsed.scheme == "resource": 

228 # Rules for scheme names disallow pkg_resource 

229 from .packageresource import PackageResourcePath 

230 

231 subclass = PackageResourcePath 

232 elif parsed.scheme == "mem": 

233 # in-memory datastore object 

234 from .mem import InMemoryResourcePath 

235 

236 subclass = InMemoryResourcePath 

237 else: 

238 raise NotImplementedError( 

239 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}" 

240 ) 

241 

242 parsed, dirLike = subclass._fixupPathUri( 

243 parsed, root=root, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory 

244 ) 

245 

246 # It is possible for the class to change from schemeless 

247 # to file so handle that 

248 if parsed.scheme == "file": 

249 from .file import FileResourcePath 

250 

251 subclass = FileResourcePath 

252 

253 # Now create an instance of the correct subclass and set the 

254 # attributes directly 

255 self = object.__new__(subclass) 

256 self._uri = parsed 

257 self.dirLike = dirLike 

258 if isTemporary is None: 

259 isTemporary = False 

260 self.isTemporary = isTemporary 

261 return self 

262 

263 @property 

264 def scheme(self) -> str: 

265 """Return the URI scheme. 

266 

267 Notes 

268 ----- 

269 (``://`` is not part of the scheme). 

270 """ 

271 return self._uri.scheme 

272 

273 @property 

274 def netloc(self) -> str: 

275 """Return the URI network location.""" 

276 return self._uri.netloc 

277 

278 @property 

279 def path(self) -> str: 

280 """Return the path component of the URI.""" 

281 return self._uri.path 

282 

283 @property 

284 def unquoted_path(self) -> str: 

285 """Return path component of the URI with any URI quoting reversed.""" 

286 return urllib.parse.unquote(self._uri.path) 

287 

288 @property 

289 def ospath(self) -> str: 

290 """Return the path component of the URI localized to current OS.""" 

291 raise AttributeError(f"Non-file URI ({self}) has no local OS path.") 

292 

293 @property 

294 def relativeToPathRoot(self) -> str: 

295 """Return path relative to network location. 

296 

297 Effectively, this is the path property with posix separator stripped 

298 from the left hand side of the path. 

299 

300 Always unquotes. 

301 """ 

302 p = self._pathLib(self.path) 

303 relToRoot = str(p.relative_to(p.root)) 

304 if self.dirLike and not relToRoot.endswith("/"): 

305 relToRoot += "/" 

306 return urllib.parse.unquote(relToRoot) 

307 

308 @property 

309 def is_root(self) -> bool: 

310 """Return whether this URI points to the root of the network location. 

311 

312 This means that the path components refers to the top level. 

313 """ 

314 relpath = self.relativeToPathRoot 

315 if relpath == "./": 

316 return True 

317 return False 

318 

319 @property 

320 def fragment(self) -> str: 

321 """Return the fragment component of the URI.""" 

322 return self._uri.fragment 

323 

324 @property 

325 def params(self) -> str: 

326 """Return any parameters included in the URI.""" 

327 return self._uri.params 

328 

329 @property 

330 def query(self) -> str: 

331 """Return any query strings included in the URI.""" 

332 return self._uri.query 

333 

334 def geturl(self) -> str: 

335 """Return the URI in string form. 

336 

337 Returns 

338 ------- 

339 url : `str` 

340 String form of URI. 

341 """ 

342 return self._uri.geturl() 

343 

344 def root_uri(self) -> ResourcePath: 

345 """Return the base root URI. 

346 

347 Returns 

348 ------- 

349 uri : `ResourcePath` 

350 root URI. 

351 """ 

352 return self.replace(path="", forceDirectory=True) 

353 

354 def split(self) -> Tuple[ResourcePath, str]: 

355 """Split URI into head and tail. 

356 

357 Returns 

358 ------- 

359 head: `ResourcePath` 

360 Everything leading up to tail, expanded and normalized as per 

361 ResourcePath rules. 

362 tail : `str` 

363 Last `self.path` component. Tail will be empty if path ends on a 

364 separator. Tail will never contain separators. It will be 

365 unquoted. 

366 

367 Notes 

368 ----- 

369 Equivalent to `os.path.split()` where head preserves the URI 

370 components. 

371 """ 

372 head, tail = self._pathModule.split(self.path) 

373 headuri = self._uri._replace(path=head) 

374 

375 # The file part should never include quoted metacharacters 

376 tail = urllib.parse.unquote(tail) 

377 

378 # Schemeless is special in that it can be a relative path 

379 # We need to ensure that it stays that way. All other URIs will 

380 # be absolute already. 

381 forceAbsolute = self._pathModule.isabs(self.path) 

382 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail 

383 

384 def basename(self) -> str: 

385 """Return the base name, last element of path, of the URI. 

386 

387 Returns 

388 ------- 

389 tail : `str` 

390 Last part of the path attribute. Trail will be empty if path ends 

391 on a separator. 

392 

393 Notes 

394 ----- 

395 If URI ends on a slash returns an empty string. This is the second 

396 element returned by `split()`. 

397 

398 Equivalent of `os.path.basename()``. 

399 """ 

400 return self.split()[1] 

401 

402 def dirname(self) -> ResourcePath: 

403 """Return the directory component of the path as a new `ResourcePath`. 

404 

405 Returns 

406 ------- 

407 head : `ResourcePath` 

408 Everything except the tail of path attribute, expanded and 

409 normalized as per ResourcePath rules. 

410 

411 Notes 

412 ----- 

413 Equivalent of `os.path.dirname()`. 

414 """ 

415 return self.split()[0] 

416 

417 def parent(self) -> ResourcePath: 

418 """Return a `ResourcePath` of the parent directory. 

419 

420 Returns 

421 ------- 

422 head : `ResourcePath` 

423 Everything except the tail of path attribute, expanded and 

424 normalized as per `ResourcePath` rules. 

425 

426 Notes 

427 ----- 

428 For a file-like URI this will be the same as calling `dirname()`. 

429 """ 

430 # When self is file-like, return self.dirname() 

431 if not self.dirLike: 

432 return self.dirname() 

433 # When self is dir-like, return its parent directory, 

434 # regardless of the presence of a trailing separator 

435 originalPath = self._pathLib(self.path) 

436 parentPath = originalPath.parent 

437 return self.replace(path=str(parentPath), forceDirectory=True) 

438 

439 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath: 

440 """Return new `ResourcePath` with specified components replaced. 

441 

442 Parameters 

443 ---------- 

444 forceDirectory : `bool`, optional 

445 Parameter passed to ResourcePath constructor to force this 

446 new URI to be dir-like. 

447 isTemporary : `bool`, optional 

448 Indicate that the resulting URI is temporary resource. 

449 **kwargs 

450 Components of a `urllib.parse.ParseResult` that should be 

451 modified for the newly-created `ResourcePath`. 

452 

453 Returns 

454 ------- 

455 new : `ResourcePath` 

456 New `ResourcePath` object with updated values. 

457 

458 Notes 

459 ----- 

460 Does not, for now, allow a change in URI scheme. 

461 """ 

462 # Disallow a change in scheme 

463 if "scheme" in kwargs: 

464 raise ValueError(f"Can not use replace() method to change URI scheme for {self}") 

465 return self.__class__( 

466 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary 

467 ) 

468 

469 def updatedFile(self, newfile: str) -> ResourcePath: 

470 """Return new URI with an updated final component of the path. 

471 

472 Parameters 

473 ---------- 

474 newfile : `str` 

475 File name with no path component. 

476 

477 Returns 

478 ------- 

479 updated : `ResourcePath` 

480 

481 Notes 

482 ----- 

483 Forces the ResourcePath.dirLike attribute to be false. The new file 

484 path will be quoted if necessary. 

485 """ 

486 if self.quotePaths: 

487 newfile = urllib.parse.quote(newfile) 

488 dir, _ = self._pathModule.split(self.path) 

489 newpath = self._pathModule.join(dir, newfile) 

490 

491 updated = self.replace(path=newpath) 

492 updated.dirLike = False 

493 return updated 

494 

495 def updatedExtension(self, ext: Optional[str]) -> ResourcePath: 

496 """Return a new `ResourcePath` with updated file extension. 

497 

498 All file extensions are replaced. 

499 

500 Parameters 

501 ---------- 

502 ext : `str` or `None` 

503 New extension. If an empty string is given any extension will 

504 be removed. If `None` is given there will be no change. 

505 

506 Returns 

507 ------- 

508 updated : `ResourcePath` 

509 URI with the specified extension. Can return itself if 

510 no extension was specified. 

511 """ 

512 if ext is None: 

513 return self 

514 

515 # Get the extension 

516 current = self.getExtension() 

517 

518 # Nothing to do if the extension already matches 

519 if current == ext: 

520 return self 

521 

522 # Remove the current extension from the path 

523 # .fits.gz counts as one extension do not use os.path.splitext 

524 path = self.path 

525 if current: 

526 path = path[: -len(current)] 

527 

528 # Ensure that we have a leading "." on file extension (and we do not 

529 # try to modify the empty string) 

530 if ext and not ext.startswith("."): 

531 ext = "." + ext 

532 

533 return self.replace(path=path + ext) 

534 

535 def getExtension(self) -> str: 

536 """Return the file extension(s) associated with this URI path. 

537 

538 Returns 

539 ------- 

540 ext : `str` 

541 The file extension (including the ``.``). Can be empty string 

542 if there is no file extension. Usually returns only the last 

543 file extension unless there is a special extension modifier 

544 indicating file compression, in which case the combined 

545 extension (e.g. ``.fits.gz``) will be returned. 

546 """ 

547 special = {".gz", ".bz2", ".xz", ".fz"} 

548 

549 # Get the file part of the path so as not to be confused by 

550 # "." in directory names. 

551 basename = self.basename() 

552 extensions = self._pathLib(basename).suffixes 

553 

554 if not extensions: 

555 return "" 

556 

557 ext = extensions.pop() 

558 

559 # Multiple extensions, decide whether to include the final two 

560 if extensions and ext in special: 

561 ext = f"{extensions[-1]}{ext}" 

562 

563 return ext 

564 

565 def join( 

566 self, path: Union[str, ResourcePath], isTemporary: Optional[bool] = None, forceDirectory: bool = False 

567 ) -> ResourcePath: 

568 """Return new `ResourcePath` with additional path components. 

569 

570 Parameters 

571 ---------- 

572 path : `str`, `ResourcePath` 

573 Additional file components to append to the current URI. Assumed 

574 to include a file at the end. Will be quoted depending on the 

575 associated URI scheme. If the path looks like a URI with a scheme 

576 referring to an absolute location, it will be returned 

577 directly (matching the behavior of `os.path.join()`). It can 

578 also be a `ResourcePath`. 

579 isTemporary : `bool`, optional 

580 Indicate that the resulting URI represents a temporary resource. 

581 Default is ``self.isTemporary``. 

582 forceDirectory : `bool`, optional 

583 If `True` forces the URI to end with a separator, otherwise given 

584 URI is interpreted as is. 

585 

586 Returns 

587 ------- 

588 new : `ResourcePath` 

589 New URI with any file at the end replaced with the new path 

590 components. 

591 

592 Notes 

593 ----- 

594 Schemeless URIs assume local path separator but all other URIs assume 

595 POSIX separator if the supplied path has directory structure. It 

596 may be this never becomes a problem but datastore templates assume 

597 POSIX separator is being used. 

598 

599 If an absolute `ResourcePath` is given for ``path`` is is assumed that 

600 this should be returned directly. Giving a ``path`` of an absolute 

601 scheme-less URI is not allowed for safety reasons as it may indicate 

602 a mistake in the calling code. 

603 

604 Raises 

605 ------ 

606 ValueError 

607 Raised if the ``path`` is an absolute scheme-less URI. In that 

608 situation it is unclear whether the intent is to return a 

609 ``file`` URI or it was a mistake and a relative scheme-less URI 

610 was meant. 

611 RuntimeError 

612 Raised if this attempts to join a temporary URI to a non-temporary 

613 URI. 

614 """ 

615 if isTemporary is None: 

616 isTemporary = self.isTemporary 

617 elif not isTemporary and self.isTemporary: 

618 raise RuntimeError("Cannot join temporary URI to non-temporary URI.") 

619 # If we have a full URI in path we will use it directly 

620 # but without forcing to absolute so that we can trap the 

621 # expected option of relative path. 

622 path_uri = ResourcePath( 

623 path, forceAbsolute=False, forceDirectory=forceDirectory, isTemporary=isTemporary 

624 ) 

625 if path_uri.scheme: 

626 # Check for scheme so can distinguish explicit URIs from 

627 # absolute scheme-less URIs. 

628 return path_uri 

629 

630 if path_uri.isabs(): 

631 # Absolute scheme-less path. 

632 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.") 

633 

634 # If this was originally a ResourcePath extract the unquoted path from 

635 # it. Otherwise we use the string we were given to allow "#" to appear 

636 # in the filename if given as a plain string. 

637 if not isinstance(path, str): 

638 path = path_uri.unquoted_path 

639 

640 new = self.dirname() # By definition a directory URI 

641 

642 # new should be asked about quoting, not self, since dirname can 

643 # change the URI scheme for schemeless -> file 

644 if new.quotePaths: 

645 path = urllib.parse.quote(path) 

646 

647 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path)) 

648 

649 # normpath can strip trailing / so we force directory if the supplied 

650 # path ended with a / 

651 return new.replace( 

652 path=newpath, 

653 forceDirectory=(forceDirectory or path.endswith(self._pathModule.sep)), 

654 isTemporary=isTemporary, 

655 ) 

656 

657 def relative_to(self, other: ResourcePath) -> Optional[str]: 

658 """Return the relative path from this URI to the other URI. 

659 

660 Parameters 

661 ---------- 

662 other : `ResourcePath` 

663 URI to use to calculate the relative path. Must be a parent 

664 of this URI. 

665 

666 Returns 

667 ------- 

668 subpath : `str` 

669 The sub path of this URI relative to the supplied other URI. 

670 Returns `None` if there is no parent child relationship. 

671 Scheme and netloc must match. 

672 """ 

673 # Scheme-less absolute other is treated as if it's a file scheme. 

674 # Scheme-less relative other can only return non-None if self 

675 # is also scheme-less relative and that is handled specifically 

676 # in a subclass. 

677 if not other.scheme and other.isabs(): 

678 other = other.abspath() 

679 

680 # Scheme-less self is handled elsewhere. 

681 if self.scheme != other.scheme or self.netloc != other.netloc: 

682 return None 

683 

684 enclosed_path = self._pathLib(self.relativeToPathRoot) 

685 parent_path = other.relativeToPathRoot 

686 subpath: Optional[str] 

687 try: 

688 subpath = str(enclosed_path.relative_to(parent_path)) 

689 except ValueError: 

690 subpath = None 

691 else: 

692 subpath = urllib.parse.unquote(subpath) 

693 return subpath 

694 

695 def exists(self) -> bool: 

696 """Indicate that the resource is available. 

697 

698 Returns 

699 ------- 

700 exists : `bool` 

701 `True` if the resource exists. 

702 """ 

703 raise NotImplementedError() 

704 

705 @classmethod 

706 def mexists(cls, uris: Iterable[ResourcePath]) -> Dict[ResourcePath, bool]: 

707 """Check for existence of multiple URIs at once. 

708 

709 Parameters 

710 ---------- 

711 uris : iterable of `ResourcePath` 

712 The URIs to test. 

713 

714 Returns 

715 ------- 

716 existence : `dict` of [`ResourcePath`, `bool`] 

717 Mapping of original URI to boolean indicating existence. 

718 """ 

719 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) 

720 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris} 

721 

722 results: Dict[ResourcePath, bool] = {} 

723 for future in concurrent.futures.as_completed(future_exists): 

724 uri = future_exists[future] 

725 try: 

726 exists = future.result() 

727 except Exception: 

728 exists = False 

729 results[uri] = exists 

730 return results 

731 

732 def remove(self) -> None: 

733 """Remove the resource.""" 

734 raise NotImplementedError() 

735 

736 def isabs(self) -> bool: 

737 """Indicate that the resource is fully specified. 

738 

739 For non-schemeless URIs this is always true. 

740 

741 Returns 

742 ------- 

743 isabs : `bool` 

744 `True` in all cases except schemeless URI. 

745 """ 

746 return True 

747 

748 def abspath(self) -> ResourcePath: 

749 """Return URI using an absolute path. 

750 

751 Returns 

752 ------- 

753 abs : `ResourcePath` 

754 Absolute URI. For non-schemeless URIs this always returns itself. 

755 Schemeless URIs are upgraded to file URIs. 

756 """ 

757 return self 

758 

759 def _as_local(self) -> Tuple[str, bool]: 

760 """Return the location of the (possibly remote) resource as local file. 

761 

762 This is a helper function for `as_local` context manager. 

763 

764 Returns 

765 ------- 

766 path : `str` 

767 If this is a remote resource, it will be a copy of the resource 

768 on the local file system, probably in a temporary directory. 

769 For a local resource this should be the actual path to the 

770 resource. 

771 is_temporary : `bool` 

772 Indicates if the local path is a temporary file or not. 

773 """ 

774 raise NotImplementedError() 

775 

776 @contextlib.contextmanager 

777 def as_local(self) -> Iterator[ResourcePath]: 

778 """Return the location of the (possibly remote) resource as local file. 

779 

780 Yields 

781 ------ 

782 local : `ResourcePath` 

783 If this is a remote resource, it will be a copy of the resource 

784 on the local file system, probably in a temporary directory. 

785 For a local resource this should be the actual path to the 

786 resource. 

787 

788 Notes 

789 ----- 

790 The context manager will automatically delete any local temporary 

791 file. 

792 

793 Examples 

794 -------- 

795 Should be used as a context manager: 

796 

797 .. code-block:: py 

798 

799 with uri.as_local() as local: 

800 ospath = local.ospath 

801 """ 

802 if self.dirLike: 

803 raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.") 

804 local_src, is_temporary = self._as_local() 

805 local_uri = ResourcePath(local_src, isTemporary=is_temporary) 

806 

807 try: 

808 yield local_uri 

809 finally: 

810 # The caller might have relocated the temporary file. 

811 # Do not ever delete if the temporary matches self 

812 # (since it may have been that a temporary file was made local 

813 # but already was local). 

814 if self != local_uri and is_temporary and local_uri.exists(): 

815 local_uri.remove() 

816 

817 @classmethod 

818 @contextlib.contextmanager 

819 def temporary_uri( 

820 cls, prefix: Optional[ResourcePath] = None, suffix: Optional[str] = None 

821 ) -> Iterator[ResourcePath]: 

822 """Create a temporary file-like URI. 

823 

824 Parameters 

825 ---------- 

826 prefix : `ResourcePath`, optional 

827 Prefix to use. Without this the path will be formed as a local 

828 file URI in a temporary directory. Ensuring that the prefix 

829 location exists is the responsibility of the caller. 

830 suffix : `str`, optional 

831 A file suffix to be used. The ``.`` should be included in this 

832 suffix. 

833 

834 Yields 

835 ------ 

836 uri : `ResourcePath` 

837 The temporary URI. Will be removed when the context is completed. 

838 """ 

839 use_tempdir = False 

840 if prefix is None: 

841 prefix = ResourcePath(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True) 

842 # Record that we need to delete this directory. Can not rely 

843 # on isTemporary flag since an external prefix may have that 

844 # set as well. 

845 use_tempdir = True 

846 

847 # Need to create a randomized file name. For consistency do not 

848 # use mkstemp for local and something else for remote. Additionally 

849 # this method does not create the file to prevent name clashes. 

850 characters = "abcdefghijklmnopqrstuvwxyz0123456789_" 

851 rng = Random() 

852 tempname = "".join(rng.choice(characters) for _ in range(16)) 

853 if suffix: 

854 tempname += suffix 

855 temporary_uri = prefix.join(tempname, isTemporary=True) 

856 if temporary_uri.dirLike: 

857 # If we had a safe way to clean up a remote temporary directory, we 

858 # could support this. 

859 raise NotImplementedError("temporary_uri cannot be used to create a temporary directory.") 

860 try: 

861 yield temporary_uri 

862 finally: 

863 if use_tempdir: 

864 shutil.rmtree(prefix.ospath, ignore_errors=True) 

865 else: 

866 try: 

867 # It's okay if this does not work because the user removed 

868 # the file. 

869 temporary_uri.remove() 

870 except FileNotFoundError: 

871 pass 

872 

873 def read(self, size: int = -1) -> bytes: 

874 """Open the resource and return the contents in bytes. 

875 

876 Parameters 

877 ---------- 

878 size : `int`, optional 

879 The number of bytes to read. Negative or omitted indicates 

880 that all data should be read. 

881 """ 

882 raise NotImplementedError() 

883 

884 def write(self, data: bytes, overwrite: bool = True) -> None: 

885 """Write the supplied bytes to the new resource. 

886 

887 Parameters 

888 ---------- 

889 data : `bytes` 

890 The bytes to write to the resource. The entire contents of the 

891 resource will be replaced. 

892 overwrite : `bool`, optional 

893 If `True` the resource will be overwritten if it exists. Otherwise 

894 the write will fail. 

895 """ 

896 raise NotImplementedError() 

897 

898 def mkdir(self) -> None: 

899 """For a dir-like URI, create the directory resource if needed.""" 

900 raise NotImplementedError() 

901 

902 def isdir(self) -> bool: 

903 """Return True if this URI looks like a directory, else False.""" 

904 return self.dirLike 

905 

906 def size(self) -> int: 

907 """For non-dir-like URI, return the size of the resource. 

908 

909 Returns 

910 ------- 

911 sz : `int` 

912 The size in bytes of the resource associated with this URI. 

913 Returns 0 if dir-like. 

914 """ 

915 raise NotImplementedError() 

916 

917 def __str__(self) -> str: 

918 """Convert the URI to its native string form.""" 

919 return self.geturl() 

920 

921 def __repr__(self) -> str: 

922 """Return string representation suitable for evaluation.""" 

923 return f'ResourcePath("{self.geturl()}")' 

924 

925 def __eq__(self, other: Any) -> bool: 

926 """Compare supplied object with this `ResourcePath`.""" 

927 if not isinstance(other, ResourcePath): 

928 return NotImplemented 

929 return self.geturl() == other.geturl() 

930 

931 def __hash__(self) -> int: 

932 """Return hash of this object.""" 

933 return hash(str(self)) 

934 

935 def __copy__(self) -> ResourcePath: 

936 """Copy constructor. 

937 

938 Object is immutable so copy can return itself. 

939 """ 

940 # Implement here because the __new__ method confuses things 

941 return self 

942 

943 def __deepcopy__(self, memo: Any) -> ResourcePath: 

944 """Deepcopy the object. 

945 

946 Object is immutable so copy can return itself. 

947 """ 

948 # Implement here because the __new__ method confuses things 

949 return self 

950 

951 def __getnewargs__(self) -> Tuple: 

952 """Support pickling.""" 

953 return (str(self),) 

954 

955 @classmethod 

956 def _fixDirectorySep( 

957 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False 

958 ) -> Tuple[urllib.parse.ParseResult, bool]: 

959 """Ensure that a path separator is present on directory paths. 

960 

961 Parameters 

962 ---------- 

963 parsed : `~urllib.parse.ParseResult` 

964 The result from parsing a URI using `urllib.parse`. 

965 forceDirectory : `bool`, optional 

966 If `True` forces the URI to end with a separator, otherwise given 

967 URI is interpreted as is. Specifying that the URI is conceptually 

968 equivalent to a directory can break some ambiguities when 

969 interpreting the last element of a path. 

970 

971 Returns 

972 ------- 

973 modified : `~urllib.parse.ParseResult` 

974 Update result if a URI is being handled. 

975 dirLike : `bool` 

976 `True` if given parsed URI has a trailing separator or 

977 forceDirectory is True. Otherwise `False`. 

978 """ 

979 # assume we are not dealing with a directory like URI 

980 dirLike = False 

981 

982 # Directory separator 

983 sep = cls._pathModule.sep 

984 

985 # URI is dir-like if explicitly stated or if it ends on a separator 

986 endsOnSep = parsed.path.endswith(sep) 

987 if forceDirectory or endsOnSep: 

988 dirLike = True 

989 # only add the separator if it's not already there 

990 if not endsOnSep: 

991 parsed = parsed._replace(path=parsed.path + sep) 

992 

993 return parsed, dirLike 

994 

995 @classmethod 

996 def _fixupPathUri( 

997 cls, 

998 parsed: urllib.parse.ParseResult, 

999 root: Optional[Union[str, ResourcePath]] = None, 

1000 forceAbsolute: bool = False, 

1001 forceDirectory: bool = False, 

1002 ) -> Tuple[urllib.parse.ParseResult, bool]: 

1003 """Correct any issues with the supplied URI. 

1004 

1005 Parameters 

1006 ---------- 

1007 parsed : `~urllib.parse.ParseResult` 

1008 The result from parsing a URI using `urllib.parse`. 

1009 root : `str` or `ResourcePath`, ignored 

1010 Not used by the this implementation since all URIs are 

1011 absolute except for those representing the local file system. 

1012 forceAbsolute : `bool`, ignored. 

1013 Not used by this implementation. URIs are generally always 

1014 absolute. 

1015 forceDirectory : `bool`, optional 

1016 If `True` forces the URI to end with a separator, otherwise given 

1017 URI is interpreted as is. Specifying that the URI is conceptually 

1018 equivalent to a directory can break some ambiguities when 

1019 interpreting the last element of a path. 

1020 

1021 Returns 

1022 ------- 

1023 modified : `~urllib.parse.ParseResult` 

1024 Update result if a URI is being handled. 

1025 dirLike : `bool` 

1026 `True` if given parsed URI has a trailing separator or 

1027 forceDirectory is True. Otherwise `False`. 

1028 

1029 Notes 

1030 ----- 

1031 Relative paths are explicitly not supported by RFC8089 but `urllib` 

1032 does accept URIs of the form ``file:relative/path.ext``. They need 

1033 to be turned into absolute paths before they can be used. This is 

1034 always done regardless of the ``forceAbsolute`` parameter. 

1035 

1036 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

1037 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

1038 

1039 Scheme-less paths are normalized. 

1040 """ 

1041 return cls._fixDirectorySep(parsed, forceDirectory) 

1042 

1043 def transfer_from( 

1044 self, 

1045 src: ResourcePath, 

1046 transfer: str, 

1047 overwrite: bool = False, 

1048 transaction: Optional[TransactionProtocol] = None, 

1049 ) -> None: 

1050 """Transfer the current resource to a new location. 

1051 

1052 Parameters 

1053 ---------- 

1054 src : `ResourcePath` 

1055 Source URI. 

1056 transfer : `str` 

1057 Mode to use for transferring the resource. Generically there are 

1058 many standard options: copy, link, symlink, hardlink, relsymlink. 

1059 Not all URIs support all modes. 

1060 overwrite : `bool`, optional 

1061 Allow an existing file to be overwritten. Defaults to `False`. 

1062 transaction : `~lsst.resources.utils.TransactionProtocol`, optional 

1063 A transaction object that can (depending on implementation) 

1064 rollback transfers on error. Not guaranteed to be implemented. 

1065 

1066 Notes 

1067 ----- 

1068 Conceptually this is hard to scale as the number of URI schemes 

1069 grow. The destination URI is more important than the source URI 

1070 since that is where all the transfer modes are relevant (with the 

1071 complication that "move" deletes the source). 

1072 

1073 Local file to local file is the fundamental use case but every 

1074 other scheme has to support "copy" to local file (with implicit 

1075 support for "move") and copy from local file. 

1076 All the "link" options tend to be specific to local file systems. 

1077 

1078 "move" is a "copy" where the remote resource is deleted at the end. 

1079 Whether this works depends on the source URI rather than the 

1080 destination URI. Reverting a move on transaction rollback is 

1081 expected to be problematic if a remote resource was involved. 

1082 """ 

1083 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}") 

1084 

1085 def walk( 

1086 self, file_filter: Optional[Union[str, re.Pattern]] = None 

1087 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]: 

1088 """Walk the directory tree returning matching files and directories. 

1089 

1090 Parameters 

1091 ---------- 

1092 file_filter : `str` or `re.Pattern`, optional 

1093 Regex to filter out files from the list before it is returned. 

1094 

1095 Yields 

1096 ------ 

1097 dirpath : `ResourcePath` 

1098 Current directory being examined. 

1099 dirnames : `list` of `str` 

1100 Names of subdirectories within dirpath. 

1101 filenames : `list` of `str` 

1102 Names of all the files within dirpath. 

1103 """ 

1104 raise NotImplementedError() 

1105 

1106 @classmethod 

1107 def findFileResources( 

1108 cls, 

1109 candidates: Iterable[Union[str, ResourcePath]], 

1110 file_filter: Optional[str] = None, 

1111 grouped: bool = False, 

1112 ) -> Iterator[Union[ResourcePath, Iterator[ResourcePath]]]: 

1113 """Get all the files from a list of values. 

1114 

1115 Parameters 

1116 ---------- 

1117 candidates : iterable [`str` or `ResourcePath`] 

1118 The files to return and directories in which to look for files to 

1119 return. 

1120 file_filter : `str`, optional 

1121 The regex to use when searching for files within directories. 

1122 By default returns all the found files. 

1123 grouped : `bool`, optional 

1124 If `True` the results will be grouped by directory and each 

1125 yielded value will be an iterator over URIs. If `False` each 

1126 URI will be returned separately. 

1127 

1128 Yields 

1129 ------ 

1130 found_file: `ResourcePath` 

1131 The passed-in URIs and URIs found in passed-in directories. 

1132 If grouping is enabled, each of the yielded values will be an 

1133 iterator yielding members of the group. Files given explicitly 

1134 will be returned as a single group at the end. 

1135 

1136 Notes 

1137 ----- 

1138 If a value is a file it is yielded immediately without checking that it 

1139 exists. If a value is a directory, all the files in the directory 

1140 (recursively) that match the regex will be yielded in turn. 

1141 """ 

1142 fileRegex = None if file_filter is None else re.compile(file_filter) 

1143 

1144 singles = [] 

1145 

1146 # Find all the files of interest 

1147 for location in candidates: 

1148 uri = ResourcePath(location) 

1149 if uri.isdir(): 

1150 for found in uri.walk(fileRegex): 

1151 if not found: 

1152 # This means the uri does not exist and by 

1153 # convention we ignore it 

1154 continue 

1155 root, dirs, files = found 

1156 if not files: 

1157 continue 

1158 if grouped: 

1159 yield (root.join(name) for name in files) 

1160 else: 

1161 for name in files: 

1162 yield root.join(name) 

1163 else: 

1164 if grouped: 

1165 singles.append(uri) 

1166 else: 

1167 yield uri 

1168 

1169 # Finally, return any explicitly given files in one group 

1170 if grouped and singles: 

1171 yield iter(singles) 

1172 

1173 @contextlib.contextmanager 

1174 def open( 

1175 self, 

1176 mode: str = "r", 

1177 *, 

1178 encoding: Optional[str] = None, 

1179 prefer_file_temporary: bool = False, 

1180 ) -> Iterator[IO]: 

1181 """Return a context manager that wraps an object that behaves like an 

1182 open file at the location of the URI. 

1183 

1184 Parameters 

1185 ---------- 

1186 mode : `str` 

1187 String indicating the mode in which to open the file. Values are 

1188 the same as those accepted by `builtins.open`, though intrinsically 

1189 read-only URI types may only support read modes, and 

1190 `io.IOBase.seekable` is not guaranteed to be `True` on the returned 

1191 object. 

1192 encoding : `str`, optional 

1193 Unicode encoding for text IO; ignored for binary IO. Defaults to 

1194 ``locale.getpreferredencoding(False)``, just as `builtins.open` 

1195 does. 

1196 prefer_file_temporary : `bool`, optional 

1197 If `True`, for implementations that require transfers from a remote 

1198 system to temporary local storage and/or back, use a temporary file 

1199 instead of an in-memory buffer; this is generally slower, but it 

1200 may be necessary to avoid excessive memory usage by large files. 

1201 Ignored by implementations that do not require a temporary. 

1202 

1203 Returns 

1204 ------- 

1205 cm : `contextlib.ContextManager` 

1206 A context manager that wraps a file-like object. 

1207 

1208 Notes 

1209 ----- 

1210 The default implementation of this method uses a local temporary buffer 

1211 (in-memory or file, depending on ``prefer_file_temporary``) with calls 

1212 to `read`, `write`, `as_local`, and `transfer_from` as necessary to 

1213 read and write from/to remote systems. Remote writes thus occur only 

1214 when the context manager is exited. `ResourcePath` implementations 

1215 that can return a more efficient native buffer should do so whenever 

1216 possible (as is guaranteed for local files). `ResourcePath` 

1217 implementations for which `as_local` does not return a temporary are 

1218 required to reimplement `open`, though they may delegate to `super` 

1219 when `prefer_file_temporary` is `False`. 

1220 """ 

1221 if self.dirLike: 

1222 raise IsADirectoryError(f"Directory-like URI {self} cannot be opened.") 

1223 if "x" in mode and self.exists(): 

1224 raise FileExistsError(f"File at {self} already exists.") 

1225 if prefer_file_temporary: 

1226 if "r" in mode or "a" in mode: 

1227 local_cm = self.as_local() 

1228 else: 

1229 local_cm = self.temporary_uri(suffix=self.getExtension()) 

1230 with local_cm as local_uri: 

1231 assert local_uri.isTemporary, ( 

1232 "ResourcePath implementations for which as_local is not " 

1233 "a temporary must reimplement `open`." 

1234 ) 

1235 with open(local_uri.ospath, mode=mode, encoding=encoding) as file_buffer: 

1236 if "a" in mode: 

1237 file_buffer.seek(0, io.SEEK_END) 

1238 yield file_buffer 

1239 if "r" not in mode or "+" in mode: 

1240 self.transfer_from(local_uri, transfer="copy", overwrite=("x" not in mode)) 

1241 else: 

1242 if "r" in mode or "a" in mode: 

1243 in_bytes = self.read() 

1244 else: 

1245 in_bytes = b"" 

1246 if "b" in mode: 

1247 bytes_buffer = io.BytesIO(in_bytes) 

1248 if "a" in mode: 

1249 bytes_buffer.seek(0, io.SEEK_END) 

1250 yield bytes_buffer 

1251 out_bytes = bytes_buffer.getvalue() 

1252 else: 

1253 if encoding is None: 

1254 encoding = locale.getpreferredencoding(False) 

1255 str_buffer = io.StringIO(in_bytes.decode(encoding)) 

1256 if "a" in mode: 

1257 str_buffer.seek(0, io.SEEK_END) 

1258 yield str_buffer 

1259 out_bytes = str_buffer.getvalue().encode(encoding) 

1260 if "r" not in mode or "+" in mode: 

1261 self.write(out_bytes, overwrite=("x" not in mode))