Coverage for python/lsst/resources/_resourcePath.py: 22%

401 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-07-03 01:04 -0700

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14import concurrent.futures 

15import contextlib 

16import copy 

17import io 

18import locale 

19import logging 

20import os 

21import posixpath 

22import re 

23import shutil 

24import tempfile 

25import urllib.parse 

26from pathlib import Path, PurePath, PurePosixPath 

27from random import Random 

28 

29__all__ = ("ResourcePath", "ResourcePathExpression") 

30 

31from typing import ( 

32 IO, 

33 TYPE_CHECKING, 

34 Any, 

35 Dict, 

36 Iterable, 

37 Iterator, 

38 List, 

39 Literal, 

40 Optional, 

41 Tuple, 

42 Type, 

43 Union, 

44 overload, 

45) 

46 

47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true

48 from .utils import TransactionProtocol 

49 

50 

51log = logging.getLogger(__name__) 

52 

53# Regex for looking for URI escapes 

54ESCAPES_RE = re.compile(r"%[A-F0-9]{2}") 

55 

56# Precomputed escaped hash 

57ESCAPED_HASH = urllib.parse.quote("#") 

58 

59# Maximum number of worker threads for parallelized operations. 

60# If greater than 10, be aware that this number has to be consistent 

61# with connection pool sizing (for example in urllib3). 

62MAX_WORKERS = 10 

63 

64 

65ResourcePathExpression = Union[str, urllib.parse.ParseResult, "ResourcePath", Path] 

66"""Type-annotation alias for objects that can be coerced to ResourcePath. 

67""" 

68 

69 

70class ResourcePath: 

71 """Convenience wrapper around URI parsers. 

72 

73 Provides access to URI components and can convert file 

74 paths into absolute path URIs. Scheme-less URIs are treated as if 

75 they are local file system paths and are converted to absolute URIs. 

76 

77 A specialist subclass is created for each supported URI scheme. 

78 

79 Parameters 

80 ---------- 

81 uri : `str`, `Path`, `urllib.parse.ParseResult`, or `ResourcePath`. 

82 URI in string form. Can be scheme-less if referring to a local 

83 filesystem path. 

84 root : `str` or `ResourcePath`, optional 

85 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

86 use this as the root. Must be absolute. If `None` the current 

87 working directory will be used. Can be a file URI. 

88 forceAbsolute : `bool`, optional 

89 If `True`, scheme-less relative URI will be converted to an absolute 

90 path using a ``file`` scheme. If `False` scheme-less URI will remain 

91 scheme-less and will not be updated to ``file`` or absolute path. 

92 forceDirectory: `bool`, optional 

93 If `True` forces the URI to end with a separator, otherwise given URI 

94 is interpreted as is. 

95 isTemporary : `bool`, optional 

96 If `True` indicates that this URI points to a temporary resource. 

97 The default is `False`, unless ``uri`` is already a `ResourcePath` 

98 instance and ``uri.isTemporary is True``. 

99 """ 

100 

101 _pathLib: Type[PurePath] = PurePosixPath 

102 """Path library to use for this scheme.""" 

103 

104 _pathModule = posixpath 

105 """Path module to use for this scheme.""" 

106 

107 transferModes: Tuple[str, ...] = ("copy", "auto", "move") 

108 """Transfer modes supported by this implementation. 

109 

110 Move is special in that it is generally a copy followed by an unlink. 

111 Whether that unlink works depends critically on whether the source URI 

112 implements unlink. If it does not the move will be reported as a failure. 

113 """ 

114 

115 transferDefault: str = "copy" 

116 """Default mode to use for transferring if ``auto`` is specified.""" 

117 

118 quotePaths = True 

119 """True if path-like elements modifying a URI should be quoted. 

120 

121 All non-schemeless URIs have to internally use quoted paths. Therefore 

122 if a new file name is given (e.g. to updatedFile or join) a decision must 

123 be made whether to quote it to be consistent. 

124 """ 

125 

126 isLocal = False 

127 """If `True` this URI refers to a local file.""" 

128 

129 # This is not an ABC with abstract methods because the __new__ being 

130 # a factory confuses mypy such that it assumes that every constructor 

131 # returns a ResourcePath and then determines that all the abstract methods 

132 # are still abstract. If they are not marked abstract but just raise 

133 # mypy is fine with it. 

134 

135 # mypy is confused without these 

136 _uri: urllib.parse.ParseResult 

137 isTemporary: bool 

138 dirLike: bool 

139 

140 def __new__( 

141 cls, 

142 uri: ResourcePathExpression, 

143 root: Optional[Union[str, ResourcePath]] = None, 

144 forceAbsolute: bool = True, 

145 forceDirectory: bool = False, 

146 isTemporary: Optional[bool] = None, 

147 ) -> ResourcePath: 

148 """Create and return new specialist ResourcePath subclass.""" 

149 parsed: urllib.parse.ParseResult 

150 dirLike: bool = False 

151 subclass: Optional[Type[ResourcePath]] = None 

152 

153 if isinstance(uri, os.PathLike): 

154 uri = str(uri) 

155 

156 # Record if we need to post process the URI components 

157 # or if the instance is already fully configured 

158 if isinstance(uri, str): 

159 # Since local file names can have special characters in them 

160 # we need to quote them for the parser but we can unquote 

161 # later. Assume that all other URI schemes are quoted. 

162 # Since sometimes people write file:/a/b and not file:///a/b 

163 # we should not quote in the explicit case of file: 

164 if "://" not in uri and not uri.startswith("file:"): 

165 if ESCAPES_RE.search(uri): 

166 log.warning("Possible double encoding of %s", uri) 

167 else: 

168 uri = urllib.parse.quote(uri) 

169 # Special case hash since we must support fragments 

170 # even in schemeless URIs -- although try to only replace 

171 # them in file part and not directory part 

172 if ESCAPED_HASH in uri: 

173 dirpos = uri.rfind("/") 

174 # Do replacement after this / 

175 uri = uri[: dirpos + 1] + uri[dirpos + 1 :].replace(ESCAPED_HASH, "#") 

176 

177 parsed = urllib.parse.urlparse(uri) 

178 elif isinstance(uri, urllib.parse.ParseResult): 

179 parsed = copy.copy(uri) 

180 # If we are being instantiated with a subclass, rather than 

181 # ResourcePath, ensure that that subclass is used directly. 

182 # This could lead to inconsistencies if this constructor 

183 # is used externally outside of the ResourcePath.replace() method. 

184 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt")) 

185 # will be a problem. 

186 # This is needed to prevent a schemeless absolute URI become 

187 # a file URI unexpectedly when calling updatedFile or 

188 # updatedExtension 

189 if cls is not ResourcePath: 

190 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory) 

191 subclass = cls 

192 

193 elif isinstance(uri, ResourcePath): 

194 # Since ResourcePath is immutable we can return the argument 

195 # unchanged if it already agrees with forceDirectory, isTemporary, 

196 # and forceAbsolute. 

197 # We invoke __new__ again with str(self) to add a scheme for 

198 # forceAbsolute, but for the others that seems more likely to paper 

199 # over logic errors than do something useful, so we just raise. 

200 if forceDirectory and not uri.dirLike: 

201 raise RuntimeError( 

202 f"{uri} is already a file-like ResourcePath; cannot force it to directory." 

203 ) 

204 if isTemporary is not None and isTemporary is not uri.isTemporary: 

205 raise RuntimeError( 

206 f"{uri} is already a {'temporary' if uri.isTemporary else 'permanent'} " 

207 f"ResourcePath; cannot make it {'temporary' if isTemporary else 'permanent'}." 

208 ) 

209 if forceAbsolute and not uri.scheme: 

210 return ResourcePath( 

211 str(uri), 

212 root=root, 

213 forceAbsolute=True, 

214 forceDirectory=uri.dirLike, 

215 isTemporary=uri.isTemporary, 

216 ) 

217 return uri 

218 else: 

219 raise ValueError( 

220 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'" 

221 ) 

222 

223 if subclass is None: 

224 # Work out the subclass from the URI scheme 

225 if not parsed.scheme: 

226 from .schemeless import SchemelessResourcePath 

227 

228 subclass = SchemelessResourcePath 

229 elif parsed.scheme == "file": 

230 from .file import FileResourcePath 

231 

232 subclass = FileResourcePath 

233 elif parsed.scheme == "s3": 

234 from .s3 import S3ResourcePath 

235 

236 subclass = S3ResourcePath 

237 elif parsed.scheme.startswith("http"): 

238 from .http import HttpResourcePath 

239 

240 subclass = HttpResourcePath 

241 elif parsed.scheme == "gs": 

242 from .gs import GSResourcePath 

243 

244 subclass = GSResourcePath 

245 elif parsed.scheme == "resource": 

246 # Rules for scheme names disallow pkg_resource 

247 from .packageresource import PackageResourcePath 

248 

249 subclass = PackageResourcePath 

250 elif parsed.scheme == "mem": 

251 # in-memory datastore object 

252 from .mem import InMemoryResourcePath 

253 

254 subclass = InMemoryResourcePath 

255 else: 

256 raise NotImplementedError( 

257 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}" 

258 ) 

259 

260 parsed, dirLike = subclass._fixupPathUri( 

261 parsed, root=root, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory 

262 ) 

263 

264 # It is possible for the class to change from schemeless 

265 # to file so handle that 

266 if parsed.scheme == "file": 

267 from .file import FileResourcePath 

268 

269 subclass = FileResourcePath 

270 

271 # Now create an instance of the correct subclass and set the 

272 # attributes directly 

273 self = object.__new__(subclass) 

274 self._uri = parsed 

275 self.dirLike = dirLike 

276 if isTemporary is None: 

277 isTemporary = False 

278 self.isTemporary = isTemporary 

279 return self 

280 

281 @property 

282 def scheme(self) -> str: 

283 """Return the URI scheme. 

284 

285 Notes 

286 ----- 

287 (``://`` is not part of the scheme). 

288 """ 

289 return self._uri.scheme 

290 

291 @property 

292 def netloc(self) -> str: 

293 """Return the URI network location.""" 

294 return self._uri.netloc 

295 

296 @property 

297 def path(self) -> str: 

298 """Return the path component of the URI.""" 

299 return self._uri.path 

300 

301 @property 

302 def unquoted_path(self) -> str: 

303 """Return path component of the URI with any URI quoting reversed.""" 

304 return urllib.parse.unquote(self._uri.path) 

305 

306 @property 

307 def ospath(self) -> str: 

308 """Return the path component of the URI localized to current OS.""" 

309 raise AttributeError(f"Non-file URI ({self}) has no local OS path.") 

310 

311 @property 

312 def relativeToPathRoot(self) -> str: 

313 """Return path relative to network location. 

314 

315 Effectively, this is the path property with posix separator stripped 

316 from the left hand side of the path. 

317 

318 Always unquotes. 

319 """ 

320 p = self._pathLib(self.path) 

321 relToRoot = str(p.relative_to(p.root)) 

322 if self.dirLike and not relToRoot.endswith("/"): 

323 relToRoot += "/" 

324 return urllib.parse.unquote(relToRoot) 

325 

326 @property 

327 def is_root(self) -> bool: 

328 """Return whether this URI points to the root of the network location. 

329 

330 This means that the path components refers to the top level. 

331 """ 

332 relpath = self.relativeToPathRoot 

333 if relpath == "./": 

334 return True 

335 return False 

336 

337 @property 

338 def fragment(self) -> str: 

339 """Return the fragment component of the URI.""" 

340 return self._uri.fragment 

341 

342 @property 

343 def params(self) -> str: 

344 """Return any parameters included in the URI.""" 

345 return self._uri.params 

346 

347 @property 

348 def query(self) -> str: 

349 """Return any query strings included in the URI.""" 

350 return self._uri.query 

351 

352 def geturl(self) -> str: 

353 """Return the URI in string form. 

354 

355 Returns 

356 ------- 

357 url : `str` 

358 String form of URI. 

359 """ 

360 return self._uri.geturl() 

361 

362 def root_uri(self) -> ResourcePath: 

363 """Return the base root URI. 

364 

365 Returns 

366 ------- 

367 uri : `ResourcePath` 

368 root URI. 

369 """ 

370 return self.replace(path="", forceDirectory=True) 

371 

372 def split(self) -> Tuple[ResourcePath, str]: 

373 """Split URI into head and tail. 

374 

375 Returns 

376 ------- 

377 head: `ResourcePath` 

378 Everything leading up to tail, expanded and normalized as per 

379 ResourcePath rules. 

380 tail : `str` 

381 Last `self.path` component. Tail will be empty if path ends on a 

382 separator. Tail will never contain separators. It will be 

383 unquoted. 

384 

385 Notes 

386 ----- 

387 Equivalent to `os.path.split()` where head preserves the URI 

388 components. 

389 """ 

390 head, tail = self._pathModule.split(self.path) 

391 headuri = self._uri._replace(path=head) 

392 

393 # The file part should never include quoted metacharacters 

394 tail = urllib.parse.unquote(tail) 

395 

396 # Schemeless is special in that it can be a relative path 

397 # We need to ensure that it stays that way. All other URIs will 

398 # be absolute already. 

399 forceAbsolute = self._pathModule.isabs(self.path) 

400 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail 

401 

402 def basename(self) -> str: 

403 """Return the base name, last element of path, of the URI. 

404 

405 Returns 

406 ------- 

407 tail : `str` 

408 Last part of the path attribute. Trail will be empty if path ends 

409 on a separator. 

410 

411 Notes 

412 ----- 

413 If URI ends on a slash returns an empty string. This is the second 

414 element returned by `split()`. 

415 

416 Equivalent of `os.path.basename()``. 

417 """ 

418 return self.split()[1] 

419 

420 def dirname(self) -> ResourcePath: 

421 """Return the directory component of the path as a new `ResourcePath`. 

422 

423 Returns 

424 ------- 

425 head : `ResourcePath` 

426 Everything except the tail of path attribute, expanded and 

427 normalized as per ResourcePath rules. 

428 

429 Notes 

430 ----- 

431 Equivalent of `os.path.dirname()`. 

432 """ 

433 return self.split()[0] 

434 

435 def parent(self) -> ResourcePath: 

436 """Return a `ResourcePath` of the parent directory. 

437 

438 Returns 

439 ------- 

440 head : `ResourcePath` 

441 Everything except the tail of path attribute, expanded and 

442 normalized as per `ResourcePath` rules. 

443 

444 Notes 

445 ----- 

446 For a file-like URI this will be the same as calling `dirname()`. 

447 """ 

448 # When self is file-like, return self.dirname() 

449 if not self.dirLike: 

450 return self.dirname() 

451 # When self is dir-like, return its parent directory, 

452 # regardless of the presence of a trailing separator 

453 originalPath = self._pathLib(self.path) 

454 parentPath = originalPath.parent 

455 return self.replace(path=str(parentPath), forceDirectory=True) 

456 

457 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath: 

458 """Return new `ResourcePath` with specified components replaced. 

459 

460 Parameters 

461 ---------- 

462 forceDirectory : `bool`, optional 

463 Parameter passed to ResourcePath constructor to force this 

464 new URI to be dir-like. 

465 isTemporary : `bool`, optional 

466 Indicate that the resulting URI is temporary resource. 

467 **kwargs 

468 Components of a `urllib.parse.ParseResult` that should be 

469 modified for the newly-created `ResourcePath`. 

470 

471 Returns 

472 ------- 

473 new : `ResourcePath` 

474 New `ResourcePath` object with updated values. 

475 

476 Notes 

477 ----- 

478 Does not, for now, allow a change in URI scheme. 

479 """ 

480 # Disallow a change in scheme 

481 if "scheme" in kwargs: 

482 raise ValueError(f"Can not use replace() method to change URI scheme for {self}") 

483 return self.__class__( 

484 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary 

485 ) 

486 

487 def updatedFile(self, newfile: str) -> ResourcePath: 

488 """Return new URI with an updated final component of the path. 

489 

490 Parameters 

491 ---------- 

492 newfile : `str` 

493 File name with no path component. 

494 

495 Returns 

496 ------- 

497 updated : `ResourcePath` 

498 

499 Notes 

500 ----- 

501 Forces the ResourcePath.dirLike attribute to be false. The new file 

502 path will be quoted if necessary. 

503 """ 

504 if self.quotePaths: 

505 newfile = urllib.parse.quote(newfile) 

506 dir, _ = self._pathModule.split(self.path) 

507 newpath = self._pathModule.join(dir, newfile) 

508 

509 updated = self.replace(path=newpath) 

510 updated.dirLike = False 

511 return updated 

512 

513 def updatedExtension(self, ext: Optional[str]) -> ResourcePath: 

514 """Return a new `ResourcePath` with updated file extension. 

515 

516 All file extensions are replaced. 

517 

518 Parameters 

519 ---------- 

520 ext : `str` or `None` 

521 New extension. If an empty string is given any extension will 

522 be removed. If `None` is given there will be no change. 

523 

524 Returns 

525 ------- 

526 updated : `ResourcePath` 

527 URI with the specified extension. Can return itself if 

528 no extension was specified. 

529 """ 

530 if ext is None: 

531 return self 

532 

533 # Get the extension 

534 current = self.getExtension() 

535 

536 # Nothing to do if the extension already matches 

537 if current == ext: 

538 return self 

539 

540 # Remove the current extension from the path 

541 # .fits.gz counts as one extension do not use os.path.splitext 

542 path = self.path 

543 if current: 

544 path = path[: -len(current)] 

545 

546 # Ensure that we have a leading "." on file extension (and we do not 

547 # try to modify the empty string) 

548 if ext and not ext.startswith("."): 

549 ext = "." + ext 

550 

551 return self.replace(path=path + ext) 

552 

553 def getExtension(self) -> str: 

554 """Return the file extension(s) associated with this URI path. 

555 

556 Returns 

557 ------- 

558 ext : `str` 

559 The file extension (including the ``.``). Can be empty string 

560 if there is no file extension. Usually returns only the last 

561 file extension unless there is a special extension modifier 

562 indicating file compression, in which case the combined 

563 extension (e.g. ``.fits.gz``) will be returned. 

564 """ 

565 special = {".gz", ".bz2", ".xz", ".fz"} 

566 

567 # Get the file part of the path so as not to be confused by 

568 # "." in directory names. 

569 basename = self.basename() 

570 extensions = self._pathLib(basename).suffixes 

571 

572 if not extensions: 

573 return "" 

574 

575 ext = extensions.pop() 

576 

577 # Multiple extensions, decide whether to include the final two 

578 if extensions and ext in special: 

579 ext = f"{extensions[-1]}{ext}" 

580 

581 return ext 

582 

583 def join( 

584 self, path: Union[str, ResourcePath], isTemporary: Optional[bool] = None, forceDirectory: bool = False 

585 ) -> ResourcePath: 

586 """Return new `ResourcePath` with additional path components. 

587 

588 Parameters 

589 ---------- 

590 path : `str`, `ResourcePath` 

591 Additional file components to append to the current URI. Assumed 

592 to include a file at the end. Will be quoted depending on the 

593 associated URI scheme. If the path looks like a URI with a scheme 

594 referring to an absolute location, it will be returned 

595 directly (matching the behavior of `os.path.join()`). It can 

596 also be a `ResourcePath`. 

597 isTemporary : `bool`, optional 

598 Indicate that the resulting URI represents a temporary resource. 

599 Default is ``self.isTemporary``. 

600 forceDirectory : `bool`, optional 

601 If `True` forces the URI to end with a separator, otherwise given 

602 URI is interpreted as is. 

603 

604 Returns 

605 ------- 

606 new : `ResourcePath` 

607 New URI with any file at the end replaced with the new path 

608 components. 

609 

610 Notes 

611 ----- 

612 Schemeless URIs assume local path separator but all other URIs assume 

613 POSIX separator if the supplied path has directory structure. It 

614 may be this never becomes a problem but datastore templates assume 

615 POSIX separator is being used. 

616 

617 If an absolute `ResourcePath` is given for ``path`` is is assumed that 

618 this should be returned directly. Giving a ``path`` of an absolute 

619 scheme-less URI is not allowed for safety reasons as it may indicate 

620 a mistake in the calling code. 

621 

622 Raises 

623 ------ 

624 ValueError 

625 Raised if the ``path`` is an absolute scheme-less URI. In that 

626 situation it is unclear whether the intent is to return a 

627 ``file`` URI or it was a mistake and a relative scheme-less URI 

628 was meant. 

629 RuntimeError 

630 Raised if this attempts to join a temporary URI to a non-temporary 

631 URI. 

632 """ 

633 if isTemporary is None: 

634 isTemporary = self.isTemporary 

635 elif not isTemporary and self.isTemporary: 

636 raise RuntimeError("Cannot join temporary URI to non-temporary URI.") 

637 # If we have a full URI in path we will use it directly 

638 # but without forcing to absolute so that we can trap the 

639 # expected option of relative path. 

640 path_uri = ResourcePath( 

641 path, forceAbsolute=False, forceDirectory=forceDirectory, isTemporary=isTemporary 

642 ) 

643 if path_uri.scheme: 

644 # Check for scheme so can distinguish explicit URIs from 

645 # absolute scheme-less URIs. 

646 return path_uri 

647 

648 if path_uri.isabs(): 

649 # Absolute scheme-less path. 

650 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.") 

651 

652 # If this was originally a ResourcePath extract the unquoted path from 

653 # it. Otherwise we use the string we were given to allow "#" to appear 

654 # in the filename if given as a plain string. 

655 if not isinstance(path, str): 

656 path = path_uri.unquoted_path 

657 

658 new = self.dirname() # By definition a directory URI 

659 

660 # new should be asked about quoting, not self, since dirname can 

661 # change the URI scheme for schemeless -> file 

662 if new.quotePaths: 

663 path = urllib.parse.quote(path) 

664 

665 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path)) 

666 

667 # normpath can strip trailing / so we force directory if the supplied 

668 # path ended with a / 

669 return new.replace( 

670 path=newpath, 

671 forceDirectory=(forceDirectory or path.endswith(self._pathModule.sep)), 

672 isTemporary=isTemporary, 

673 ) 

674 

675 def relative_to(self, other: ResourcePath) -> Optional[str]: 

676 """Return the relative path from this URI to the other URI. 

677 

678 Parameters 

679 ---------- 

680 other : `ResourcePath` 

681 URI to use to calculate the relative path. Must be a parent 

682 of this URI. 

683 

684 Returns 

685 ------- 

686 subpath : `str` 

687 The sub path of this URI relative to the supplied other URI. 

688 Returns `None` if there is no parent child relationship. 

689 Scheme and netloc must match. 

690 """ 

691 # Scheme-less absolute other is treated as if it's a file scheme. 

692 # Scheme-less relative other can only return non-None if self 

693 # is also scheme-less relative and that is handled specifically 

694 # in a subclass. 

695 if not other.scheme and other.isabs(): 

696 other = other.abspath() 

697 

698 # Scheme-less self is handled elsewhere. 

699 if self.scheme != other.scheme or self.netloc != other.netloc: 

700 return None 

701 

702 enclosed_path = self._pathLib(self.relativeToPathRoot) 

703 parent_path = other.relativeToPathRoot 

704 subpath: Optional[str] 

705 try: 

706 subpath = str(enclosed_path.relative_to(parent_path)) 

707 except ValueError: 

708 subpath = None 

709 else: 

710 subpath = urllib.parse.unquote(subpath) 

711 return subpath 

712 

713 def exists(self) -> bool: 

714 """Indicate that the resource is available. 

715 

716 Returns 

717 ------- 

718 exists : `bool` 

719 `True` if the resource exists. 

720 """ 

721 raise NotImplementedError() 

722 

723 @classmethod 

724 def mexists(cls, uris: Iterable[ResourcePath]) -> Dict[ResourcePath, bool]: 

725 """Check for existence of multiple URIs at once. 

726 

727 Parameters 

728 ---------- 

729 uris : iterable of `ResourcePath` 

730 The URIs to test. 

731 

732 Returns 

733 ------- 

734 existence : `dict` of [`ResourcePath`, `bool`] 

735 Mapping of original URI to boolean indicating existence. 

736 """ 

737 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) 

738 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris} 

739 

740 results: Dict[ResourcePath, bool] = {} 

741 for future in concurrent.futures.as_completed(future_exists): 

742 uri = future_exists[future] 

743 try: 

744 exists = future.result() 

745 except Exception: 

746 exists = False 

747 results[uri] = exists 

748 return results 

749 

750 def remove(self) -> None: 

751 """Remove the resource.""" 

752 raise NotImplementedError() 

753 

754 def isabs(self) -> bool: 

755 """Indicate that the resource is fully specified. 

756 

757 For non-schemeless URIs this is always true. 

758 

759 Returns 

760 ------- 

761 isabs : `bool` 

762 `True` in all cases except schemeless URI. 

763 """ 

764 return True 

765 

766 def abspath(self) -> ResourcePath: 

767 """Return URI using an absolute path. 

768 

769 Returns 

770 ------- 

771 abs : `ResourcePath` 

772 Absolute URI. For non-schemeless URIs this always returns itself. 

773 Schemeless URIs are upgraded to file URIs. 

774 """ 

775 return self 

776 

777 def _as_local(self) -> Tuple[str, bool]: 

778 """Return the location of the (possibly remote) resource as local file. 

779 

780 This is a helper function for `as_local` context manager. 

781 

782 Returns 

783 ------- 

784 path : `str` 

785 If this is a remote resource, it will be a copy of the resource 

786 on the local file system, probably in a temporary directory. 

787 For a local resource this should be the actual path to the 

788 resource. 

789 is_temporary : `bool` 

790 Indicates if the local path is a temporary file or not. 

791 """ 

792 raise NotImplementedError() 

793 

794 @contextlib.contextmanager 

795 def as_local(self) -> Iterator[ResourcePath]: 

796 """Return the location of the (possibly remote) resource as local file. 

797 

798 Yields 

799 ------ 

800 local : `ResourcePath` 

801 If this is a remote resource, it will be a copy of the resource 

802 on the local file system, probably in a temporary directory. 

803 For a local resource this should be the actual path to the 

804 resource. 

805 

806 Notes 

807 ----- 

808 The context manager will automatically delete any local temporary 

809 file. 

810 

811 Examples 

812 -------- 

813 Should be used as a context manager: 

814 

815 .. code-block:: py 

816 

817 with uri.as_local() as local: 

818 ospath = local.ospath 

819 """ 

820 if self.dirLike: 

821 raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.") 

822 local_src, is_temporary = self._as_local() 

823 local_uri = ResourcePath(local_src, isTemporary=is_temporary) 

824 

825 try: 

826 yield local_uri 

827 finally: 

828 # The caller might have relocated the temporary file. 

829 # Do not ever delete if the temporary matches self 

830 # (since it may have been that a temporary file was made local 

831 # but already was local). 

832 if self != local_uri and is_temporary and local_uri.exists(): 

833 local_uri.remove() 

834 

835 @classmethod 

836 @contextlib.contextmanager 

837 def temporary_uri( 

838 cls, prefix: Optional[ResourcePath] = None, suffix: Optional[str] = None 

839 ) -> Iterator[ResourcePath]: 

840 """Create a temporary file-like URI. 

841 

842 Parameters 

843 ---------- 

844 prefix : `ResourcePath`, optional 

845 Prefix to use. Without this the path will be formed as a local 

846 file URI in a temporary directory. Ensuring that the prefix 

847 location exists is the responsibility of the caller. 

848 suffix : `str`, optional 

849 A file suffix to be used. The ``.`` should be included in this 

850 suffix. 

851 

852 Yields 

853 ------ 

854 uri : `ResourcePath` 

855 The temporary URI. Will be removed when the context is completed. 

856 """ 

857 use_tempdir = False 

858 if prefix is None: 

859 prefix = ResourcePath(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True) 

860 # Record that we need to delete this directory. Can not rely 

861 # on isTemporary flag since an external prefix may have that 

862 # set as well. 

863 use_tempdir = True 

864 

865 # Need to create a randomized file name. For consistency do not 

866 # use mkstemp for local and something else for remote. Additionally 

867 # this method does not create the file to prevent name clashes. 

868 characters = "abcdefghijklmnopqrstuvwxyz0123456789_" 

869 rng = Random() 

870 tempname = "".join(rng.choice(characters) for _ in range(16)) 

871 if suffix: 

872 tempname += suffix 

873 temporary_uri = prefix.join(tempname, isTemporary=True) 

874 if temporary_uri.dirLike: 

875 # If we had a safe way to clean up a remote temporary directory, we 

876 # could support this. 

877 raise NotImplementedError("temporary_uri cannot be used to create a temporary directory.") 

878 try: 

879 yield temporary_uri 

880 finally: 

881 if use_tempdir: 

882 shutil.rmtree(prefix.ospath, ignore_errors=True) 

883 else: 

884 try: 

885 # It's okay if this does not work because the user removed 

886 # the file. 

887 temporary_uri.remove() 

888 except FileNotFoundError: 

889 pass 

890 

891 def read(self, size: int = -1) -> bytes: 

892 """Open the resource and return the contents in bytes. 

893 

894 Parameters 

895 ---------- 

896 size : `int`, optional 

897 The number of bytes to read. Negative or omitted indicates 

898 that all data should be read. 

899 """ 

900 raise NotImplementedError() 

901 

902 def write(self, data: bytes, overwrite: bool = True) -> None: 

903 """Write the supplied bytes to the new resource. 

904 

905 Parameters 

906 ---------- 

907 data : `bytes` 

908 The bytes to write to the resource. The entire contents of the 

909 resource will be replaced. 

910 overwrite : `bool`, optional 

911 If `True` the resource will be overwritten if it exists. Otherwise 

912 the write will fail. 

913 """ 

914 raise NotImplementedError() 

915 

916 def mkdir(self) -> None: 

917 """For a dir-like URI, create the directory resource if needed.""" 

918 raise NotImplementedError() 

919 

920 def isdir(self) -> bool: 

921 """Return True if this URI looks like a directory, else False.""" 

922 return self.dirLike 

923 

924 def size(self) -> int: 

925 """For non-dir-like URI, return the size of the resource. 

926 

927 Returns 

928 ------- 

929 sz : `int` 

930 The size in bytes of the resource associated with this URI. 

931 Returns 0 if dir-like. 

932 """ 

933 raise NotImplementedError() 

934 

935 def __str__(self) -> str: 

936 """Convert the URI to its native string form.""" 

937 return self.geturl() 

938 

939 def __repr__(self) -> str: 

940 """Return string representation suitable for evaluation.""" 

941 return f'ResourcePath("{self.geturl()}")' 

942 

943 def __eq__(self, other: Any) -> bool: 

944 """Compare supplied object with this `ResourcePath`.""" 

945 if not isinstance(other, ResourcePath): 

946 return NotImplemented 

947 return self.geturl() == other.geturl() 

948 

949 def __hash__(self) -> int: 

950 """Return hash of this object.""" 

951 return hash(str(self)) 

952 

953 def __lt__(self, other: ResourcePath) -> bool: 

954 return self.geturl() < other.geturl() 

955 

956 def __le__(self, other: ResourcePath) -> bool: 

957 return self.geturl() <= other.geturl() 

958 

959 def __gt__(self, other: ResourcePath) -> bool: 

960 return self.geturl() > other.geturl() 

961 

962 def __ge__(self, other: ResourcePath) -> bool: 

963 return self.geturl() >= other.geturl() 

964 

965 def __copy__(self) -> ResourcePath: 

966 """Copy constructor. 

967 

968 Object is immutable so copy can return itself. 

969 """ 

970 # Implement here because the __new__ method confuses things 

971 return self 

972 

973 def __deepcopy__(self, memo: Any) -> ResourcePath: 

974 """Deepcopy the object. 

975 

976 Object is immutable so copy can return itself. 

977 """ 

978 # Implement here because the __new__ method confuses things 

979 return self 

980 

981 def __getnewargs__(self) -> Tuple: 

982 """Support pickling.""" 

983 return (str(self),) 

984 

985 @classmethod 

986 def _fixDirectorySep( 

987 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False 

988 ) -> Tuple[urllib.parse.ParseResult, bool]: 

989 """Ensure that a path separator is present on directory paths. 

990 

991 Parameters 

992 ---------- 

993 parsed : `~urllib.parse.ParseResult` 

994 The result from parsing a URI using `urllib.parse`. 

995 forceDirectory : `bool`, optional 

996 If `True` forces the URI to end with a separator, otherwise given 

997 URI is interpreted as is. Specifying that the URI is conceptually 

998 equivalent to a directory can break some ambiguities when 

999 interpreting the last element of a path. 

1000 

1001 Returns 

1002 ------- 

1003 modified : `~urllib.parse.ParseResult` 

1004 Update result if a URI is being handled. 

1005 dirLike : `bool` 

1006 `True` if given parsed URI has a trailing separator or 

1007 forceDirectory is True. Otherwise `False`. 

1008 """ 

1009 # assume we are not dealing with a directory like URI 

1010 dirLike = False 

1011 

1012 # Directory separator 

1013 sep = cls._pathModule.sep 

1014 

1015 # URI is dir-like if explicitly stated or if it ends on a separator 

1016 endsOnSep = parsed.path.endswith(sep) 

1017 if forceDirectory or endsOnSep: 

1018 dirLike = True 

1019 # only add the separator if it's not already there 

1020 if not endsOnSep: 

1021 parsed = parsed._replace(path=parsed.path + sep) 

1022 

1023 return parsed, dirLike 

1024 

1025 @classmethod 

1026 def _fixupPathUri( 

1027 cls, 

1028 parsed: urllib.parse.ParseResult, 

1029 root: Optional[Union[str, ResourcePath]] = None, 

1030 forceAbsolute: bool = False, 

1031 forceDirectory: bool = False, 

1032 ) -> Tuple[urllib.parse.ParseResult, bool]: 

1033 """Correct any issues with the supplied URI. 

1034 

1035 Parameters 

1036 ---------- 

1037 parsed : `~urllib.parse.ParseResult` 

1038 The result from parsing a URI using `urllib.parse`. 

1039 root : `str` or `ResourcePath`, ignored 

1040 Not used by the this implementation since all URIs are 

1041 absolute except for those representing the local file system. 

1042 forceAbsolute : `bool`, ignored. 

1043 Not used by this implementation. URIs are generally always 

1044 absolute. 

1045 forceDirectory : `bool`, optional 

1046 If `True` forces the URI to end with a separator, otherwise given 

1047 URI is interpreted as is. Specifying that the URI is conceptually 

1048 equivalent to a directory can break some ambiguities when 

1049 interpreting the last element of a path. 

1050 

1051 Returns 

1052 ------- 

1053 modified : `~urllib.parse.ParseResult` 

1054 Update result if a URI is being handled. 

1055 dirLike : `bool` 

1056 `True` if given parsed URI has a trailing separator or 

1057 forceDirectory is True. Otherwise `False`. 

1058 

1059 Notes 

1060 ----- 

1061 Relative paths are explicitly not supported by RFC8089 but `urllib` 

1062 does accept URIs of the form ``file:relative/path.ext``. They need 

1063 to be turned into absolute paths before they can be used. This is 

1064 always done regardless of the ``forceAbsolute`` parameter. 

1065 

1066 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

1067 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

1068 

1069 Scheme-less paths are normalized. 

1070 """ 

1071 return cls._fixDirectorySep(parsed, forceDirectory) 

1072 

1073 def transfer_from( 

1074 self, 

1075 src: ResourcePath, 

1076 transfer: str, 

1077 overwrite: bool = False, 

1078 transaction: Optional[TransactionProtocol] = None, 

1079 ) -> None: 

1080 """Transfer to this URI from another. 

1081 

1082 Parameters 

1083 ---------- 

1084 src : `ResourcePath` 

1085 Source URI. 

1086 transfer : `str` 

1087 Mode to use for transferring the resource. Generically there are 

1088 many standard options: copy, link, symlink, hardlink, relsymlink. 

1089 Not all URIs support all modes. 

1090 overwrite : `bool`, optional 

1091 Allow an existing file to be overwritten. Defaults to `False`. 

1092 transaction : `~lsst.resources.utils.TransactionProtocol`, optional 

1093 A transaction object that can (depending on implementation) 

1094 rollback transfers on error. Not guaranteed to be implemented. 

1095 

1096 Notes 

1097 ----- 

1098 Conceptually this is hard to scale as the number of URI schemes 

1099 grow. The destination URI is more important than the source URI 

1100 since that is where all the transfer modes are relevant (with the 

1101 complication that "move" deletes the source). 

1102 

1103 Local file to local file is the fundamental use case but every 

1104 other scheme has to support "copy" to local file (with implicit 

1105 support for "move") and copy from local file. 

1106 All the "link" options tend to be specific to local file systems. 

1107 

1108 "move" is a "copy" where the remote resource is deleted at the end. 

1109 Whether this works depends on the source URI rather than the 

1110 destination URI. Reverting a move on transaction rollback is 

1111 expected to be problematic if a remote resource was involved. 

1112 """ 

1113 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}") 

1114 

1115 def walk( 

1116 self, file_filter: Optional[Union[str, re.Pattern]] = None 

1117 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]: 

1118 """Walk the directory tree returning matching files and directories. 

1119 

1120 Parameters 

1121 ---------- 

1122 file_filter : `str` or `re.Pattern`, optional 

1123 Regex to filter out files from the list before it is returned. 

1124 

1125 Yields 

1126 ------ 

1127 dirpath : `ResourcePath` 

1128 Current directory being examined. 

1129 dirnames : `list` of `str` 

1130 Names of subdirectories within dirpath. 

1131 filenames : `list` of `str` 

1132 Names of all the files within dirpath. 

1133 """ 

1134 raise NotImplementedError() 

1135 

1136 @overload 

1137 @classmethod 

1138 def findFileResources( 

1139 cls, 

1140 candidates: Iterable[ResourcePathExpression], 

1141 file_filter: Optional[Union[str, re.Pattern]], 

1142 grouped: Literal[True], 

1143 ) -> Iterator[Iterator[ResourcePath]]: 

1144 ... 

1145 

1146 @overload 

1147 @classmethod 

1148 def findFileResources( 

1149 cls, 

1150 candidates: Iterable[ResourcePathExpression], 

1151 *, 

1152 grouped: Literal[True], 

1153 ) -> Iterator[Iterator[ResourcePath]]: 

1154 ... 

1155 

1156 @overload 

1157 @classmethod 

1158 def findFileResources( 

1159 cls, 

1160 candidates: Iterable[ResourcePathExpression], 

1161 file_filter: Optional[Union[str, re.Pattern]] = None, 

1162 grouped: Literal[False] = False, 

1163 ) -> Iterator[ResourcePath]: 

1164 ... 

1165 

1166 @classmethod 

1167 def findFileResources( 

1168 cls, 

1169 candidates: Iterable[ResourcePathExpression], 

1170 file_filter: Optional[Union[str, re.Pattern]] = None, 

1171 grouped: bool = False, 

1172 ) -> Iterator[Union[ResourcePath, Iterator[ResourcePath]]]: 

1173 """Get all the files from a list of values. 

1174 

1175 Parameters 

1176 ---------- 

1177 candidates : iterable [`str` or `ResourcePath`] 

1178 The files to return and directories in which to look for files to 

1179 return. 

1180 file_filter : `str` or `re.Pattern`, optional 

1181 The regex to use when searching for files within directories. 

1182 By default returns all the found files. 

1183 grouped : `bool`, optional 

1184 If `True` the results will be grouped by directory and each 

1185 yielded value will be an iterator over URIs. If `False` each 

1186 URI will be returned separately. 

1187 

1188 Yields 

1189 ------ 

1190 found_file: `ResourcePath` 

1191 The passed-in URIs and URIs found in passed-in directories. 

1192 If grouping is enabled, each of the yielded values will be an 

1193 iterator yielding members of the group. Files given explicitly 

1194 will be returned as a single group at the end. 

1195 

1196 Notes 

1197 ----- 

1198 If a value is a file it is yielded immediately without checking that it 

1199 exists. If a value is a directory, all the files in the directory 

1200 (recursively) that match the regex will be yielded in turn. 

1201 """ 

1202 fileRegex = None if file_filter is None else re.compile(file_filter) 

1203 

1204 singles = [] 

1205 

1206 # Find all the files of interest 

1207 for location in candidates: 

1208 uri = ResourcePath(location) 

1209 if uri.isdir(): 

1210 for found in uri.walk(fileRegex): 

1211 if not found: 

1212 # This means the uri does not exist and by 

1213 # convention we ignore it 

1214 continue 

1215 root, dirs, files = found 

1216 if not files: 

1217 continue 

1218 if grouped: 

1219 yield (root.join(name) for name in files) 

1220 else: 

1221 for name in files: 

1222 yield root.join(name) 

1223 else: 

1224 if grouped: 

1225 singles.append(uri) 

1226 else: 

1227 yield uri 

1228 

1229 # Finally, return any explicitly given files in one group 

1230 if grouped and singles: 

1231 yield iter(singles) 

1232 

1233 @contextlib.contextmanager 

1234 def open( 

1235 self, 

1236 mode: str = "r", 

1237 *, 

1238 encoding: Optional[str] = None, 

1239 prefer_file_temporary: bool = False, 

1240 ) -> Iterator[IO]: 

1241 """Return a context manager that wraps an object that behaves like an 

1242 open file at the location of the URI. 

1243 

1244 Parameters 

1245 ---------- 

1246 mode : `str` 

1247 String indicating the mode in which to open the file. Values are 

1248 the same as those accepted by `builtins.open`, though intrinsically 

1249 read-only URI types may only support read modes, and 

1250 `io.IOBase.seekable` is not guaranteed to be `True` on the returned 

1251 object. 

1252 encoding : `str`, optional 

1253 Unicode encoding for text IO; ignored for binary IO. Defaults to 

1254 ``locale.getpreferredencoding(False)``, just as `builtins.open` 

1255 does. 

1256 prefer_file_temporary : `bool`, optional 

1257 If `True`, for implementations that require transfers from a remote 

1258 system to temporary local storage and/or back, use a temporary file 

1259 instead of an in-memory buffer; this is generally slower, but it 

1260 may be necessary to avoid excessive memory usage by large files. 

1261 Ignored by implementations that do not require a temporary. 

1262 

1263 Returns 

1264 ------- 

1265 cm : `contextlib.ContextManager` 

1266 A context manager that wraps a file-like object. 

1267 

1268 Notes 

1269 ----- 

1270 The default implementation of this method uses a local temporary buffer 

1271 (in-memory or file, depending on ``prefer_file_temporary``) with calls 

1272 to `read`, `write`, `as_local`, and `transfer_from` as necessary to 

1273 read and write from/to remote systems. Remote writes thus occur only 

1274 when the context manager is exited. `ResourcePath` implementations 

1275 that can return a more efficient native buffer should do so whenever 

1276 possible (as is guaranteed for local files). `ResourcePath` 

1277 implementations for which `as_local` does not return a temporary are 

1278 required to reimplement `open`, though they may delegate to `super` 

1279 when `prefer_file_temporary` is `False`. 

1280 """ 

1281 if self.dirLike: 

1282 raise IsADirectoryError(f"Directory-like URI {self} cannot be opened.") 

1283 if "x" in mode and self.exists(): 

1284 raise FileExistsError(f"File at {self} already exists.") 

1285 if prefer_file_temporary: 

1286 if "r" in mode or "a" in mode: 

1287 local_cm = self.as_local() 

1288 else: 

1289 local_cm = self.temporary_uri(suffix=self.getExtension()) 

1290 with local_cm as local_uri: 

1291 assert local_uri.isTemporary, ( 

1292 "ResourcePath implementations for which as_local is not " 

1293 "a temporary must reimplement `open`." 

1294 ) 

1295 with open(local_uri.ospath, mode=mode, encoding=encoding) as file_buffer: 

1296 if "a" in mode: 

1297 file_buffer.seek(0, io.SEEK_END) 

1298 yield file_buffer 

1299 if "r" not in mode or "+" in mode: 

1300 self.transfer_from(local_uri, transfer="copy", overwrite=("x" not in mode)) 

1301 else: 

1302 if "r" in mode or "a" in mode: 

1303 in_bytes = self.read() 

1304 else: 

1305 in_bytes = b"" 

1306 if "b" in mode: 

1307 bytes_buffer = io.BytesIO(in_bytes) 

1308 if "a" in mode: 

1309 bytes_buffer.seek(0, io.SEEK_END) 

1310 yield bytes_buffer 

1311 out_bytes = bytes_buffer.getvalue() 

1312 else: 

1313 if encoding is None: 

1314 encoding = locale.getpreferredencoding(False) 

1315 str_buffer = io.StringIO(in_bytes.decode(encoding)) 

1316 if "a" in mode: 

1317 str_buffer.seek(0, io.SEEK_END) 

1318 yield str_buffer 

1319 out_bytes = str_buffer.getvalue().encode(encoding) 

1320 if "r" not in mode or "+" in mode: 

1321 self.write(out_bytes, overwrite=("x" not in mode))