Coverage for python/lsst/resources/_resourcePath.py: 22%

406 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-30 09:30 +0000

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14import concurrent.futures 

15import contextlib 

16import copy 

17import io 

18import locale 

19import logging 

20import os 

21import posixpath 

22import re 

23import shutil 

24import tempfile 

25import urllib.parse 

26from pathlib import Path, PurePath, PurePosixPath 

27from random import Random 

28 

29__all__ = ("ResourcePath", "ResourcePathExpression") 

30 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Dict, 

35 Iterable, 

36 Iterator, 

37 List, 

38 Literal, 

39 Optional, 

40 Tuple, 

41 Type, 

42 Union, 

43 overload, 

44) 

45 

46from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol 

47 

48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true

49 from .utils import TransactionProtocol 

50 

51 

52log = logging.getLogger(__name__) 

53 

54# Regex for looking for URI escapes 

55ESCAPES_RE = re.compile(r"%[A-F0-9]{2}") 

56 

57# Precomputed escaped hash 

58ESCAPED_HASH = urllib.parse.quote("#") 

59 

60# Maximum number of worker threads for parallelized operations. 

61# If greater than 10, be aware that this number has to be consistent 

62# with connection pool sizing (for example in urllib3). 

63MAX_WORKERS = 10 

64 

65 

66ResourcePathExpression = Union[str, urllib.parse.ParseResult, "ResourcePath", Path] 

67"""Type-annotation alias for objects that can be coerced to ResourcePath. 

68""" 

69 

70 

71class ResourcePath: 

72 """Convenience wrapper around URI parsers. 

73 

74 Provides access to URI components and can convert file 

75 paths into absolute path URIs. Scheme-less URIs are treated as if 

76 they are local file system paths and are converted to absolute URIs. 

77 

78 A specialist subclass is created for each supported URI scheme. 

79 

80 Parameters 

81 ---------- 

82 uri : `str`, `Path`, `urllib.parse.ParseResult`, or `ResourcePath`. 

83 URI in string form. Can be scheme-less if referring to a local 

84 filesystem path. 

85 root : `str` or `ResourcePath`, optional 

86 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

87 use this as the root. Must be absolute. If `None` the current 

88 working directory will be used. Can be a file URI. 

89 forceAbsolute : `bool`, optional 

90 If `True`, scheme-less relative URI will be converted to an absolute 

91 path using a ``file`` scheme. If `False` scheme-less URI will remain 

92 scheme-less and will not be updated to ``file`` or absolute path. 

93 forceDirectory: `bool`, optional 

94 If `True` forces the URI to end with a separator, otherwise given URI 

95 is interpreted as is. 

96 isTemporary : `bool`, optional 

97 If `True` indicates that this URI points to a temporary resource. 

98 The default is `False`, unless ``uri`` is already a `ResourcePath` 

99 instance and ``uri.isTemporary is True``. 

100 """ 

101 

102 _pathLib: Type[PurePath] = PurePosixPath 

103 """Path library to use for this scheme.""" 

104 

105 _pathModule = posixpath 

106 """Path module to use for this scheme.""" 

107 

108 transferModes: Tuple[str, ...] = ("copy", "auto", "move") 

109 """Transfer modes supported by this implementation. 

110 

111 Move is special in that it is generally a copy followed by an unlink. 

112 Whether that unlink works depends critically on whether the source URI 

113 implements unlink. If it does not the move will be reported as a failure. 

114 """ 

115 

116 transferDefault: str = "copy" 

117 """Default mode to use for transferring if ``auto`` is specified.""" 

118 

119 quotePaths = True 

120 """True if path-like elements modifying a URI should be quoted. 

121 

122 All non-schemeless URIs have to internally use quoted paths. Therefore 

123 if a new file name is given (e.g. to updatedFile or join) a decision must 

124 be made whether to quote it to be consistent. 

125 """ 

126 

127 isLocal = False 

128 """If `True` this URI refers to a local file.""" 

129 

130 # This is not an ABC with abstract methods because the __new__ being 

131 # a factory confuses mypy such that it assumes that every constructor 

132 # returns a ResourcePath and then determines that all the abstract methods 

133 # are still abstract. If they are not marked abstract but just raise 

134 # mypy is fine with it. 

135 

136 # mypy is confused without these 

137 _uri: urllib.parse.ParseResult 

138 isTemporary: bool 

139 dirLike: bool 

140 

141 def __new__( 

142 cls, 

143 uri: ResourcePathExpression, 

144 root: Optional[Union[str, ResourcePath]] = None, 

145 forceAbsolute: bool = True, 

146 forceDirectory: bool = False, 

147 isTemporary: Optional[bool] = None, 

148 ) -> ResourcePath: 

149 """Create and return new specialist ResourcePath subclass.""" 

150 parsed: urllib.parse.ParseResult 

151 dirLike: bool = False 

152 subclass: Optional[Type[ResourcePath]] = None 

153 

154 if isinstance(uri, os.PathLike): 

155 uri = str(uri) 

156 

157 # Record if we need to post process the URI components 

158 # or if the instance is already fully configured 

159 if isinstance(uri, str): 

160 # Since local file names can have special characters in them 

161 # we need to quote them for the parser but we can unquote 

162 # later. Assume that all other URI schemes are quoted. 

163 # Since sometimes people write file:/a/b and not file:///a/b 

164 # we should not quote in the explicit case of file: 

165 if "://" not in uri and not uri.startswith("file:"): 

166 if ESCAPES_RE.search(uri): 

167 log.warning("Possible double encoding of %s", uri) 

168 else: 

169 uri = urllib.parse.quote(uri) 

170 # Special case hash since we must support fragments 

171 # even in schemeless URIs -- although try to only replace 

172 # them in file part and not directory part 

173 if ESCAPED_HASH in uri: 

174 dirpos = uri.rfind("/") 

175 # Do replacement after this / 

176 uri = uri[: dirpos + 1] + uri[dirpos + 1 :].replace(ESCAPED_HASH, "#") 

177 

178 parsed = urllib.parse.urlparse(uri) 

179 elif isinstance(uri, urllib.parse.ParseResult): 

180 parsed = copy.copy(uri) 

181 # If we are being instantiated with a subclass, rather than 

182 # ResourcePath, ensure that that subclass is used directly. 

183 # This could lead to inconsistencies if this constructor 

184 # is used externally outside of the ResourcePath.replace() method. 

185 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt")) 

186 # will be a problem. 

187 # This is needed to prevent a schemeless absolute URI become 

188 # a file URI unexpectedly when calling updatedFile or 

189 # updatedExtension 

190 if cls is not ResourcePath: 

191 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory) 

192 subclass = cls 

193 

194 elif isinstance(uri, ResourcePath): 

195 # Since ResourcePath is immutable we can return the argument 

196 # unchanged if it already agrees with forceDirectory, isTemporary, 

197 # and forceAbsolute. 

198 # We invoke __new__ again with str(self) to add a scheme for 

199 # forceAbsolute, but for the others that seems more likely to paper 

200 # over logic errors than do something useful, so we just raise. 

201 if forceDirectory and not uri.dirLike: 

202 raise RuntimeError( 

203 f"{uri} is already a file-like ResourcePath; cannot force it to directory." 

204 ) 

205 if isTemporary is not None and isTemporary is not uri.isTemporary: 

206 raise RuntimeError( 

207 f"{uri} is already a {'temporary' if uri.isTemporary else 'permanent'} " 

208 f"ResourcePath; cannot make it {'temporary' if isTemporary else 'permanent'}." 

209 ) 

210 if forceAbsolute and not uri.scheme: 

211 return ResourcePath( 

212 str(uri), 

213 root=root, 

214 forceAbsolute=True, 

215 forceDirectory=uri.dirLike, 

216 isTemporary=uri.isTemporary, 

217 ) 

218 return uri 

219 else: 

220 raise ValueError( 

221 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'" 

222 ) 

223 

224 if subclass is None: 

225 # Work out the subclass from the URI scheme 

226 if not parsed.scheme: 

227 from .schemeless import SchemelessResourcePath 

228 

229 subclass = SchemelessResourcePath 

230 elif parsed.scheme == "file": 

231 from .file import FileResourcePath 

232 

233 subclass = FileResourcePath 

234 elif parsed.scheme == "s3": 

235 from .s3 import S3ResourcePath 

236 

237 subclass = S3ResourcePath 

238 elif parsed.scheme.startswith("http"): 

239 from .http import HttpResourcePath 

240 

241 subclass = HttpResourcePath 

242 elif parsed.scheme == "gs": 

243 from .gs import GSResourcePath 

244 

245 subclass = GSResourcePath 

246 elif parsed.scheme == "resource": 

247 # Rules for scheme names disallow pkg_resource 

248 from .packageresource import PackageResourcePath 

249 

250 subclass = PackageResourcePath 

251 elif parsed.scheme == "mem": 

252 # in-memory datastore object 

253 from .mem import InMemoryResourcePath 

254 

255 subclass = InMemoryResourcePath 

256 else: 

257 raise NotImplementedError( 

258 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}" 

259 ) 

260 

261 parsed, dirLike = subclass._fixupPathUri( 

262 parsed, root=root, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory 

263 ) 

264 

265 # It is possible for the class to change from schemeless 

266 # to file so handle that 

267 if parsed.scheme == "file": 

268 from .file import FileResourcePath 

269 

270 subclass = FileResourcePath 

271 

272 # Now create an instance of the correct subclass and set the 

273 # attributes directly 

274 self = object.__new__(subclass) 

275 self._uri = parsed 

276 self.dirLike = dirLike 

277 if isTemporary is None: 

278 isTemporary = False 

279 self.isTemporary = isTemporary 

280 return self 

281 

282 @property 

283 def scheme(self) -> str: 

284 """Return the URI scheme. 

285 

286 Notes 

287 ----- 

288 (``://`` is not part of the scheme). 

289 """ 

290 return self._uri.scheme 

291 

292 @property 

293 def netloc(self) -> str: 

294 """Return the URI network location.""" 

295 return self._uri.netloc 

296 

297 @property 

298 def path(self) -> str: 

299 """Return the path component of the URI.""" 

300 return self._uri.path 

301 

302 @property 

303 def unquoted_path(self) -> str: 

304 """Return path component of the URI with any URI quoting reversed.""" 

305 return urllib.parse.unquote(self._uri.path) 

306 

307 @property 

308 def ospath(self) -> str: 

309 """Return the path component of the URI localized to current OS.""" 

310 raise AttributeError(f"Non-file URI ({self}) has no local OS path.") 

311 

312 @property 

313 def relativeToPathRoot(self) -> str: 

314 """Return path relative to network location. 

315 

316 Effectively, this is the path property with posix separator stripped 

317 from the left hand side of the path. 

318 

319 Always unquotes. 

320 """ 

321 p = self._pathLib(self.path) 

322 relToRoot = str(p.relative_to(p.root)) 

323 if self.dirLike and not relToRoot.endswith("/"): 

324 relToRoot += "/" 

325 return urllib.parse.unquote(relToRoot) 

326 

327 @property 

328 def is_root(self) -> bool: 

329 """Return whether this URI points to the root of the network location. 

330 

331 This means that the path components refers to the top level. 

332 """ 

333 relpath = self.relativeToPathRoot 

334 if relpath == "./": 

335 return True 

336 return False 

337 

338 @property 

339 def fragment(self) -> str: 

340 """Return the fragment component of the URI.""" 

341 return self._uri.fragment 

342 

343 @property 

344 def params(self) -> str: 

345 """Return any parameters included in the URI.""" 

346 return self._uri.params 

347 

348 @property 

349 def query(self) -> str: 

350 """Return any query strings included in the URI.""" 

351 return self._uri.query 

352 

353 def geturl(self) -> str: 

354 """Return the URI in string form. 

355 

356 Returns 

357 ------- 

358 url : `str` 

359 String form of URI. 

360 """ 

361 return self._uri.geturl() 

362 

363 def root_uri(self) -> ResourcePath: 

364 """Return the base root URI. 

365 

366 Returns 

367 ------- 

368 uri : `ResourcePath` 

369 root URI. 

370 """ 

371 return self.replace(path="", forceDirectory=True) 

372 

373 def split(self) -> Tuple[ResourcePath, str]: 

374 """Split URI into head and tail. 

375 

376 Returns 

377 ------- 

378 head: `ResourcePath` 

379 Everything leading up to tail, expanded and normalized as per 

380 ResourcePath rules. 

381 tail : `str` 

382 Last `self.path` component. Tail will be empty if path ends on a 

383 separator. Tail will never contain separators. It will be 

384 unquoted. 

385 

386 Notes 

387 ----- 

388 Equivalent to `os.path.split()` where head preserves the URI 

389 components. 

390 """ 

391 head, tail = self._pathModule.split(self.path) 

392 headuri = self._uri._replace(path=head) 

393 

394 # The file part should never include quoted metacharacters 

395 tail = urllib.parse.unquote(tail) 

396 

397 # Schemeless is special in that it can be a relative path 

398 # We need to ensure that it stays that way. All other URIs will 

399 # be absolute already. 

400 forceAbsolute = self._pathModule.isabs(self.path) 

401 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail 

402 

403 def basename(self) -> str: 

404 """Return the base name, last element of path, of the URI. 

405 

406 Returns 

407 ------- 

408 tail : `str` 

409 Last part of the path attribute. Trail will be empty if path ends 

410 on a separator. 

411 

412 Notes 

413 ----- 

414 If URI ends on a slash returns an empty string. This is the second 

415 element returned by `split()`. 

416 

417 Equivalent of `os.path.basename()``. 

418 """ 

419 return self.split()[1] 

420 

421 def dirname(self) -> ResourcePath: 

422 """Return the directory component of the path as a new `ResourcePath`. 

423 

424 Returns 

425 ------- 

426 head : `ResourcePath` 

427 Everything except the tail of path attribute, expanded and 

428 normalized as per ResourcePath rules. 

429 

430 Notes 

431 ----- 

432 Equivalent of `os.path.dirname()`. 

433 """ 

434 return self.split()[0] 

435 

436 def parent(self) -> ResourcePath: 

437 """Return a `ResourcePath` of the parent directory. 

438 

439 Returns 

440 ------- 

441 head : `ResourcePath` 

442 Everything except the tail of path attribute, expanded and 

443 normalized as per `ResourcePath` rules. 

444 

445 Notes 

446 ----- 

447 For a file-like URI this will be the same as calling `dirname()`. 

448 """ 

449 # When self is file-like, return self.dirname() 

450 if not self.dirLike: 

451 return self.dirname() 

452 # When self is dir-like, return its parent directory, 

453 # regardless of the presence of a trailing separator 

454 originalPath = self._pathLib(self.path) 

455 parentPath = originalPath.parent 

456 return self.replace(path=str(parentPath), forceDirectory=True) 

457 

458 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath: 

459 """Return new `ResourcePath` with specified components replaced. 

460 

461 Parameters 

462 ---------- 

463 forceDirectory : `bool`, optional 

464 Parameter passed to ResourcePath constructor to force this 

465 new URI to be dir-like. 

466 isTemporary : `bool`, optional 

467 Indicate that the resulting URI is temporary resource. 

468 **kwargs 

469 Components of a `urllib.parse.ParseResult` that should be 

470 modified for the newly-created `ResourcePath`. 

471 

472 Returns 

473 ------- 

474 new : `ResourcePath` 

475 New `ResourcePath` object with updated values. 

476 

477 Notes 

478 ----- 

479 Does not, for now, allow a change in URI scheme. 

480 """ 

481 # Disallow a change in scheme 

482 if "scheme" in kwargs: 

483 raise ValueError(f"Can not use replace() method to change URI scheme for {self}") 

484 return self.__class__( 

485 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary 

486 ) 

487 

488 def updatedFile(self, newfile: str) -> ResourcePath: 

489 """Return new URI with an updated final component of the path. 

490 

491 Parameters 

492 ---------- 

493 newfile : `str` 

494 File name with no path component. 

495 

496 Returns 

497 ------- 

498 updated : `ResourcePath` 

499 

500 Notes 

501 ----- 

502 Forces the ResourcePath.dirLike attribute to be false. The new file 

503 path will be quoted if necessary. 

504 """ 

505 if self.quotePaths: 

506 newfile = urllib.parse.quote(newfile) 

507 dir, _ = self._pathModule.split(self.path) 

508 newpath = self._pathModule.join(dir, newfile) 

509 

510 updated = self.replace(path=newpath) 

511 updated.dirLike = False 

512 return updated 

513 

514 def updatedExtension(self, ext: Optional[str]) -> ResourcePath: 

515 """Return a new `ResourcePath` with updated file extension. 

516 

517 All file extensions are replaced. 

518 

519 Parameters 

520 ---------- 

521 ext : `str` or `None` 

522 New extension. If an empty string is given any extension will 

523 be removed. If `None` is given there will be no change. 

524 

525 Returns 

526 ------- 

527 updated : `ResourcePath` 

528 URI with the specified extension. Can return itself if 

529 no extension was specified. 

530 """ 

531 if ext is None: 

532 return self 

533 

534 # Get the extension 

535 current = self.getExtension() 

536 

537 # Nothing to do if the extension already matches 

538 if current == ext: 

539 return self 

540 

541 # Remove the current extension from the path 

542 # .fits.gz counts as one extension do not use os.path.splitext 

543 path = self.path 

544 if current: 

545 path = path[: -len(current)] 

546 

547 # Ensure that we have a leading "." on file extension (and we do not 

548 # try to modify the empty string) 

549 if ext and not ext.startswith("."): 

550 ext = "." + ext 

551 

552 return self.replace(path=path + ext) 

553 

554 def getExtension(self) -> str: 

555 """Return the file extension(s) associated with this URI path. 

556 

557 Returns 

558 ------- 

559 ext : `str` 

560 The file extension (including the ``.``). Can be empty string 

561 if there is no file extension. Usually returns only the last 

562 file extension unless there is a special extension modifier 

563 indicating file compression, in which case the combined 

564 extension (e.g. ``.fits.gz``) will be returned. 

565 """ 

566 special = {".gz", ".bz2", ".xz", ".fz"} 

567 

568 # Get the file part of the path so as not to be confused by 

569 # "." in directory names. 

570 basename = self.basename() 

571 extensions = self._pathLib(basename).suffixes 

572 

573 if not extensions: 

574 return "" 

575 

576 ext = extensions.pop() 

577 

578 # Multiple extensions, decide whether to include the final two 

579 if extensions and ext in special: 

580 ext = f"{extensions[-1]}{ext}" 

581 

582 return ext 

583 

584 def join( 

585 self, path: Union[str, ResourcePath], isTemporary: Optional[bool] = None, forceDirectory: bool = False 

586 ) -> ResourcePath: 

587 """Return new `ResourcePath` with additional path components. 

588 

589 Parameters 

590 ---------- 

591 path : `str`, `ResourcePath` 

592 Additional file components to append to the current URI. Assumed 

593 to include a file at the end. Will be quoted depending on the 

594 associated URI scheme. If the path looks like a URI with a scheme 

595 referring to an absolute location, it will be returned 

596 directly (matching the behavior of `os.path.join()`). It can 

597 also be a `ResourcePath`. 

598 isTemporary : `bool`, optional 

599 Indicate that the resulting URI represents a temporary resource. 

600 Default is ``self.isTemporary``. 

601 forceDirectory : `bool`, optional 

602 If `True` forces the URI to end with a separator, otherwise given 

603 URI is interpreted as is. 

604 

605 Returns 

606 ------- 

607 new : `ResourcePath` 

608 New URI with any file at the end replaced with the new path 

609 components. 

610 

611 Notes 

612 ----- 

613 Schemeless URIs assume local path separator but all other URIs assume 

614 POSIX separator if the supplied path has directory structure. It 

615 may be this never becomes a problem but datastore templates assume 

616 POSIX separator is being used. 

617 

618 If an absolute `ResourcePath` is given for ``path`` is is assumed that 

619 this should be returned directly. Giving a ``path`` of an absolute 

620 scheme-less URI is not allowed for safety reasons as it may indicate 

621 a mistake in the calling code. 

622 

623 Raises 

624 ------ 

625 ValueError 

626 Raised if the ``path`` is an absolute scheme-less URI. In that 

627 situation it is unclear whether the intent is to return a 

628 ``file`` URI or it was a mistake and a relative scheme-less URI 

629 was meant. 

630 RuntimeError 

631 Raised if this attempts to join a temporary URI to a non-temporary 

632 URI. 

633 """ 

634 if isTemporary is None: 

635 isTemporary = self.isTemporary 

636 elif not isTemporary and self.isTemporary: 

637 raise RuntimeError("Cannot join temporary URI to non-temporary URI.") 

638 # If we have a full URI in path we will use it directly 

639 # but without forcing to absolute so that we can trap the 

640 # expected option of relative path. 

641 path_uri = ResourcePath( 

642 path, forceAbsolute=False, forceDirectory=forceDirectory, isTemporary=isTemporary 

643 ) 

644 if path_uri.scheme: 

645 # Check for scheme so can distinguish explicit URIs from 

646 # absolute scheme-less URIs. 

647 return path_uri 

648 

649 if path_uri.isabs(): 

650 # Absolute scheme-less path. 

651 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.") 

652 

653 # If this was originally a ResourcePath extract the unquoted path from 

654 # it. Otherwise we use the string we were given to allow "#" to appear 

655 # in the filename if given as a plain string. 

656 if not isinstance(path, str): 

657 path = path_uri.unquoted_path 

658 

659 new = self.dirname() # By definition a directory URI 

660 

661 # new should be asked about quoting, not self, since dirname can 

662 # change the URI scheme for schemeless -> file 

663 if new.quotePaths: 

664 path = urllib.parse.quote(path) 

665 

666 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path)) 

667 

668 # normpath can strip trailing / so we force directory if the supplied 

669 # path ended with a / 

670 return new.replace( 

671 path=newpath, 

672 forceDirectory=(forceDirectory or path.endswith(self._pathModule.sep)), 

673 isTemporary=isTemporary, 

674 ) 

675 

676 def relative_to(self, other: ResourcePath) -> Optional[str]: 

677 """Return the relative path from this URI to the other URI. 

678 

679 Parameters 

680 ---------- 

681 other : `ResourcePath` 

682 URI to use to calculate the relative path. Must be a parent 

683 of this URI. 

684 

685 Returns 

686 ------- 

687 subpath : `str` 

688 The sub path of this URI relative to the supplied other URI. 

689 Returns `None` if there is no parent child relationship. 

690 Scheme and netloc must match. 

691 """ 

692 # Scheme-less absolute other is treated as if it's a file scheme. 

693 # Scheme-less relative other can only return non-None if self 

694 # is also scheme-less relative and that is handled specifically 

695 # in a subclass. 

696 if not other.scheme and other.isabs(): 

697 other = other.abspath() 

698 

699 # Scheme-less self is handled elsewhere. 

700 if self.scheme != other.scheme or self.netloc != other.netloc: 

701 return None 

702 

703 enclosed_path = self._pathLib(self.relativeToPathRoot) 

704 parent_path = other.relativeToPathRoot 

705 subpath: Optional[str] 

706 try: 

707 subpath = str(enclosed_path.relative_to(parent_path)) 

708 except ValueError: 

709 subpath = None 

710 else: 

711 subpath = urllib.parse.unquote(subpath) 

712 return subpath 

713 

714 def exists(self) -> bool: 

715 """Indicate that the resource is available. 

716 

717 Returns 

718 ------- 

719 exists : `bool` 

720 `True` if the resource exists. 

721 """ 

722 raise NotImplementedError() 

723 

724 @classmethod 

725 def mexists(cls, uris: Iterable[ResourcePath]) -> Dict[ResourcePath, bool]: 

726 """Check for existence of multiple URIs at once. 

727 

728 Parameters 

729 ---------- 

730 uris : iterable of `ResourcePath` 

731 The URIs to test. 

732 

733 Returns 

734 ------- 

735 existence : `dict` of [`ResourcePath`, `bool`] 

736 Mapping of original URI to boolean indicating existence. 

737 """ 

738 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) 

739 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris} 

740 

741 results: Dict[ResourcePath, bool] = {} 

742 for future in concurrent.futures.as_completed(future_exists): 

743 uri = future_exists[future] 

744 try: 

745 exists = future.result() 

746 except Exception: 

747 exists = False 

748 results[uri] = exists 

749 return results 

750 

751 def remove(self) -> None: 

752 """Remove the resource.""" 

753 raise NotImplementedError() 

754 

755 def isabs(self) -> bool: 

756 """Indicate that the resource is fully specified. 

757 

758 For non-schemeless URIs this is always true. 

759 

760 Returns 

761 ------- 

762 isabs : `bool` 

763 `True` in all cases except schemeless URI. 

764 """ 

765 return True 

766 

767 def abspath(self) -> ResourcePath: 

768 """Return URI using an absolute path. 

769 

770 Returns 

771 ------- 

772 abs : `ResourcePath` 

773 Absolute URI. For non-schemeless URIs this always returns itself. 

774 Schemeless URIs are upgraded to file URIs. 

775 """ 

776 return self 

777 

778 def _as_local(self) -> Tuple[str, bool]: 

779 """Return the location of the (possibly remote) resource as local file. 

780 

781 This is a helper function for `as_local` context manager. 

782 

783 Returns 

784 ------- 

785 path : `str` 

786 If this is a remote resource, it will be a copy of the resource 

787 on the local file system, probably in a temporary directory. 

788 For a local resource this should be the actual path to the 

789 resource. 

790 is_temporary : `bool` 

791 Indicates if the local path is a temporary file or not. 

792 """ 

793 raise NotImplementedError() 

794 

795 @contextlib.contextmanager 

796 def as_local(self) -> Iterator[ResourcePath]: 

797 """Return the location of the (possibly remote) resource as local file. 

798 

799 Yields 

800 ------ 

801 local : `ResourcePath` 

802 If this is a remote resource, it will be a copy of the resource 

803 on the local file system, probably in a temporary directory. 

804 For a local resource this should be the actual path to the 

805 resource. 

806 

807 Notes 

808 ----- 

809 The context manager will automatically delete any local temporary 

810 file. 

811 

812 Examples 

813 -------- 

814 Should be used as a context manager: 

815 

816 .. code-block:: py 

817 

818 with uri.as_local() as local: 

819 ospath = local.ospath 

820 """ 

821 if self.dirLike: 

822 raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.") 

823 local_src, is_temporary = self._as_local() 

824 local_uri = ResourcePath(local_src, isTemporary=is_temporary) 

825 

826 try: 

827 yield local_uri 

828 finally: 

829 # The caller might have relocated the temporary file. 

830 # Do not ever delete if the temporary matches self 

831 # (since it may have been that a temporary file was made local 

832 # but already was local). 

833 if self != local_uri and is_temporary and local_uri.exists(): 

834 local_uri.remove() 

835 

836 @classmethod 

837 @contextlib.contextmanager 

838 def temporary_uri( 

839 cls, prefix: Optional[ResourcePath] = None, suffix: Optional[str] = None 

840 ) -> Iterator[ResourcePath]: 

841 """Create a temporary file-like URI. 

842 

843 Parameters 

844 ---------- 

845 prefix : `ResourcePath`, optional 

846 Prefix to use. Without this the path will be formed as a local 

847 file URI in a temporary directory. Ensuring that the prefix 

848 location exists is the responsibility of the caller. 

849 suffix : `str`, optional 

850 A file suffix to be used. The ``.`` should be included in this 

851 suffix. 

852 

853 Yields 

854 ------ 

855 uri : `ResourcePath` 

856 The temporary URI. Will be removed when the context is completed. 

857 """ 

858 use_tempdir = False 

859 if prefix is None: 

860 prefix = ResourcePath(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True) 

861 # Record that we need to delete this directory. Can not rely 

862 # on isTemporary flag since an external prefix may have that 

863 # set as well. 

864 use_tempdir = True 

865 

866 # Need to create a randomized file name. For consistency do not 

867 # use mkstemp for local and something else for remote. Additionally 

868 # this method does not create the file to prevent name clashes. 

869 characters = "abcdefghijklmnopqrstuvwxyz0123456789_" 

870 rng = Random() 

871 tempname = "".join(rng.choice(characters) for _ in range(16)) 

872 if suffix: 

873 tempname += suffix 

874 temporary_uri = prefix.join(tempname, isTemporary=True) 

875 if temporary_uri.dirLike: 

876 # If we had a safe way to clean up a remote temporary directory, we 

877 # could support this. 

878 raise NotImplementedError("temporary_uri cannot be used to create a temporary directory.") 

879 try: 

880 yield temporary_uri 

881 finally: 

882 if use_tempdir: 

883 shutil.rmtree(prefix.ospath, ignore_errors=True) 

884 else: 

885 try: 

886 # It's okay if this does not work because the user removed 

887 # the file. 

888 temporary_uri.remove() 

889 except FileNotFoundError: 

890 pass 

891 

892 def read(self, size: int = -1) -> bytes: 

893 """Open the resource and return the contents in bytes. 

894 

895 Parameters 

896 ---------- 

897 size : `int`, optional 

898 The number of bytes to read. Negative or omitted indicates 

899 that all data should be read. 

900 """ 

901 raise NotImplementedError() 

902 

903 def write(self, data: bytes, overwrite: bool = True) -> None: 

904 """Write the supplied bytes to the new resource. 

905 

906 Parameters 

907 ---------- 

908 data : `bytes` 

909 The bytes to write to the resource. The entire contents of the 

910 resource will be replaced. 

911 overwrite : `bool`, optional 

912 If `True` the resource will be overwritten if it exists. Otherwise 

913 the write will fail. 

914 """ 

915 raise NotImplementedError() 

916 

917 def mkdir(self) -> None: 

918 """For a dir-like URI, create the directory resource if needed.""" 

919 raise NotImplementedError() 

920 

921 def isdir(self) -> bool: 

922 """Return True if this URI looks like a directory, else False.""" 

923 return self.dirLike 

924 

925 def size(self) -> int: 

926 """For non-dir-like URI, return the size of the resource. 

927 

928 Returns 

929 ------- 

930 sz : `int` 

931 The size in bytes of the resource associated with this URI. 

932 Returns 0 if dir-like. 

933 """ 

934 raise NotImplementedError() 

935 

936 def __str__(self) -> str: 

937 """Convert the URI to its native string form.""" 

938 return self.geturl() 

939 

940 def __repr__(self) -> str: 

941 """Return string representation suitable for evaluation.""" 

942 return f'ResourcePath("{self.geturl()}")' 

943 

944 def __eq__(self, other: Any) -> bool: 

945 """Compare supplied object with this `ResourcePath`.""" 

946 if not isinstance(other, ResourcePath): 

947 return NotImplemented 

948 return self.geturl() == other.geturl() 

949 

950 def __hash__(self) -> int: 

951 """Return hash of this object.""" 

952 return hash(str(self)) 

953 

954 def __lt__(self, other: ResourcePath) -> bool: 

955 return self.geturl() < other.geturl() 

956 

957 def __le__(self, other: ResourcePath) -> bool: 

958 return self.geturl() <= other.geturl() 

959 

960 def __gt__(self, other: ResourcePath) -> bool: 

961 return self.geturl() > other.geturl() 

962 

963 def __ge__(self, other: ResourcePath) -> bool: 

964 return self.geturl() >= other.geturl() 

965 

966 def __copy__(self) -> ResourcePath: 

967 """Copy constructor. 

968 

969 Object is immutable so copy can return itself. 

970 """ 

971 # Implement here because the __new__ method confuses things 

972 return self 

973 

974 def __deepcopy__(self, memo: Any) -> ResourcePath: 

975 """Deepcopy the object. 

976 

977 Object is immutable so copy can return itself. 

978 """ 

979 # Implement here because the __new__ method confuses things 

980 return self 

981 

982 def __getnewargs__(self) -> Tuple: 

983 """Support pickling.""" 

984 return (str(self),) 

985 

986 @classmethod 

987 def _fixDirectorySep( 

988 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False 

989 ) -> Tuple[urllib.parse.ParseResult, bool]: 

990 """Ensure that a path separator is present on directory paths. 

991 

992 Parameters 

993 ---------- 

994 parsed : `~urllib.parse.ParseResult` 

995 The result from parsing a URI using `urllib.parse`. 

996 forceDirectory : `bool`, optional 

997 If `True` forces the URI to end with a separator, otherwise given 

998 URI is interpreted as is. Specifying that the URI is conceptually 

999 equivalent to a directory can break some ambiguities when 

1000 interpreting the last element of a path. 

1001 

1002 Returns 

1003 ------- 

1004 modified : `~urllib.parse.ParseResult` 

1005 Update result if a URI is being handled. 

1006 dirLike : `bool` 

1007 `True` if given parsed URI has a trailing separator or 

1008 forceDirectory is True. Otherwise `False`. 

1009 """ 

1010 # assume we are not dealing with a directory like URI 

1011 dirLike = False 

1012 

1013 # Directory separator 

1014 sep = cls._pathModule.sep 

1015 

1016 # URI is dir-like if explicitly stated or if it ends on a separator 

1017 endsOnSep = parsed.path.endswith(sep) 

1018 if forceDirectory or endsOnSep: 

1019 dirLike = True 

1020 # only add the separator if it's not already there 

1021 if not endsOnSep: 

1022 parsed = parsed._replace(path=parsed.path + sep) 

1023 

1024 return parsed, dirLike 

1025 

1026 @classmethod 

1027 def _fixupPathUri( 

1028 cls, 

1029 parsed: urllib.parse.ParseResult, 

1030 root: Optional[Union[str, ResourcePath]] = None, 

1031 forceAbsolute: bool = False, 

1032 forceDirectory: bool = False, 

1033 ) -> Tuple[urllib.parse.ParseResult, bool]: 

1034 """Correct any issues with the supplied URI. 

1035 

1036 Parameters 

1037 ---------- 

1038 parsed : `~urllib.parse.ParseResult` 

1039 The result from parsing a URI using `urllib.parse`. 

1040 root : `str` or `ResourcePath`, ignored 

1041 Not used by the this implementation since all URIs are 

1042 absolute except for those representing the local file system. 

1043 forceAbsolute : `bool`, ignored. 

1044 Not used by this implementation. URIs are generally always 

1045 absolute. 

1046 forceDirectory : `bool`, optional 

1047 If `True` forces the URI to end with a separator, otherwise given 

1048 URI is interpreted as is. Specifying that the URI is conceptually 

1049 equivalent to a directory can break some ambiguities when 

1050 interpreting the last element of a path. 

1051 

1052 Returns 

1053 ------- 

1054 modified : `~urllib.parse.ParseResult` 

1055 Update result if a URI is being handled. 

1056 dirLike : `bool` 

1057 `True` if given parsed URI has a trailing separator or 

1058 forceDirectory is True. Otherwise `False`. 

1059 

1060 Notes 

1061 ----- 

1062 Relative paths are explicitly not supported by RFC8089 but `urllib` 

1063 does accept URIs of the form ``file:relative/path.ext``. They need 

1064 to be turned into absolute paths before they can be used. This is 

1065 always done regardless of the ``forceAbsolute`` parameter. 

1066 

1067 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

1068 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

1069 

1070 Scheme-less paths are normalized. 

1071 """ 

1072 return cls._fixDirectorySep(parsed, forceDirectory) 

1073 

1074 def transfer_from( 

1075 self, 

1076 src: ResourcePath, 

1077 transfer: str, 

1078 overwrite: bool = False, 

1079 transaction: Optional[TransactionProtocol] = None, 

1080 ) -> None: 

1081 """Transfer to this URI from another. 

1082 

1083 Parameters 

1084 ---------- 

1085 src : `ResourcePath` 

1086 Source URI. 

1087 transfer : `str` 

1088 Mode to use for transferring the resource. Generically there are 

1089 many standard options: copy, link, symlink, hardlink, relsymlink. 

1090 Not all URIs support all modes. 

1091 overwrite : `bool`, optional 

1092 Allow an existing file to be overwritten. Defaults to `False`. 

1093 transaction : `~lsst.resources.utils.TransactionProtocol`, optional 

1094 A transaction object that can (depending on implementation) 

1095 rollback transfers on error. Not guaranteed to be implemented. 

1096 

1097 Notes 

1098 ----- 

1099 Conceptually this is hard to scale as the number of URI schemes 

1100 grow. The destination URI is more important than the source URI 

1101 since that is where all the transfer modes are relevant (with the 

1102 complication that "move" deletes the source). 

1103 

1104 Local file to local file is the fundamental use case but every 

1105 other scheme has to support "copy" to local file (with implicit 

1106 support for "move") and copy from local file. 

1107 All the "link" options tend to be specific to local file systems. 

1108 

1109 "move" is a "copy" where the remote resource is deleted at the end. 

1110 Whether this works depends on the source URI rather than the 

1111 destination URI. Reverting a move on transaction rollback is 

1112 expected to be problematic if a remote resource was involved. 

1113 """ 

1114 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}") 

1115 

1116 def walk( 

1117 self, file_filter: Optional[Union[str, re.Pattern]] = None 

1118 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]: 

1119 """Walk the directory tree returning matching files and directories. 

1120 

1121 Parameters 

1122 ---------- 

1123 file_filter : `str` or `re.Pattern`, optional 

1124 Regex to filter out files from the list before it is returned. 

1125 

1126 Yields 

1127 ------ 

1128 dirpath : `ResourcePath` 

1129 Current directory being examined. 

1130 dirnames : `list` of `str` 

1131 Names of subdirectories within dirpath. 

1132 filenames : `list` of `str` 

1133 Names of all the files within dirpath. 

1134 """ 

1135 raise NotImplementedError() 

1136 

1137 @overload 

1138 @classmethod 

1139 def findFileResources( 

1140 cls, 

1141 candidates: Iterable[ResourcePathExpression], 

1142 file_filter: Optional[Union[str, re.Pattern]], 

1143 grouped: Literal[True], 

1144 ) -> Iterator[Iterator[ResourcePath]]: 

1145 ... 

1146 

1147 @overload 

1148 @classmethod 

1149 def findFileResources( 

1150 cls, 

1151 candidates: Iterable[ResourcePathExpression], 

1152 *, 

1153 grouped: Literal[True], 

1154 ) -> Iterator[Iterator[ResourcePath]]: 

1155 ... 

1156 

1157 @overload 

1158 @classmethod 

1159 def findFileResources( 

1160 cls, 

1161 candidates: Iterable[ResourcePathExpression], 

1162 file_filter: Optional[Union[str, re.Pattern]] = None, 

1163 grouped: Literal[False] = False, 

1164 ) -> Iterator[ResourcePath]: 

1165 ... 

1166 

1167 @classmethod 

1168 def findFileResources( 

1169 cls, 

1170 candidates: Iterable[ResourcePathExpression], 

1171 file_filter: Optional[Union[str, re.Pattern]] = None, 

1172 grouped: bool = False, 

1173 ) -> Iterator[Union[ResourcePath, Iterator[ResourcePath]]]: 

1174 """Get all the files from a list of values. 

1175 

1176 Parameters 

1177 ---------- 

1178 candidates : iterable [`str` or `ResourcePath`] 

1179 The files to return and directories in which to look for files to 

1180 return. 

1181 file_filter : `str` or `re.Pattern`, optional 

1182 The regex to use when searching for files within directories. 

1183 By default returns all the found files. 

1184 grouped : `bool`, optional 

1185 If `True` the results will be grouped by directory and each 

1186 yielded value will be an iterator over URIs. If `False` each 

1187 URI will be returned separately. 

1188 

1189 Yields 

1190 ------ 

1191 found_file: `ResourcePath` 

1192 The passed-in URIs and URIs found in passed-in directories. 

1193 If grouping is enabled, each of the yielded values will be an 

1194 iterator yielding members of the group. Files given explicitly 

1195 will be returned as a single group at the end. 

1196 

1197 Notes 

1198 ----- 

1199 If a value is a file it is yielded immediately without checking that it 

1200 exists. If a value is a directory, all the files in the directory 

1201 (recursively) that match the regex will be yielded in turn. 

1202 """ 

1203 fileRegex = None if file_filter is None else re.compile(file_filter) 

1204 

1205 singles = [] 

1206 

1207 # Find all the files of interest 

1208 for location in candidates: 

1209 uri = ResourcePath(location) 

1210 if uri.isdir(): 

1211 for found in uri.walk(fileRegex): 

1212 if not found: 

1213 # This means the uri does not exist and by 

1214 # convention we ignore it 

1215 continue 

1216 root, dirs, files = found 

1217 if not files: 

1218 continue 

1219 if grouped: 

1220 yield (root.join(name) for name in files) 

1221 else: 

1222 for name in files: 

1223 yield root.join(name) 

1224 else: 

1225 if grouped: 

1226 singles.append(uri) 

1227 else: 

1228 yield uri 

1229 

1230 # Finally, return any explicitly given files in one group 

1231 if grouped and singles: 

1232 yield iter(singles) 

1233 

1234 @contextlib.contextmanager 

1235 def open( 

1236 self, 

1237 mode: str = "r", 

1238 *, 

1239 encoding: Optional[str] = None, 

1240 prefer_file_temporary: bool = False, 

1241 ) -> Iterator[ResourceHandleProtocol]: 

1242 """Return a context manager that wraps an object that behaves like an 

1243 open file at the location of the URI. 

1244 

1245 Parameters 

1246 ---------- 

1247 mode : `str` 

1248 String indicating the mode in which to open the file. Values are 

1249 the same as those accepted by `builtins.open`, though intrinsically 

1250 read-only URI types may only support read modes, and 

1251 `io.IOBase.seekable` is not guaranteed to be `True` on the returned 

1252 object. 

1253 encoding : `str`, optional 

1254 Unicode encoding for text IO; ignored for binary IO. Defaults to 

1255 ``locale.getpreferredencoding(False)``, just as `builtins.open` 

1256 does. 

1257 prefer_file_temporary : `bool`, optional 

1258 If `True`, for implementations that require transfers from a remote 

1259 system to temporary local storage and/or back, use a temporary file 

1260 instead of an in-memory buffer; this is generally slower, but it 

1261 may be necessary to avoid excessive memory usage by large files. 

1262 Ignored by implementations that do not require a temporary. 

1263 

1264 Returns 

1265 ------- 

1266 cm : `contextlib.ContextManager` 

1267 A context manager that wraps a file-like object. 

1268 

1269 Notes 

1270 ----- 

1271 The default implementation of this method uses a local temporary buffer 

1272 (in-memory or file, depending on ``prefer_file_temporary``) with calls 

1273 to `read`, `write`, `as_local`, and `transfer_from` as necessary to 

1274 read and write from/to remote systems. Remote writes thus occur only 

1275 when the context manager is exited. `ResourcePath` implementations 

1276 that can return a more efficient native buffer should do so whenever 

1277 possible (as is guaranteed for local files). `ResourcePath` 

1278 implementations for which `as_local` does not return a temporary are 

1279 required to reimplement `open`, though they may delegate to `super` 

1280 when `prefer_file_temporary` is `False`. 

1281 """ 

1282 if self.dirLike: 

1283 raise IsADirectoryError(f"Directory-like URI {self} cannot be opened.") 

1284 if "x" in mode and self.exists(): 

1285 raise FileExistsError(f"File at {self} already exists.") 

1286 if prefer_file_temporary: 

1287 if "r" in mode or "a" in mode: 

1288 local_cm = self.as_local() 

1289 else: 

1290 local_cm = self.temporary_uri(suffix=self.getExtension()) 

1291 with local_cm as local_uri: 

1292 assert local_uri.isTemporary, ( 

1293 "ResourcePath implementations for which as_local is not " 

1294 "a temporary must reimplement `open`." 

1295 ) 

1296 with open(local_uri.ospath, mode=mode, encoding=encoding) as file_buffer: 

1297 if "a" in mode: 

1298 file_buffer.seek(0, io.SEEK_END) 

1299 yield file_buffer 

1300 if "r" not in mode or "+" in mode: 

1301 self.transfer_from(local_uri, transfer="copy", overwrite=("x" not in mode)) 

1302 else: 

1303 with self._openImpl(mode, encoding=encoding) as handle: 

1304 yield handle 

1305 

1306 @contextlib.contextmanager 

1307 def _openImpl( 

1308 self, mode: str = "r", *, encoding: Optional[str] = None 

1309 ) -> Iterator[ResourceHandleProtocol]: 

1310 """Implement opening of a resource handle. 

1311 

1312 This private method may be overridden by specific `ResourcePath` 

1313 implementations to provide a customized handle like interface. 

1314 

1315 Parameters 

1316 ---------- 

1317 mode : `str` 

1318 The mode the handle should be opened with 

1319 encoding : `str`, optional 

1320 The byte encoding of any binary text 

1321 

1322 Yields 

1323 ------ 

1324 handle : `BaseResourceHandle` 

1325 A handle that conforms to the `BaseResourcehandle interface 

1326 

1327 Notes 

1328 ----- 

1329 The base implementation of a file handle reads in a files entire 

1330 contents into a buffer for manipulation, and then writes it back out 

1331 upon close. Subclasses of this class may offer more fine grained 

1332 control. 

1333 """ 

1334 if "r" in mode or "a" in mode: 

1335 in_bytes = self.read() 

1336 else: 

1337 in_bytes = b"" 

1338 if "b" in mode: 

1339 bytes_buffer = io.BytesIO(in_bytes) 

1340 if "a" in mode: 

1341 bytes_buffer.seek(0, io.SEEK_END) 

1342 yield bytes_buffer 

1343 out_bytes = bytes_buffer.getvalue() 

1344 else: 

1345 if encoding is None: 

1346 encoding = locale.getpreferredencoding(False) 

1347 str_buffer = io.StringIO(in_bytes.decode(encoding)) 

1348 if "a" in mode: 

1349 str_buffer.seek(0, io.SEEK_END) 

1350 yield str_buffer 

1351 out_bytes = str_buffer.getvalue().encode(encoding) 

1352 if "r" not in mode or "+" in mode: 

1353 self.write(out_bytes, overwrite=("x" not in mode))