Coverage for python/lsst/resources/_resourcePath.py: 21%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

391 statements  

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14import concurrent.futures 

15import contextlib 

16import copy 

17import io 

18import locale 

19import logging 

20import os 

21import posixpath 

22import re 

23import shutil 

24import tempfile 

25import urllib.parse 

26from pathlib import Path, PurePath, PurePosixPath 

27from random import Random 

28 

29__all__ = ("ResourcePath", "ResourcePathExpression") 

30 

31from typing import ( 

32 IO, 

33 TYPE_CHECKING, 

34 Any, 

35 Dict, 

36 Iterable, 

37 Iterator, 

38 List, 

39 Literal, 

40 Optional, 

41 Tuple, 

42 Type, 

43 Union, 

44 overload, 

45) 

46 

47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true

48 from .utils import TransactionProtocol 

49 

50 

51log = logging.getLogger(__name__) 

52 

53# Regex for looking for URI escapes 

54ESCAPES_RE = re.compile(r"%[A-F0-9]{2}") 

55 

56# Precomputed escaped hash 

57ESCAPED_HASH = urllib.parse.quote("#") 

58 

59# Maximum number of worker threads for parallelized operations. 

60# If greater than 10, be aware that this number has to be consistent 

61# with connection pool sizing (for example in urllib3). 

62MAX_WORKERS = 10 

63 

64 

65ResourcePathExpression = Union[str, urllib.parse.ParseResult, "ResourcePath", Path] 

66"""Type-annotation alias for objects that can be coerced to ResourcePath. 

67""" 

68 

69 

70class ResourcePath: 

71 """Convenience wrapper around URI parsers. 

72 

73 Provides access to URI components and can convert file 

74 paths into absolute path URIs. Scheme-less URIs are treated as if 

75 they are local file system paths and are converted to absolute URIs. 

76 

77 A specialist subclass is created for each supported URI scheme. 

78 

79 Parameters 

80 ---------- 

81 uri : `str`, `Path`, `urllib.parse.ParseResult`, or `ResourcePath`. 

82 URI in string form. Can be scheme-less if referring to a local 

83 filesystem path. 

84 root : `str` or `ResourcePath`, optional 

85 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

86 use this as the root. Must be absolute. If `None` the current 

87 working directory will be used. Can be a file URI. 

88 forceAbsolute : `bool`, optional 

89 If `True`, scheme-less relative URI will be converted to an absolute 

90 path using a ``file`` scheme. If `False` scheme-less URI will remain 

91 scheme-less and will not be updated to ``file`` or absolute path. 

92 forceDirectory: `bool`, optional 

93 If `True` forces the URI to end with a separator, otherwise given URI 

94 is interpreted as is. 

95 isTemporary : `bool`, optional 

96 If `True` indicates that this URI points to a temporary resource. 

97 The default is `False`, unless ``uri`` is already a `ResourcePath` 

98 instance and ``uri.isTemporary is True``. 

99 """ 

100 

101 _pathLib: Type[PurePath] = PurePosixPath 

102 """Path library to use for this scheme.""" 

103 

104 _pathModule = posixpath 

105 """Path module to use for this scheme.""" 

106 

107 transferModes: Tuple[str, ...] = ("copy", "auto", "move") 

108 """Transfer modes supported by this implementation. 

109 

110 Move is special in that it is generally a copy followed by an unlink. 

111 Whether that unlink works depends critically on whether the source URI 

112 implements unlink. If it does not the move will be reported as a failure. 

113 """ 

114 

115 transferDefault: str = "copy" 

116 """Default mode to use for transferring if ``auto`` is specified.""" 

117 

118 quotePaths = True 

119 """True if path-like elements modifying a URI should be quoted. 

120 

121 All non-schemeless URIs have to internally use quoted paths. Therefore 

122 if a new file name is given (e.g. to updatedFile or join) a decision must 

123 be made whether to quote it to be consistent. 

124 """ 

125 

126 isLocal = False 

127 """If `True` this URI refers to a local file.""" 

128 

129 # This is not an ABC with abstract methods because the __new__ being 

130 # a factory confuses mypy such that it assumes that every constructor 

131 # returns a ResourcePath and then determines that all the abstract methods 

132 # are still abstract. If they are not marked abstract but just raise 

133 # mypy is fine with it. 

134 

135 # mypy is confused without these 

136 _uri: urllib.parse.ParseResult 

137 isTemporary: bool 

138 dirLike: bool 

139 

140 def __new__( 

141 cls, 

142 uri: ResourcePathExpression, 

143 root: Optional[Union[str, ResourcePath]] = None, 

144 forceAbsolute: bool = True, 

145 forceDirectory: bool = False, 

146 isTemporary: Optional[bool] = None, 

147 ) -> ResourcePath: 

148 """Create and return new specialist ResourcePath subclass.""" 

149 parsed: urllib.parse.ParseResult 

150 dirLike: bool = False 

151 subclass: Optional[Type[ResourcePath]] = None 

152 

153 if isinstance(uri, os.PathLike): 

154 uri = str(uri) 

155 

156 # Record if we need to post process the URI components 

157 # or if the instance is already fully configured 

158 if isinstance(uri, str): 

159 # Since local file names can have special characters in them 

160 # we need to quote them for the parser but we can unquote 

161 # later. Assume that all other URI schemes are quoted. 

162 # Since sometimes people write file:/a/b and not file:///a/b 

163 # we should not quote in the explicit case of file: 

164 if "://" not in uri and not uri.startswith("file:"): 

165 if ESCAPES_RE.search(uri): 

166 log.warning("Possible double encoding of %s", uri) 

167 else: 

168 uri = urllib.parse.quote(uri) 

169 # Special case hash since we must support fragments 

170 # even in schemeless URIs -- although try to only replace 

171 # them in file part and not directory part 

172 if ESCAPED_HASH in uri: 

173 dirpos = uri.rfind("/") 

174 # Do replacement after this / 

175 uri = uri[: dirpos + 1] + uri[dirpos + 1 :].replace(ESCAPED_HASH, "#") 

176 

177 parsed = urllib.parse.urlparse(uri) 

178 elif isinstance(uri, urllib.parse.ParseResult): 

179 parsed = copy.copy(uri) 

180 # If we are being instantiated with a subclass, rather than 

181 # ResourcePath, ensure that that subclass is used directly. 

182 # This could lead to inconsistencies if this constructor 

183 # is used externally outside of the ResourcePath.replace() method. 

184 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt")) 

185 # will be a problem. 

186 # This is needed to prevent a schemeless absolute URI become 

187 # a file URI unexpectedly when calling updatedFile or 

188 # updatedExtension 

189 if cls is not ResourcePath: 

190 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory) 

191 subclass = cls 

192 

193 elif isinstance(uri, ResourcePath): 

194 # Since ResourcePath is immutable we can return the argument 

195 # unchanged if it already agrees with forceDirectory, isTemporary, 

196 # and forceAbsolute. 

197 # We invoke __new__ again with str(self) to add a scheme for 

198 # forceAbsolute, but for the others that seems more likely to paper 

199 # over logic errors than do something useful, so we just raise. 

200 if forceDirectory and not uri.dirLike: 

201 raise RuntimeError( 

202 f"{uri} is already a file-like ResourcePath; cannot force it to directory." 

203 ) 

204 if isTemporary is not None and isTemporary is not uri.isTemporary: 

205 raise RuntimeError( 

206 f"{uri} is already a {'temporary' if uri.isTemporary else 'permanent'} " 

207 f"ResourcePath; cannot make it {'temporary' if isTemporary else 'permanent'}." 

208 ) 

209 if forceAbsolute and not uri.scheme: 

210 return ResourcePath( 

211 str(uri), 

212 root=root, 

213 forceAbsolute=True, 

214 forceDirectory=uri.dirLike, 

215 isTemporary=uri.isTemporary, 

216 ) 

217 return uri 

218 else: 

219 raise ValueError( 

220 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'" 

221 ) 

222 

223 if subclass is None: 

224 # Work out the subclass from the URI scheme 

225 if not parsed.scheme: 

226 from .schemeless import SchemelessResourcePath 

227 

228 subclass = SchemelessResourcePath 

229 elif parsed.scheme == "file": 

230 from .file import FileResourcePath 

231 

232 subclass = FileResourcePath 

233 elif parsed.scheme == "s3": 

234 from .s3 import S3ResourcePath 

235 

236 subclass = S3ResourcePath 

237 elif parsed.scheme.startswith("http"): 

238 from .http import HttpResourcePath 

239 

240 subclass = HttpResourcePath 

241 elif parsed.scheme == "resource": 

242 # Rules for scheme names disallow pkg_resource 

243 from .packageresource import PackageResourcePath 

244 

245 subclass = PackageResourcePath 

246 elif parsed.scheme == "mem": 

247 # in-memory datastore object 

248 from .mem import InMemoryResourcePath 

249 

250 subclass = InMemoryResourcePath 

251 else: 

252 raise NotImplementedError( 

253 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}" 

254 ) 

255 

256 parsed, dirLike = subclass._fixupPathUri( 

257 parsed, root=root, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory 

258 ) 

259 

260 # It is possible for the class to change from schemeless 

261 # to file so handle that 

262 if parsed.scheme == "file": 

263 from .file import FileResourcePath 

264 

265 subclass = FileResourcePath 

266 

267 # Now create an instance of the correct subclass and set the 

268 # attributes directly 

269 self = object.__new__(subclass) 

270 self._uri = parsed 

271 self.dirLike = dirLike 

272 if isTemporary is None: 

273 isTemporary = False 

274 self.isTemporary = isTemporary 

275 return self 

276 

277 @property 

278 def scheme(self) -> str: 

279 """Return the URI scheme. 

280 

281 Notes 

282 ----- 

283 (``://`` is not part of the scheme). 

284 """ 

285 return self._uri.scheme 

286 

287 @property 

288 def netloc(self) -> str: 

289 """Return the URI network location.""" 

290 return self._uri.netloc 

291 

292 @property 

293 def path(self) -> str: 

294 """Return the path component of the URI.""" 

295 return self._uri.path 

296 

297 @property 

298 def unquoted_path(self) -> str: 

299 """Return path component of the URI with any URI quoting reversed.""" 

300 return urllib.parse.unquote(self._uri.path) 

301 

302 @property 

303 def ospath(self) -> str: 

304 """Return the path component of the URI localized to current OS.""" 

305 raise AttributeError(f"Non-file URI ({self}) has no local OS path.") 

306 

307 @property 

308 def relativeToPathRoot(self) -> str: 

309 """Return path relative to network location. 

310 

311 Effectively, this is the path property with posix separator stripped 

312 from the left hand side of the path. 

313 

314 Always unquotes. 

315 """ 

316 p = self._pathLib(self.path) 

317 relToRoot = str(p.relative_to(p.root)) 

318 if self.dirLike and not relToRoot.endswith("/"): 

319 relToRoot += "/" 

320 return urllib.parse.unquote(relToRoot) 

321 

322 @property 

323 def is_root(self) -> bool: 

324 """Return whether this URI points to the root of the network location. 

325 

326 This means that the path components refers to the top level. 

327 """ 

328 relpath = self.relativeToPathRoot 

329 if relpath == "./": 

330 return True 

331 return False 

332 

333 @property 

334 def fragment(self) -> str: 

335 """Return the fragment component of the URI.""" 

336 return self._uri.fragment 

337 

338 @property 

339 def params(self) -> str: 

340 """Return any parameters included in the URI.""" 

341 return self._uri.params 

342 

343 @property 

344 def query(self) -> str: 

345 """Return any query strings included in the URI.""" 

346 return self._uri.query 

347 

348 def geturl(self) -> str: 

349 """Return the URI in string form. 

350 

351 Returns 

352 ------- 

353 url : `str` 

354 String form of URI. 

355 """ 

356 return self._uri.geturl() 

357 

358 def root_uri(self) -> ResourcePath: 

359 """Return the base root URI. 

360 

361 Returns 

362 ------- 

363 uri : `ResourcePath` 

364 root URI. 

365 """ 

366 return self.replace(path="", forceDirectory=True) 

367 

368 def split(self) -> Tuple[ResourcePath, str]: 

369 """Split URI into head and tail. 

370 

371 Returns 

372 ------- 

373 head: `ResourcePath` 

374 Everything leading up to tail, expanded and normalized as per 

375 ResourcePath rules. 

376 tail : `str` 

377 Last `self.path` component. Tail will be empty if path ends on a 

378 separator. Tail will never contain separators. It will be 

379 unquoted. 

380 

381 Notes 

382 ----- 

383 Equivalent to `os.path.split()` where head preserves the URI 

384 components. 

385 """ 

386 head, tail = self._pathModule.split(self.path) 

387 headuri = self._uri._replace(path=head) 

388 

389 # The file part should never include quoted metacharacters 

390 tail = urllib.parse.unquote(tail) 

391 

392 # Schemeless is special in that it can be a relative path 

393 # We need to ensure that it stays that way. All other URIs will 

394 # be absolute already. 

395 forceAbsolute = self._pathModule.isabs(self.path) 

396 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail 

397 

398 def basename(self) -> str: 

399 """Return the base name, last element of path, of the URI. 

400 

401 Returns 

402 ------- 

403 tail : `str` 

404 Last part of the path attribute. Trail will be empty if path ends 

405 on a separator. 

406 

407 Notes 

408 ----- 

409 If URI ends on a slash returns an empty string. This is the second 

410 element returned by `split()`. 

411 

412 Equivalent of `os.path.basename()``. 

413 """ 

414 return self.split()[1] 

415 

416 def dirname(self) -> ResourcePath: 

417 """Return the directory component of the path as a new `ResourcePath`. 

418 

419 Returns 

420 ------- 

421 head : `ResourcePath` 

422 Everything except the tail of path attribute, expanded and 

423 normalized as per ResourcePath rules. 

424 

425 Notes 

426 ----- 

427 Equivalent of `os.path.dirname()`. 

428 """ 

429 return self.split()[0] 

430 

431 def parent(self) -> ResourcePath: 

432 """Return a `ResourcePath` of the parent directory. 

433 

434 Returns 

435 ------- 

436 head : `ResourcePath` 

437 Everything except the tail of path attribute, expanded and 

438 normalized as per `ResourcePath` rules. 

439 

440 Notes 

441 ----- 

442 For a file-like URI this will be the same as calling `dirname()`. 

443 """ 

444 # When self is file-like, return self.dirname() 

445 if not self.dirLike: 

446 return self.dirname() 

447 # When self is dir-like, return its parent directory, 

448 # regardless of the presence of a trailing separator 

449 originalPath = self._pathLib(self.path) 

450 parentPath = originalPath.parent 

451 return self.replace(path=str(parentPath), forceDirectory=True) 

452 

453 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath: 

454 """Return new `ResourcePath` with specified components replaced. 

455 

456 Parameters 

457 ---------- 

458 forceDirectory : `bool`, optional 

459 Parameter passed to ResourcePath constructor to force this 

460 new URI to be dir-like. 

461 isTemporary : `bool`, optional 

462 Indicate that the resulting URI is temporary resource. 

463 **kwargs 

464 Components of a `urllib.parse.ParseResult` that should be 

465 modified for the newly-created `ResourcePath`. 

466 

467 Returns 

468 ------- 

469 new : `ResourcePath` 

470 New `ResourcePath` object with updated values. 

471 

472 Notes 

473 ----- 

474 Does not, for now, allow a change in URI scheme. 

475 """ 

476 # Disallow a change in scheme 

477 if "scheme" in kwargs: 

478 raise ValueError(f"Can not use replace() method to change URI scheme for {self}") 

479 return self.__class__( 

480 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary 

481 ) 

482 

483 def updatedFile(self, newfile: str) -> ResourcePath: 

484 """Return new URI with an updated final component of the path. 

485 

486 Parameters 

487 ---------- 

488 newfile : `str` 

489 File name with no path component. 

490 

491 Returns 

492 ------- 

493 updated : `ResourcePath` 

494 

495 Notes 

496 ----- 

497 Forces the ResourcePath.dirLike attribute to be false. The new file 

498 path will be quoted if necessary. 

499 """ 

500 if self.quotePaths: 

501 newfile = urllib.parse.quote(newfile) 

502 dir, _ = self._pathModule.split(self.path) 

503 newpath = self._pathModule.join(dir, newfile) 

504 

505 updated = self.replace(path=newpath) 

506 updated.dirLike = False 

507 return updated 

508 

509 def updatedExtension(self, ext: Optional[str]) -> ResourcePath: 

510 """Return a new `ResourcePath` with updated file extension. 

511 

512 All file extensions are replaced. 

513 

514 Parameters 

515 ---------- 

516 ext : `str` or `None` 

517 New extension. If an empty string is given any extension will 

518 be removed. If `None` is given there will be no change. 

519 

520 Returns 

521 ------- 

522 updated : `ResourcePath` 

523 URI with the specified extension. Can return itself if 

524 no extension was specified. 

525 """ 

526 if ext is None: 

527 return self 

528 

529 # Get the extension 

530 current = self.getExtension() 

531 

532 # Nothing to do if the extension already matches 

533 if current == ext: 

534 return self 

535 

536 # Remove the current extension from the path 

537 # .fits.gz counts as one extension do not use os.path.splitext 

538 path = self.path 

539 if current: 

540 path = path[: -len(current)] 

541 

542 # Ensure that we have a leading "." on file extension (and we do not 

543 # try to modify the empty string) 

544 if ext and not ext.startswith("."): 

545 ext = "." + ext 

546 

547 return self.replace(path=path + ext) 

548 

549 def getExtension(self) -> str: 

550 """Return the file extension(s) associated with this URI path. 

551 

552 Returns 

553 ------- 

554 ext : `str` 

555 The file extension (including the ``.``). Can be empty string 

556 if there is no file extension. Usually returns only the last 

557 file extension unless there is a special extension modifier 

558 indicating file compression, in which case the combined 

559 extension (e.g. ``.fits.gz``) will be returned. 

560 """ 

561 special = {".gz", ".bz2", ".xz", ".fz"} 

562 

563 # Get the file part of the path so as not to be confused by 

564 # "." in directory names. 

565 basename = self.basename() 

566 extensions = self._pathLib(basename).suffixes 

567 

568 if not extensions: 

569 return "" 

570 

571 ext = extensions.pop() 

572 

573 # Multiple extensions, decide whether to include the final two 

574 if extensions and ext in special: 

575 ext = f"{extensions[-1]}{ext}" 

576 

577 return ext 

578 

579 def join( 

580 self, path: Union[str, ResourcePath], isTemporary: Optional[bool] = None, forceDirectory: bool = False 

581 ) -> ResourcePath: 

582 """Return new `ResourcePath` with additional path components. 

583 

584 Parameters 

585 ---------- 

586 path : `str`, `ResourcePath` 

587 Additional file components to append to the current URI. Assumed 

588 to include a file at the end. Will be quoted depending on the 

589 associated URI scheme. If the path looks like a URI with a scheme 

590 referring to an absolute location, it will be returned 

591 directly (matching the behavior of `os.path.join()`). It can 

592 also be a `ResourcePath`. 

593 isTemporary : `bool`, optional 

594 Indicate that the resulting URI represents a temporary resource. 

595 Default is ``self.isTemporary``. 

596 forceDirectory : `bool`, optional 

597 If `True` forces the URI to end with a separator, otherwise given 

598 URI is interpreted as is. 

599 

600 Returns 

601 ------- 

602 new : `ResourcePath` 

603 New URI with any file at the end replaced with the new path 

604 components. 

605 

606 Notes 

607 ----- 

608 Schemeless URIs assume local path separator but all other URIs assume 

609 POSIX separator if the supplied path has directory structure. It 

610 may be this never becomes a problem but datastore templates assume 

611 POSIX separator is being used. 

612 

613 If an absolute `ResourcePath` is given for ``path`` is is assumed that 

614 this should be returned directly. Giving a ``path`` of an absolute 

615 scheme-less URI is not allowed for safety reasons as it may indicate 

616 a mistake in the calling code. 

617 

618 Raises 

619 ------ 

620 ValueError 

621 Raised if the ``path`` is an absolute scheme-less URI. In that 

622 situation it is unclear whether the intent is to return a 

623 ``file`` URI or it was a mistake and a relative scheme-less URI 

624 was meant. 

625 RuntimeError 

626 Raised if this attempts to join a temporary URI to a non-temporary 

627 URI. 

628 """ 

629 if isTemporary is None: 

630 isTemporary = self.isTemporary 

631 elif not isTemporary and self.isTemporary: 

632 raise RuntimeError("Cannot join temporary URI to non-temporary URI.") 

633 # If we have a full URI in path we will use it directly 

634 # but without forcing to absolute so that we can trap the 

635 # expected option of relative path. 

636 path_uri = ResourcePath( 

637 path, forceAbsolute=False, forceDirectory=forceDirectory, isTemporary=isTemporary 

638 ) 

639 if path_uri.scheme: 

640 # Check for scheme so can distinguish explicit URIs from 

641 # absolute scheme-less URIs. 

642 return path_uri 

643 

644 if path_uri.isabs(): 

645 # Absolute scheme-less path. 

646 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.") 

647 

648 # If this was originally a ResourcePath extract the unquoted path from 

649 # it. Otherwise we use the string we were given to allow "#" to appear 

650 # in the filename if given as a plain string. 

651 if not isinstance(path, str): 

652 path = path_uri.unquoted_path 

653 

654 new = self.dirname() # By definition a directory URI 

655 

656 # new should be asked about quoting, not self, since dirname can 

657 # change the URI scheme for schemeless -> file 

658 if new.quotePaths: 

659 path = urllib.parse.quote(path) 

660 

661 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path)) 

662 

663 # normpath can strip trailing / so we force directory if the supplied 

664 # path ended with a / 

665 return new.replace( 

666 path=newpath, 

667 forceDirectory=(forceDirectory or path.endswith(self._pathModule.sep)), 

668 isTemporary=isTemporary, 

669 ) 

670 

671 def relative_to(self, other: ResourcePath) -> Optional[str]: 

672 """Return the relative path from this URI to the other URI. 

673 

674 Parameters 

675 ---------- 

676 other : `ResourcePath` 

677 URI to use to calculate the relative path. Must be a parent 

678 of this URI. 

679 

680 Returns 

681 ------- 

682 subpath : `str` 

683 The sub path of this URI relative to the supplied other URI. 

684 Returns `None` if there is no parent child relationship. 

685 Scheme and netloc must match. 

686 """ 

687 # Scheme-less absolute other is treated as if it's a file scheme. 

688 # Scheme-less relative other can only return non-None if self 

689 # is also scheme-less relative and that is handled specifically 

690 # in a subclass. 

691 if not other.scheme and other.isabs(): 

692 other = other.abspath() 

693 

694 # Scheme-less self is handled elsewhere. 

695 if self.scheme != other.scheme or self.netloc != other.netloc: 

696 return None 

697 

698 enclosed_path = self._pathLib(self.relativeToPathRoot) 

699 parent_path = other.relativeToPathRoot 

700 subpath: Optional[str] 

701 try: 

702 subpath = str(enclosed_path.relative_to(parent_path)) 

703 except ValueError: 

704 subpath = None 

705 else: 

706 subpath = urllib.parse.unquote(subpath) 

707 return subpath 

708 

709 def exists(self) -> bool: 

710 """Indicate that the resource is available. 

711 

712 Returns 

713 ------- 

714 exists : `bool` 

715 `True` if the resource exists. 

716 """ 

717 raise NotImplementedError() 

718 

719 @classmethod 

720 def mexists(cls, uris: Iterable[ResourcePath]) -> Dict[ResourcePath, bool]: 

721 """Check for existence of multiple URIs at once. 

722 

723 Parameters 

724 ---------- 

725 uris : iterable of `ResourcePath` 

726 The URIs to test. 

727 

728 Returns 

729 ------- 

730 existence : `dict` of [`ResourcePath`, `bool`] 

731 Mapping of original URI to boolean indicating existence. 

732 """ 

733 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) 

734 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris} 

735 

736 results: Dict[ResourcePath, bool] = {} 

737 for future in concurrent.futures.as_completed(future_exists): 

738 uri = future_exists[future] 

739 try: 

740 exists = future.result() 

741 except Exception: 

742 exists = False 

743 results[uri] = exists 

744 return results 

745 

746 def remove(self) -> None: 

747 """Remove the resource.""" 

748 raise NotImplementedError() 

749 

750 def isabs(self) -> bool: 

751 """Indicate that the resource is fully specified. 

752 

753 For non-schemeless URIs this is always true. 

754 

755 Returns 

756 ------- 

757 isabs : `bool` 

758 `True` in all cases except schemeless URI. 

759 """ 

760 return True 

761 

762 def abspath(self) -> ResourcePath: 

763 """Return URI using an absolute path. 

764 

765 Returns 

766 ------- 

767 abs : `ResourcePath` 

768 Absolute URI. For non-schemeless URIs this always returns itself. 

769 Schemeless URIs are upgraded to file URIs. 

770 """ 

771 return self 

772 

773 def _as_local(self) -> Tuple[str, bool]: 

774 """Return the location of the (possibly remote) resource as local file. 

775 

776 This is a helper function for `as_local` context manager. 

777 

778 Returns 

779 ------- 

780 path : `str` 

781 If this is a remote resource, it will be a copy of the resource 

782 on the local file system, probably in a temporary directory. 

783 For a local resource this should be the actual path to the 

784 resource. 

785 is_temporary : `bool` 

786 Indicates if the local path is a temporary file or not. 

787 """ 

788 raise NotImplementedError() 

789 

790 @contextlib.contextmanager 

791 def as_local(self) -> Iterator[ResourcePath]: 

792 """Return the location of the (possibly remote) resource as local file. 

793 

794 Yields 

795 ------ 

796 local : `ResourcePath` 

797 If this is a remote resource, it will be a copy of the resource 

798 on the local file system, probably in a temporary directory. 

799 For a local resource this should be the actual path to the 

800 resource. 

801 

802 Notes 

803 ----- 

804 The context manager will automatically delete any local temporary 

805 file. 

806 

807 Examples 

808 -------- 

809 Should be used as a context manager: 

810 

811 .. code-block:: py 

812 

813 with uri.as_local() as local: 

814 ospath = local.ospath 

815 """ 

816 if self.dirLike: 

817 raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.") 

818 local_src, is_temporary = self._as_local() 

819 local_uri = ResourcePath(local_src, isTemporary=is_temporary) 

820 

821 try: 

822 yield local_uri 

823 finally: 

824 # The caller might have relocated the temporary file. 

825 # Do not ever delete if the temporary matches self 

826 # (since it may have been that a temporary file was made local 

827 # but already was local). 

828 if self != local_uri and is_temporary and local_uri.exists(): 

829 local_uri.remove() 

830 

831 @classmethod 

832 @contextlib.contextmanager 

833 def temporary_uri( 

834 cls, prefix: Optional[ResourcePath] = None, suffix: Optional[str] = None 

835 ) -> Iterator[ResourcePath]: 

836 """Create a temporary file-like URI. 

837 

838 Parameters 

839 ---------- 

840 prefix : `ResourcePath`, optional 

841 Prefix to use. Without this the path will be formed as a local 

842 file URI in a temporary directory. Ensuring that the prefix 

843 location exists is the responsibility of the caller. 

844 suffix : `str`, optional 

845 A file suffix to be used. The ``.`` should be included in this 

846 suffix. 

847 

848 Yields 

849 ------ 

850 uri : `ResourcePath` 

851 The temporary URI. Will be removed when the context is completed. 

852 """ 

853 use_tempdir = False 

854 if prefix is None: 

855 prefix = ResourcePath(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True) 

856 # Record that we need to delete this directory. Can not rely 

857 # on isTemporary flag since an external prefix may have that 

858 # set as well. 

859 use_tempdir = True 

860 

861 # Need to create a randomized file name. For consistency do not 

862 # use mkstemp for local and something else for remote. Additionally 

863 # this method does not create the file to prevent name clashes. 

864 characters = "abcdefghijklmnopqrstuvwxyz0123456789_" 

865 rng = Random() 

866 tempname = "".join(rng.choice(characters) for _ in range(16)) 

867 if suffix: 

868 tempname += suffix 

869 temporary_uri = prefix.join(tempname, isTemporary=True) 

870 if temporary_uri.dirLike: 

871 # If we had a safe way to clean up a remote temporary directory, we 

872 # could support this. 

873 raise NotImplementedError("temporary_uri cannot be used to create a temporary directory.") 

874 try: 

875 yield temporary_uri 

876 finally: 

877 if use_tempdir: 

878 shutil.rmtree(prefix.ospath, ignore_errors=True) 

879 else: 

880 try: 

881 # It's okay if this does not work because the user removed 

882 # the file. 

883 temporary_uri.remove() 

884 except FileNotFoundError: 

885 pass 

886 

887 def read(self, size: int = -1) -> bytes: 

888 """Open the resource and return the contents in bytes. 

889 

890 Parameters 

891 ---------- 

892 size : `int`, optional 

893 The number of bytes to read. Negative or omitted indicates 

894 that all data should be read. 

895 """ 

896 raise NotImplementedError() 

897 

898 def write(self, data: bytes, overwrite: bool = True) -> None: 

899 """Write the supplied bytes to the new resource. 

900 

901 Parameters 

902 ---------- 

903 data : `bytes` 

904 The bytes to write to the resource. The entire contents of the 

905 resource will be replaced. 

906 overwrite : `bool`, optional 

907 If `True` the resource will be overwritten if it exists. Otherwise 

908 the write will fail. 

909 """ 

910 raise NotImplementedError() 

911 

912 def mkdir(self) -> None: 

913 """For a dir-like URI, create the directory resource if needed.""" 

914 raise NotImplementedError() 

915 

916 def isdir(self) -> bool: 

917 """Return True if this URI looks like a directory, else False.""" 

918 return self.dirLike 

919 

920 def size(self) -> int: 

921 """For non-dir-like URI, return the size of the resource. 

922 

923 Returns 

924 ------- 

925 sz : `int` 

926 The size in bytes of the resource associated with this URI. 

927 Returns 0 if dir-like. 

928 """ 

929 raise NotImplementedError() 

930 

931 def __str__(self) -> str: 

932 """Convert the URI to its native string form.""" 

933 return self.geturl() 

934 

935 def __repr__(self) -> str: 

936 """Return string representation suitable for evaluation.""" 

937 return f'ResourcePath("{self.geturl()}")' 

938 

939 def __eq__(self, other: Any) -> bool: 

940 """Compare supplied object with this `ResourcePath`.""" 

941 if not isinstance(other, ResourcePath): 

942 return NotImplemented 

943 return self.geturl() == other.geturl() 

944 

945 def __hash__(self) -> int: 

946 """Return hash of this object.""" 

947 return hash(str(self)) 

948 

949 def __lt__(self, other: ResourcePath) -> bool: 

950 return self.geturl() < other.geturl() 

951 

952 def __le__(self, other: ResourcePath) -> bool: 

953 return self.geturl() <= other.geturl() 

954 

955 def __gt__(self, other: ResourcePath) -> bool: 

956 return self.geturl() > other.geturl() 

957 

958 def __ge__(self, other: ResourcePath) -> bool: 

959 return self.geturl() >= other.geturl() 

960 

961 def __copy__(self) -> ResourcePath: 

962 """Copy constructor. 

963 

964 Object is immutable so copy can return itself. 

965 """ 

966 # Implement here because the __new__ method confuses things 

967 return self 

968 

969 def __deepcopy__(self, memo: Any) -> ResourcePath: 

970 """Deepcopy the object. 

971 

972 Object is immutable so copy can return itself. 

973 """ 

974 # Implement here because the __new__ method confuses things 

975 return self 

976 

977 def __getnewargs__(self) -> Tuple: 

978 """Support pickling.""" 

979 return (str(self),) 

980 

981 @classmethod 

982 def _fixDirectorySep( 

983 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False 

984 ) -> Tuple[urllib.parse.ParseResult, bool]: 

985 """Ensure that a path separator is present on directory paths. 

986 

987 Parameters 

988 ---------- 

989 parsed : `~urllib.parse.ParseResult` 

990 The result from parsing a URI using `urllib.parse`. 

991 forceDirectory : `bool`, optional 

992 If `True` forces the URI to end with a separator, otherwise given 

993 URI is interpreted as is. Specifying that the URI is conceptually 

994 equivalent to a directory can break some ambiguities when 

995 interpreting the last element of a path. 

996 

997 Returns 

998 ------- 

999 modified : `~urllib.parse.ParseResult` 

1000 Update result if a URI is being handled. 

1001 dirLike : `bool` 

1002 `True` if given parsed URI has a trailing separator or 

1003 forceDirectory is True. Otherwise `False`. 

1004 """ 

1005 # assume we are not dealing with a directory like URI 

1006 dirLike = False 

1007 

1008 # Directory separator 

1009 sep = cls._pathModule.sep 

1010 

1011 # URI is dir-like if explicitly stated or if it ends on a separator 

1012 endsOnSep = parsed.path.endswith(sep) 

1013 if forceDirectory or endsOnSep: 

1014 dirLike = True 

1015 # only add the separator if it's not already there 

1016 if not endsOnSep: 

1017 parsed = parsed._replace(path=parsed.path + sep) 

1018 

1019 return parsed, dirLike 

1020 

1021 @classmethod 

1022 def _fixupPathUri( 

1023 cls, 

1024 parsed: urllib.parse.ParseResult, 

1025 root: Optional[Union[str, ResourcePath]] = None, 

1026 forceAbsolute: bool = False, 

1027 forceDirectory: bool = False, 

1028 ) -> Tuple[urllib.parse.ParseResult, bool]: 

1029 """Correct any issues with the supplied URI. 

1030 

1031 Parameters 

1032 ---------- 

1033 parsed : `~urllib.parse.ParseResult` 

1034 The result from parsing a URI using `urllib.parse`. 

1035 root : `str` or `ResourcePath`, ignored 

1036 Not used by the this implementation since all URIs are 

1037 absolute except for those representing the local file system. 

1038 forceAbsolute : `bool`, ignored. 

1039 Not used by this implementation. URIs are generally always 

1040 absolute. 

1041 forceDirectory : `bool`, optional 

1042 If `True` forces the URI to end with a separator, otherwise given 

1043 URI is interpreted as is. Specifying that the URI is conceptually 

1044 equivalent to a directory can break some ambiguities when 

1045 interpreting the last element of a path. 

1046 

1047 Returns 

1048 ------- 

1049 modified : `~urllib.parse.ParseResult` 

1050 Update result if a URI is being handled. 

1051 dirLike : `bool` 

1052 `True` if given parsed URI has a trailing separator or 

1053 forceDirectory is True. Otherwise `False`. 

1054 

1055 Notes 

1056 ----- 

1057 Relative paths are explicitly not supported by RFC8089 but `urllib` 

1058 does accept URIs of the form ``file:relative/path.ext``. They need 

1059 to be turned into absolute paths before they can be used. This is 

1060 always done regardless of the ``forceAbsolute`` parameter. 

1061 

1062 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

1063 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

1064 

1065 Scheme-less paths are normalized. 

1066 """ 

1067 return cls._fixDirectorySep(parsed, forceDirectory) 

1068 

1069 def transfer_from( 

1070 self, 

1071 src: ResourcePath, 

1072 transfer: str, 

1073 overwrite: bool = False, 

1074 transaction: Optional[TransactionProtocol] = None, 

1075 ) -> None: 

1076 """Transfer the current resource to a new location. 

1077 

1078 Parameters 

1079 ---------- 

1080 src : `ResourcePath` 

1081 Source URI. 

1082 transfer : `str` 

1083 Mode to use for transferring the resource. Generically there are 

1084 many standard options: copy, link, symlink, hardlink, relsymlink. 

1085 Not all URIs support all modes. 

1086 overwrite : `bool`, optional 

1087 Allow an existing file to be overwritten. Defaults to `False`. 

1088 transaction : `~lsst.resources.utils.TransactionProtocol`, optional 

1089 A transaction object that can (depending on implementation) 

1090 rollback transfers on error. Not guaranteed to be implemented. 

1091 

1092 Notes 

1093 ----- 

1094 Conceptually this is hard to scale as the number of URI schemes 

1095 grow. The destination URI is more important than the source URI 

1096 since that is where all the transfer modes are relevant (with the 

1097 complication that "move" deletes the source). 

1098 

1099 Local file to local file is the fundamental use case but every 

1100 other scheme has to support "copy" to local file (with implicit 

1101 support for "move") and copy from local file. 

1102 All the "link" options tend to be specific to local file systems. 

1103 

1104 "move" is a "copy" where the remote resource is deleted at the end. 

1105 Whether this works depends on the source URI rather than the 

1106 destination URI. Reverting a move on transaction rollback is 

1107 expected to be problematic if a remote resource was involved. 

1108 """ 

1109 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}") 

1110 

1111 def walk( 

1112 self, file_filter: Optional[Union[str, re.Pattern]] = None 

1113 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]: 

1114 """Walk the directory tree returning matching files and directories. 

1115 

1116 Parameters 

1117 ---------- 

1118 file_filter : `str` or `re.Pattern`, optional 

1119 Regex to filter out files from the list before it is returned. 

1120 

1121 Yields 

1122 ------ 

1123 dirpath : `ResourcePath` 

1124 Current directory being examined. 

1125 dirnames : `list` of `str` 

1126 Names of subdirectories within dirpath. 

1127 filenames : `list` of `str` 

1128 Names of all the files within dirpath. 

1129 """ 

1130 raise NotImplementedError() 

1131 

1132 @overload 

1133 @classmethod 

1134 def findFileResources( 

1135 cls, 

1136 candidates: Iterable[ResourcePathExpression], 

1137 file_filter: Optional[Union[str, re.Pattern]], 

1138 grouped: Literal[True], 

1139 ) -> Iterator[Iterator[ResourcePath]]: 

1140 ... 

1141 

1142 @overload 

1143 @classmethod 

1144 def findFileResources( 

1145 cls, 

1146 candidates: Iterable[ResourcePathExpression], 

1147 *, 

1148 grouped: Literal[True], 

1149 ) -> Iterator[Iterator[ResourcePath]]: 

1150 ... 

1151 

1152 @overload 

1153 @classmethod 

1154 def findFileResources( 

1155 cls, 

1156 candidates: Iterable[ResourcePathExpression], 

1157 file_filter: Optional[Union[str, re.Pattern]] = None, 

1158 grouped: Literal[False] = False, 

1159 ) -> Iterator[ResourcePath]: 

1160 ... 

1161 

1162 @classmethod 

1163 def findFileResources( 

1164 cls, 

1165 candidates: Iterable[ResourcePathExpression], 

1166 file_filter: Optional[Union[str, re.Pattern]] = None, 

1167 grouped: bool = False, 

1168 ) -> Iterator[Union[ResourcePath, Iterator[ResourcePath]]]: 

1169 """Get all the files from a list of values. 

1170 

1171 Parameters 

1172 ---------- 

1173 candidates : iterable [`str` or `ResourcePath`] 

1174 The files to return and directories in which to look for files to 

1175 return. 

1176 file_filter : `str` or `re.Pattern`, optional 

1177 The regex to use when searching for files within directories. 

1178 By default returns all the found files. 

1179 grouped : `bool`, optional 

1180 If `True` the results will be grouped by directory and each 

1181 yielded value will be an iterator over URIs. If `False` each 

1182 URI will be returned separately. 

1183 

1184 Yields 

1185 ------ 

1186 found_file: `ResourcePath` 

1187 The passed-in URIs and URIs found in passed-in directories. 

1188 If grouping is enabled, each of the yielded values will be an 

1189 iterator yielding members of the group. Files given explicitly 

1190 will be returned as a single group at the end. 

1191 

1192 Notes 

1193 ----- 

1194 If a value is a file it is yielded immediately without checking that it 

1195 exists. If a value is a directory, all the files in the directory 

1196 (recursively) that match the regex will be yielded in turn. 

1197 """ 

1198 fileRegex = None if file_filter is None else re.compile(file_filter) 

1199 

1200 singles = [] 

1201 

1202 # Find all the files of interest 

1203 for location in candidates: 

1204 uri = ResourcePath(location) 

1205 if uri.isdir(): 

1206 for found in uri.walk(fileRegex): 

1207 if not found: 

1208 # This means the uri does not exist and by 

1209 # convention we ignore it 

1210 continue 

1211 root, dirs, files = found 

1212 if not files: 

1213 continue 

1214 if grouped: 

1215 yield (root.join(name) for name in files) 

1216 else: 

1217 for name in files: 

1218 yield root.join(name) 

1219 else: 

1220 if grouped: 

1221 singles.append(uri) 

1222 else: 

1223 yield uri 

1224 

1225 # Finally, return any explicitly given files in one group 

1226 if grouped and singles: 

1227 yield iter(singles) 

1228 

1229 @contextlib.contextmanager 

1230 def open( 

1231 self, 

1232 mode: str = "r", 

1233 *, 

1234 encoding: Optional[str] = None, 

1235 prefer_file_temporary: bool = False, 

1236 ) -> Iterator[IO]: 

1237 """Return a context manager that wraps an object that behaves like an 

1238 open file at the location of the URI. 

1239 

1240 Parameters 

1241 ---------- 

1242 mode : `str` 

1243 String indicating the mode in which to open the file. Values are 

1244 the same as those accepted by `builtins.open`, though intrinsically 

1245 read-only URI types may only support read modes, and 

1246 `io.IOBase.seekable` is not guaranteed to be `True` on the returned 

1247 object. 

1248 encoding : `str`, optional 

1249 Unicode encoding for text IO; ignored for binary IO. Defaults to 

1250 ``locale.getpreferredencoding(False)``, just as `builtins.open` 

1251 does. 

1252 prefer_file_temporary : `bool`, optional 

1253 If `True`, for implementations that require transfers from a remote 

1254 system to temporary local storage and/or back, use a temporary file 

1255 instead of an in-memory buffer; this is generally slower, but it 

1256 may be necessary to avoid excessive memory usage by large files. 

1257 Ignored by implementations that do not require a temporary. 

1258 

1259 Returns 

1260 ------- 

1261 cm : `contextlib.ContextManager` 

1262 A context manager that wraps a file-like object. 

1263 

1264 Notes 

1265 ----- 

1266 The default implementation of this method uses a local temporary buffer 

1267 (in-memory or file, depending on ``prefer_file_temporary``) with calls 

1268 to `read`, `write`, `as_local`, and `transfer_from` as necessary to 

1269 read and write from/to remote systems. Remote writes thus occur only 

1270 when the context manager is exited. `ResourcePath` implementations 

1271 that can return a more efficient native buffer should do so whenever 

1272 possible (as is guaranteed for local files). `ResourcePath` 

1273 implementations for which `as_local` does not return a temporary are 

1274 required to reimplement `open`, though they may delegate to `super` 

1275 when `prefer_file_temporary` is `False`. 

1276 """ 

1277 if self.dirLike: 

1278 raise IsADirectoryError(f"Directory-like URI {self} cannot be opened.") 

1279 if "x" in mode and self.exists(): 

1280 raise FileExistsError(f"File at {self} already exists.") 

1281 if prefer_file_temporary: 

1282 if "r" in mode or "a" in mode: 

1283 local_cm = self.as_local() 

1284 else: 

1285 local_cm = self.temporary_uri(suffix=self.getExtension()) 

1286 with local_cm as local_uri: 

1287 assert local_uri.isTemporary, ( 

1288 "ResourcePath implementations for which as_local is not " 

1289 "a temporary must reimplement `open`." 

1290 ) 

1291 with open(local_uri.ospath, mode=mode, encoding=encoding) as file_buffer: 

1292 if "a" in mode: 

1293 file_buffer.seek(0, io.SEEK_END) 

1294 yield file_buffer 

1295 if "r" not in mode or "+" in mode: 

1296 self.transfer_from(local_uri, transfer="copy", overwrite=("x" not in mode)) 

1297 else: 

1298 if "r" in mode or "a" in mode: 

1299 in_bytes = self.read() 

1300 else: 

1301 in_bytes = b"" 

1302 if "b" in mode: 

1303 bytes_buffer = io.BytesIO(in_bytes) 

1304 if "a" in mode: 

1305 bytes_buffer.seek(0, io.SEEK_END) 

1306 yield bytes_buffer 

1307 out_bytes = bytes_buffer.getvalue() 

1308 else: 

1309 if encoding is None: 

1310 encoding = locale.getpreferredencoding(False) 

1311 str_buffer = io.StringIO(in_bytes.decode(encoding)) 

1312 if "a" in mode: 

1313 str_buffer.seek(0, io.SEEK_END) 

1314 yield str_buffer 

1315 out_bytes = str_buffer.getvalue().encode(encoding) 

1316 if "r" not in mode or "+" in mode: 

1317 self.write(out_bytes, overwrite=("x" not in mode))