Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import contextlib 

25import urllib.parse 

26import posixpath 

27import copy 

28import logging 

29import re 

30 

31from pathlib import Path, PurePath, PurePosixPath 

32 

33__all__ = ('ButlerURI',) 

34 

35from typing import ( 

36 TYPE_CHECKING, 

37 Any, 

38 Iterable, 

39 Iterator, 

40 List, 

41 Optional, 

42 Tuple, 

43 Type, 

44 Union, 

45) 

46 

47from .utils import NoTransaction 

48 

49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true

50 from ..datastore import DatastoreTransaction 

51 

52 

53log = logging.getLogger(__name__) 

54 

55# Regex for looking for URI escapes 

56ESCAPES_RE = re.compile(r"%[A-F0-9]{2}") 

57 

58# Precomputed escaped hash 

59ESCAPED_HASH = urllib.parse.quote("#") 

60 

61 

62class ButlerURI: 

63 """Convenience wrapper around URI parsers. 

64 

65 Provides access to URI components and can convert file 

66 paths into absolute path URIs. Scheme-less URIs are treated as if 

67 they are local file system paths and are converted to absolute URIs. 

68 

69 A specialist subclass is created for each supported URI scheme. 

70 

71 Parameters 

72 ---------- 

73 uri : `str` or `urllib.parse.ParseResult` 

74 URI in string form. Can be scheme-less if referring to a local 

75 filesystem path. 

76 root : `str` or `ButlerURI`, optional 

77 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

78 use this as the root. Must be absolute. If `None` the current 

79 working directory will be used. Can be a file URI. 

80 forceAbsolute : `bool`, optional 

81 If `True`, scheme-less relative URI will be converted to an absolute 

82 path using a ``file`` scheme. If `False` scheme-less URI will remain 

83 scheme-less and will not be updated to ``file`` or absolute path. 

84 forceDirectory: `bool`, optional 

85 If `True` forces the URI to end with a separator, otherwise given URI 

86 is interpreted as is. 

87 isTemporary : `bool`, optional 

88 If `True` indicates that this URI points to a temporary resource. 

89 """ 

90 

91 _pathLib: Type[PurePath] = PurePosixPath 

92 """Path library to use for this scheme.""" 

93 

94 _pathModule = posixpath 

95 """Path module to use for this scheme.""" 

96 

97 transferModes: Tuple[str, ...] = ("copy", "auto", "move") 

98 """Transfer modes supported by this implementation. 

99 

100 Move is special in that it is generally a copy followed by an unlink. 

101 Whether that unlink works depends critically on whether the source URI 

102 implements unlink. If it does not the move will be reported as a failure. 

103 """ 

104 

105 transferDefault: str = "copy" 

106 """Default mode to use for transferring if ``auto`` is specified.""" 

107 

108 quotePaths = True 

109 """True if path-like elements modifying a URI should be quoted. 

110 

111 All non-schemeless URIs have to internally use quoted paths. Therefore 

112 if a new file name is given (e.g. to updatedFile or join) a decision must 

113 be made whether to quote it to be consistent. 

114 """ 

115 

116 isLocal = False 

117 """If `True` this URI refers to a local file.""" 

118 

119 # This is not an ABC with abstract methods because the __new__ being 

120 # a factory confuses mypy such that it assumes that every constructor 

121 # returns a ButlerURI and then determines that all the abstract methods 

122 # are still abstract. If they are not marked abstract but just raise 

123 # mypy is fine with it. 

124 

125 # mypy is confused without these 

126 _uri: urllib.parse.ParseResult 

127 isTemporary: bool 

128 dirLike: bool 

129 

130 def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI, Path], 

131 root: Optional[Union[str, ButlerURI]] = None, forceAbsolute: bool = True, 

132 forceDirectory: bool = False, isTemporary: bool = False) -> ButlerURI: 

133 """Create and return new specialist ButlerURI subclass.""" 

134 parsed: urllib.parse.ParseResult 

135 dirLike: bool = False 

136 subclass: Optional[Type[ButlerURI]] = None 

137 

138 if isinstance(uri, Path): 138 ↛ 139line 138 didn't jump to line 139, because the condition on line 138 was never true

139 uri = str(uri) 

140 

141 # Record if we need to post process the URI components 

142 # or if the instance is already fully configured 

143 if isinstance(uri, str): 

144 # Since local file names can have special characters in them 

145 # we need to quote them for the parser but we can unquote 

146 # later. Assume that all other URI schemes are quoted. 

147 # Since sometimes people write file:/a/b and not file:///a/b 

148 # we should not quote in the explicit case of file: 

149 if "://" not in uri and not uri.startswith("file:"): 

150 if ESCAPES_RE.search(uri): 150 ↛ 151line 150 didn't jump to line 151, because the condition on line 150 was never true

151 log.warning("Possible double encoding of %s", uri) 

152 else: 

153 uri = urllib.parse.quote(uri) 

154 # Special case hash since we must support fragments 

155 # even in schemeless URIs -- although try to only replace 

156 # them in file part and not directory part 

157 if ESCAPED_HASH in uri: 157 ↛ 158line 157 didn't jump to line 158, because the condition on line 157 was never true

158 dirpos = uri.rfind("/") 

159 # Do replacement after this / 

160 uri = uri[:dirpos+1] + uri[dirpos+1:].replace(ESCAPED_HASH, "#") 

161 

162 parsed = urllib.parse.urlparse(uri) 

163 elif isinstance(uri, urllib.parse.ParseResult): 

164 parsed = copy.copy(uri) 

165 # If we are being instantiated with a subclass, rather than 

166 # ButlerURI, ensure that that subclass is used directly. 

167 # This could lead to inconsistencies if this constructor 

168 # is used externally outside of the ButlerURI.replace() method. 

169 # ButlerS3URI(urllib.parse.urlparse("file://a/b.txt")) 

170 # will be a problem. 

171 # This is needed to prevent a schemeless absolute URI become 

172 # a file URI unexpectedly when calling updatedFile or 

173 # updatedExtension 

174 if cls is not ButlerURI: 

175 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory) 

176 subclass = cls 

177 

178 elif isinstance(uri, ButlerURI): 178 ↛ 183line 178 didn't jump to line 183, because the condition on line 178 was never false

179 # Since ButlerURI is immutable we can return the argument 

180 # unchanged. 

181 return uri 

182 else: 

183 raise ValueError("Supplied URI must be string, Path, " 

184 f"ButlerURI, or ParseResult but got '{uri!r}'") 

185 

186 if subclass is None: 

187 # Work out the subclass from the URI scheme 

188 if not parsed.scheme: 

189 from .schemeless import ButlerSchemelessURI 

190 subclass = ButlerSchemelessURI 

191 elif parsed.scheme == "file": 191 ↛ 192line 191 didn't jump to line 192, because the condition on line 191 was never true

192 from .file import ButlerFileURI 

193 subclass = ButlerFileURI 

194 elif parsed.scheme == "s3": 194 ↛ 195line 194 didn't jump to line 195, because the condition on line 194 was never true

195 from .s3 import ButlerS3URI 

196 subclass = ButlerS3URI 

197 elif parsed.scheme.startswith("http"): 197 ↛ 198line 197 didn't jump to line 198, because the condition on line 197 was never true

198 from .http import ButlerHttpURI 

199 subclass = ButlerHttpURI 

200 elif parsed.scheme == "resource": 200 ↛ 204line 200 didn't jump to line 204, because the condition on line 200 was never false

201 # Rules for scheme names disallow pkg_resource 

202 from .packageresource import ButlerPackageResourceURI 

203 subclass = ButlerPackageResourceURI 

204 elif parsed.scheme == "mem": 

205 # in-memory datastore object 

206 from .mem import ButlerInMemoryURI 

207 subclass = ButlerInMemoryURI 

208 else: 

209 raise NotImplementedError(f"No URI support for scheme: '{parsed.scheme}'" 

210 " in {parsed.geturl()}") 

211 

212 parsed, dirLike = subclass._fixupPathUri(parsed, root=root, 

213 forceAbsolute=forceAbsolute, 

214 forceDirectory=forceDirectory) 

215 

216 # It is possible for the class to change from schemeless 

217 # to file so handle that 

218 if parsed.scheme == "file": 218 ↛ 219line 218 didn't jump to line 219, because the condition on line 218 was never true

219 from .file import ButlerFileURI 

220 subclass = ButlerFileURI 

221 

222 # Now create an instance of the correct subclass and set the 

223 # attributes directly 

224 self = object.__new__(subclass) 

225 self._uri = parsed 

226 self.dirLike = dirLike 

227 self.isTemporary = isTemporary 

228 return self 

229 

230 @property 

231 def scheme(self) -> str: 

232 """Return the URI scheme. 

233 

234 Notes 

235 ----- 

236 (``://`` is not part of the scheme). 

237 """ 

238 return self._uri.scheme 

239 

240 @property 

241 def netloc(self) -> str: 

242 """Return the URI network location.""" 

243 return self._uri.netloc 

244 

245 @property 

246 def path(self) -> str: 

247 """Return the path component of the URI.""" 

248 return self._uri.path 

249 

250 @property 

251 def unquoted_path(self) -> str: 

252 """Return path component of the URI with any URI quoting reversed.""" 

253 return urllib.parse.unquote(self._uri.path) 

254 

255 @property 

256 def ospath(self) -> str: 

257 """Return the path component of the URI localized to current OS.""" 

258 raise AttributeError(f"Non-file URI ({self}) has no local OS path.") 

259 

260 @property 

261 def relativeToPathRoot(self) -> str: 

262 """Return path relative to network location. 

263 

264 Effectively, this is the path property with posix separator stripped 

265 from the left hand side of the path. 

266 

267 Always unquotes. 

268 """ 

269 p = self._pathLib(self.path) 

270 relToRoot = str(p.relative_to(p.root)) 

271 if self.dirLike and not relToRoot.endswith("/"): 271 ↛ 272line 271 didn't jump to line 272, because the condition on line 271 was never true

272 relToRoot += "/" 

273 return urllib.parse.unquote(relToRoot) 

274 

275 @property 

276 def is_root(self) -> bool: 

277 """Return whether this URI points to the root of the network location. 

278 

279 This means that the path components refers to the top level. 

280 """ 

281 relpath = self.relativeToPathRoot 

282 if relpath == "./": 

283 return True 

284 return False 

285 

286 @property 

287 def fragment(self) -> str: 

288 """Return the fragment component of the URI.""" 

289 return self._uri.fragment 

290 

291 @property 

292 def params(self) -> str: 

293 """Return any parameters included in the URI.""" 

294 return self._uri.params 

295 

296 @property 

297 def query(self) -> str: 

298 """Return any query strings included in the URI.""" 

299 return self._uri.query 

300 

301 def geturl(self) -> str: 

302 """Return the URI in string form. 

303 

304 Returns 

305 ------- 

306 url : `str` 

307 String form of URI. 

308 """ 

309 return self._uri.geturl() 

310 

311 def root_uri(self) -> ButlerURI: 

312 """Return the base root URI. 

313 

314 Returns 

315 ------- 

316 uri : `ButlerURI` 

317 root URI. 

318 """ 

319 return self.replace(path="", forceDirectory=True) 

320 

321 def split(self) -> Tuple[ButlerURI, str]: 

322 """Split URI into head and tail. 

323 

324 Returns 

325 ------- 

326 head: `ButlerURI` 

327 Everything leading up to tail, expanded and normalized as per 

328 ButlerURI rules. 

329 tail : `str` 

330 Last `self.path` component. Tail will be empty if path ends on a 

331 separator. Tail will never contain separators. It will be 

332 unquoted. 

333 

334 Notes 

335 ----- 

336 Equivalent to `os.path.split()` where head preserves the URI 

337 components. 

338 """ 

339 head, tail = self._pathModule.split(self.path) 

340 headuri = self._uri._replace(path=head) 

341 

342 # The file part should never include quoted metacharacters 

343 tail = urllib.parse.unquote(tail) 

344 

345 # Schemeless is special in that it can be a relative path 

346 # We need to ensure that it stays that way. All other URIs will 

347 # be absolute already. 

348 forceAbsolute = self._pathModule.isabs(self.path) 

349 return ButlerURI(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail 

350 

351 def basename(self) -> str: 

352 """Return the base name, last element of path, of the URI. 

353 

354 Returns 

355 ------- 

356 tail : `str` 

357 Last part of the path attribute. Trail will be empty if path ends 

358 on a separator. 

359 

360 Notes 

361 ----- 

362 If URI ends on a slash returns an empty string. This is the second 

363 element returned by `split()`. 

364 

365 Equivalent of `os.path.basename()``. 

366 """ 

367 return self.split()[1] 

368 

369 def dirname(self) -> ButlerURI: 

370 """Return the directory component of the path as a new `ButlerURI`. 

371 

372 Returns 

373 ------- 

374 head : `ButlerURI` 

375 Everything except the tail of path attribute, expanded and 

376 normalized as per ButlerURI rules. 

377 

378 Notes 

379 ----- 

380 Equivalent of `os.path.dirname()`. 

381 """ 

382 return self.split()[0] 

383 

384 def parent(self) -> ButlerURI: 

385 """Return a `ButlerURI` of the parent directory. 

386 

387 Returns 

388 ------- 

389 head : `ButlerURI` 

390 Everything except the tail of path attribute, expanded and 

391 normalized as per `ButlerURI` rules. 

392 

393 Notes 

394 ----- 

395 For a file-like URI this will be the same as calling `dirname()`. 

396 """ 

397 # When self is file-like, return self.dirname() 

398 if not self.dirLike: 

399 return self.dirname() 

400 # When self is dir-like, return its parent directory, 

401 # regardless of the presence of a trailing separator 

402 originalPath = self._pathLib(self.path) 

403 parentPath = originalPath.parent 

404 return self.replace(path=str(parentPath), forceDirectory=True) 

405 

406 def replace(self, forceDirectory: bool = False, **kwargs: Any) -> ButlerURI: 

407 """Return new `ButlerURI` with specified components replaced. 

408 

409 Parameters 

410 ---------- 

411 forceDirectory : `bool` 

412 Parameter passed to ButlerURI constructor to force this 

413 new URI to be dir-like. 

414 **kwargs 

415 Components of a `urllib.parse.ParseResult` that should be 

416 modified for the newly-created `ButlerURI`. 

417 

418 Returns 

419 ------- 

420 new : `ButlerURI` 

421 New `ButlerURI` object with updated values. 

422 

423 Notes 

424 ----- 

425 Does not, for now, allow a change in URI scheme. 

426 """ 

427 # Disallow a change in scheme 

428 if "scheme" in kwargs: 428 ↛ 429line 428 didn't jump to line 429, because the condition on line 428 was never true

429 raise ValueError(f"Can not use replace() method to change URI scheme for {self}") 

430 return self.__class__(self._uri._replace(**kwargs), forceDirectory=forceDirectory) 

431 

432 def updatedFile(self, newfile: str) -> ButlerURI: 

433 """Return new URI with an updated final component of the path. 

434 

435 Parameters 

436 ---------- 

437 newfile : `str` 

438 File name with no path component. 

439 

440 Returns 

441 ------- 

442 updated : `ButlerURI` 

443 

444 Notes 

445 ----- 

446 Forces the ButlerURI.dirLike attribute to be false. The new file path 

447 will be quoted if necessary. 

448 """ 

449 if self.quotePaths: 

450 newfile = urllib.parse.quote(newfile) 

451 dir, _ = self._pathModule.split(self.path) 

452 newpath = self._pathModule.join(dir, newfile) 

453 

454 updated = self.replace(path=newpath) 

455 updated.dirLike = False 

456 return updated 

457 

458 def updatedExtension(self, ext: Optional[str]) -> ButlerURI: 

459 """Return a new `ButlerURI` with updated file extension. 

460 

461 All file extensions are replaced. 

462 

463 Parameters 

464 ---------- 

465 ext : `str` or `None` 

466 New extension. If an empty string is given any extension will 

467 be removed. If `None` is given there will be no change. 

468 

469 Returns 

470 ------- 

471 updated : `ButlerURI` 

472 URI with the specified extension. Can return itself if 

473 no extension was specified. 

474 """ 

475 if ext is None: 

476 return self 

477 

478 # Get the extension 

479 current = self.getExtension() 

480 

481 # Nothing to do if the extension already matches 

482 if current == ext: 

483 return self 

484 

485 # Remove the current extension from the path 

486 # .fits.gz counts as one extension do not use os.path.splitext 

487 path = self.path 

488 if current: 

489 path = path[:-len(current)] 

490 

491 # Ensure that we have a leading "." on file extension (and we do not 

492 # try to modify the empty string) 

493 if ext and not ext.startswith("."): 

494 ext = "." + ext 

495 

496 return self.replace(path=path + ext) 

497 

498 def getExtension(self) -> str: 

499 """Return the file extension(s) associated with this URI path. 

500 

501 Returns 

502 ------- 

503 ext : `str` 

504 The file extension (including the ``.``). Can be empty string 

505 if there is no file extension. Usually returns only the last 

506 file extension unless there is a special extension modifier 

507 indicating file compression, in which case the combined 

508 extension (e.g. ``.fits.gz``) will be returned. 

509 """ 

510 special = {".gz", ".bz2", ".xz", ".fz"} 

511 

512 # Get the file part of the path so as not to be confused by 

513 # "." in directory names. 

514 basename = self.basename() 

515 extensions = self._pathLib(basename).suffixes 

516 

517 if not extensions: 517 ↛ 518line 517 didn't jump to line 518, because the condition on line 517 was never true

518 return "" 

519 

520 ext = extensions.pop() 

521 

522 # Multiple extensions, decide whether to include the final two 

523 if extensions and ext in special: 523 ↛ 524line 523 didn't jump to line 524, because the condition on line 523 was never true

524 ext = f"{extensions[-1]}{ext}" 

525 

526 return ext 

527 

528 def join(self, path: Union[str, ButlerURI]) -> ButlerURI: 

529 """Return new `ButlerURI` with additional path components. 

530 

531 Parameters 

532 ---------- 

533 path : `str`, `ButlerURI` 

534 Additional file components to append to the current URI. Assumed 

535 to include a file at the end. Will be quoted depending on the 

536 associated URI scheme. If the path looks like a URI with a scheme 

537 referring to an absolute location, it will be returned 

538 directly (matching the behavior of `os.path.join()`). It can 

539 also be a `ButlerURI`. 

540 

541 Returns 

542 ------- 

543 new : `ButlerURI` 

544 New URI with any file at the end replaced with the new path 

545 components. 

546 

547 Notes 

548 ----- 

549 Schemeless URIs assume local path separator but all other URIs assume 

550 POSIX separator if the supplied path has directory structure. It 

551 may be this never becomes a problem but datastore templates assume 

552 POSIX separator is being used. 

553 

554 If an absolute `ButlerURI` is given for ``path`` is is assumed that 

555 this should be returned directly. Giving a ``path`` of an absolute 

556 scheme-less URI is not allowed for safety reasons as it may indicate 

557 a mistake in the calling code. 

558 

559 Raises 

560 ------ 

561 ValueError 

562 Raised if the ``path`` is an absolute scheme-less URI. In that 

563 situation it is unclear whether the intent is to return a 

564 ``file`` URI or it was a mistake and a relative scheme-less URI 

565 was meant. 

566 """ 

567 # If we have a full URI in path we will use it directly 

568 # but without forcing to absolute so that we can trap the 

569 # expected option of relative path. 

570 path_uri = ButlerURI(path, forceAbsolute=False) 

571 if path_uri.scheme: 571 ↛ 574line 571 didn't jump to line 574, because the condition on line 571 was never true

572 # Check for scheme so can distinguish explicit URIs from 

573 # absolute scheme-less URIs. 

574 return path_uri 

575 

576 if path_uri.isabs(): 576 ↛ 578line 576 didn't jump to line 578, because the condition on line 576 was never true

577 # Absolute scheme-less path. 

578 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.") 

579 

580 # If this was originally a ButlerURI extract the unquoted path from it. 

581 # Otherwise we use the string we were given to allow "#" to appear 

582 # in the filename if given as a plain string. 

583 if not isinstance(path, str): 583 ↛ 584line 583 didn't jump to line 584, because the condition on line 583 was never true

584 path = path_uri.unquoted_path 

585 

586 new = self.dirname() # By definition a directory URI 

587 

588 # new should be asked about quoting, not self, since dirname can 

589 # change the URI scheme for schemeless -> file 

590 if new.quotePaths: 590 ↛ 593line 590 didn't jump to line 593, because the condition on line 590 was never false

591 path = urllib.parse.quote(path) 

592 

593 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path)) 

594 

595 # normpath can strip trailing / so we force directory if the supplied 

596 # path ended with a / 

597 return new.replace(path=newpath, forceDirectory=path.endswith(self._pathModule.sep)) 

598 

599 def relative_to(self, other: ButlerURI) -> Optional[str]: 

600 """Return the relative path from this URI to the other URI. 

601 

602 Parameters 

603 ---------- 

604 other : `ButlerURI` 

605 URI to use to calculate the relative path. Must be a parent 

606 of this URI. 

607 

608 Returns 

609 ------- 

610 subpath : `str` 

611 The sub path of this URI relative to the supplied other URI. 

612 Returns `None` if there is no parent child relationship. 

613 Scheme and netloc must match. 

614 """ 

615 # Scheme-less absolute other is treated as if it's a file scheme. 

616 # Scheme-less relative other can only return non-None if self 

617 # is also scheme-less relative and that is handled specifically 

618 # in a subclass. 

619 if not other.scheme and other.isabs(): 

620 other = other.abspath() 

621 

622 # Scheme-less self is handled elsewhere. 

623 if self.scheme != other.scheme or self.netloc != other.netloc: 

624 return None 

625 

626 enclosed_path = self._pathLib(self.relativeToPathRoot) 

627 parent_path = other.relativeToPathRoot 

628 subpath: Optional[str] 

629 try: 

630 subpath = str(enclosed_path.relative_to(parent_path)) 

631 except ValueError: 

632 subpath = None 

633 else: 

634 subpath = urllib.parse.unquote(subpath) 

635 return subpath 

636 

637 def exists(self) -> bool: 

638 """Indicate that the resource is available. 

639 

640 Returns 

641 ------- 

642 exists : `bool` 

643 `True` if the resource exists. 

644 """ 

645 raise NotImplementedError() 

646 

647 def remove(self) -> None: 

648 """Remove the resource.""" 

649 raise NotImplementedError() 

650 

651 def isabs(self) -> bool: 

652 """Indicate that the resource is fully specified. 

653 

654 For non-schemeless URIs this is always true. 

655 

656 Returns 

657 ------- 

658 isabs : `bool` 

659 `True` in all cases except schemeless URI. 

660 """ 

661 return True 

662 

663 def abspath(self) -> ButlerURI: 

664 """Return URI using an absolute path. 

665 

666 Returns 

667 ------- 

668 abs : `ButlerURI` 

669 Absolute URI. For non-schemeless URIs this always returns itself. 

670 Schemeless URIs are upgraded to file URIs. 

671 """ 

672 return self 

673 

674 def _as_local(self) -> Tuple[str, bool]: 

675 """Return the location of the (possibly remote) resource as local file. 

676 

677 This is a helper function for `as_local` context manager. 

678 

679 Returns 

680 ------- 

681 path : `str` 

682 If this is a remote resource, it will be a copy of the resource 

683 on the local file system, probably in a temporary directory. 

684 For a local resource this should be the actual path to the 

685 resource. 

686 is_temporary : `bool` 

687 Indicates if the local path is a temporary file or not. 

688 """ 

689 raise NotImplementedError() 

690 

691 @contextlib.contextmanager 

692 def as_local(self) -> Iterator[ButlerURI]: 

693 """Return the location of the (possibly remote) resource as local file. 

694 

695 Yields 

696 ------ 

697 local : `ButlerURI` 

698 If this is a remote resource, it will be a copy of the resource 

699 on the local file system, probably in a temporary directory. 

700 For a local resource this should be the actual path to the 

701 resource. 

702 

703 Notes 

704 ----- 

705 The context manager will automatically delete any local temporary 

706 file. 

707 

708 Examples 

709 -------- 

710 Should be used as a context manager: 

711 

712 .. code-block:: py 

713 

714 with uri.as_local() as local: 

715 ospath = local.ospath 

716 """ 

717 local_src, is_temporary = self._as_local() 

718 local_uri = ButlerURI(local_src, isTemporary=is_temporary) 

719 

720 try: 

721 yield local_uri 

722 finally: 

723 # The caller might have relocated the temporary file 

724 if is_temporary and local_uri.exists(): 

725 local_uri.remove() 

726 

727 def read(self, size: int = -1) -> bytes: 

728 """Open the resource and return the contents in bytes. 

729 

730 Parameters 

731 ---------- 

732 size : `int`, optional 

733 The number of bytes to read. Negative or omitted indicates 

734 that all data should be read. 

735 """ 

736 raise NotImplementedError() 

737 

738 def write(self, data: bytes, overwrite: bool = True) -> None: 

739 """Write the supplied bytes to the new resource. 

740 

741 Parameters 

742 ---------- 

743 data : `bytes` 

744 The bytes to write to the resource. The entire contents of the 

745 resource will be replaced. 

746 overwrite : `bool`, optional 

747 If `True` the resource will be overwritten if it exists. Otherwise 

748 the write will fail. 

749 """ 

750 raise NotImplementedError() 

751 

752 def mkdir(self) -> None: 

753 """For a dir-like URI, create the directory resource if needed.""" 

754 raise NotImplementedError() 

755 

756 def isdir(self) -> bool: 

757 """Return True if this URI looks like a directory, else False.""" 

758 return self.dirLike 

759 

760 def size(self) -> int: 

761 """For non-dir-like URI, return the size of the resource. 

762 

763 Returns 

764 ------- 

765 sz : `int` 

766 The size in bytes of the resource associated with this URI. 

767 Returns 0 if dir-like. 

768 """ 

769 raise NotImplementedError() 

770 

771 def __str__(self) -> str: 

772 """Convert the URI to its native string form.""" 

773 return self.geturl() 

774 

775 def __repr__(self) -> str: 

776 """Return string representation suitable for evaluation.""" 

777 return f'ButlerURI("{self.geturl()}")' 

778 

779 def __eq__(self, other: Any) -> bool: 

780 """Compare supplied object with this `ButlerURI`.""" 

781 if not isinstance(other, ButlerURI): 

782 return NotImplemented 

783 return self.geturl() == other.geturl() 

784 

785 def __hash__(self) -> int: 

786 """Return hash of this object.""" 

787 return hash(str(self)) 

788 

789 def __copy__(self) -> ButlerURI: 

790 """Copy constructor. 

791 

792 Object is immutable so copy can return itself. 

793 """ 

794 # Implement here because the __new__ method confuses things 

795 return self 

796 

797 def __deepcopy__(self, memo: Any) -> ButlerURI: 

798 """Deepcopy the object. 

799 

800 Object is immutable so copy can return itself. 

801 """ 

802 # Implement here because the __new__ method confuses things 

803 return self 

804 

805 def __getnewargs__(self) -> Tuple: 

806 """Support pickling.""" 

807 return (str(self),) 

808 

809 @classmethod 

810 def _fixDirectorySep(cls, parsed: urllib.parse.ParseResult, 

811 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

812 """Ensure that a path separator is present on directory paths. 

813 

814 Parameters 

815 ---------- 

816 parsed : `~urllib.parse.ParseResult` 

817 The result from parsing a URI using `urllib.parse`. 

818 forceDirectory : `bool`, optional 

819 If `True` forces the URI to end with a separator, otherwise given 

820 URI is interpreted as is. Specifying that the URI is conceptually 

821 equivalent to a directory can break some ambiguities when 

822 interpreting the last element of a path. 

823 

824 Returns 

825 ------- 

826 modified : `~urllib.parse.ParseResult` 

827 Update result if a URI is being handled. 

828 dirLike : `bool` 

829 `True` if given parsed URI has a trailing separator or 

830 forceDirectory is True. Otherwise `False`. 

831 """ 

832 # assume we are not dealing with a directory like URI 

833 dirLike = False 

834 

835 # Directory separator 

836 sep = cls._pathModule.sep 

837 

838 # URI is dir-like if explicitly stated or if it ends on a separator 

839 endsOnSep = parsed.path.endswith(sep) 

840 if forceDirectory or endsOnSep: 

841 dirLike = True 

842 # only add the separator if it's not already there 

843 if not endsOnSep: 843 ↛ 846line 843 didn't jump to line 846, because the condition on line 843 was never false

844 parsed = parsed._replace(path=parsed.path+sep) 

845 

846 return parsed, dirLike 

847 

848 @classmethod 

849 def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

850 forceAbsolute: bool = False, 

851 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

852 """Correct any issues with the supplied URI. 

853 

854 Parameters 

855 ---------- 

856 parsed : `~urllib.parse.ParseResult` 

857 The result from parsing a URI using `urllib.parse`. 

858 root : `str` or `ButlerURI`, ignored 

859 Not used by the this implementation since all URIs are 

860 absolute except for those representing the local file system. 

861 forceAbsolute : `bool`, ignored. 

862 Not used by this implementation. URIs are generally always 

863 absolute. 

864 forceDirectory : `bool`, optional 

865 If `True` forces the URI to end with a separator, otherwise given 

866 URI is interpreted as is. Specifying that the URI is conceptually 

867 equivalent to a directory can break some ambiguities when 

868 interpreting the last element of a path. 

869 

870 Returns 

871 ------- 

872 modified : `~urllib.parse.ParseResult` 

873 Update result if a URI is being handled. 

874 dirLike : `bool` 

875 `True` if given parsed URI has a trailing separator or 

876 forceDirectory is True. Otherwise `False`. 

877 

878 Notes 

879 ----- 

880 Relative paths are explicitly not supported by RFC8089 but `urllib` 

881 does accept URIs of the form ``file:relative/path.ext``. They need 

882 to be turned into absolute paths before they can be used. This is 

883 always done regardless of the ``forceAbsolute`` parameter. 

884 

885 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

886 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

887 

888 Scheme-less paths are normalized. 

889 """ 

890 return cls._fixDirectorySep(parsed, forceDirectory) 

891 

892 def transfer_from(self, src: ButlerURI, transfer: str, 

893 overwrite: bool = False, 

894 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

895 """Transfer the current resource to a new location. 

896 

897 Parameters 

898 ---------- 

899 src : `ButlerURI` 

900 Source URI. 

901 transfer : `str` 

902 Mode to use for transferring the resource. Generically there are 

903 many standard options: copy, link, symlink, hardlink, relsymlink. 

904 Not all URIs support all modes. 

905 overwrite : `bool`, optional 

906 Allow an existing file to be overwritten. Defaults to `False`. 

907 transaction : `DatastoreTransaction`, optional 

908 A transaction object that can (depending on implementation) 

909 rollback transfers on error. Not guaranteed to be implemented. 

910 

911 Notes 

912 ----- 

913 Conceptually this is hard to scale as the number of URI schemes 

914 grow. The destination URI is more important than the source URI 

915 since that is where all the transfer modes are relevant (with the 

916 complication that "move" deletes the source). 

917 

918 Local file to local file is the fundamental use case but every 

919 other scheme has to support "copy" to local file (with implicit 

920 support for "move") and copy from local file. 

921 All the "link" options tend to be specific to local file systems. 

922 

923 "move" is a "copy" where the remote resource is deleted at the end. 

924 Whether this works depends on the source URI rather than the 

925 destination URI. Reverting a move on transaction rollback is 

926 expected to be problematic if a remote resource was involved. 

927 """ 

928 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}") 

929 

930 def walk(self, file_filter: Optional[Union[str, re.Pattern]] = None) -> Iterator[Union[List, 

931 Tuple[ButlerURI, 

932 List[str], 

933 List[str]]]]: 

934 """Walk the directory tree returning matching files and directories. 

935 

936 Parameters 

937 ---------- 

938 file_filter : `str` or `re.Pattern`, optional 

939 Regex to filter out files from the list before it is returned. 

940 

941 Yields 

942 ------ 

943 dirpath : `ButlerURI` 

944 Current directory being examined. 

945 dirnames : `list` of `str` 

946 Names of subdirectories within dirpath. 

947 filenames : `list` of `str` 

948 Names of all the files within dirpath. 

949 """ 

950 raise NotImplementedError() 

951 

952 @classmethod 

953 def findFileResources(cls, candidates: Iterable[Union[str, ButlerURI]], 

954 file_filter: Optional[str] = None, 

955 grouped: bool = False) -> Iterator[Union[ButlerURI, Iterator[ButlerURI]]]: 

956 """Get all the files from a list of values. 

957 

958 Parameters 

959 ---------- 

960 candidates : iterable [`str` or `ButlerURI`] 

961 The files to return and directories in which to look for files to 

962 return. 

963 file_filter : `str`, optional 

964 The regex to use when searching for files within directories. 

965 By default returns all the found files. 

966 grouped : `bool`, optional 

967 If `True` the results will be grouped by directory and each 

968 yielded value will be an iterator over URIs. If `False` each 

969 URI will be returned separately. 

970 

971 Yields 

972 ------ 

973 found_file: `ButlerURI` 

974 The passed-in URIs and URIs found in passed-in directories. 

975 If grouping is enabled, each of the yielded values will be an 

976 iterator yielding members of the group. Files given explicitly 

977 will be returned as a single group at the end. 

978 

979 Notes 

980 ----- 

981 If a value is a file it is yielded immediately. If a value is a 

982 directory, all the files in the directory (recursively) that match 

983 the regex will be yielded in turn. 

984 """ 

985 fileRegex = None if file_filter is None else re.compile(file_filter) 

986 

987 singles = [] 

988 

989 # Find all the files of interest 

990 for location in candidates: 

991 uri = ButlerURI(location) 

992 if uri.isdir(): 

993 for found in uri.walk(fileRegex): 

994 if not found: 

995 # This means the uri does not exist and by 

996 # convention we ignore it 

997 continue 

998 root, dirs, files = found 

999 if not files: 

1000 continue 

1001 if grouped: 

1002 yield (root.join(name) for name in files) 

1003 else: 

1004 for name in files: 

1005 yield root.join(name) 

1006 else: 

1007 if grouped: 

1008 singles.append(uri) 

1009 else: 

1010 yield uri 

1011 

1012 # Finally, return any explicitly given files in one group 

1013 if grouped and singles: 

1014 yield iter(singles)