Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import contextlib 

25import urllib.parse 

26import posixpath 

27import copy 

28import logging 

29import re 

30import shutil 

31import tempfile 

32 

33from random import Random 

34from pathlib import Path, PurePath, PurePosixPath 

35 

36__all__ = ('ButlerURI',) 

37 

38from typing import ( 

39 TYPE_CHECKING, 

40 Any, 

41 Iterable, 

42 Iterator, 

43 List, 

44 Optional, 

45 Tuple, 

46 Type, 

47 Union, 

48) 

49 

50from .utils import NoTransaction 

51 

52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true

53 from ..datastore import DatastoreTransaction 

54 

55 

56log = logging.getLogger(__name__) 

57 

58# Regex for looking for URI escapes 

59ESCAPES_RE = re.compile(r"%[A-F0-9]{2}") 

60 

61# Precomputed escaped hash 

62ESCAPED_HASH = urllib.parse.quote("#") 

63 

64 

65class ButlerURI: 

66 """Convenience wrapper around URI parsers. 

67 

68 Provides access to URI components and can convert file 

69 paths into absolute path URIs. Scheme-less URIs are treated as if 

70 they are local file system paths and are converted to absolute URIs. 

71 

72 A specialist subclass is created for each supported URI scheme. 

73 

74 Parameters 

75 ---------- 

76 uri : `str` or `urllib.parse.ParseResult` 

77 URI in string form. Can be scheme-less if referring to a local 

78 filesystem path. 

79 root : `str` or `ButlerURI`, optional 

80 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

81 use this as the root. Must be absolute. If `None` the current 

82 working directory will be used. Can be a file URI. 

83 forceAbsolute : `bool`, optional 

84 If `True`, scheme-less relative URI will be converted to an absolute 

85 path using a ``file`` scheme. If `False` scheme-less URI will remain 

86 scheme-less and will not be updated to ``file`` or absolute path. 

87 forceDirectory: `bool`, optional 

88 If `True` forces the URI to end with a separator, otherwise given URI 

89 is interpreted as is. 

90 isTemporary : `bool`, optional 

91 If `True` indicates that this URI points to a temporary resource. 

92 """ 

93 

94 _pathLib: Type[PurePath] = PurePosixPath 

95 """Path library to use for this scheme.""" 

96 

97 _pathModule = posixpath 

98 """Path module to use for this scheme.""" 

99 

100 transferModes: Tuple[str, ...] = ("copy", "auto", "move") 

101 """Transfer modes supported by this implementation. 

102 

103 Move is special in that it is generally a copy followed by an unlink. 

104 Whether that unlink works depends critically on whether the source URI 

105 implements unlink. If it does not the move will be reported as a failure. 

106 """ 

107 

108 transferDefault: str = "copy" 

109 """Default mode to use for transferring if ``auto`` is specified.""" 

110 

111 quotePaths = True 

112 """True if path-like elements modifying a URI should be quoted. 

113 

114 All non-schemeless URIs have to internally use quoted paths. Therefore 

115 if a new file name is given (e.g. to updatedFile or join) a decision must 

116 be made whether to quote it to be consistent. 

117 """ 

118 

119 isLocal = False 

120 """If `True` this URI refers to a local file.""" 

121 

122 # This is not an ABC with abstract methods because the __new__ being 

123 # a factory confuses mypy such that it assumes that every constructor 

124 # returns a ButlerURI and then determines that all the abstract methods 

125 # are still abstract. If they are not marked abstract but just raise 

126 # mypy is fine with it. 

127 

128 # mypy is confused without these 

129 _uri: urllib.parse.ParseResult 

130 isTemporary: bool 

131 dirLike: bool 

132 

133 def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI, Path], 

134 root: Optional[Union[str, ButlerURI]] = None, forceAbsolute: bool = True, 

135 forceDirectory: bool = False, isTemporary: bool = False) -> ButlerURI: 

136 """Create and return new specialist ButlerURI subclass.""" 

137 parsed: urllib.parse.ParseResult 

138 dirLike: bool = False 

139 subclass: Optional[Type[ButlerURI]] = None 

140 

141 if isinstance(uri, Path): 141 ↛ 142line 141 didn't jump to line 142, because the condition on line 141 was never true

142 uri = str(uri) 

143 

144 # Record if we need to post process the URI components 

145 # or if the instance is already fully configured 

146 if isinstance(uri, str): 

147 # Since local file names can have special characters in them 

148 # we need to quote them for the parser but we can unquote 

149 # later. Assume that all other URI schemes are quoted. 

150 # Since sometimes people write file:/a/b and not file:///a/b 

151 # we should not quote in the explicit case of file: 

152 if "://" not in uri and not uri.startswith("file:"): 

153 if ESCAPES_RE.search(uri): 153 ↛ 154line 153 didn't jump to line 154, because the condition on line 153 was never true

154 log.warning("Possible double encoding of %s", uri) 

155 else: 

156 uri = urllib.parse.quote(uri) 

157 # Special case hash since we must support fragments 

158 # even in schemeless URIs -- although try to only replace 

159 # them in file part and not directory part 

160 if ESCAPED_HASH in uri: 160 ↛ 161line 160 didn't jump to line 161, because the condition on line 160 was never true

161 dirpos = uri.rfind("/") 

162 # Do replacement after this / 

163 uri = uri[:dirpos+1] + uri[dirpos+1:].replace(ESCAPED_HASH, "#") 

164 

165 parsed = urllib.parse.urlparse(uri) 

166 elif isinstance(uri, urllib.parse.ParseResult): 

167 parsed = copy.copy(uri) 

168 # If we are being instantiated with a subclass, rather than 

169 # ButlerURI, ensure that that subclass is used directly. 

170 # This could lead to inconsistencies if this constructor 

171 # is used externally outside of the ButlerURI.replace() method. 

172 # ButlerS3URI(urllib.parse.urlparse("file://a/b.txt")) 

173 # will be a problem. 

174 # This is needed to prevent a schemeless absolute URI become 

175 # a file URI unexpectedly when calling updatedFile or 

176 # updatedExtension 

177 if cls is not ButlerURI: 

178 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory) 

179 subclass = cls 

180 

181 elif isinstance(uri, ButlerURI): 181 ↛ 186line 181 didn't jump to line 186, because the condition on line 181 was never false

182 # Since ButlerURI is immutable we can return the argument 

183 # unchanged. 

184 return uri 

185 else: 

186 raise ValueError("Supplied URI must be string, Path, " 

187 f"ButlerURI, or ParseResult but got '{uri!r}'") 

188 

189 if subclass is None: 

190 # Work out the subclass from the URI scheme 

191 if not parsed.scheme: 

192 from .schemeless import ButlerSchemelessURI 

193 subclass = ButlerSchemelessURI 

194 elif parsed.scheme == "file": 194 ↛ 195line 194 didn't jump to line 195, because the condition on line 194 was never true

195 from .file import ButlerFileURI 

196 subclass = ButlerFileURI 

197 elif parsed.scheme == "s3": 197 ↛ 198line 197 didn't jump to line 198, because the condition on line 197 was never true

198 from .s3 import ButlerS3URI 

199 subclass = ButlerS3URI 

200 elif parsed.scheme.startswith("http"): 200 ↛ 201line 200 didn't jump to line 201, because the condition on line 200 was never true

201 from .http import ButlerHttpURI 

202 subclass = ButlerHttpURI 

203 elif parsed.scheme == "resource": 203 ↛ 207line 203 didn't jump to line 207, because the condition on line 203 was never false

204 # Rules for scheme names disallow pkg_resource 

205 from .packageresource import ButlerPackageResourceURI 

206 subclass = ButlerPackageResourceURI 

207 elif parsed.scheme == "mem": 

208 # in-memory datastore object 

209 from .mem import ButlerInMemoryURI 

210 subclass = ButlerInMemoryURI 

211 else: 

212 raise NotImplementedError(f"No URI support for scheme: '{parsed.scheme}'" 

213 " in {parsed.geturl()}") 

214 

215 parsed, dirLike = subclass._fixupPathUri(parsed, root=root, 

216 forceAbsolute=forceAbsolute, 

217 forceDirectory=forceDirectory) 

218 

219 # It is possible for the class to change from schemeless 

220 # to file so handle that 

221 if parsed.scheme == "file": 221 ↛ 222line 221 didn't jump to line 222, because the condition on line 221 was never true

222 from .file import ButlerFileURI 

223 subclass = ButlerFileURI 

224 

225 # Now create an instance of the correct subclass and set the 

226 # attributes directly 

227 self = object.__new__(subclass) 

228 self._uri = parsed 

229 self.dirLike = dirLike 

230 self.isTemporary = isTemporary 

231 return self 

232 

233 @property 

234 def scheme(self) -> str: 

235 """Return the URI scheme. 

236 

237 Notes 

238 ----- 

239 (``://`` is not part of the scheme). 

240 """ 

241 return self._uri.scheme 

242 

243 @property 

244 def netloc(self) -> str: 

245 """Return the URI network location.""" 

246 return self._uri.netloc 

247 

248 @property 

249 def path(self) -> str: 

250 """Return the path component of the URI.""" 

251 return self._uri.path 

252 

253 @property 

254 def unquoted_path(self) -> str: 

255 """Return path component of the URI with any URI quoting reversed.""" 

256 return urllib.parse.unquote(self._uri.path) 

257 

258 @property 

259 def ospath(self) -> str: 

260 """Return the path component of the URI localized to current OS.""" 

261 raise AttributeError(f"Non-file URI ({self}) has no local OS path.") 

262 

263 @property 

264 def relativeToPathRoot(self) -> str: 

265 """Return path relative to network location. 

266 

267 Effectively, this is the path property with posix separator stripped 

268 from the left hand side of the path. 

269 

270 Always unquotes. 

271 """ 

272 p = self._pathLib(self.path) 

273 relToRoot = str(p.relative_to(p.root)) 

274 if self.dirLike and not relToRoot.endswith("/"): 274 ↛ 275line 274 didn't jump to line 275, because the condition on line 274 was never true

275 relToRoot += "/" 

276 return urllib.parse.unquote(relToRoot) 

277 

278 @property 

279 def is_root(self) -> bool: 

280 """Return whether this URI points to the root of the network location. 

281 

282 This means that the path components refers to the top level. 

283 """ 

284 relpath = self.relativeToPathRoot 

285 if relpath == "./": 

286 return True 

287 return False 

288 

289 @property 

290 def fragment(self) -> str: 

291 """Return the fragment component of the URI.""" 

292 return self._uri.fragment 

293 

294 @property 

295 def params(self) -> str: 

296 """Return any parameters included in the URI.""" 

297 return self._uri.params 

298 

299 @property 

300 def query(self) -> str: 

301 """Return any query strings included in the URI.""" 

302 return self._uri.query 

303 

304 def geturl(self) -> str: 

305 """Return the URI in string form. 

306 

307 Returns 

308 ------- 

309 url : `str` 

310 String form of URI. 

311 """ 

312 return self._uri.geturl() 

313 

314 def root_uri(self) -> ButlerURI: 

315 """Return the base root URI. 

316 

317 Returns 

318 ------- 

319 uri : `ButlerURI` 

320 root URI. 

321 """ 

322 return self.replace(path="", forceDirectory=True) 

323 

324 def split(self) -> Tuple[ButlerURI, str]: 

325 """Split URI into head and tail. 

326 

327 Returns 

328 ------- 

329 head: `ButlerURI` 

330 Everything leading up to tail, expanded and normalized as per 

331 ButlerURI rules. 

332 tail : `str` 

333 Last `self.path` component. Tail will be empty if path ends on a 

334 separator. Tail will never contain separators. It will be 

335 unquoted. 

336 

337 Notes 

338 ----- 

339 Equivalent to `os.path.split()` where head preserves the URI 

340 components. 

341 """ 

342 head, tail = self._pathModule.split(self.path) 

343 headuri = self._uri._replace(path=head) 

344 

345 # The file part should never include quoted metacharacters 

346 tail = urllib.parse.unquote(tail) 

347 

348 # Schemeless is special in that it can be a relative path 

349 # We need to ensure that it stays that way. All other URIs will 

350 # be absolute already. 

351 forceAbsolute = self._pathModule.isabs(self.path) 

352 return ButlerURI(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail 

353 

354 def basename(self) -> str: 

355 """Return the base name, last element of path, of the URI. 

356 

357 Returns 

358 ------- 

359 tail : `str` 

360 Last part of the path attribute. Trail will be empty if path ends 

361 on a separator. 

362 

363 Notes 

364 ----- 

365 If URI ends on a slash returns an empty string. This is the second 

366 element returned by `split()`. 

367 

368 Equivalent of `os.path.basename()``. 

369 """ 

370 return self.split()[1] 

371 

372 def dirname(self) -> ButlerURI: 

373 """Return the directory component of the path as a new `ButlerURI`. 

374 

375 Returns 

376 ------- 

377 head : `ButlerURI` 

378 Everything except the tail of path attribute, expanded and 

379 normalized as per ButlerURI rules. 

380 

381 Notes 

382 ----- 

383 Equivalent of `os.path.dirname()`. 

384 """ 

385 return self.split()[0] 

386 

387 def parent(self) -> ButlerURI: 

388 """Return a `ButlerURI` of the parent directory. 

389 

390 Returns 

391 ------- 

392 head : `ButlerURI` 

393 Everything except the tail of path attribute, expanded and 

394 normalized as per `ButlerURI` rules. 

395 

396 Notes 

397 ----- 

398 For a file-like URI this will be the same as calling `dirname()`. 

399 """ 

400 # When self is file-like, return self.dirname() 

401 if not self.dirLike: 

402 return self.dirname() 

403 # When self is dir-like, return its parent directory, 

404 # regardless of the presence of a trailing separator 

405 originalPath = self._pathLib(self.path) 

406 parentPath = originalPath.parent 

407 return self.replace(path=str(parentPath), forceDirectory=True) 

408 

409 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ButlerURI: 

410 """Return new `ButlerURI` with specified components replaced. 

411 

412 Parameters 

413 ---------- 

414 forceDirectory : `bool`, optional 

415 Parameter passed to ButlerURI constructor to force this 

416 new URI to be dir-like. 

417 isTemporary : `bool`, optional 

418 Indicate that the resulting URI is temporary resource. 

419 **kwargs 

420 Components of a `urllib.parse.ParseResult` that should be 

421 modified for the newly-created `ButlerURI`. 

422 

423 Returns 

424 ------- 

425 new : `ButlerURI` 

426 New `ButlerURI` object with updated values. 

427 

428 Notes 

429 ----- 

430 Does not, for now, allow a change in URI scheme. 

431 """ 

432 # Disallow a change in scheme 

433 if "scheme" in kwargs: 433 ↛ 434line 433 didn't jump to line 434, because the condition on line 433 was never true

434 raise ValueError(f"Can not use replace() method to change URI scheme for {self}") 

435 return self.__class__(self._uri._replace(**kwargs), forceDirectory=forceDirectory, 

436 isTemporary=isTemporary) 

437 

438 def updatedFile(self, newfile: str) -> ButlerURI: 

439 """Return new URI with an updated final component of the path. 

440 

441 Parameters 

442 ---------- 

443 newfile : `str` 

444 File name with no path component. 

445 

446 Returns 

447 ------- 

448 updated : `ButlerURI` 

449 

450 Notes 

451 ----- 

452 Forces the ButlerURI.dirLike attribute to be false. The new file path 

453 will be quoted if necessary. 

454 """ 

455 if self.quotePaths: 

456 newfile = urllib.parse.quote(newfile) 

457 dir, _ = self._pathModule.split(self.path) 

458 newpath = self._pathModule.join(dir, newfile) 

459 

460 updated = self.replace(path=newpath) 

461 updated.dirLike = False 

462 return updated 

463 

464 def updatedExtension(self, ext: Optional[str]) -> ButlerURI: 

465 """Return a new `ButlerURI` with updated file extension. 

466 

467 All file extensions are replaced. 

468 

469 Parameters 

470 ---------- 

471 ext : `str` or `None` 

472 New extension. If an empty string is given any extension will 

473 be removed. If `None` is given there will be no change. 

474 

475 Returns 

476 ------- 

477 updated : `ButlerURI` 

478 URI with the specified extension. Can return itself if 

479 no extension was specified. 

480 """ 

481 if ext is None: 

482 return self 

483 

484 # Get the extension 

485 current = self.getExtension() 

486 

487 # Nothing to do if the extension already matches 

488 if current == ext: 

489 return self 

490 

491 # Remove the current extension from the path 

492 # .fits.gz counts as one extension do not use os.path.splitext 

493 path = self.path 

494 if current: 

495 path = path[:-len(current)] 

496 

497 # Ensure that we have a leading "." on file extension (and we do not 

498 # try to modify the empty string) 

499 if ext and not ext.startswith("."): 

500 ext = "." + ext 

501 

502 return self.replace(path=path + ext) 

503 

504 def getExtension(self) -> str: 

505 """Return the file extension(s) associated with this URI path. 

506 

507 Returns 

508 ------- 

509 ext : `str` 

510 The file extension (including the ``.``). Can be empty string 

511 if there is no file extension. Usually returns only the last 

512 file extension unless there is a special extension modifier 

513 indicating file compression, in which case the combined 

514 extension (e.g. ``.fits.gz``) will be returned. 

515 """ 

516 special = {".gz", ".bz2", ".xz", ".fz"} 

517 

518 # Get the file part of the path so as not to be confused by 

519 # "." in directory names. 

520 basename = self.basename() 

521 extensions = self._pathLib(basename).suffixes 

522 

523 if not extensions: 523 ↛ 524line 523 didn't jump to line 524, because the condition on line 523 was never true

524 return "" 

525 

526 ext = extensions.pop() 

527 

528 # Multiple extensions, decide whether to include the final two 

529 if extensions and ext in special: 529 ↛ 530line 529 didn't jump to line 530, because the condition on line 529 was never true

530 ext = f"{extensions[-1]}{ext}" 

531 

532 return ext 

533 

534 def join(self, path: Union[str, ButlerURI], isTemporary: bool = False) -> ButlerURI: 

535 """Return new `ButlerURI` with additional path components. 

536 

537 Parameters 

538 ---------- 

539 path : `str`, `ButlerURI` 

540 Additional file components to append to the current URI. Assumed 

541 to include a file at the end. Will be quoted depending on the 

542 associated URI scheme. If the path looks like a URI with a scheme 

543 referring to an absolute location, it will be returned 

544 directly (matching the behavior of `os.path.join()`). It can 

545 also be a `ButlerURI`. 

546 isTemporary : `bool`, optional 

547 Indicate that the resulting URI represents a temporary resource. 

548 

549 Returns 

550 ------- 

551 new : `ButlerURI` 

552 New URI with any file at the end replaced with the new path 

553 components. 

554 

555 Notes 

556 ----- 

557 Schemeless URIs assume local path separator but all other URIs assume 

558 POSIX separator if the supplied path has directory structure. It 

559 may be this never becomes a problem but datastore templates assume 

560 POSIX separator is being used. 

561 

562 If an absolute `ButlerURI` is given for ``path`` is is assumed that 

563 this should be returned directly. Giving a ``path`` of an absolute 

564 scheme-less URI is not allowed for safety reasons as it may indicate 

565 a mistake in the calling code. 

566 

567 Raises 

568 ------ 

569 ValueError 

570 Raised if the ``path`` is an absolute scheme-less URI. In that 

571 situation it is unclear whether the intent is to return a 

572 ``file`` URI or it was a mistake and a relative scheme-less URI 

573 was meant. 

574 """ 

575 # If we have a full URI in path we will use it directly 

576 # but without forcing to absolute so that we can trap the 

577 # expected option of relative path. 

578 path_uri = ButlerURI(path, forceAbsolute=False) 

579 if path_uri.scheme: 579 ↛ 582line 579 didn't jump to line 582, because the condition on line 579 was never true

580 # Check for scheme so can distinguish explicit URIs from 

581 # absolute scheme-less URIs. 

582 return path_uri 

583 

584 if path_uri.isabs(): 584 ↛ 586line 584 didn't jump to line 586, because the condition on line 584 was never true

585 # Absolute scheme-less path. 

586 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.") 

587 

588 # If this was originally a ButlerURI extract the unquoted path from it. 

589 # Otherwise we use the string we were given to allow "#" to appear 

590 # in the filename if given as a plain string. 

591 if not isinstance(path, str): 591 ↛ 592line 591 didn't jump to line 592, because the condition on line 591 was never true

592 path = path_uri.unquoted_path 

593 

594 new = self.dirname() # By definition a directory URI 

595 

596 # new should be asked about quoting, not self, since dirname can 

597 # change the URI scheme for schemeless -> file 

598 if new.quotePaths: 598 ↛ 601line 598 didn't jump to line 601, because the condition on line 598 was never false

599 path = urllib.parse.quote(path) 

600 

601 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path)) 

602 

603 # normpath can strip trailing / so we force directory if the supplied 

604 # path ended with a / 

605 return new.replace(path=newpath, forceDirectory=path.endswith(self._pathModule.sep), 

606 isTemporary=isTemporary) 

607 

608 def relative_to(self, other: ButlerURI) -> Optional[str]: 

609 """Return the relative path from this URI to the other URI. 

610 

611 Parameters 

612 ---------- 

613 other : `ButlerURI` 

614 URI to use to calculate the relative path. Must be a parent 

615 of this URI. 

616 

617 Returns 

618 ------- 

619 subpath : `str` 

620 The sub path of this URI relative to the supplied other URI. 

621 Returns `None` if there is no parent child relationship. 

622 Scheme and netloc must match. 

623 """ 

624 # Scheme-less absolute other is treated as if it's a file scheme. 

625 # Scheme-less relative other can only return non-None if self 

626 # is also scheme-less relative and that is handled specifically 

627 # in a subclass. 

628 if not other.scheme and other.isabs(): 

629 other = other.abspath() 

630 

631 # Scheme-less self is handled elsewhere. 

632 if self.scheme != other.scheme or self.netloc != other.netloc: 

633 return None 

634 

635 enclosed_path = self._pathLib(self.relativeToPathRoot) 

636 parent_path = other.relativeToPathRoot 

637 subpath: Optional[str] 

638 try: 

639 subpath = str(enclosed_path.relative_to(parent_path)) 

640 except ValueError: 

641 subpath = None 

642 else: 

643 subpath = urllib.parse.unquote(subpath) 

644 return subpath 

645 

646 def exists(self) -> bool: 

647 """Indicate that the resource is available. 

648 

649 Returns 

650 ------- 

651 exists : `bool` 

652 `True` if the resource exists. 

653 """ 

654 raise NotImplementedError() 

655 

656 def remove(self) -> None: 

657 """Remove the resource.""" 

658 raise NotImplementedError() 

659 

660 def isabs(self) -> bool: 

661 """Indicate that the resource is fully specified. 

662 

663 For non-schemeless URIs this is always true. 

664 

665 Returns 

666 ------- 

667 isabs : `bool` 

668 `True` in all cases except schemeless URI. 

669 """ 

670 return True 

671 

672 def abspath(self) -> ButlerURI: 

673 """Return URI using an absolute path. 

674 

675 Returns 

676 ------- 

677 abs : `ButlerURI` 

678 Absolute URI. For non-schemeless URIs this always returns itself. 

679 Schemeless URIs are upgraded to file URIs. 

680 """ 

681 return self 

682 

683 def _as_local(self) -> Tuple[str, bool]: 

684 """Return the location of the (possibly remote) resource as local file. 

685 

686 This is a helper function for `as_local` context manager. 

687 

688 Returns 

689 ------- 

690 path : `str` 

691 If this is a remote resource, it will be a copy of the resource 

692 on the local file system, probably in a temporary directory. 

693 For a local resource this should be the actual path to the 

694 resource. 

695 is_temporary : `bool` 

696 Indicates if the local path is a temporary file or not. 

697 """ 

698 raise NotImplementedError() 

699 

700 @contextlib.contextmanager 

701 def as_local(self) -> Iterator[ButlerURI]: 

702 """Return the location of the (possibly remote) resource as local file. 

703 

704 Yields 

705 ------ 

706 local : `ButlerURI` 

707 If this is a remote resource, it will be a copy of the resource 

708 on the local file system, probably in a temporary directory. 

709 For a local resource this should be the actual path to the 

710 resource. 

711 

712 Notes 

713 ----- 

714 The context manager will automatically delete any local temporary 

715 file. 

716 

717 Examples 

718 -------- 

719 Should be used as a context manager: 

720 

721 .. code-block:: py 

722 

723 with uri.as_local() as local: 

724 ospath = local.ospath 

725 """ 

726 local_src, is_temporary = self._as_local() 

727 local_uri = ButlerURI(local_src, isTemporary=is_temporary) 

728 

729 try: 

730 yield local_uri 

731 finally: 

732 # The caller might have relocated the temporary file 

733 if is_temporary and local_uri.exists(): 

734 local_uri.remove() 

735 

736 @classmethod 

737 @contextlib.contextmanager 

738 def temporary_uri(cls, prefix: Optional[ButlerURI] = None, 

739 suffix: Optional[str] = None) -> Iterator[ButlerURI]: 

740 """Create a temporary URI. 

741 

742 Parameters 

743 ---------- 

744 prefix : `ButlerURI`, optional 

745 Prefix to use. Without this the path will be formed as a local 

746 file URI in a temporary directory. Ensuring that the prefix 

747 location exists is the responsibility of the caller. 

748 suffix : `str`, optional 

749 A file suffix to be used. The ``.`` should be included in this 

750 suffix. 

751 

752 Yields 

753 ------ 

754 uri : `ButlerURI` 

755 The temporary URI. Will be removed when the context is completed. 

756 """ 

757 use_tempdir = False 

758 if prefix is None: 

759 prefix = ButlerURI(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True) 

760 # Record that we need to delete this directory. Can not rely 

761 # on isTemporary flag since an external prefix may have that 

762 # set as well. 

763 use_tempdir = True 

764 

765 # Need to create a randomized file name. For consistency do not 

766 # use mkstemp for local and something else for remote. Additionally 

767 # this method does not create the file to prevent name clashes. 

768 characters = "abcdefghijklmnopqrstuvwxyz0123456789_" 

769 rng = Random() 

770 tempname = "".join(rng.choice(characters) for _ in range(16)) 

771 if suffix: 

772 tempname += suffix 

773 temporary_uri = prefix.join(tempname, isTemporary=True) 

774 

775 try: 

776 yield temporary_uri 

777 finally: 

778 if use_tempdir: 

779 shutil.rmtree(prefix.ospath, ignore_errors=True) 

780 else: 

781 try: 

782 # It's okay if this does not work because the user removed 

783 # the file. 

784 temporary_uri.remove() 

785 except FileNotFoundError: 

786 pass 

787 

788 def read(self, size: int = -1) -> bytes: 

789 """Open the resource and return the contents in bytes. 

790 

791 Parameters 

792 ---------- 

793 size : `int`, optional 

794 The number of bytes to read. Negative or omitted indicates 

795 that all data should be read. 

796 """ 

797 raise NotImplementedError() 

798 

799 def write(self, data: bytes, overwrite: bool = True) -> None: 

800 """Write the supplied bytes to the new resource. 

801 

802 Parameters 

803 ---------- 

804 data : `bytes` 

805 The bytes to write to the resource. The entire contents of the 

806 resource will be replaced. 

807 overwrite : `bool`, optional 

808 If `True` the resource will be overwritten if it exists. Otherwise 

809 the write will fail. 

810 """ 

811 raise NotImplementedError() 

812 

813 def mkdir(self) -> None: 

814 """For a dir-like URI, create the directory resource if needed.""" 

815 raise NotImplementedError() 

816 

817 def isdir(self) -> bool: 

818 """Return True if this URI looks like a directory, else False.""" 

819 return self.dirLike 

820 

821 def size(self) -> int: 

822 """For non-dir-like URI, return the size of the resource. 

823 

824 Returns 

825 ------- 

826 sz : `int` 

827 The size in bytes of the resource associated with this URI. 

828 Returns 0 if dir-like. 

829 """ 

830 raise NotImplementedError() 

831 

832 def __str__(self) -> str: 

833 """Convert the URI to its native string form.""" 

834 return self.geturl() 

835 

836 def __repr__(self) -> str: 

837 """Return string representation suitable for evaluation.""" 

838 return f'ButlerURI("{self.geturl()}")' 

839 

840 def __eq__(self, other: Any) -> bool: 

841 """Compare supplied object with this `ButlerURI`.""" 

842 if not isinstance(other, ButlerURI): 

843 return NotImplemented 

844 return self.geturl() == other.geturl() 

845 

846 def __hash__(self) -> int: 

847 """Return hash of this object.""" 

848 return hash(str(self)) 

849 

850 def __copy__(self) -> ButlerURI: 

851 """Copy constructor. 

852 

853 Object is immutable so copy can return itself. 

854 """ 

855 # Implement here because the __new__ method confuses things 

856 return self 

857 

858 def __deepcopy__(self, memo: Any) -> ButlerURI: 

859 """Deepcopy the object. 

860 

861 Object is immutable so copy can return itself. 

862 """ 

863 # Implement here because the __new__ method confuses things 

864 return self 

865 

866 def __getnewargs__(self) -> Tuple: 

867 """Support pickling.""" 

868 return (str(self),) 

869 

870 @classmethod 

871 def _fixDirectorySep(cls, parsed: urllib.parse.ParseResult, 

872 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

873 """Ensure that a path separator is present on directory paths. 

874 

875 Parameters 

876 ---------- 

877 parsed : `~urllib.parse.ParseResult` 

878 The result from parsing a URI using `urllib.parse`. 

879 forceDirectory : `bool`, optional 

880 If `True` forces the URI to end with a separator, otherwise given 

881 URI is interpreted as is. Specifying that the URI is conceptually 

882 equivalent to a directory can break some ambiguities when 

883 interpreting the last element of a path. 

884 

885 Returns 

886 ------- 

887 modified : `~urllib.parse.ParseResult` 

888 Update result if a URI is being handled. 

889 dirLike : `bool` 

890 `True` if given parsed URI has a trailing separator or 

891 forceDirectory is True. Otherwise `False`. 

892 """ 

893 # assume we are not dealing with a directory like URI 

894 dirLike = False 

895 

896 # Directory separator 

897 sep = cls._pathModule.sep 

898 

899 # URI is dir-like if explicitly stated or if it ends on a separator 

900 endsOnSep = parsed.path.endswith(sep) 

901 if forceDirectory or endsOnSep: 

902 dirLike = True 

903 # only add the separator if it's not already there 

904 if not endsOnSep: 904 ↛ 907line 904 didn't jump to line 907, because the condition on line 904 was never false

905 parsed = parsed._replace(path=parsed.path+sep) 

906 

907 return parsed, dirLike 

908 

909 @classmethod 

910 def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

911 forceAbsolute: bool = False, 

912 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

913 """Correct any issues with the supplied URI. 

914 

915 Parameters 

916 ---------- 

917 parsed : `~urllib.parse.ParseResult` 

918 The result from parsing a URI using `urllib.parse`. 

919 root : `str` or `ButlerURI`, ignored 

920 Not used by the this implementation since all URIs are 

921 absolute except for those representing the local file system. 

922 forceAbsolute : `bool`, ignored. 

923 Not used by this implementation. URIs are generally always 

924 absolute. 

925 forceDirectory : `bool`, optional 

926 If `True` forces the URI to end with a separator, otherwise given 

927 URI is interpreted as is. Specifying that the URI is conceptually 

928 equivalent to a directory can break some ambiguities when 

929 interpreting the last element of a path. 

930 

931 Returns 

932 ------- 

933 modified : `~urllib.parse.ParseResult` 

934 Update result if a URI is being handled. 

935 dirLike : `bool` 

936 `True` if given parsed URI has a trailing separator or 

937 forceDirectory is True. Otherwise `False`. 

938 

939 Notes 

940 ----- 

941 Relative paths are explicitly not supported by RFC8089 but `urllib` 

942 does accept URIs of the form ``file:relative/path.ext``. They need 

943 to be turned into absolute paths before they can be used. This is 

944 always done regardless of the ``forceAbsolute`` parameter. 

945 

946 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

947 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

948 

949 Scheme-less paths are normalized. 

950 """ 

951 return cls._fixDirectorySep(parsed, forceDirectory) 

952 

953 def transfer_from(self, src: ButlerURI, transfer: str, 

954 overwrite: bool = False, 

955 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

956 """Transfer the current resource to a new location. 

957 

958 Parameters 

959 ---------- 

960 src : `ButlerURI` 

961 Source URI. 

962 transfer : `str` 

963 Mode to use for transferring the resource. Generically there are 

964 many standard options: copy, link, symlink, hardlink, relsymlink. 

965 Not all URIs support all modes. 

966 overwrite : `bool`, optional 

967 Allow an existing file to be overwritten. Defaults to `False`. 

968 transaction : `DatastoreTransaction`, optional 

969 A transaction object that can (depending on implementation) 

970 rollback transfers on error. Not guaranteed to be implemented. 

971 

972 Notes 

973 ----- 

974 Conceptually this is hard to scale as the number of URI schemes 

975 grow. The destination URI is more important than the source URI 

976 since that is where all the transfer modes are relevant (with the 

977 complication that "move" deletes the source). 

978 

979 Local file to local file is the fundamental use case but every 

980 other scheme has to support "copy" to local file (with implicit 

981 support for "move") and copy from local file. 

982 All the "link" options tend to be specific to local file systems. 

983 

984 "move" is a "copy" where the remote resource is deleted at the end. 

985 Whether this works depends on the source URI rather than the 

986 destination URI. Reverting a move on transaction rollback is 

987 expected to be problematic if a remote resource was involved. 

988 """ 

989 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}") 

990 

991 def walk(self, file_filter: Optional[Union[str, re.Pattern]] = None) -> Iterator[Union[List, 

992 Tuple[ButlerURI, 

993 List[str], 

994 List[str]]]]: 

995 """Walk the directory tree returning matching files and directories. 

996 

997 Parameters 

998 ---------- 

999 file_filter : `str` or `re.Pattern`, optional 

1000 Regex to filter out files from the list before it is returned. 

1001 

1002 Yields 

1003 ------ 

1004 dirpath : `ButlerURI` 

1005 Current directory being examined. 

1006 dirnames : `list` of `str` 

1007 Names of subdirectories within dirpath. 

1008 filenames : `list` of `str` 

1009 Names of all the files within dirpath. 

1010 """ 

1011 raise NotImplementedError() 

1012 

1013 @classmethod 

1014 def findFileResources(cls, candidates: Iterable[Union[str, ButlerURI]], 

1015 file_filter: Optional[str] = None, 

1016 grouped: bool = False) -> Iterator[Union[ButlerURI, Iterator[ButlerURI]]]: 

1017 """Get all the files from a list of values. 

1018 

1019 Parameters 

1020 ---------- 

1021 candidates : iterable [`str` or `ButlerURI`] 

1022 The files to return and directories in which to look for files to 

1023 return. 

1024 file_filter : `str`, optional 

1025 The regex to use when searching for files within directories. 

1026 By default returns all the found files. 

1027 grouped : `bool`, optional 

1028 If `True` the results will be grouped by directory and each 

1029 yielded value will be an iterator over URIs. If `False` each 

1030 URI will be returned separately. 

1031 

1032 Yields 

1033 ------ 

1034 found_file: `ButlerURI` 

1035 The passed-in URIs and URIs found in passed-in directories. 

1036 If grouping is enabled, each of the yielded values will be an 

1037 iterator yielding members of the group. Files given explicitly 

1038 will be returned as a single group at the end. 

1039 

1040 Notes 

1041 ----- 

1042 If a value is a file it is yielded immediately. If a value is a 

1043 directory, all the files in the directory (recursively) that match 

1044 the regex will be yielded in turn. 

1045 """ 

1046 fileRegex = None if file_filter is None else re.compile(file_filter) 

1047 

1048 singles = [] 

1049 

1050 # Find all the files of interest 

1051 for location in candidates: 

1052 uri = ButlerURI(location) 

1053 if uri.isdir(): 

1054 for found in uri.walk(fileRegex): 

1055 if not found: 

1056 # This means the uri does not exist and by 

1057 # convention we ignore it 

1058 continue 

1059 root, dirs, files = found 

1060 if not files: 

1061 continue 

1062 if grouped: 

1063 yield (root.join(name) for name in files) 

1064 else: 

1065 for name in files: 

1066 yield root.join(name) 

1067 else: 

1068 if grouped: 

1069 singles.append(uri) 

1070 else: 

1071 yield uri 

1072 

1073 # Finally, return any explicitly given files in one group 

1074 if grouped and singles: 

1075 yield iter(singles)