Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import contextlib 

25import urllib.parse 

26import posixpath 

27import copy 

28import logging 

29import re 

30 

31from pathlib import Path, PurePath, PurePosixPath 

32 

33__all__ = ('ButlerURI',) 

34 

35from typing import ( 

36 TYPE_CHECKING, 

37 Any, 

38 Iterable, 

39 Iterator, 

40 List, 

41 Optional, 

42 Tuple, 

43 Type, 

44 Union, 

45) 

46 

47from .utils import NoTransaction 

48 

49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true

50 from ..datastore import DatastoreTransaction 

51 

52 

53log = logging.getLogger(__name__) 

54 

55# Regex for looking for URI escapes 

56ESCAPES_RE = re.compile(r"%[A-F0-9]{2}") 

57 

58# Precomputed escaped hash 

59ESCAPED_HASH = urllib.parse.quote("#") 

60 

61 

62class ButlerURI: 

63 """Convenience wrapper around URI parsers. 

64 

65 Provides access to URI components and can convert file 

66 paths into absolute path URIs. Scheme-less URIs are treated as if 

67 they are local file system paths and are converted to absolute URIs. 

68 

69 A specialist subclass is created for each supported URI scheme. 

70 

71 Parameters 

72 ---------- 

73 uri : `str` or `urllib.parse.ParseResult` 

74 URI in string form. Can be scheme-less if referring to a local 

75 filesystem path. 

76 root : `str` or `ButlerURI`, optional 

77 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

78 use this as the root. Must be absolute. If `None` the current 

79 working directory will be used. Can be a file URI. 

80 forceAbsolute : `bool`, optional 

81 If `True`, scheme-less relative URI will be converted to an absolute 

82 path using a ``file`` scheme. If `False` scheme-less URI will remain 

83 scheme-less and will not be updated to ``file`` or absolute path. 

84 forceDirectory: `bool`, optional 

85 If `True` forces the URI to end with a separator, otherwise given URI 

86 is interpreted as is. 

87 isTemporary : `bool`, optional 

88 If `True` indicates that this URI points to a temporary resource. 

89 """ 

90 

91 _pathLib: Type[PurePath] = PurePosixPath 

92 """Path library to use for this scheme.""" 

93 

94 _pathModule = posixpath 

95 """Path module to use for this scheme.""" 

96 

97 transferModes: Tuple[str, ...] = ("copy", "auto", "move") 

98 """Transfer modes supported by this implementation. 

99 

100 Move is special in that it is generally a copy followed by an unlink. 

101 Whether that unlink works depends critically on whether the source URI 

102 implements unlink. If it does not the move will be reported as a failure. 

103 """ 

104 

105 transferDefault: str = "copy" 

106 """Default mode to use for transferring if ``auto`` is specified.""" 

107 

108 quotePaths = True 

109 """True if path-like elements modifying a URI should be quoted. 

110 

111 All non-schemeless URIs have to internally use quoted paths. Therefore 

112 if a new file name is given (e.g. to updatedFile or join) a decision must 

113 be made whether to quote it to be consistent. 

114 """ 

115 

116 isLocal = False 

117 """If `True` this URI refers to a local file.""" 

118 

119 # This is not an ABC with abstract methods because the __new__ being 

120 # a factory confuses mypy such that it assumes that every constructor 

121 # returns a ButlerURI and then determines that all the abstract methods 

122 # are still abstract. If they are not marked abstract but just raise 

123 # mypy is fine with it. 

124 

125 # mypy is confused without these 

126 _uri: urllib.parse.ParseResult 

127 isTemporary: bool 

128 dirLike: bool 

129 

130 def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI, Path], 

131 root: Optional[Union[str, ButlerURI]] = None, forceAbsolute: bool = True, 

132 forceDirectory: bool = False, isTemporary: bool = False) -> ButlerURI: 

133 """Create and return new specialist ButlerURI subclass.""" 

134 parsed: urllib.parse.ParseResult 

135 dirLike: bool = False 

136 subclass: Optional[Type[ButlerURI]] = None 

137 

138 if isinstance(uri, Path): 138 ↛ 139line 138 didn't jump to line 139, because the condition on line 138 was never true

139 uri = str(uri) 

140 

141 # Record if we need to post process the URI components 

142 # or if the instance is already fully configured 

143 if isinstance(uri, str): 

144 # Since local file names can have special characters in them 

145 # we need to quote them for the parser but we can unquote 

146 # later. Assume that all other URI schemes are quoted. 

147 # Since sometimes people write file:/a/b and not file:///a/b 

148 # we should not quote in the explicit case of file: 

149 if "://" not in uri and not uri.startswith("file:"): 

150 if ESCAPES_RE.search(uri): 150 ↛ 151line 150 didn't jump to line 151, because the condition on line 150 was never true

151 log.warning("Possible double encoding of %s", uri) 

152 else: 

153 uri = urllib.parse.quote(uri) 

154 # Special case hash since we must support fragments 

155 # even in schemeless URIs -- although try to only replace 

156 # them in file part and not directory part 

157 if ESCAPED_HASH in uri: 157 ↛ 158line 157 didn't jump to line 158, because the condition on line 157 was never true

158 dirpos = uri.rfind("/") 

159 # Do replacement after this / 

160 uri = uri[:dirpos+1] + uri[dirpos+1:].replace(ESCAPED_HASH, "#") 

161 

162 parsed = urllib.parse.urlparse(uri) 

163 elif isinstance(uri, urllib.parse.ParseResult): 

164 parsed = copy.copy(uri) 

165 # If we are being instantiated with a subclass, rather than 

166 # ButlerURI, ensure that that subclass is used directly. 

167 # This could lead to inconsistencies if this constructor 

168 # is used externally outside of the ButlerURI.replace() method. 

169 # ButlerS3URI(urllib.parse.urlparse("file://a/b.txt")) 

170 # will be a problem. 

171 # This is needed to prevent a schemeless absolute URI become 

172 # a file URI unexpectedly when calling updatedFile or 

173 # updatedExtension 

174 if cls is not ButlerURI: 

175 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory) 

176 subclass = cls 

177 

178 elif isinstance(uri, ButlerURI): 178 ↛ 183line 178 didn't jump to line 183, because the condition on line 178 was never false

179 # Since ButlerURI is immutable we can return the argument 

180 # unchanged. 

181 return uri 

182 else: 

183 raise ValueError("Supplied URI must be string, Path, " 

184 f"ButlerURI, or ParseResult but got '{uri!r}'") 

185 

186 if subclass is None: 

187 # Work out the subclass from the URI scheme 

188 if not parsed.scheme: 

189 from .schemeless import ButlerSchemelessURI 

190 subclass = ButlerSchemelessURI 

191 elif parsed.scheme == "file": 191 ↛ 192line 191 didn't jump to line 192, because the condition on line 191 was never true

192 from .file import ButlerFileURI 

193 subclass = ButlerFileURI 

194 elif parsed.scheme == "s3": 194 ↛ 195line 194 didn't jump to line 195, because the condition on line 194 was never true

195 from .s3 import ButlerS3URI 

196 subclass = ButlerS3URI 

197 elif parsed.scheme.startswith("http"): 197 ↛ 198line 197 didn't jump to line 198, because the condition on line 197 was never true

198 from .http import ButlerHttpURI 

199 subclass = ButlerHttpURI 

200 elif parsed.scheme == "resource": 200 ↛ 204line 200 didn't jump to line 204, because the condition on line 200 was never false

201 # Rules for scheme names disallow pkg_resource 

202 from .packageresource import ButlerPackageResourceURI 

203 subclass = ButlerPackageResourceURI 

204 elif parsed.scheme == "mem": 

205 # in-memory datastore object 

206 from .mem import ButlerInMemoryURI 

207 subclass = ButlerInMemoryURI 

208 else: 

209 raise NotImplementedError(f"No URI support for scheme: '{parsed.scheme}'" 

210 " in {parsed.geturl()}") 

211 

212 parsed, dirLike = subclass._fixupPathUri(parsed, root=root, 

213 forceAbsolute=forceAbsolute, 

214 forceDirectory=forceDirectory) 

215 

216 # It is possible for the class to change from schemeless 

217 # to file so handle that 

218 if parsed.scheme == "file": 218 ↛ 219line 218 didn't jump to line 219, because the condition on line 218 was never true

219 from .file import ButlerFileURI 

220 subclass = ButlerFileURI 

221 

222 # Now create an instance of the correct subclass and set the 

223 # attributes directly 

224 self = object.__new__(subclass) 

225 self._uri = parsed 

226 self.dirLike = dirLike 

227 self.isTemporary = isTemporary 

228 return self 

229 

230 @property 

231 def scheme(self) -> str: 

232 """Return the URI scheme. 

233 

234 Notes 

235 ----- 

236 (``://`` is not part of the scheme). 

237 """ 

238 return self._uri.scheme 

239 

240 @property 

241 def netloc(self) -> str: 

242 """Return the URI network location.""" 

243 return self._uri.netloc 

244 

245 @property 

246 def path(self) -> str: 

247 """Return the path component of the URI.""" 

248 return self._uri.path 

249 

250 @property 

251 def unquoted_path(self) -> str: 

252 """Return path component of the URI with any URI quoting reversed.""" 

253 return urllib.parse.unquote(self._uri.path) 

254 

255 @property 

256 def ospath(self) -> str: 

257 """Return the path component of the URI localized to current OS.""" 

258 raise AttributeError(f"Non-file URI ({self}) has no local OS path.") 

259 

260 @property 

261 def relativeToPathRoot(self) -> str: 

262 """Return path relative to network location. 

263 

264 Effectively, this is the path property with posix separator stripped 

265 from the left hand side of the path. 

266 

267 Always unquotes. 

268 """ 

269 p = self._pathLib(self.path) 

270 relToRoot = str(p.relative_to(p.root)) 

271 if self.dirLike and not relToRoot.endswith("/"): 271 ↛ 272line 271 didn't jump to line 272, because the condition on line 271 was never true

272 relToRoot += "/" 

273 return urllib.parse.unquote(relToRoot) 

274 

275 @property 

276 def is_root(self) -> bool: 

277 """Return whether this URI points to the root of the network location. 

278 

279 This means that the path components refers to the top level. 

280 """ 

281 relpath = self.relativeToPathRoot 

282 if relpath == "./": 

283 return True 

284 return False 

285 

286 @property 

287 def fragment(self) -> str: 

288 """Return the fragment component of the URI.""" 

289 return self._uri.fragment 

290 

291 @property 

292 def params(self) -> str: 

293 """Return any parameters included in the URI.""" 

294 return self._uri.params 

295 

296 @property 

297 def query(self) -> str: 

298 """Return any query strings included in the URI.""" 

299 return self._uri.query 

300 

301 def geturl(self) -> str: 

302 """Return the URI in string form. 

303 

304 Returns 

305 ------- 

306 url : `str` 

307 String form of URI. 

308 """ 

309 return self._uri.geturl() 

310 

311 def split(self) -> Tuple[ButlerURI, str]: 

312 """Split URI into head and tail. 

313 

314 Returns 

315 ------- 

316 head: `ButlerURI` 

317 Everything leading up to tail, expanded and normalized as per 

318 ButlerURI rules. 

319 tail : `str` 

320 Last `self.path` component. Tail will be empty if path ends on a 

321 separator. Tail will never contain separators. It will be 

322 unquoted. 

323 

324 Notes 

325 ----- 

326 Equivalent to `os.path.split()` where head preserves the URI 

327 components. 

328 """ 

329 head, tail = self._pathModule.split(self.path) 

330 headuri = self._uri._replace(path=head) 

331 

332 # The file part should never include quoted metacharacters 

333 tail = urllib.parse.unquote(tail) 

334 

335 # Schemeless is special in that it can be a relative path 

336 # We need to ensure that it stays that way. All other URIs will 

337 # be absolute already. 

338 forceAbsolute = self._pathModule.isabs(self.path) 

339 return ButlerURI(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail 

340 

341 def basename(self) -> str: 

342 """Return the base name, last element of path, of the URI. 

343 

344 Returns 

345 ------- 

346 tail : `str` 

347 Last part of the path attribute. Trail will be empty if path ends 

348 on a separator. 

349 

350 Notes 

351 ----- 

352 If URI ends on a slash returns an empty string. This is the second 

353 element returned by `split()`. 

354 

355 Equivalent of `os.path.basename()``. 

356 """ 

357 return self.split()[1] 

358 

359 def dirname(self) -> ButlerURI: 

360 """Return the directory component of the path as a new `ButlerURI`. 

361 

362 Returns 

363 ------- 

364 head : `ButlerURI` 

365 Everything except the tail of path attribute, expanded and 

366 normalized as per ButlerURI rules. 

367 

368 Notes 

369 ----- 

370 Equivalent of `os.path.dirname()`. 

371 """ 

372 return self.split()[0] 

373 

374 def parent(self) -> ButlerURI: 

375 """Return a `ButlerURI` of the parent directory. 

376 

377 Returns 

378 ------- 

379 head : `ButlerURI` 

380 Everything except the tail of path attribute, expanded and 

381 normalized as per `ButlerURI` rules. 

382 

383 Notes 

384 ----- 

385 For a file-like URI this will be the same as calling `dirname()`. 

386 """ 

387 # When self is file-like, return self.dirname() 

388 if not self.dirLike: 

389 return self.dirname() 

390 # When self is dir-like, return its parent directory, 

391 # regardless of the presence of a trailing separator 

392 originalPath = self._pathLib(self.path) 

393 parentPath = originalPath.parent 

394 return self.replace(path=str(parentPath), forceDirectory=True) 

395 

396 def replace(self, forceDirectory: bool = False, **kwargs: Any) -> ButlerURI: 

397 """Return new `ButlerURI` with specified components replaced. 

398 

399 Parameters 

400 ---------- 

401 forceDirectory : `bool` 

402 Parameter passed to ButlerURI constructor to force this 

403 new URI to be dir-like. 

404 kwargs : `dict` 

405 Components of a `urllib.parse.ParseResult` that should be 

406 modified for the newly-created `ButlerURI`. 

407 

408 Returns 

409 ------- 

410 new : `ButlerURI` 

411 New `ButlerURI` object with updated values. 

412 

413 Notes 

414 ----- 

415 Does not, for now, allow a change in URI scheme. 

416 """ 

417 # Disallow a change in scheme 

418 if "scheme" in kwargs: 418 ↛ 419line 418 didn't jump to line 419, because the condition on line 418 was never true

419 raise ValueError(f"Can not use replace() method to change URI scheme for {self}") 

420 return self.__class__(self._uri._replace(**kwargs), forceDirectory=forceDirectory) 

421 

422 def updatedFile(self, newfile: str) -> ButlerURI: 

423 """Return new URI with an updated final component of the path. 

424 

425 Parameters 

426 ---------- 

427 newfile : `str` 

428 File name with no path component. 

429 

430 Returns 

431 ------- 

432 updated : `ButlerURI` 

433 

434 Notes 

435 ----- 

436 Forces the ButlerURI.dirLike attribute to be false. The new file path 

437 will be quoted if necessary. 

438 """ 

439 if self.quotePaths: 

440 newfile = urllib.parse.quote(newfile) 

441 dir, _ = self._pathModule.split(self.path) 

442 newpath = self._pathModule.join(dir, newfile) 

443 

444 updated = self.replace(path=newpath) 

445 updated.dirLike = False 

446 return updated 

447 

448 def updatedExtension(self, ext: Optional[str]) -> ButlerURI: 

449 """Return a new `ButlerURI` with updated file extension. 

450 

451 All file extensions are replaced. 

452 

453 Parameters 

454 ---------- 

455 ext : `str` or `None` 

456 New extension. If an empty string is given any extension will 

457 be removed. If `None` is given there will be no change. 

458 

459 Returns 

460 ------- 

461 updated : `ButlerURI` 

462 URI with the specified extension. Can return itself if 

463 no extension was specified. 

464 """ 

465 if ext is None: 

466 return self 

467 

468 # Get the extension 

469 current = self.getExtension() 

470 

471 # Nothing to do if the extension already matches 

472 if current == ext: 

473 return self 

474 

475 # Remove the current extension from the path 

476 # .fits.gz counts as one extension do not use os.path.splitext 

477 path = self.path 

478 if current: 

479 path = path[:-len(current)] 

480 

481 # Ensure that we have a leading "." on file extension (and we do not 

482 # try to modify the empty string) 

483 if ext and not ext.startswith("."): 

484 ext = "." + ext 

485 

486 return self.replace(path=path + ext) 

487 

488 def getExtension(self) -> str: 

489 """Return the file extension(s) associated with this URI path. 

490 

491 Returns 

492 ------- 

493 ext : `str` 

494 The file extension (including the ``.``). Can be empty string 

495 if there is no file extension. Usually returns only the last 

496 file extension unless there is a special extension modifier 

497 indicating file compression, in which case the combined 

498 extension (e.g. ``.fits.gz``) will be returned. 

499 """ 

500 special = {".gz", ".bz2", ".xz", ".fz"} 

501 

502 extensions = self._pathLib(self.path).suffixes 

503 

504 if not extensions: 504 ↛ 505line 504 didn't jump to line 505, because the condition on line 504 was never true

505 return "" 

506 

507 ext = extensions.pop() 

508 

509 # Multiple extensions, decide whether to include the final two 

510 if extensions and ext in special: 510 ↛ 511line 510 didn't jump to line 511, because the condition on line 510 was never true

511 ext = f"{extensions[-1]}{ext}" 

512 

513 return ext 

514 

515 def join(self, path: Union[str, ButlerURI]) -> ButlerURI: 

516 """Return new `ButlerURI` with additional path components. 

517 

518 Parameters 

519 ---------- 

520 path : `str`, `ButlerURI` 

521 Additional file components to append to the current URI. Assumed 

522 to include a file at the end. Will be quoted depending on the 

523 associated URI scheme. If the path looks like a URI with a scheme 

524 referring to an absolute location, it will be returned 

525 directly (matching the behavior of `os.path.join()`). It can 

526 also be a `ButlerURI`. 

527 

528 Returns 

529 ------- 

530 new : `ButlerURI` 

531 New URI with any file at the end replaced with the new path 

532 components. 

533 

534 Notes 

535 ----- 

536 Schemeless URIs assume local path separator but all other URIs assume 

537 POSIX separator if the supplied path has directory structure. It 

538 may be this never becomes a problem but datastore templates assume 

539 POSIX separator is being used. 

540 

541 Currently, if the join path is given as an absolute scheme-less 

542 URI it will be returned as an absolute ``file:`` URI even if the 

543 URI it is being joined to is non-file. 

544 """ 

545 # If we have a full URI in path we will use it directly 

546 # but without forcing to absolute so that we can trap the 

547 # expected option of relative path. 

548 path_uri = ButlerURI(path, forceAbsolute=False) 

549 if path_uri.scheme: 549 ↛ 550line 549 didn't jump to line 550, because the condition on line 549 was never true

550 return path_uri 

551 

552 # Force back to string 

553 path = path_uri.path 

554 

555 new = self.dirname() # By definition a directory URI 

556 

557 # new should be asked about quoting, not self, since dirname can 

558 # change the URI scheme for schemeless -> file 

559 if new.quotePaths: 559 ↛ 562line 559 didn't jump to line 562, because the condition on line 559 was never false

560 path = urllib.parse.quote(path) 

561 

562 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path)) 

563 

564 # normpath can strip trailing / so we force directory if the supplied 

565 # path ended with a / 

566 return new.replace(path=newpath, forceDirectory=path.endswith(self._pathModule.sep)) 

567 

568 def relative_to(self, other: ButlerURI) -> Optional[str]: 

569 """Return the relative path from this URI to the other URI. 

570 

571 Parameters 

572 ---------- 

573 other : `ButlerURI` 

574 URI to use to calculate the relative path. Must be a parent 

575 of this URI. 

576 

577 Returns 

578 ------- 

579 subpath : `str` 

580 The sub path of this URI relative to the supplied other URI. 

581 Returns `None` if there is no parent child relationship. 

582 Scheme and netloc must match. 

583 """ 

584 if self.scheme != other.scheme or self.netloc != other.netloc: 

585 return None 

586 

587 enclosed_path = self._pathLib(self.relativeToPathRoot) 

588 parent_path = other.relativeToPathRoot 

589 subpath: Optional[str] 

590 try: 

591 subpath = str(enclosed_path.relative_to(parent_path)) 

592 except ValueError: 

593 subpath = None 

594 else: 

595 subpath = urllib.parse.unquote(subpath) 

596 return subpath 

597 

598 def exists(self) -> bool: 

599 """Indicate that the resource is available. 

600 

601 Returns 

602 ------- 

603 exists : `bool` 

604 `True` if the resource exists. 

605 """ 

606 raise NotImplementedError() 

607 

608 def remove(self) -> None: 

609 """Remove the resource.""" 

610 raise NotImplementedError() 

611 

612 def isabs(self) -> bool: 

613 """Indicate that the resource is fully specified. 

614 

615 For non-schemeless URIs this is always true. 

616 

617 Returns 

618 ------- 

619 isabs : `bool` 

620 `True` in all cases except schemeless URI. 

621 """ 

622 return True 

623 

624 def abspath(self) -> ButlerURI: 

625 """Return URI using an absolute path. 

626 

627 Returns 

628 ------- 

629 abs : `ButlerURI` 

630 Absolute URI. For non-schemeless URIs this always returns itself. 

631 Schemeless URIs are upgraded to file URIs. 

632 """ 

633 return self 

634 

635 def _as_local(self) -> Tuple[str, bool]: 

636 """Return the location of the (possibly remote) resource as local file. 

637 

638 This is a helper function for `as_local` context manager. 

639 

640 Returns 

641 ------- 

642 path : `str` 

643 If this is a remote resource, it will be a copy of the resource 

644 on the local file system, probably in a temporary directory. 

645 For a local resource this should be the actual path to the 

646 resource. 

647 is_temporary : `bool` 

648 Indicates if the local path is a temporary file or not. 

649 """ 

650 raise NotImplementedError() 

651 

652 @contextlib.contextmanager 

653 def as_local(self) -> Iterator[ButlerURI]: 

654 """Return the location of the (possibly remote) resource as local file. 

655 

656 Yields 

657 ------ 

658 local : `ButlerURI` 

659 If this is a remote resource, it will be a copy of the resource 

660 on the local file system, probably in a temporary directory. 

661 For a local resource this should be the actual path to the 

662 resource. 

663 

664 Notes 

665 ----- 

666 The context manager will automatically delete any local temporary 

667 file. 

668 

669 Examples 

670 -------- 

671 Should be used as a context manager: 

672 

673 .. code-block:: py 

674 

675 with uri.as_local() as local: 

676 ospath = local.ospath 

677 """ 

678 local_src, is_temporary = self._as_local() 

679 local_uri = ButlerURI(local_src, isTemporary=is_temporary) 

680 

681 try: 

682 yield local_uri 

683 finally: 

684 # The caller might have relocated the temporary file 

685 if is_temporary and local_uri.exists(): 

686 local_uri.remove() 

687 

688 def read(self, size: int = -1) -> bytes: 

689 """Open the resource and return the contents in bytes. 

690 

691 Parameters 

692 ---------- 

693 size : `int`, optional 

694 The number of bytes to read. Negative or omitted indicates 

695 that all data should be read. 

696 """ 

697 raise NotImplementedError() 

698 

699 def write(self, data: bytes, overwrite: bool = True) -> None: 

700 """Write the supplied bytes to the new resource. 

701 

702 Parameters 

703 ---------- 

704 data : `bytes` 

705 The bytes to write to the resource. The entire contents of the 

706 resource will be replaced. 

707 overwrite : `bool`, optional 

708 If `True` the resource will be overwritten if it exists. Otherwise 

709 the write will fail. 

710 """ 

711 raise NotImplementedError() 

712 

713 def mkdir(self) -> None: 

714 """For a dir-like URI, create the directory resource if needed.""" 

715 raise NotImplementedError() 

716 

717 def isdir(self) -> bool: 

718 """Return True if this URI looks like a directory, else False.""" 

719 return self.dirLike 

720 

721 def size(self) -> int: 

722 """For non-dir-like URI, return the size of the resource. 

723 

724 Returns 

725 ------- 

726 sz : `int` 

727 The size in bytes of the resource associated with this URI. 

728 Returns 0 if dir-like. 

729 """ 

730 raise NotImplementedError() 

731 

732 def __str__(self) -> str: 

733 """Convert the URI to its native string form.""" 

734 return self.geturl() 

735 

736 def __repr__(self) -> str: 

737 """Return string representation suitable for evaluation.""" 

738 return f'ButlerURI("{self.geturl()}")' 

739 

740 def __eq__(self, other: Any) -> bool: 

741 """Compare supplied object with this `ButlerURI`.""" 

742 if not isinstance(other, ButlerURI): 

743 return NotImplemented 

744 return self.geturl() == other.geturl() 

745 

746 def __hash__(self) -> int: 

747 """Return hash of this object.""" 

748 return hash(str(self)) 

749 

750 def __copy__(self) -> ButlerURI: 

751 """Copy constructor. 

752 

753 Object is immutable so copy can return itself. 

754 """ 

755 # Implement here because the __new__ method confuses things 

756 return self 

757 

758 def __deepcopy__(self, memo: Any) -> ButlerURI: 

759 """Deepcopy the object. 

760 

761 Object is immutable so copy can return itself. 

762 """ 

763 # Implement here because the __new__ method confuses things 

764 return self 

765 

766 def __getnewargs__(self) -> Tuple: 

767 """Support pickling.""" 

768 return (str(self),) 

769 

770 @classmethod 

771 def _fixDirectorySep(cls, parsed: urllib.parse.ParseResult, 

772 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

773 """Ensure that a path separator is present on directory paths. 

774 

775 Parameters 

776 ---------- 

777 parsed : `~urllib.parse.ParseResult` 

778 The result from parsing a URI using `urllib.parse`. 

779 forceDirectory : `bool`, optional 

780 If `True` forces the URI to end with a separator, otherwise given 

781 URI is interpreted as is. Specifying that the URI is conceptually 

782 equivalent to a directory can break some ambiguities when 

783 interpreting the last element of a path. 

784 

785 Returns 

786 ------- 

787 modified : `~urllib.parse.ParseResult` 

788 Update result if a URI is being handled. 

789 dirLike : `bool` 

790 `True` if given parsed URI has a trailing separator or 

791 forceDirectory is True. Otherwise `False`. 

792 """ 

793 # assume we are not dealing with a directory like URI 

794 dirLike = False 

795 

796 # Directory separator 

797 sep = cls._pathModule.sep 

798 

799 # URI is dir-like if explicitly stated or if it ends on a separator 

800 endsOnSep = parsed.path.endswith(sep) 

801 if forceDirectory or endsOnSep: 

802 dirLike = True 

803 # only add the separator if it's not already there 

804 if not endsOnSep: 804 ↛ 807line 804 didn't jump to line 807, because the condition on line 804 was never false

805 parsed = parsed._replace(path=parsed.path+sep) 

806 

807 return parsed, dirLike 

808 

809 @classmethod 

810 def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

811 forceAbsolute: bool = False, 

812 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

813 """Correct any issues with the supplied URI. 

814 

815 Parameters 

816 ---------- 

817 parsed : `~urllib.parse.ParseResult` 

818 The result from parsing a URI using `urllib.parse`. 

819 root : `str` or `ButlerURI`, ignored 

820 Not used by the this implementation since all URIs are 

821 absolute except for those representing the local file system. 

822 forceAbsolute : `bool`, ignored. 

823 Not used by this implementation. URIs are generally always 

824 absolute. 

825 forceDirectory : `bool`, optional 

826 If `True` forces the URI to end with a separator, otherwise given 

827 URI is interpreted as is. Specifying that the URI is conceptually 

828 equivalent to a directory can break some ambiguities when 

829 interpreting the last element of a path. 

830 

831 Returns 

832 ------- 

833 modified : `~urllib.parse.ParseResult` 

834 Update result if a URI is being handled. 

835 dirLike : `bool` 

836 `True` if given parsed URI has a trailing separator or 

837 forceDirectory is True. Otherwise `False`. 

838 

839 Notes 

840 ----- 

841 Relative paths are explicitly not supported by RFC8089 but `urllib` 

842 does accept URIs of the form ``file:relative/path.ext``. They need 

843 to be turned into absolute paths before they can be used. This is 

844 always done regardless of the ``forceAbsolute`` parameter. 

845 

846 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

847 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

848 

849 Scheme-less paths are normalized. 

850 """ 

851 return cls._fixDirectorySep(parsed, forceDirectory) 

852 

853 def transfer_from(self, src: ButlerURI, transfer: str, 

854 overwrite: bool = False, 

855 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

856 """Transfer the current resource to a new location. 

857 

858 Parameters 

859 ---------- 

860 src : `ButlerURI` 

861 Source URI. 

862 transfer : `str` 

863 Mode to use for transferring the resource. Generically there are 

864 many standard options: copy, link, symlink, hardlink, relsymlink. 

865 Not all URIs support all modes. 

866 overwrite : `bool`, optional 

867 Allow an existing file to be overwritten. Defaults to `False`. 

868 transaction : `DatastoreTransaction`, optional 

869 A transaction object that can (depending on implementation) 

870 rollback transfers on error. Not guaranteed to be implemented. 

871 

872 Notes 

873 ----- 

874 Conceptually this is hard to scale as the number of URI schemes 

875 grow. The destination URI is more important than the source URI 

876 since that is where all the transfer modes are relevant (with the 

877 complication that "move" deletes the source). 

878 

879 Local file to local file is the fundamental use case but every 

880 other scheme has to support "copy" to local file (with implicit 

881 support for "move") and copy from local file. 

882 All the "link" options tend to be specific to local file systems. 

883 

884 "move" is a "copy" where the remote resource is deleted at the end. 

885 Whether this works depends on the source URI rather than the 

886 destination URI. Reverting a move on transaction rollback is 

887 expected to be problematic if a remote resource was involved. 

888 """ 

889 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}") 

890 

891 def walk(self, file_filter: Optional[Union[str, re.Pattern]] = None) -> Iterator[Union[List, 

892 Tuple[ButlerURI, 

893 List[str], 

894 List[str]]]]: 

895 """Walk the directory tree returning matching files and directories. 

896 

897 Parameters 

898 ---------- 

899 file_filter : `str` or `re.Pattern`, optional 

900 Regex to filter out files from the list before it is returned. 

901 

902 Yields 

903 ------ 

904 dirpath : `ButlerURI` 

905 Current directory being examined. 

906 dirnames : `list` of `str` 

907 Names of subdirectories within dirpath. 

908 filenames : `list` of `str` 

909 Names of all the files within dirpath. 

910 """ 

911 raise NotImplementedError() 

912 

913 @classmethod 

914 def findFileResources(cls, candidates: Iterable[Union[str, ButlerURI]], 

915 file_filter: Optional[str] = None, 

916 grouped: bool = False) -> Iterator[Union[ButlerURI, Iterator[ButlerURI]]]: 

917 """Get all the files from a list of values. 

918 

919 Parameters 

920 ---------- 

921 candidates : iterable [`str` or `ButlerURI`] 

922 The files to return and directories in which to look for files to 

923 return. 

924 file_filter : `str`, optional 

925 The regex to use when searching for files within directories. 

926 By default returns all the found files. 

927 grouped : `bool`, optional 

928 If `True` the results will be grouped by directory and each 

929 yielded value will be an iterator over URIs. If `False` each 

930 URI will be returned separately. 

931 

932 Yields 

933 ------ 

934 found_file: `ButlerURI` 

935 The passed-in URIs and URIs found in passed-in directories. 

936 If grouping is enabled, each of the yielded values will be an 

937 iterator yielding members of the group. Files given explicitly 

938 will be returned as a single group at the end. 

939 

940 Notes 

941 ----- 

942 If a value is a file it is yielded immediately. If a value is a 

943 directory, all the files in the directory (recursively) that match 

944 the regex will be yielded in turn. 

945 """ 

946 fileRegex = None if file_filter is None else re.compile(file_filter) 

947 

948 singles = [] 

949 

950 # Find all the files of interest 

951 for location in candidates: 

952 uri = ButlerURI(location) 

953 if uri.isdir(): 

954 for found in uri.walk(fileRegex): 

955 if not found: 

956 # This means the uri does not exist and by 

957 # convention we ignore it 

958 continue 

959 root, dirs, files = found 

960 if not files: 

961 continue 

962 if grouped: 

963 yield (root.join(name) for name in files) 

964 else: 

965 for name in files: 

966 yield root.join(name) 

967 else: 

968 if grouped: 

969 singles.append(uri) 

970 else: 

971 yield uri 

972 

973 # Finally, return any explicitly given files in one group 

974 if grouped and singles: 

975 yield iter(singles)