Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Location", "LocationFactory", "ButlerURI") 

25 

26import os 

27import os.path 

28import urllib 

29import posixpath 

30from pathlib import Path, PurePath, PurePosixPath 

31import copy 

32import types 

33 

34from typing import ( 

35 Any, 

36 Optional, 

37 Tuple, 

38 Union, 

39) 

40 

41# Determine if the path separator for the OS looks like POSIX 

42IS_POSIX = os.sep == posixpath.sep 

43 

44# Root path for this operating system 

45OS_ROOT_PATH = Path().resolve().root 

46 

47 

48def os2posix(ospath: str) -> str: 

49 """Convert a local path description to a POSIX path description. 

50 

51 Parameters 

52 ---------- 

53 path : `str` 

54 Path using the local path separator. 

55 

56 Returns 

57 ------- 

58 posix : `str` 

59 Path using POSIX path separator 

60 """ 

61 if IS_POSIX: 

62 return ospath 

63 

64 posix = PurePath(ospath).as_posix() 

65 

66 # PurePath strips trailing "/" from paths such that you can no 

67 # longer tell if a path is meant to be referring to a directory 

68 # Try to fix this. 

69 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep): 

70 posix += posixpath.sep 

71 

72 return posix 

73 

74 

75def posix2os(posix: Union[PurePath, str]) -> str: 

76 """Convert a POSIX path description to a local path description. 

77 

78 Parameters 

79 ---------- 

80 posix : `str` 

81 Path using the POSIX path separator. 

82 

83 Returns 

84 ------- 

85 ospath : `str` 

86 Path using OS path separator 

87 """ 

88 if IS_POSIX: 

89 return str(posix) 

90 

91 posixPath = PurePosixPath(posix) 

92 paths = list(posixPath.parts) 

93 

94 # Have to convert the root directory after splitting 

95 if paths[0] == posixPath.root: 

96 paths[0] = OS_ROOT_PATH 

97 

98 # Trailing "/" is stripped so we need to add back an empty path 

99 # for consistency 

100 if str(posix).endswith(posixpath.sep): 

101 paths.append("") 

102 

103 return os.path.join(*paths) 

104 

105 

106class ButlerURI: 

107 """Convenience wrapper around URI parsers. 

108 

109 Provides access to URI components and can convert file 

110 paths into absolute path URIs. Scheme-less URIs are treated as if 

111 they are local file system paths and are converted to absolute URIs. 

112 

113 Parameters 

114 ---------- 

115 uri : `str` or `urllib.parse.ParseResult` 

116 URI in string form. Can be scheme-less if referring to a local 

117 filesystem path. 

118 root : `str`, optional 

119 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

120 use this as the root. Must be absolute. If `None` the current 

121 working directory will be used. 

122 forceAbsolute : `bool`, optional 

123 If `True`, scheme-less relative URI will be converted to an absolute 

124 path using a ``file`` scheme. If `False` scheme-less URI will remain 

125 scheme-less and will not be updated to ``file`` or absolute path. 

126 forceDirectory: `bool`, optional 

127 If `True` forces the URI to end with a separator, otherwise given URI 

128 is interpreted as is. 

129 """ 

130 

131 def __init__(self, uri: Union[str, urllib.parse.ParseResult, ButlerURI], 

132 root: Optional[str] = None, forceAbsolute: bool = True, forceDirectory: bool = False): 

133 self._uri: urllib.parse.ParseResult 

134 self.dirLike: bool 

135 

136 # Record if we need to post process the URI components 

137 # or if the instance is already fully configured 

138 is_configured = False 

139 if isinstance(uri, str): 

140 parsed = urllib.parse.urlparse(uri) 

141 elif isinstance(uri, urllib.parse.ParseResult): 

142 parsed = copy.copy(uri) 

143 elif isinstance(uri, ButlerURI): 

144 self._uri = copy.copy(uri._uri) 

145 self.dirLike = uri.dirLike 

146 # No further parsing required 

147 is_configured = True 

148 else: 

149 raise ValueError(f"Supplied URI must be string, ButlerURI, or ParseResult but got '{uri!r}'") 

150 

151 if not is_configured: 

152 parsed, dirLike = self._fixupPathUri(parsed, root=root, 

153 forceAbsolute=forceAbsolute, 

154 forceDirectory=forceDirectory) 

155 

156 self.dirLike = dirLike 

157 self._uri = parsed 

158 

159 @property 

160 def scheme(self) -> str: 

161 """The URI scheme (``://`` is not part of the scheme).""" 

162 return self._uri.scheme 

163 

164 @property 

165 def netloc(self) -> str: 

166 """The URI network location.""" 

167 return self._uri.netloc 

168 

169 @property 

170 def path(self) -> str: 

171 """The path component of the URI.""" 

172 return self._uri.path 

173 

174 @property 

175 def ospath(self) -> str: 

176 """Path component of the URI localized to current OS.""" 

177 if self.scheme == 's3': 

178 raise AttributeError('S3 URIs have no OS path.') 

179 return posix2os(self._uri.path) 

180 

181 @property 

182 def relativeToPathRoot(self) -> str: 

183 """Returns path relative to network location. 

184 

185 Effectively, this is the path property with posix separator stripped 

186 from the left hand side of the path. 

187 """ 

188 if not self.scheme: 

189 p = PurePath(self.path) 

190 else: 

191 p = PurePosixPath(self.path) 

192 relToRoot = str(p.relative_to(p.root)) 

193 if self.dirLike and not relToRoot.endswith("/"): 

194 relToRoot += "/" 

195 return relToRoot 

196 

197 @property 

198 def fragment(self) -> str: 

199 """The fragment component of the URI.""" 

200 return self._uri.fragment 

201 

202 @property 

203 def params(self) -> str: 

204 """Any parameters included in the URI.""" 

205 return self._uri.params 

206 

207 @property 

208 def query(self) -> str: 

209 """Any query strings included in the URI.""" 

210 return self._uri.query 

211 

212 def geturl(self) -> str: 

213 """Return the URI in string form. 

214 

215 Returns 

216 ------- 

217 url : `str` 

218 String form of URI. 

219 """ 

220 return self._uri.geturl() 

221 

222 def split(self) -> Tuple[ButlerURI, str]: 

223 """Splits URI into head and tail. Equivalent to os.path.split where 

224 head preserves the URI components. 

225 

226 Returns 

227 ------- 

228 head: `ButlerURI` 

229 Everything leading up to tail, expanded and normalized as per 

230 ButlerURI rules. 

231 tail : `str` 

232 Last `self.path` component. Tail will be empty if path ends on a 

233 separator. Tail will never contain separators. 

234 """ 

235 if self.scheme: 

236 head, tail = posixpath.split(self.path) 

237 else: 

238 head, tail = os.path.split(self.path) 

239 headuri = self._uri._replace(path=head) 

240 return self.__class__(headuri, forceDirectory=True), tail 

241 

242 def basename(self) -> str: 

243 """Returns the base name, last element of path, of the URI. If URI ends 

244 on a slash returns an empty string. This is the second element returned 

245 by split(). 

246 

247 Equivalent of os.path.basename(). 

248 

249 Returns 

250 ------- 

251 tail : `str` 

252 Last part of the path attribute. Trail will be empty if path ends 

253 on a separator. 

254 """ 

255 return self.split()[1] 

256 

257 def dirname(self) -> ButlerURI: 

258 """Returns a ButlerURI containing all the directories of the path 

259 attribute. 

260 

261 Equivalent of os.path.dirname() 

262 

263 Returns 

264 ------- 

265 head : `ButlerURI` 

266 Everything except the tail of path attribute, expanded and 

267 normalized as per ButlerURI rules. 

268 """ 

269 return self.split()[0] 

270 

271 def replace(self, **kwargs: Any) -> ButlerURI: 

272 """Replace components in a URI with new values and return a new 

273 instance. 

274 

275 Returns 

276 ------- 

277 new : `ButlerURI` 

278 New `ButlerURI` object with updated values. 

279 """ 

280 return self.__class__(self._uri._replace(**kwargs)) 

281 

282 def updateFile(self, newfile: str) -> None: 

283 """Update in place the final component of the path with the supplied 

284 file name. 

285 

286 Parameters 

287 ---------- 

288 newfile : `str` 

289 File name with no path component. 

290 

291 Notes 

292 ----- 

293 Updates the URI in place. 

294 Updates the ButlerURI.dirLike attribute. 

295 """ 

296 pathclass = posixpath if self.scheme else os.path 

297 

298 # Mypy can't work out that these specific modules support split 

299 # and join 

300 dir, _ = pathclass.split(self.path) # type: ignore 

301 newpath = pathclass.join(dir, newfile) # type: ignore 

302 

303 self.dirLike = False 

304 self._uri = self._uri._replace(path=newpath) 

305 

306 def getExtension(self) -> str: 

307 """Return the file extension(s) associated with this URI path. 

308 

309 Returns 

310 ------- 

311 ext : `str` 

312 The file extension (including the ``.``). Can be empty string 

313 if there is no file extension. Will return all file extensions 

314 as a single extension such that ``file.fits.gz`` will return 

315 a value of ``.fits.gz``. 

316 """ 

317 if not self.scheme: 

318 extensions = PurePath(self.path).suffixes 

319 else: 

320 extensions = PurePosixPath(self.path).suffixes 

321 return "".join(extensions) 

322 

323 def __str__(self) -> str: 

324 return self.geturl() 

325 

326 def __repr__(self) -> str: 

327 return f'ButlerURI("{self.geturl()}")' 

328 

329 def __eq__(self, other: Any) -> bool: 

330 if not isinstance(other, ButlerURI): 

331 return False 

332 return self.geturl() == other.geturl() 

333 

334 @staticmethod 

335 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None, 

336 forceAbsolute: bool = False, 

337 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

338 """Fix up relative paths in URI instances. 

339 

340 Parameters 

341 ---------- 

342 parsed : `~urllib.parse.ParseResult` 

343 The result from parsing a URI using `urllib.parse`. 

344 root : `str`, optional 

345 Path to use as root when converting relative to absolute. 

346 If `None`, it will be the current working directory. This 

347 is a local file system path, not a URI. 

348 forceAbsolute : `bool`, optional 

349 If `True`, scheme-less relative URI will be converted to an 

350 absolute path using a ``file`` scheme. If `False` scheme-less URI 

351 will remain scheme-less and will not be updated to ``file`` or 

352 absolute path. URIs with a defined scheme will not be affected 

353 by this parameter. 

354 forceDirectory : `bool`, optional 

355 If `True` forces the URI to end with a separator, otherwise given 

356 URI is interpreted as is. 

357 

358 Returns 

359 ------- 

360 modified : `~urllib.parse.ParseResult` 

361 Update result if a URI is being handled. 

362 dirLike : `bool` 

363 `True` if given parsed URI has a trailing separator or 

364 forceDirectory is True. Otherwise `False`. 

365 

366 Notes 

367 ----- 

368 Relative paths are explicitly not supported by RFC8089 but `urllib` 

369 does accept URIs of the form ``file:relative/path.ext``. They need 

370 to be turned into absolute paths before they can be used. This is 

371 always done regardless of the ``forceAbsolute`` parameter. 

372 

373 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

374 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

375 

376 Scheme-less paths are normalized. 

377 """ 

378 # assume we are not dealing with a directory like URI 

379 dirLike = False 

380 if not parsed.scheme or parsed.scheme == "file": 

381 

382 # Replacement values for the URI 

383 replacements = {} 

384 

385 if root is None: 

386 root = os.path.abspath(os.path.curdir) 

387 

388 if not parsed.scheme: 

389 # if there was no scheme this is a local OS file path 

390 # which can support tilde expansion. 

391 expandedPath = os.path.expanduser(parsed.path) 

392 

393 # Ensure that this is a file URI if it is already absolute 

394 if os.path.isabs(expandedPath): 

395 replacements["scheme"] = "file" 

396 replacements["path"] = os2posix(os.path.normpath(expandedPath)) 

397 elif forceAbsolute: 

398 # This can stay in OS path form, do not change to file 

399 # scheme. 

400 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath)) 

401 else: 

402 # No change needed for relative local path staying relative 

403 # except normalization 

404 replacements["path"] = os.path.normpath(expandedPath) 

405 # normalization of empty path returns "." so we are dirLike 

406 if expandedPath == "": 

407 dirLike = True 

408 

409 # normpath strips trailing "/" which makes it hard to keep 

410 # track of directory vs file when calling replaceFile 

411 # find the appropriate separator 

412 if "scheme" in replacements: 

413 sep = posixpath.sep 

414 else: 

415 sep = os.sep 

416 

417 # add the trailing separator only if explicitly required or 

418 # if it was stripped by normpath. Acknowledge that trailing 

419 # separator exists. 

420 endsOnSep = expandedPath.endswith(os.sep) and not replacements["path"].endswith(sep) 

421 if (forceDirectory or endsOnSep or dirLike): 

422 dirLike = True 

423 replacements["path"] += sep 

424 

425 elif parsed.scheme == "file": 

426 # file URI implies POSIX path separators so split as POSIX, 

427 # then join as os, and convert to abspath. Do not handle 

428 # home directories since "file" scheme is explicitly documented 

429 # to not do tilde expansion. 

430 sep = posixpath.sep 

431 if posixpath.isabs(parsed.path): 

432 if forceDirectory: 

433 parsed = parsed._replace(path=parsed.path+sep) 

434 dirLike = True 

435 return copy.copy(parsed), dirLike 

436 

437 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

438 

439 # normpath strips trailing "/" so put it back if necessary 

440 # Acknowledge that trailing separator exists. 

441 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

442 replacements["path"] += sep 

443 dirLike = True 

444 else: 

445 raise RuntimeError("Unexpectedly got confused by URI scheme") 

446 

447 # ParseResult is a NamedTuple so _replace is standard API 

448 parsed = parsed._replace(**replacements) 

449 

450 # URI is dir-like if explicitly stated or if it ends on a separator 

451 endsOnSep = parsed.path.endswith(posixpath.sep) 

452 if forceDirectory or endsOnSep: 

453 dirLike = True 

454 # only add the separator if it's not already there 

455 if not endsOnSep: 

456 parsed = parsed._replace(path=parsed.path+posixpath.sep) 

457 

458 if dirLike is None: 

459 raise RuntimeError("ButlerURI.dirLike attribute not set successfully.") 

460 

461 return parsed, dirLike 

462 

463 

464class Location: 

465 """Identifies a location within the `Datastore`. 

466 

467 Parameters 

468 ---------- 

469 datastoreRootUri : `ButlerURI` or `str` 

470 Base URI for this datastore, must include an absolute path. 

471 path : `str` 

472 Relative path within datastore. Assumed to be using the local 

473 path separator if a ``file`` scheme is being used for the URI, 

474 else a POSIX separator. 

475 """ 

476 

477 __slots__ = ("_datastoreRootUri", "_path") 

478 

479 def __init__(self, datastoreRootUri: Union[ButlerURI, str], path: str): 

480 if isinstance(datastoreRootUri, str): 

481 datastoreRootUri = ButlerURI(datastoreRootUri, forceDirectory=True) 

482 elif not isinstance(datastoreRootUri, ButlerURI): 

483 raise ValueError("Datastore root must be a ButlerURI instance") 

484 

485 if not posixpath.isabs(datastoreRootUri.path): 

486 raise ValueError(f"Supplied URI must be an absolute path (given {datastoreRootUri}).") 

487 

488 self._datastoreRootUri = datastoreRootUri 

489 

490 pathModule: types.ModuleType 

491 if self._datastoreRootUri.scheme == "file": 

492 pathModule = os.path 

493 else: 

494 pathModule = posixpath 

495 

496 # mypy can not work out that these modules support isabs 

497 if pathModule.isabs(path): # type: ignore 

498 raise ValueError("Path within datastore must be relative not absolute") 

499 

500 self._path = path 

501 

502 def __str__(self) -> str: 

503 return self.uri 

504 

505 def __repr__(self) -> str: 

506 uri = self._datastoreRootUri.geturl() 

507 path = self._path 

508 return f"{self.__class__.__name__}({uri!r}, {path!r})" 

509 

510 @property 

511 def uri(self) -> str: 

512 """URI string corresponding to fully-specified location in datastore. 

513 """ 

514 uriPath = os2posix(self.path) 

515 return self._datastoreRootUri.replace(path=uriPath).geturl() 

516 

517 @property 

518 def path(self) -> str: 

519 """Path corresponding to location. 

520 

521 This path includes the root of the `Datastore`, but does not include 

522 non-path components of the root URI. If a file URI scheme is being 

523 used the path will be returned with the local OS path separator. 

524 """ 

525 if not self._datastoreRootUri.scheme: 

526 # Entirely local file system 

527 return os.path.normpath(os.path.join(self._datastoreRootUri.path, self.pathInStore)) 

528 elif self._datastoreRootUri.scheme == "file": 

529 return os.path.normpath(os.path.join(posix2os(self._datastoreRootUri.path), self.pathInStore)) 

530 else: 

531 return posixpath.join(self._datastoreRootUri.path, self.pathInStore) 

532 

533 @property 

534 def pathInStore(self) -> str: 

535 """Path corresponding to location relative to `Datastore` root. 

536 

537 Uses the same path separator as supplied to the object constructor. 

538 """ 

539 return self._path 

540 

541 @property 

542 def netloc(self) -> str: 

543 """The URI network location.""" 

544 return self._datastoreRootUri.netloc 

545 

546 @property 

547 def relativeToPathRoot(self) -> str: 

548 """Returns the path component of the URI relative to the network 

549 location. 

550 

551 Effectively, this is the path property with POSIX separator stripped 

552 from the left hand side of the path. 

553 """ 

554 if self._datastoreRootUri.scheme == 'file' or not self._datastoreRootUri.scheme: 

555 p = PurePath(os2posix(self.path)) 

556 else: 

557 p = PurePosixPath(self.path) 

558 stripped = p.relative_to(p.root) 

559 return str(posix2os(stripped)) 

560 

561 def updateExtension(self, ext: Optional[str]) -> None: 

562 """Update the file extension associated with this `Location`. 

563 

564 All file extensions are replaced. 

565 

566 Parameters 

567 ---------- 

568 ext : `str` 

569 New extension. If an empty string is given any extension will 

570 be removed. If `None` is given there will be no change. 

571 """ 

572 if ext is None: 

573 return 

574 

575 # Get the extension and remove it from the path if one is found 

576 # .fits.gz counts as one extension do not use os.path.splitext 

577 current = self.getExtension() 

578 path = self.pathInStore 

579 if current: 

580 path = path[:-len(current)] 

581 

582 # Ensure that we have a leading "." on file extension (and we do not 

583 # try to modify the empty string) 

584 if ext and not ext.startswith("."): 

585 ext = "." + ext 

586 

587 self._path = path + ext 

588 

589 def getExtension(self) -> str: 

590 """Return the file extension(s) associated with this location. 

591 

592 Returns 

593 ------- 

594 ext : `str` 

595 The file extension (including the ``.``). Can be empty string 

596 if there is no file extension. Will return all file extensions 

597 as a single extension such that ``file.fits.gz`` will return 

598 a value of ``.fits.gz``. 

599 """ 

600 if not self._datastoreRootUri.scheme: 

601 extensions = PurePath(self.path).suffixes 

602 else: 

603 extensions = PurePath(self.path).suffixes 

604 return "".join(extensions) 

605 

606 

607class LocationFactory: 

608 """Factory for `Location` instances. 

609 

610 The factory is constructed from the root location of the datastore. 

611 This location can be a path on the file system (absolute or relative) 

612 or as a URI. 

613 

614 Parameters 

615 ---------- 

616 datastoreRoot : `str` 

617 Root location of the `Datastore` either as a path in the local 

618 filesystem or as a URI. File scheme URIs can be used. If a local 

619 filesystem path is used without URI scheme, it will be converted 

620 to an absolute path and any home directory indicators expanded. 

621 If a file scheme is used with a relative path, the path will 

622 be treated as a posixpath but then converted to an absolute path. 

623 """ 

624 

625 def __init__(self, datastoreRoot: str): 

626 self._datastoreRootUri = ButlerURI(datastoreRoot, forceAbsolute=True, 

627 forceDirectory=True) 

628 

629 def __str__(self) -> str: 

630 return f"{self.__class__.__name__}@{self._datastoreRootUri}" 

631 

632 @property 

633 def netloc(self) -> str: 

634 """Returns the network location of root location of the `Datastore`.""" 

635 return self._datastoreRootUri.netloc 

636 

637 def fromPath(self, path: str) -> Location: 

638 """Factory function to create a `Location` from a POSIX path. 

639 

640 Parameters 

641 ---------- 

642 path : `str` 

643 A standard POSIX path, relative to the `Datastore` root. 

644 

645 Returns 

646 ------- 

647 location : `Location` 

648 The equivalent `Location`. 

649 """ 

650 if os.path.isabs(path): 

651 raise ValueError("LocationFactory path must be relative to datastore, not absolute.") 

652 return Location(self._datastoreRootUri, path)