Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Location", "LocationFactory", "ButlerURI") 

25 

26import os 

27import os.path 

28import urllib 

29import posixpath 

30from pathlib import Path, PurePath, PurePosixPath 

31import copy 

32import types 

33 

34from typing import ( 

35 Any, 

36 Optional, 

37 Tuple, 

38 Union, 

39) 

40 

41# Determine if the path separator for the OS looks like POSIX 

42IS_POSIX = os.sep == posixpath.sep 

43 

44# Root path for this operating system 

45OS_ROOT_PATH = Path().resolve().root 

46 

47 

48def os2posix(ospath: str) -> str: 

49 """Convert a local path description to a POSIX path description. 

50 

51 Parameters 

52 ---------- 

53 path : `str` 

54 Path using the local path separator. 

55 

56 Returns 

57 ------- 

58 posix : `str` 

59 Path using POSIX path separator 

60 """ 

61 if IS_POSIX: 

62 return ospath 

63 

64 posix = PurePath(ospath).as_posix() 

65 

66 # PurePath strips trailing "/" from paths such that you can no 

67 # longer tell if a path is meant to be referring to a directory 

68 # Try to fix this. 

69 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep): 

70 posix += posixpath.sep 

71 

72 return posix 

73 

74 

75def posix2os(posix: Union[PurePath, str]) -> str: 

76 """Convert a POSIX path description to a local path description. 

77 

78 Parameters 

79 ---------- 

80 posix : `str` 

81 Path using the POSIX path separator. 

82 

83 Returns 

84 ------- 

85 ospath : `str` 

86 Path using OS path separator 

87 """ 

88 if IS_POSIX: 

89 return str(posix) 

90 

91 posixPath = PurePosixPath(posix) 

92 paths = list(posixPath.parts) 

93 

94 # Have to convert the root directory after splitting 

95 if paths[0] == posixPath.root: 

96 paths[0] = OS_ROOT_PATH 

97 

98 # Trailing "/" is stripped so we need to add back an empty path 

99 # for consistency 

100 if str(posix).endswith(posixpath.sep): 

101 paths.append("") 

102 

103 return os.path.join(*paths) 

104 

105 

106class ButlerURI: 

107 """Convenience wrapper around URI parsers. 

108 

109 Provides access to URI components and can convert file 

110 paths into absolute path URIs. Scheme-less URIs are treated as if 

111 they are local file system paths and are converted to absolute URIs. 

112 

113 Parameters 

114 ---------- 

115 uri : `str` or `urllib.parse.ParseResult` 

116 URI in string form. Can be scheme-less if referring to a local 

117 filesystem path. 

118 root : `str`, optional 

119 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

120 use this as the root. Must be absolute. If `None` the current 

121 working directory will be used. 

122 forceAbsolute : `bool`, optional 

123 If `True`, scheme-less relative URI will be converted to an absolute 

124 path using a ``file`` scheme. If `False` scheme-less URI will remain 

125 scheme-less and will not be updated to ``file`` or absolute path. 

126 forceDirectory: `bool`, optional 

127 If `True` forces the URI to end with a separator, otherwise given URI 

128 is interpreted as is. 

129 """ 

130 

131 def __init__(self, uri: Union[str, urllib.parse.ParseResult], 

132 root: Optional[str] = None, forceAbsolute: bool = True, forceDirectory: bool = False): 

133 if isinstance(uri, str): 

134 parsed = urllib.parse.urlparse(uri) 

135 elif isinstance(uri, urllib.parse.ParseResult): 

136 parsed = copy.copy(uri) 

137 else: 

138 raise ValueError("Supplied URI must be either string or ParseResult") 

139 

140 parsed, dirLike = self._fixupPathUri(parsed, root=root, 

141 forceAbsolute=forceAbsolute, 

142 forceDirectory=forceDirectory) 

143 

144 self.dirLike = dirLike 

145 self._uri = parsed 

146 

147 @property 

148 def scheme(self) -> str: 

149 """The URI scheme (``://`` is not part of the scheme).""" 

150 return self._uri.scheme 

151 

152 @property 

153 def netloc(self) -> str: 

154 """The URI network location.""" 

155 return self._uri.netloc 

156 

157 @property 

158 def path(self) -> str: 

159 """The path component of the URI.""" 

160 return self._uri.path 

161 

162 @property 

163 def ospath(self) -> str: 

164 """Path component of the URI localized to current OS.""" 

165 if self.scheme == 's3': 

166 raise AttributeError('S3 URIs have no OS path.') 

167 return posix2os(self._uri.path) 

168 

169 @property 

170 def relativeToPathRoot(self) -> str: 

171 """Returns path relative to network location. 

172 

173 Effectively, this is the path property with posix separator stripped 

174 from the left hand side of the path. 

175 """ 

176 if not self.scheme: 

177 p = PurePath(self.path) 

178 else: 

179 p = PurePosixPath(self.path) 

180 relToRoot = str(p.relative_to(p.root)) 

181 if self.dirLike and not relToRoot.endswith("/"): 

182 relToRoot += "/" 

183 return relToRoot 

184 

185 @property 

186 def fragment(self) -> str: 

187 """The fragment component of the URI.""" 

188 return self._uri.fragment 

189 

190 @property 

191 def params(self) -> str: 

192 """Any parameters included in the URI.""" 

193 return self._uri.params 

194 

195 @property 

196 def query(self) -> str: 

197 """Any query strings included in the URI.""" 

198 return self._uri.query 

199 

200 def geturl(self) -> str: 

201 """Return the URI in string form. 

202 

203 Returns 

204 ------- 

205 url : `str` 

206 String form of URI. 

207 """ 

208 return self._uri.geturl() 

209 

210 def split(self) -> Tuple[ButlerURI, str]: 

211 """Splits URI into head and tail. Equivalent to os.path.split where 

212 head preserves the URI components. 

213 

214 Returns 

215 ------- 

216 head: `ButlerURI` 

217 Everything leading up to tail, expanded and normalized as per 

218 ButlerURI rules. 

219 tail : `str` 

220 Last `self.path` component. Tail will be empty if path ends on a 

221 separator. Tail will never contain separators. 

222 """ 

223 if self.scheme: 

224 head, tail = posixpath.split(self.path) 

225 else: 

226 head, tail = os.path.split(self.path) 

227 headuri = self._uri._replace(path=head) 

228 return self.__class__(headuri, forceDirectory=True), tail 

229 

230 def basename(self) -> str: 

231 """Returns the base name, last element of path, of the URI. If URI ends 

232 on a slash returns an empty string. This is the second element returned 

233 by split(). 

234 

235 Equivalent of os.path.basename(). 

236 

237 Returns 

238 ------- 

239 tail : `str` 

240 Last part of the path attribute. Trail will be empty if path ends 

241 on a separator. 

242 """ 

243 return self.split()[1] 

244 

245 def dirname(self) -> ButlerURI: 

246 """Returns a ButlerURI containing all the directories of the path 

247 attribute. 

248 

249 Equivalent of os.path.dirname() 

250 

251 Returns 

252 ------- 

253 head : `ButlerURI` 

254 Everything except the tail of path attribute, expanded and 

255 normalized as per ButlerURI rules. 

256 """ 

257 return self.split()[0] 

258 

259 def replace(self, **kwargs: Any) -> ButlerURI: 

260 """Replace components in a URI with new values and return a new 

261 instance. 

262 

263 Returns 

264 ------- 

265 new : `ButlerURI` 

266 New `ButlerURI` object with updated values. 

267 """ 

268 return self.__class__(self._uri._replace(**kwargs)) 

269 

270 def updateFile(self, newfile: str) -> None: 

271 """Update in place the final component of the path with the supplied 

272 file name. 

273 

274 Parameters 

275 ---------- 

276 newfile : `str` 

277 File name with no path component. 

278 

279 Notes 

280 ----- 

281 Updates the URI in place. 

282 Updates the ButlerURI.dirLike attribute. 

283 """ 

284 pathclass = posixpath if self.scheme else os.path 

285 

286 # Mypy can't work out that these specific modules support split 

287 # and join 

288 dir, _ = pathclass.split(self.path) # type: ignore 

289 newpath = pathclass.join(dir, newfile) # type: ignore 

290 

291 self.dirLike = False 

292 self._uri = self._uri._replace(path=newpath) 

293 

294 def getExtension(self) -> str: 

295 """Return the file extension(s) associated with this URI path. 

296 

297 Returns 

298 ------- 

299 ext : `str` 

300 The file extension (including the ``.``). Can be empty string 

301 if there is no file extension. Will return all file extensions 

302 as a single extension such that ``file.fits.gz`` will return 

303 a value of ``.fits.gz``. 

304 """ 

305 if not self.scheme: 

306 extensions = PurePath(self.path).suffixes 

307 else: 

308 extensions = PurePosixPath(self.path).suffixes 

309 return "".join(extensions) 

310 

311 def __str__(self) -> str: 

312 return self.geturl() 

313 

314 def __repr__(self) -> str: 

315 return f'ButlerURI("{self.geturl()}")' 

316 

317 def __eq__(self, other: Any) -> bool: 

318 if not isinstance(other, ButlerURI): 

319 return False 

320 return self.geturl() == other.geturl() 

321 

322 @staticmethod 

323 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None, 

324 forceAbsolute: bool = False, 

325 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

326 """Fix up relative paths in URI instances. 

327 

328 Parameters 

329 ---------- 

330 parsed : `~urllib.parse.ParseResult` 

331 The result from parsing a URI using `urllib.parse`. 

332 root : `str`, optional 

333 Path to use as root when converting relative to absolute. 

334 If `None`, it will be the current working directory. This 

335 is a local file system path, not a URI. 

336 forceAbsolute : `bool`, optional 

337 If `True`, scheme-less relative URI will be converted to an 

338 absolute path using a ``file`` scheme. If `False` scheme-less URI 

339 will remain scheme-less and will not be updated to ``file`` or 

340 absolute path. URIs with a defined scheme will not be affected 

341 by this parameter. 

342 forceDirectory : `bool`, optional 

343 If `True` forces the URI to end with a separator, otherwise given 

344 URI is interpreted as is. 

345 

346 Returns 

347 ------- 

348 modified : `~urllib.parse.ParseResult` 

349 Update result if a URI is being handled. 

350 dirLike : `bool` 

351 `True` if given parsed URI has a trailing separator or 

352 forceDirectory is True. Otherwise `False`. 

353 

354 Notes 

355 ----- 

356 Relative paths are explicitly not supported by RFC8089 but `urllib` 

357 does accept URIs of the form ``file:relative/path.ext``. They need 

358 to be turned into absolute paths before they can be used. This is 

359 always done regardless of the ``forceAbsolute`` parameter. 

360 

361 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

362 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

363 

364 Scheme-less paths are normalized. 

365 """ 

366 # assume we are not dealing with a directory like URI 

367 dirLike = False 

368 if not parsed.scheme or parsed.scheme == "file": 

369 

370 # Replacement values for the URI 

371 replacements = {} 

372 

373 if root is None: 

374 root = os.path.abspath(os.path.curdir) 

375 

376 if not parsed.scheme: 

377 # if there was no scheme this is a local OS file path 

378 # which can support tilde expansion. 

379 expandedPath = os.path.expanduser(parsed.path) 

380 

381 # Ensure that this is a file URI if it is already absolute 

382 if os.path.isabs(expandedPath): 

383 replacements["scheme"] = "file" 

384 replacements["path"] = os2posix(os.path.normpath(expandedPath)) 

385 elif forceAbsolute: 

386 # This can stay in OS path form, do not change to file 

387 # scheme. 

388 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath)) 

389 else: 

390 # No change needed for relative local path staying relative 

391 # except normalization 

392 replacements["path"] = os.path.normpath(expandedPath) 

393 # normalization of empty path returns "." so we are dirLike 

394 if expandedPath == "": 

395 dirLike = True 

396 

397 # normpath strips trailing "/" which makes it hard to keep 

398 # track of directory vs file when calling replaceFile 

399 # find the appropriate separator 

400 if "scheme" in replacements: 

401 sep = posixpath.sep 

402 else: 

403 sep = os.sep 

404 

405 # add the trailing separator only if explicitly required or 

406 # if it was stripped by normpath. Acknowledge that trailing 

407 # separator exists. 

408 endsOnSep = expandedPath.endswith(os.sep) and not replacements["path"].endswith(sep) 

409 if (forceDirectory or endsOnSep or dirLike): 

410 dirLike = True 

411 replacements["path"] += sep 

412 

413 elif parsed.scheme == "file": 

414 # file URI implies POSIX path separators so split as POSIX, 

415 # then join as os, and convert to abspath. Do not handle 

416 # home directories since "file" scheme is explicitly documented 

417 # to not do tilde expansion. 

418 sep = posixpath.sep 

419 if posixpath.isabs(parsed.path): 

420 if forceDirectory: 

421 parsed = parsed._replace(path=parsed.path+sep) 

422 dirLike = True 

423 return copy.copy(parsed), dirLike 

424 

425 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

426 

427 # normpath strips trailing "/" so put it back if necessary 

428 # Acknowledge that trailing separator exists. 

429 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

430 replacements["path"] += sep 

431 dirLike = True 

432 else: 

433 raise RuntimeError("Unexpectedly got confused by URI scheme") 

434 

435 # ParseResult is a NamedTuple so _replace is standard API 

436 parsed = parsed._replace(**replacements) 

437 

438 # URI is dir-like if explicitly stated or if it ends on a separator 

439 endsOnSep = parsed.path.endswith(posixpath.sep) 

440 if forceDirectory or endsOnSep: 

441 dirLike = True 

442 # only add the separator if it's not already there 

443 if not endsOnSep: 

444 parsed = parsed._replace(path=parsed.path+posixpath.sep) 

445 

446 if dirLike is None: 

447 raise RuntimeError("ButlerURI.dirLike attribute not set successfully.") 

448 

449 return parsed, dirLike 

450 

451 

452class Location: 

453 """Identifies a location within the `Datastore`. 

454 

455 Parameters 

456 ---------- 

457 datastoreRootUri : `ButlerURI` or `str` 

458 Base URI for this datastore, must include an absolute path. 

459 path : `str` 

460 Relative path within datastore. Assumed to be using the local 

461 path separator if a ``file`` scheme is being used for the URI, 

462 else a POSIX separator. 

463 """ 

464 

465 __slots__ = ("_datastoreRootUri", "_path") 

466 

467 def __init__(self, datastoreRootUri: Union[ButlerURI, str], path: str): 

468 if isinstance(datastoreRootUri, str): 

469 datastoreRootUri = ButlerURI(datastoreRootUri, forceDirectory=True) 

470 elif not isinstance(datastoreRootUri, ButlerURI): 

471 raise ValueError("Datastore root must be a ButlerURI instance") 

472 

473 if not posixpath.isabs(datastoreRootUri.path): 

474 raise ValueError(f"Supplied URI must be an absolute path (given {datastoreRootUri}).") 

475 

476 self._datastoreRootUri = datastoreRootUri 

477 

478 pathModule: types.ModuleType 

479 if self._datastoreRootUri.scheme == "file": 

480 pathModule = os.path 

481 else: 

482 pathModule = posixpath 

483 

484 # mypy can not work out that these modules support isabs 

485 if pathModule.isabs(path): # type: ignore 

486 raise ValueError("Path within datastore must be relative not absolute") 

487 

488 self._path = path 

489 

490 def __str__(self) -> str: 

491 return self.uri 

492 

493 def __repr__(self) -> str: 

494 uri = self._datastoreRootUri.geturl() 

495 path = self._path 

496 return f"{self.__class__.__name__}({uri!r}, {path!r})" 

497 

498 @property 

499 def uri(self) -> str: 

500 """URI string corresponding to fully-specified location in datastore. 

501 """ 

502 uriPath = os2posix(self.path) 

503 return self._datastoreRootUri.replace(path=uriPath).geturl() 

504 

505 @property 

506 def path(self) -> str: 

507 """Path corresponding to location. 

508 

509 This path includes the root of the `Datastore`, but does not include 

510 non-path components of the root URI. If a file URI scheme is being 

511 used the path will be returned with the local OS path separator. 

512 """ 

513 if not self._datastoreRootUri.scheme: 

514 # Entirely local file system 

515 return os.path.normpath(os.path.join(self._datastoreRootUri.path, self.pathInStore)) 

516 elif self._datastoreRootUri.scheme == "file": 

517 return os.path.normpath(os.path.join(posix2os(self._datastoreRootUri.path), self.pathInStore)) 

518 else: 

519 return posixpath.join(self._datastoreRootUri.path, self.pathInStore) 

520 

521 @property 

522 def pathInStore(self) -> str: 

523 """Path corresponding to location relative to `Datastore` root. 

524 

525 Uses the same path separator as supplied to the object constructor. 

526 """ 

527 return self._path 

528 

529 @property 

530 def netloc(self) -> str: 

531 """The URI network location.""" 

532 return self._datastoreRootUri.netloc 

533 

534 @property 

535 def relativeToPathRoot(self) -> str: 

536 """Returns the path component of the URI relative to the network 

537 location. 

538 

539 Effectively, this is the path property with POSIX separator stripped 

540 from the left hand side of the path. 

541 """ 

542 if self._datastoreRootUri.scheme == 'file' or not self._datastoreRootUri.scheme: 

543 p = PurePath(os2posix(self.path)) 

544 else: 

545 p = PurePosixPath(self.path) 

546 stripped = p.relative_to(p.root) 

547 return str(posix2os(stripped)) 

548 

549 def updateExtension(self, ext: Optional[str]) -> None: 

550 """Update the file extension associated with this `Location`. 

551 

552 All file extensions are replaced. 

553 

554 Parameters 

555 ---------- 

556 ext : `str` 

557 New extension. If an empty string is given any extension will 

558 be removed. If `None` is given there will be no change. 

559 """ 

560 if ext is None: 

561 return 

562 

563 # Get the extension and remove it from the path if one is found 

564 # .fits.gz counts as one extension do not use os.path.splitext 

565 current = self.getExtension() 

566 path = self.pathInStore 

567 if current: 

568 path = path[:-len(current)] 

569 

570 # Ensure that we have a leading "." on file extension (and we do not 

571 # try to modify the empty string) 

572 if ext and not ext.startswith("."): 

573 ext = "." + ext 

574 

575 self._path = path + ext 

576 

577 def getExtension(self) -> str: 

578 """Return the file extension(s) associated with this location. 

579 

580 Returns 

581 ------- 

582 ext : `str` 

583 The file extension (including the ``.``). Can be empty string 

584 if there is no file extension. Will return all file extensions 

585 as a single extension such that ``file.fits.gz`` will return 

586 a value of ``.fits.gz``. 

587 """ 

588 if not self._datastoreRootUri.scheme: 

589 extensions = PurePath(self.path).suffixes 

590 else: 

591 extensions = PurePath(self.path).suffixes 

592 return "".join(extensions) 

593 

594 

595class LocationFactory: 

596 """Factory for `Location` instances. 

597 

598 The factory is constructed from the root location of the datastore. 

599 This location can be a path on the file system (absolute or relative) 

600 or as a URI. 

601 

602 Parameters 

603 ---------- 

604 datastoreRoot : `str` 

605 Root location of the `Datastore` either as a path in the local 

606 filesystem or as a URI. File scheme URIs can be used. If a local 

607 filesystem path is used without URI scheme, it will be converted 

608 to an absolute path and any home directory indicators expanded. 

609 If a file scheme is used with a relative path, the path will 

610 be treated as a posixpath but then converted to an absolute path. 

611 """ 

612 

613 def __init__(self, datastoreRoot: str): 

614 self._datastoreRootUri = ButlerURI(datastoreRoot, forceAbsolute=True, 

615 forceDirectory=True) 

616 

617 def __str__(self) -> str: 

618 return f"{self.__class__.__name__}@{self._datastoreRootUri}" 

619 

620 @property 

621 def netloc(self) -> str: 

622 """Returns the network location of root location of the `Datastore`.""" 

623 return self._datastoreRootUri.netloc 

624 

625 def fromPath(self, path: str) -> Location: 

626 """Factory function to create a `Location` from a POSIX path. 

627 

628 Parameters 

629 ---------- 

630 path : `str` 

631 A standard POSIX path, relative to the `Datastore` root. 

632 

633 Returns 

634 ------- 

635 location : `Location` 

636 The equivalent `Location`. 

637 """ 

638 if os.path.isabs(path): 

639 raise ValueError("LocationFactory path must be relative to datastore, not absolute.") 

640 return Location(self._datastoreRootUri, path)