Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("Location", "LocationFactory", "ButlerURI") 

25 

26import os 

27import os.path 

28import urllib 

29import posixpath 

30from pathlib import Path, PurePath, PurePosixPath 

31import copy 

32import types 

33 

34from typing import ( 

35 Any, 

36 Optional, 

37 Tuple, 

38 Union, 

39) 

40 

41# Determine if the path separator for the OS looks like POSIX 

42IS_POSIX = os.sep == posixpath.sep 

43 

44# Root path for this operating system 

45OS_ROOT_PATH = Path().resolve().root 

46 

47 

48def os2posix(ospath: str) -> str: 

49 """Convert a local path description to a POSIX path description. 

50 

51 Parameters 

52 ---------- 

53 path : `str` 

54 Path using the local path separator. 

55 

56 Returns 

57 ------- 

58 posix : `str` 

59 Path using POSIX path separator 

60 """ 

61 if IS_POSIX: 

62 return ospath 

63 

64 posix = PurePath(ospath).as_posix() 

65 

66 # PurePath strips trailing "/" from paths such that you can no 

67 # longer tell if a path is meant to be referring to a directory 

68 # Try to fix this. 

69 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep): 

70 posix += posixpath.sep 

71 

72 return posix 

73 

74 

75def posix2os(posix: Union[PurePath, str]) -> str: 

76 """Convert a POSIX path description to a local path description. 

77 

78 Parameters 

79 ---------- 

80 posix : `str` 

81 Path using the POSIX path separator. 

82 

83 Returns 

84 ------- 

85 ospath : `str` 

86 Path using OS path separator 

87 """ 

88 if IS_POSIX: 

89 return str(posix) 

90 

91 posixPath = PurePosixPath(posix) 

92 paths = list(posixPath.parts) 

93 

94 # Have to convert the root directory after splitting 

95 if paths[0] == posixPath.root: 

96 paths[0] = OS_ROOT_PATH 

97 

98 # Trailing "/" is stripped so we need to add back an empty path 

99 # for consistency 

100 if str(posix).endswith(posixpath.sep): 

101 paths.append("") 

102 

103 return os.path.join(*paths) 

104 

105 

106class ButlerURI: 

107 """Convenience wrapper around URI parsers. 

108 

109 Provides access to URI components and can convert file 

110 paths into absolute path URIs. Scheme-less URIs are treated as if 

111 they are local file system paths and are converted to absolute URIs. 

112 

113 Parameters 

114 ---------- 

115 uri : `str` or `urllib.parse.ParseResult` 

116 URI in string form. Can be scheme-less if referring to a local 

117 filesystem path. 

118 root : `str`, optional 

119 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

120 use this as the root. Must be absolute. If `None` the current 

121 working directory will be used. 

122 forceAbsolute : `bool`, optional 

123 If `True`, scheme-less relative URI will be converted to an absolute 

124 path using a ``file`` scheme. If `False` scheme-less URI will remain 

125 scheme-less and will not be updated to ``file`` or absolute path. 

126 forceDirectory: `bool`, optional 

127 If `True` forces the URI to end with a separator, otherwise given URI 

128 is interpreted as is. 

129 """ 

130 

131 def __init__(self, uri: Union[str, urllib.parse.ParseResult], 

132 root: Optional[str] = None, forceAbsolute: bool = True, forceDirectory: bool = False): 

133 if isinstance(uri, str): 

134 parsed = urllib.parse.urlparse(uri) 

135 elif isinstance(uri, urllib.parse.ParseResult): 

136 parsed = copy.copy(uri) 

137 else: 

138 raise ValueError("Supplied URI must be either string or ParseResult") 

139 

140 parsed, dirLike = self._fixupPathUri(parsed, root=root, 

141 forceAbsolute=forceAbsolute, 

142 forceDirectory=forceDirectory) 

143 

144 self.dirLike = dirLike 

145 self._uri = parsed 

146 

147 @property 

148 def scheme(self) -> str: 

149 """The URI scheme (``://`` is not part of the scheme).""" 

150 return self._uri.scheme 

151 

152 @property 

153 def netloc(self) -> str: 

154 """The URI network location.""" 

155 return self._uri.netloc 

156 

157 @property 

158 def path(self) -> str: 

159 """The path component of the URI.""" 

160 return self._uri.path 

161 

162 @property 

163 def ospath(self) -> str: 

164 """Path component of the URI localized to current OS.""" 

165 if self.scheme == 's3': 

166 raise AttributeError('S3 URIs have no OS path.') 

167 return posix2os(self._uri.path) 

168 

169 @property 

170 def relativeToPathRoot(self) -> str: 

171 """Returns path relative to network location. 

172 

173 Effectively, this is the path property with posix separator stripped 

174 from the left hand side of the path. 

175 """ 

176 if not self.scheme: 

177 p = PurePath(self.path) 

178 else: 

179 p = PurePosixPath(self.path) 

180 relToRoot = str(p.relative_to(p.root)) 

181 if self.dirLike and not relToRoot.endswith("/"): 

182 relToRoot += "/" 

183 return relToRoot 

184 

185 @property 

186 def fragment(self) -> str: 

187 """The fragment component of the URI.""" 

188 return self._uri.fragment 

189 

190 @property 

191 def params(self) -> str: 

192 """Any parameters included in the URI.""" 

193 return self._uri.params 

194 

195 @property 

196 def query(self) -> str: 

197 """Any query strings included in the URI.""" 

198 return self._uri.query 

199 

200 def geturl(self) -> str: 

201 """Return the URI in string form. 

202 

203 Returns 

204 ------- 

205 url : `str` 

206 String form of URI. 

207 """ 

208 return self._uri.geturl() 

209 

210 def split(self) -> Tuple[ButlerURI, str]: 

211 """Splits URI into head and tail. Equivalent to os.path.split where 

212 head preserves the URI components. 

213 

214 Returns 

215 ------- 

216 head: `ButlerURI` 

217 Everything leading up to tail, expanded and normalized as per 

218 ButlerURI rules. 

219 tail : `str` 

220 Last `self.path` component. Tail will be empty if path ends on a 

221 separator. Tail will never contain separators. 

222 """ 

223 if self.scheme: 

224 head, tail = posixpath.split(self.path) 

225 else: 

226 head, tail = os.path.split(self.path) 

227 headuri = self._uri._replace(path=head) 

228 return self.__class__(headuri, forceDirectory=True), tail 

229 

230 def basename(self) -> str: 

231 """Returns the base name, last element of path, of the URI. If URI ends 

232 on a slash returns an empty string. This is the second element returned 

233 by split(). 

234 

235 Equivalent of os.path.basename(). 

236 

237 Returns 

238 ------- 

239 tail : `str` 

240 Last part of the path attribute. Trail will be empty if path ends 

241 on a separator. 

242 """ 

243 return self.split()[1] 

244 

245 def dirname(self) -> ButlerURI: 

246 """Returns a ButlerURI containing all the directories of the path 

247 attribute. 

248 

249 Equivalent of os.path.dirname() 

250 

251 Returns 

252 ------- 

253 head : `ButlerURI` 

254 Everything except the tail of path attribute, expanded and 

255 normalized as per ButlerURI rules. 

256 """ 

257 return self.split()[0] 

258 

259 def replace(self, **kwargs: Any) -> ButlerURI: 

260 """Replace components in a URI with new values and return a new 

261 instance. 

262 

263 Returns 

264 ------- 

265 new : `ButlerURI` 

266 New `ButlerURI` object with updated values. 

267 """ 

268 return self.__class__(self._uri._replace(**kwargs)) 

269 

270 def updateFile(self, newfile: str) -> None: 

271 """Update in place the final component of the path with the supplied 

272 file name. 

273 

274 Parameters 

275 ---------- 

276 newfile : `str` 

277 File name with no path component. 

278 

279 Notes 

280 ----- 

281 Updates the URI in place. 

282 Updates the ButlerURI.dirLike attribute. 

283 """ 

284 pathclass = posixpath if self.scheme else os.path 

285 

286 # Mypy can't work out that these specific modules support split 

287 # and join 

288 dir, _ = pathclass.split(self.path) # type: ignore 

289 newpath = pathclass.join(dir, newfile) # type: ignore 

290 

291 self.dirLike = False 

292 self._uri = self._uri._replace(path=newpath) 

293 

294 def __str__(self) -> str: 

295 return self.geturl() 

296 

297 def __repr__(self) -> str: 

298 return f'ButlerURI("{self.geturl()}")' 

299 

300 def __eq__(self, other: Any) -> bool: 

301 if not isinstance(other, ButlerURI): 

302 return False 

303 return self.geturl() == other.geturl() 

304 

305 @staticmethod 

306 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None, 

307 forceAbsolute: bool = False, 

308 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

309 """Fix up relative paths in URI instances. 

310 

311 Parameters 

312 ---------- 

313 parsed : `~urllib.parse.ParseResult` 

314 The result from parsing a URI using `urllib.parse`. 

315 root : `str`, optional 

316 Path to use as root when converting relative to absolute. 

317 If `None`, it will be the current working directory. This 

318 is a local file system path, not a URI. 

319 forceAbsolute : `bool`, optional 

320 If `True`, scheme-less relative URI will be converted to an 

321 absolute path using a ``file`` scheme. If `False` scheme-less URI 

322 will remain scheme-less and will not be updated to ``file`` or 

323 absolute path. URIs with a defined scheme will not be affected 

324 by this parameter. 

325 forceDirectory : `bool`, optional 

326 If `True` forces the URI to end with a separator, otherwise given 

327 URI is interpreted as is. 

328 

329 Returns 

330 ------- 

331 modified : `~urllib.parse.ParseResult` 

332 Update result if a URI is being handled. 

333 dirLike : `bool` 

334 `True` if given parsed URI has a trailing separator or 

335 forceDirectory is True. Otherwise `False`. 

336 

337 Notes 

338 ----- 

339 Relative paths are explicitly not supported by RFC8089 but `urllib` 

340 does accept URIs of the form ``file:relative/path.ext``. They need 

341 to be turned into absolute paths before they can be used. This is 

342 always done regardless of the ``forceAbsolute`` parameter. 

343 

344 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

345 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

346 

347 Scheme-less paths are normalized. 

348 """ 

349 # assume we are not dealing with a directory like URI 

350 dirLike = False 

351 if not parsed.scheme or parsed.scheme == "file": 

352 

353 # Replacement values for the URI 

354 replacements = {} 

355 

356 if root is None: 

357 root = os.path.abspath(os.path.curdir) 

358 

359 if not parsed.scheme: 

360 # if there was no scheme this is a local OS file path 

361 # which can support tilde expansion. 

362 expandedPath = os.path.expanduser(parsed.path) 

363 

364 # Ensure that this is a file URI if it is already absolute 

365 if os.path.isabs(expandedPath): 

366 replacements["scheme"] = "file" 

367 replacements["path"] = os2posix(os.path.normpath(expandedPath)) 

368 elif forceAbsolute: 

369 # This can stay in OS path form, do not change to file 

370 # scheme. 

371 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath)) 

372 else: 

373 # No change needed for relative local path staying relative 

374 # except normalization 

375 replacements["path"] = os.path.normpath(expandedPath) 

376 # normalization of empty path returns "." so we are dirLike 

377 if expandedPath == "": 

378 dirLike = True 

379 

380 # normpath strips trailing "/" which makes it hard to keep 

381 # track of directory vs file when calling replaceFile 

382 # find the appropriate separator 

383 if "scheme" in replacements: 

384 sep = posixpath.sep 

385 else: 

386 sep = os.sep 

387 

388 # add the trailing separator only if explicitly required or 

389 # if it was stripped by normpath. Acknowledge that trailing 

390 # separator exists. 

391 endsOnSep = expandedPath.endswith(os.sep) and not replacements["path"].endswith(sep) 

392 if (forceDirectory or endsOnSep or dirLike): 

393 dirLike = True 

394 replacements["path"] += sep 

395 

396 elif parsed.scheme == "file": 

397 # file URI implies POSIX path separators so split as POSIX, 

398 # then join as os, and convert to abspath. Do not handle 

399 # home directories since "file" scheme is explicitly documented 

400 # to not do tilde expansion. 

401 sep = posixpath.sep 

402 if posixpath.isabs(parsed.path): 

403 if forceDirectory: 

404 parsed = parsed._replace(path=parsed.path+sep) 

405 dirLike = True 

406 return copy.copy(parsed), dirLike 

407 

408 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

409 

410 # normpath strips trailing "/" so put it back if necessary 

411 # Acknowledge that trailing separator exists. 

412 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

413 replacements["path"] += sep 

414 dirLike = True 

415 else: 

416 raise RuntimeError("Unexpectedly got confused by URI scheme") 

417 

418 # ParseResult is a NamedTuple so _replace is standard API 

419 parsed = parsed._replace(**replacements) 

420 

421 # URI is dir-like if explicitly stated or if it ends on a separator 

422 endsOnSep = parsed.path.endswith(posixpath.sep) 

423 if forceDirectory or endsOnSep: 

424 dirLike = True 

425 # only add the separator if it's not already there 

426 if not endsOnSep: 

427 parsed = parsed._replace(path=parsed.path+posixpath.sep) 

428 

429 if dirLike is None: 

430 raise RuntimeError("ButlerURI.dirLike attribute not set successfully.") 

431 

432 return parsed, dirLike 

433 

434 

435class Location: 

436 """Identifies a location within the `Datastore`. 

437 

438 Parameters 

439 ---------- 

440 datastoreRootUri : `ButlerURI` or `str` 

441 Base URI for this datastore, must include an absolute path. 

442 path : `str` 

443 Relative path within datastore. Assumed to be using the local 

444 path separator if a ``file`` scheme is being used for the URI, 

445 else a POSIX separator. 

446 """ 

447 

448 __slots__ = ("_datastoreRootUri", "_path") 

449 

450 def __init__(self, datastoreRootUri: Union[ButlerURI, str], path: str): 

451 if isinstance(datastoreRootUri, str): 

452 datastoreRootUri = ButlerURI(datastoreRootUri, forceDirectory=True) 

453 elif not isinstance(datastoreRootUri, ButlerURI): 

454 raise ValueError("Datastore root must be a ButlerURI instance") 

455 

456 if not posixpath.isabs(datastoreRootUri.path): 

457 raise ValueError(f"Supplied URI must be an absolute path (given {datastoreRootUri}).") 

458 

459 self._datastoreRootUri = datastoreRootUri 

460 

461 pathModule: types.ModuleType 

462 if self._datastoreRootUri.scheme == "file": 

463 pathModule = os.path 

464 else: 

465 pathModule = posixpath 

466 

467 # mypy can not work out that these modules support isabs 

468 if pathModule.isabs(path): # type: ignore 

469 raise ValueError("Path within datastore must be relative not absolute") 

470 

471 self._path = path 

472 

473 def __str__(self) -> str: 

474 return self.uri 

475 

476 def __repr__(self) -> str: 

477 uri = self._datastoreRootUri.geturl() 

478 path = self._path 

479 return f"{self.__class__.__name__}({uri!r}, {path!r})" 

480 

481 @property 

482 def uri(self) -> str: 

483 """URI string corresponding to fully-specified location in datastore. 

484 """ 

485 uriPath = os2posix(self.path) 

486 return self._datastoreRootUri.replace(path=uriPath).geturl() 

487 

488 @property 

489 def path(self) -> str: 

490 """Path corresponding to location. 

491 

492 This path includes the root of the `Datastore`, but does not include 

493 non-path components of the root URI. If a file URI scheme is being 

494 used the path will be returned with the local OS path separator. 

495 """ 

496 if not self._datastoreRootUri.scheme: 

497 # Entirely local file system 

498 return os.path.normpath(os.path.join(self._datastoreRootUri.path, self.pathInStore)) 

499 elif self._datastoreRootUri.scheme == "file": 

500 return os.path.normpath(os.path.join(posix2os(self._datastoreRootUri.path), self.pathInStore)) 

501 else: 

502 return posixpath.join(self._datastoreRootUri.path, self.pathInStore) 

503 

504 @property 

505 def pathInStore(self) -> str: 

506 """Path corresponding to location relative to `Datastore` root. 

507 

508 Uses the same path separator as supplied to the object constructor. 

509 """ 

510 return self._path 

511 

512 @property 

513 def netloc(self) -> str: 

514 """The URI network location.""" 

515 return self._datastoreRootUri.netloc 

516 

517 @property 

518 def relativeToPathRoot(self) -> str: 

519 """Returns the path component of the URI relative to the network 

520 location. 

521 

522 Effectively, this is the path property with POSIX separator stripped 

523 from the left hand side of the path. 

524 """ 

525 if self._datastoreRootUri.scheme == 'file' or not self._datastoreRootUri.scheme: 

526 p = PurePath(os2posix(self.path)) 

527 else: 

528 p = PurePosixPath(self.path) 

529 stripped = p.relative_to(p.root) 

530 return str(posix2os(stripped)) 

531 

532 def updateExtension(self, ext: Optional[str]) -> None: 

533 """Update the file extension associated with this `Location`. 

534 

535 Parameters 

536 ---------- 

537 ext : `str` 

538 New extension. If an empty string is given any extension will 

539 be removed. If `None` is given there will be no change. 

540 """ 

541 if ext is None: 

542 return 

543 

544 path, _ = os.path.splitext(self.pathInStore) 

545 

546 # Ensure that we have a leading "." on file extension (and we do not 

547 # try to modify the empty string) 

548 if ext and not ext.startswith("."): 

549 ext = "." + ext 

550 

551 self._path = path + ext 

552 

553 

554class LocationFactory: 

555 """Factory for `Location` instances. 

556 

557 The factory is constructed from the root location of the datastore. 

558 This location can be a path on the file system (absolute or relative) 

559 or as a URI. 

560 

561 Parameters 

562 ---------- 

563 datastoreRoot : `str` 

564 Root location of the `Datastore` either as a path in the local 

565 filesystem or as a URI. File scheme URIs can be used. If a local 

566 filesystem path is used without URI scheme, it will be converted 

567 to an absolute path and any home directory indicators expanded. 

568 If a file scheme is used with a relative path, the path will 

569 be treated as a posixpath but then converted to an absolute path. 

570 """ 

571 

572 def __init__(self, datastoreRoot: str): 

573 self._datastoreRootUri = ButlerURI(datastoreRoot, forceAbsolute=True, 

574 forceDirectory=True) 

575 

576 def __str__(self) -> str: 

577 return f"{self.__class__.__name__}@{self._datastoreRootUri}" 

578 

579 @property 

580 def netloc(self) -> str: 

581 """Returns the network location of root location of the `Datastore`.""" 

582 return self._datastoreRootUri.netloc 

583 

584 def fromPath(self, path: str) -> Location: 

585 """Factory function to create a `Location` from a POSIX path. 

586 

587 Parameters 

588 ---------- 

589 path : `str` 

590 A standard POSIX path, relative to the `Datastore` root. 

591 

592 Returns 

593 ------- 

594 location : `Location` 

595 The equivalent `Location`. 

596 """ 

597 if os.path.isabs(path): 

598 raise ValueError("LocationFactory path must be relative to datastore, not absolute.") 

599 return Location(self._datastoreRootUri, path)