Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ("Location", "LocationFactory", "ButlerURI") 

23 

24import os 

25import os.path 

26import urllib 

27import posixpath 

28from pathlib import Path, PurePath, PurePosixPath 

29import copy 

30 

31# Determine if the path separator for the OS looks like POSIX 

32IS_POSIX = os.sep == posixpath.sep 

33 

34# Root path for this operating system 

35OS_ROOT_PATH = Path().resolve().root 

36 

37 

38def os2posix(ospath): 

39 """Convert a local path description to a POSIX path description. 

40 

41 Parameters 

42 ---------- 

43 path : `str` 

44 Path using the local path separator. 

45 

46 Returns 

47 ------- 

48 posix : `str` 

49 Path using POSIX path separator 

50 """ 

51 if IS_POSIX: 

52 return ospath 

53 

54 posix = PurePath(ospath).as_posix() 

55 

56 # PurePath strips trailing "/" from paths such that you can no 

57 # longer tell if a path is meant to be referring to a directory 

58 # Try to fix this. 

59 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep): 

60 posix += posixpath.sep 

61 

62 return posix 

63 

64 

65def posix2os(posix): 

66 """Convert a POSIX path description to a local path description. 

67 

68 Parameters 

69 ---------- 

70 posix : `str` 

71 Path using the POSIX path separator. 

72 

73 Returns 

74 ------- 

75 ospath : `str` 

76 Path using OS path separator 

77 """ 

78 if IS_POSIX: 

79 return posix 

80 

81 posixPath = PurePosixPath(posix) 

82 paths = list(posixPath.parts) 

83 

84 # Have to convert the root directory after splitting 

85 if paths[0] == posixPath.root: 

86 paths[0] = OS_ROOT_PATH 

87 

88 # Trailing "/" is stripped so we need to add back an empty path 

89 # for consistency 

90 if posix.endswith(posixpath.sep): 

91 paths.append("") 

92 

93 return os.path.join(*paths) 

94 

95 

96class ButlerURI: 

97 """Convenience wrapper around URI parsers. 

98 

99 Provides access to URI components and can convert file 

100 paths into absolute path URIs. Scheme-less URIs are treated as if 

101 they are local file system paths and are converted to absolute URIs. 

102 

103 Parameters 

104 ---------- 

105 uri : `str` or `urllib.parse.ParseResult` 

106 URI in string form. Can be scheme-less if referring to a local 

107 filesystem path. 

108 root : `str`, optional 

109 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

110 use this as the root. Must be absolute. If `None` the current 

111 working directory will be used. 

112 forceAbsolute : `bool`, optional 

113 If `True`, scheme-less relative URI will be converted to an absolute 

114 path using a ``file`` scheme. If `False` scheme-less URI will remain 

115 scheme-less and will not be updated to ``file`` or absolute path. 

116 forceDirectory: `bool`, optional 

117 If `True` forces the URI to end with a separator, otherwise given URI 

118 is interpreted as is. 

119 """ 

120 

121 def __init__(self, uri, root=None, forceAbsolute=True, forceDirectory=False): 

122 if isinstance(uri, str): 

123 parsed = urllib.parse.urlparse(uri) 

124 elif isinstance(uri, urllib.parse.ParseResult): 

125 parsed = copy.copy(uri) 

126 else: 

127 raise ValueError("Supplied URI must be either string or ParseResult") 

128 

129 parsed, dirLike = self._fixupPathUri(parsed, root=root, 

130 forceAbsolute=forceAbsolute, 

131 forceDirectory=forceDirectory) 

132 

133 self.dirLike = dirLike 

134 self._uri = parsed 

135 

136 @property 

137 def scheme(self): 

138 """The URI scheme (``://`` is not part of the scheme).""" 

139 return self._uri.scheme 

140 

141 @property 

142 def netloc(self): 

143 """The URI network location.""" 

144 return self._uri.netloc 

145 

146 @property 

147 def path(self): 

148 """The path component of the URI.""" 

149 return self._uri.path 

150 

151 @property 

152 def ospath(self): 

153 """Path component of the URI localized to current OS.""" 

154 if self.scheme == 's3': 

155 raise AttributeError('S3 URIs have no OS path.') 

156 return posix2os(self._uri.path) 

157 

158 @property 

159 def relativeToPathRoot(self): 

160 """Returns path relative to network location. 

161 

162 Effectively, this is the path property with posix separator stripped 

163 from the left hand side of the path. 

164 """ 

165 if not self.scheme: 

166 p = PurePath(self.path) 

167 else: 

168 p = PurePosixPath(self.path) 

169 relToRoot = str(p.relative_to(p.root)) 

170 if self.dirLike and not relToRoot.endswith("/"): 

171 relToRoot += "/" 

172 return relToRoot 

173 

174 @property 

175 def fragment(self): 

176 """The fragment component of the URI.""" 

177 return self._uri.fragment 

178 

179 @property 

180 def params(self): 

181 """Any parameters included in the URI.""" 

182 return self._uri.params 

183 

184 @property 

185 def query(self): 

186 """Any query strings included in the URI.""" 

187 return self._uri.query 

188 

189 def geturl(self): 

190 """Return the URI in string form. 

191 

192 Returns 

193 ------- 

194 url : `str` 

195 String form of URI. 

196 """ 

197 return self._uri.geturl() 

198 

199 def split(self): 

200 """Splits URI into head and tail. Equivalent to os.path.split where 

201 head preserves the URI components. 

202 

203 Returns 

204 ------- 

205 head: `ButlerURI` 

206 Everything leading up to tail, expanded and normalized as per 

207 ButlerURI rules. 

208 tail : `str` 

209 Last `self.path` component. Tail will be empty if path ends on a 

210 separator. Tail will never contain separators. 

211 """ 

212 if self.scheme: 

213 head, tail = posixpath.split(self.path) 

214 else: 

215 head, tail = os.path.split(self.path) 

216 headuri = self._uri._replace(path=head) 

217 return self.__class__(headuri, forceDirectory=True), tail 

218 

219 def basename(self): 

220 """Returns the base name, last element of path, of the URI. If URI ends 

221 on a slash returns an empty string. This is the second element returned 

222 by split(). 

223 

224 Equivalent of os.path.basename(). 

225 

226 Returns 

227 ------- 

228 tail : `str` 

229 Last part of the path attribute. Trail will be empty if path ends 

230 on a separator. 

231 """ 

232 return self.split()[1] 

233 

234 def dirname(self): 

235 """Returns a ButlerURI containing all the directories of the path 

236 attribute. 

237 

238 Equivalent of os.path.dirname() 

239 

240 Returns 

241 ------- 

242 head : `ButlerURI` 

243 Everything except the tail of path attribute, expanded and 

244 normalized as per ButlerURI rules. 

245 """ 

246 return self.split()[0] 

247 

248 def replace(self, **kwargs): 

249 """Replace components in a URI with new values and return a new 

250 instance. 

251 

252 Returns 

253 ------- 

254 new : `ButlerURI` 

255 New `ButlerURI` object with updated values. 

256 """ 

257 return self.__class__(self._uri._replace(**kwargs)) 

258 

259 def updateFile(self, newfile): 

260 """Update in place the final component of the path with the supplied 

261 file name. 

262 

263 Parameters 

264 ---------- 

265 newfile : `str` 

266 File name with no path component. 

267 

268 Notes 

269 ----- 

270 Updates the URI in place. 

271 Updates the ButlerURI.dirLike attribute. 

272 """ 

273 if self.scheme: 

274 # POSIX 

275 pathclass = posixpath 

276 else: 

277 pathclass = os.path 

278 

279 dir, _ = pathclass.split(self.path) 

280 newpath = pathclass.join(dir, newfile) 

281 

282 self.dirLike = False 

283 self._uri = self._uri._replace(path=newpath) 

284 

285 def __str__(self): 

286 return self.geturl() 

287 

288 def __repr__(self) -> str: 

289 return f'ButlerURI("{self.geturl()}")' 

290 

291 def __eq__(self, other) -> bool: 

292 if not isinstance(other, ButlerURI): 

293 return False 

294 return self.geturl() == other.geturl() 

295 

296 @staticmethod 

297 def _fixupPathUri(parsed, root=None, forceAbsolute=False, forceDirectory=False): 

298 """Fix up relative paths in URI instances. 

299 

300 Parameters 

301 ---------- 

302 parsed : `~urllib.parse.ParseResult` 

303 The result from parsing a URI using `urllib.parse`. 

304 root : `str`, optional 

305 Path to use as root when converting relative to absolute. 

306 If `None`, it will be the current working directory. This 

307 is a local file system path, not a URI. 

308 forceAbsolute : `bool`, optional 

309 If `True`, scheme-less relative URI will be converted to an 

310 absolute path using a ``file`` scheme. If `False` scheme-less URI 

311 will remain scheme-less and will not be updated to ``file`` or 

312 absolute path. URIs with a defined scheme will not be affected 

313 by this parameter. 

314 forceDirectory : `bool`, optional 

315 If `True` forces the URI to end with a separator, otherwise given 

316 URI is interpreted as is. 

317 

318 Returns 

319 ------- 

320 modified : `~urllib.parse.ParseResult` 

321 Update result if a URI is being handled. 

322 dirLike : `bool` 

323 `True` if given parsed URI has a trailing separator or 

324 forceDirectory is True. Otherwise `False`. 

325 

326 Notes 

327 ----- 

328 Relative paths are explicitly not supported by RFC8089 but `urllib` 

329 does accept URIs of the form ``file:relative/path.ext``. They need 

330 to be turned into absolute paths before they can be used. This is 

331 always done regardless of the ``forceAbsolute`` parameter. 

332 

333 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

334 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

335 

336 Scheme-less paths are normalized. 

337 """ 

338 # assume we are not dealing with a directory like URI 

339 dirLike = False 

340 if not parsed.scheme or parsed.scheme == "file": 

341 

342 # Replacement values for the URI 

343 replacements = {} 

344 

345 if root is None: 

346 root = os.path.abspath(os.path.curdir) 

347 

348 if not parsed.scheme: 

349 # if there was no scheme this is a local OS file path 

350 # which can support tilde expansion. 

351 expandedPath = os.path.expanduser(parsed.path) 

352 

353 # Ensure that this is a file URI if it is already absolute 

354 if os.path.isabs(expandedPath): 

355 replacements["scheme"] = "file" 

356 replacements["path"] = os2posix(os.path.normpath(expandedPath)) 

357 elif forceAbsolute: 

358 # This can stay in OS path form, do not change to file 

359 # scheme. 

360 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath)) 

361 else: 

362 # No change needed for relative local path staying relative 

363 # except normalization 

364 replacements["path"] = os.path.normpath(expandedPath) 

365 # normalization of empty path returns "." so we are dirLike 

366 if expandedPath == "": 

367 dirLike = True 

368 

369 # normpath strips trailing "/" which makes it hard to keep 

370 # track of directory vs file when calling replaceFile 

371 # find the appropriate separator 

372 if "scheme" in replacements: 

373 sep = posixpath.sep 

374 else: 

375 sep = os.sep 

376 

377 # add the trailing separator only if explicitly required or 

378 # if it was stripped by normpath. Acknowledge that trailing 

379 # separator exists. 

380 endsOnSep = expandedPath.endswith(os.sep) and not replacements["path"].endswith(sep) 

381 if (forceDirectory or endsOnSep or dirLike): 

382 dirLike = True 

383 replacements["path"] += sep 

384 

385 elif parsed.scheme == "file": 

386 # file URI implies POSIX path separators so split as POSIX, 

387 # then join as os, and convert to abspath. Do not handle 

388 # home directories since "file" scheme is explicitly documented 

389 # to not do tilde expansion. 

390 sep = posixpath.sep 

391 if posixpath.isabs(parsed.path): 

392 if forceDirectory: 

393 parsed = parsed._replace(path=parsed.path+sep) 

394 dirLike = True 

395 return copy.copy(parsed), dirLike 

396 

397 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

398 

399 # normpath strips trailing "/" so put it back if necessary 

400 # Acknowledge that trailing separator exists. 

401 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

402 replacements["path"] += sep 

403 dirLike = True 

404 else: 

405 raise RuntimeError("Unexpectedly got confused by URI scheme") 

406 

407 # ParseResult is a NamedTuple so _replace is standard API 

408 parsed = parsed._replace(**replacements) 

409 

410 # URI is dir-like if explicitly stated or if it ends on a separator 

411 endsOnSep = parsed.path.endswith(posixpath.sep) 

412 if forceDirectory or endsOnSep: 

413 dirLike = True 

414 # only add the separator if it's not already there 

415 if not endsOnSep: 

416 parsed = parsed._replace(path=parsed.path+posixpath.sep) 

417 

418 if dirLike is None: 

419 raise RuntimeError("ButlerURI.dirLike attribute not set successfully.") 

420 

421 return parsed, dirLike 

422 

423 

424class Location: 

425 """Identifies a location within the `Datastore`. 

426 

427 Parameters 

428 ---------- 

429 datastoreRootUri : `ButlerURI` or `str` 

430 Base URI for this datastore, must include an absolute path. 

431 path : `str` 

432 Relative path within datastore. Assumed to be using the local 

433 path separator if a ``file`` scheme is being used for the URI, 

434 else a POSIX separator. 

435 """ 

436 

437 __slots__ = ("_datastoreRootUri", "_path") 

438 

439 def __init__(self, datastoreRootUri, path): 

440 if isinstance(datastoreRootUri, str): 

441 datastoreRootUri = ButlerURI(datastoreRootUri, forceDirectory=True) 

442 elif not isinstance(datastoreRootUri, ButlerURI): 

443 raise ValueError("Datastore root must be a ButlerURI instance") 

444 

445 if not posixpath.isabs(datastoreRootUri.path): 

446 raise ValueError(f"Supplied URI must be an absolute path (given {datastoreRootUri}).") 

447 

448 self._datastoreRootUri = datastoreRootUri 

449 

450 if self._datastoreRootUri.scheme == "file": 

451 pathModule = os.path 

452 else: 

453 pathModule = posixpath 

454 

455 if pathModule.isabs(path): 

456 raise ValueError("Path within datastore must be relative not absolute") 

457 

458 self._path = path 

459 

460 def __str__(self): 

461 return self.uri 

462 

463 def __repr__(self): 

464 uri = self._datastoreRootUri.geturl() 

465 path = self._path 

466 return f"{self.__class__.__name__}({uri!r}, {path!r})" 

467 

468 @property 

469 def uri(self): 

470 """URI string corresponding to fully-specified location in datastore. 

471 """ 

472 uriPath = os2posix(self.path) 

473 return self._datastoreRootUri.replace(path=uriPath).geturl() 

474 

475 @property 

476 def path(self): 

477 """Path corresponding to location. 

478 

479 This path includes the root of the `Datastore`, but does not include 

480 non-path components of the root URI. If a file URI scheme is being 

481 used the path will be returned with the local OS path separator. 

482 """ 

483 if not self._datastoreRootUri.scheme: 

484 # Entirely local file system 

485 return os.path.normpath(os.path.join(self._datastoreRootUri.path, self.pathInStore)) 

486 elif self._datastoreRootUri.scheme == "file": 

487 return os.path.normpath(os.path.join(posix2os(self._datastoreRootUri.path), self.pathInStore)) 

488 else: 

489 return posixpath.join(self._datastoreRootUri.path, self.pathInStore) 

490 

491 @property 

492 def pathInStore(self): 

493 """Path corresponding to location relative to `Datastore` root. 

494 

495 Uses the same path separator as supplied to the object constructor. 

496 """ 

497 return self._path 

498 

499 @property 

500 def netloc(self): 

501 """The URI network location.""" 

502 return self._datastoreRootUri.netloc 

503 

504 @property 

505 def relativeToPathRoot(self): 

506 """Returns the path component of the URI relative to the network 

507 location. 

508 

509 Effectively, this is the path property with POSIX separator stripped 

510 from the left hand side of the path. 

511 """ 

512 if self._datastoreRootUri.scheme == 'file' or not self._datastoreRootUri.scheme: 

513 p = PurePath(os2posix(self.path)) 

514 else: 

515 p = PurePosixPath(self.path) 

516 stripped = p.relative_to(p.root) 

517 return str(posix2os(stripped)) 

518 

519 def updateExtension(self, ext): 

520 """Update the file extension associated with this `Location`. 

521 

522 Parameters 

523 ---------- 

524 ext : `str` 

525 New extension. If an empty string is given any extension will 

526 be removed. If `None` is given there will be no change. 

527 """ 

528 if ext is None: 

529 return 

530 

531 path, _ = os.path.splitext(self.pathInStore) 

532 

533 # Ensure that we have a leading "." on file extension (and we do not 

534 # try to modify the empty string) 

535 if ext and not ext.startswith("."): 

536 ext = "." + ext 

537 

538 self._path = path + ext 

539 

540 

541class LocationFactory: 

542 """Factory for `Location` instances. 

543 

544 The factory is constructed from the root location of the datastore. 

545 This location can be a path on the file system (absolute or relative) 

546 or as a URI. 

547 

548 Parameters 

549 ---------- 

550 datastoreRoot : `str` 

551 Root location of the `Datastore` either as a path in the local 

552 filesystem or as a URI. File scheme URIs can be used. If a local 

553 filesystem path is used without URI scheme, it will be converted 

554 to an absolute path and any home directory indicators expanded. 

555 If a file scheme is used with a relative path, the path will 

556 be treated as a posixpath but then converted to an absolute path. 

557 """ 

558 

559 def __init__(self, datastoreRoot): 

560 self._datastoreRootUri = ButlerURI(datastoreRoot, forceAbsolute=True, 

561 forceDirectory=True) 

562 

563 def __str__(self): 

564 return f"{self.__class__.__name__}@{self._datastoreRootUri}" 

565 

566 @property 

567 def netloc(self): 

568 """Returns the network location of root location of the `Datastore`.""" 

569 return self._datastoreRootUri.netloc 

570 

571 def fromPath(self, path): 

572 """Factory function to create a `Location` from a POSIX path. 

573 

574 Parameters 

575 ---------- 

576 path : `str` 

577 A standard POSIX path, relative to the `Datastore` root. 

578 

579 Returns 

580 ------- 

581 location : `Location` 

582 The equivalent `Location`. 

583 """ 

584 if os.path.isabs(path): 

585 raise ValueError("LocationFactory path must be relative to datastore, not absolute.") 

586 return Location(self._datastoreRootUri, path)