Coverage for python/lsst/resources/http.py: 17%

503 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-30 02:28 -0700

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14__all__ = ("HttpResourcePath",) 

15 

16import contextlib 

17import functools 

18import io 

19import logging 

20import os 

21import os.path 

22import random 

23import re 

24import stat 

25import tempfile 

26import xml.etree.ElementTree as eTree 

27from typing import TYPE_CHECKING, BinaryIO, Iterator, List, Optional, Tuple, Union, cast 

28 

29import requests 

30from astropy import units as u 

31from lsst.utils.timer import time_this 

32from requests.adapters import HTTPAdapter 

33from requests.auth import AuthBase 

34from urllib3.util.retry import Retry 

35 

36from ._resourceHandles import ResourceHandleProtocol 

37from ._resourceHandles._httpResourceHandle import HttpReadResourceHandle 

38from ._resourcePath import ResourcePath 

39 

40if TYPE_CHECKING: 40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never true

41 from .utils import TransactionProtocol 

42 

43log = logging.getLogger(__name__) 

44 

45 

46# Default timeouts for all HTTP requests, in seconds. 

47DEFAULT_TIMEOUT_CONNECT = 300 

48DEFAULT_TIMEOUT_READ = 1500 

49 

50# Default number of connections to persist with both the front end and back end 

51# servers. 

52DEFAULT_FRONTEND_PERSISTENT_CONNECTIONS = "2" 

53DEFAULT_BACKEND_PERSISTENT_CONNECTIONS = "1" 

54 

55# Accepted digest algorithms 

56ACCEPTED_DIGESTS = ("adler32", "md5", "sha-256", "sha-512") 

57 

58 

59class HttpResourcePathConfig: 

60 """Configuration class to encapsulate the configurable items used by class 

61 HttpResourcePath. 

62 """ 

63 

64 _front_end_connections: Optional[int] = None 

65 _back_end_connections: Optional[int] = None 

66 _digest_algorithm: Optional[str] = None 

67 _send_expect_on_put: Optional[bool] = None 

68 _timeout: Optional[tuple[int, int]] = None 

69 

70 @property 

71 def front_end_connections(self) -> int: 

72 """Number of persistent connections to the front end server.""" 

73 

74 if self._front_end_connections is not None: 74 ↛ 75line 74 didn't jump to line 75, because the condition on line 74 was never true

75 return self._front_end_connections 

76 

77 self._front_end_connections = int( 

78 os.environ.get( 

79 "LSST_HTTP_FRONTEND_PERSISTENT_CONNECTIONS", DEFAULT_FRONTEND_PERSISTENT_CONNECTIONS 

80 ) 

81 ) 

82 return self._front_end_connections 

83 

84 @property 

85 def back_end_connections(self) -> int: 

86 """Number of persistent connections to the back end servers.""" 

87 

88 if self._back_end_connections is not None: 88 ↛ 89line 88 didn't jump to line 89, because the condition on line 88 was never true

89 return self._back_end_connections 

90 

91 self._back_end_connections = int( 

92 os.environ.get("LSST_HTTP_BACKEND_PERSISTENT_CONNECTIONS", DEFAULT_BACKEND_PERSISTENT_CONNECTIONS) 

93 ) 

94 return self._back_end_connections 

95 

96 @property 

97 def digest_algorithm(self) -> str: 

98 """Algorithm to ask the server to use for computing and recording 

99 digests of each file contents in PUT requests. 

100 

101 Returns 

102 ------- 

103 digest_algorithm: `str` 

104 The name of a digest algorithm or the empty string if no algotihm 

105 is configured. 

106 """ 

107 

108 if self._digest_algorithm is not None: 

109 return self._digest_algorithm 

110 

111 digest = os.environ.get("LSST_HTTP_DIGEST", "").lower() 

112 if digest not in ACCEPTED_DIGESTS: 

113 digest = "" 

114 

115 self._digest_algorithm = digest 

116 return self._digest_algorithm 

117 

118 @property 

119 def send_expect_on_put(self) -> bool: 

120 """Return True if a "Expect: 100-continue" header is to be sent to 

121 the server on each PUT request. 

122 

123 Some servers (e.g. dCache) uses this information as an indication that 

124 the client knows how to handle redirects to the specific server that 

125 will actually receive the data for PUT requests. 

126 """ 

127 

128 if self._send_expect_on_put is not None: 

129 return self._send_expect_on_put 

130 

131 self._send_expect_on_put = "LSST_HTTP_PUT_SEND_EXPECT_HEADER" in os.environ 

132 return self._send_expect_on_put 

133 

134 @property 

135 def timeout(self) -> tuple[int, int]: 

136 """Return a tuple with the values of timeouts for connecting to the 

137 server and reading its response, respectively. Both values are in 

138 seconds. 

139 """ 

140 

141 if self._timeout is not None: 

142 return self._timeout 

143 

144 self._timeout = ( 

145 int(os.environ.get("LSST_HTTP_TIMEOUT_CONNECT", DEFAULT_TIMEOUT_CONNECT)), 

146 int(os.environ.get("LSST_HTTP_TIMEOUT_READ", DEFAULT_TIMEOUT_READ)), 

147 ) 

148 return self._timeout 

149 

150 

151@functools.lru_cache 

152def _is_webdav_endpoint(path: Union[ResourcePath, str]) -> bool: 

153 """Check whether the remote HTTP endpoint implements WebDAV features. 

154 

155 Parameters 

156 ---------- 

157 path : `ResourcePath` or `str` 

158 URL to the resource to be checked. 

159 Should preferably refer to the root since the status is shared 

160 by all paths in that server. 

161 

162 Returns 

163 ------- 

164 _is_webdav_endpoint : `bool` 

165 True if the endpoint implements WebDAV, False if it doesn't. 

166 """ 

167 log.debug("Detecting HTTP endpoint type for '%s'...", path) 

168 try: 

169 ca_cert_bundle = os.getenv("LSST_HTTP_CACERT_BUNDLE") 

170 verify: Union[bool, str] = ca_cert_bundle if ca_cert_bundle else True 

171 resp = requests.options(str(path), verify=verify, stream=False) 

172 

173 # Check that "1" is part of the value of the "DAV" header. We don't 

174 # use locks, so a server complying to class 1 is enough for our 

175 # purposes. All webDAV servers must advertise at least compliance 

176 # class "1". 

177 # 

178 # Compliance classes are documented in 

179 # http://www.webdav.org/specs/rfc4918.html#dav.compliance.classes 

180 # 

181 # Examples of values for header DAV are: 

182 # DAV: 1, 2 

183 # DAV: 1, <http://apache.org/dav/propset/fs/1> 

184 if "DAV" not in resp.headers: 

185 return False 

186 else: 

187 # Convert to str to keep mypy happy 

188 compliance_class = str(resp.headers.get("DAV")) 

189 return "1" in compliance_class.replace(" ", "").split(",") 

190 except requests.exceptions.SSLError as e: 

191 log.warning( 

192 "Environment variable LSST_HTTP_CACERT_BUNDLE can be used to " 

193 "specify a bundle of certificate authorities you trust which are " 

194 "not included in the default set of trusted authorities of your " 

195 "system." 

196 ) 

197 raise e 

198 

199 

200# Tuple (path, block_size) pointing to the location of a local directory 

201# to save temporary files and the block size of the underlying file system. 

202_TMPDIR: Optional[tuple[str, int]] = None 

203 

204 

205def _get_temp_dir() -> tuple[str, int]: 

206 """Return the temporary directory path and block size. 

207 

208 This function caches its results in _TMPDIR. 

209 """ 

210 global _TMPDIR 

211 if _TMPDIR: 

212 return _TMPDIR 

213 

214 # Use the value of environment variables 'LSST_RESOURCES_TMPDIR' or 

215 # 'TMPDIR', if defined. Otherwise use current working directory. 

216 tmpdir = os.getcwd() 

217 for dir in (os.getenv(v) for v in ("LSST_RESOURCES_TMPDIR", "TMPDIR")): 

218 if dir and os.path.isdir(dir): 

219 tmpdir = dir 

220 break 

221 

222 # Compute the block size as 256 blocks of typical size 

223 # (i.e. 4096 bytes) or 10 times the file system block size, 

224 # whichever is higher. This is a reasonable compromise between 

225 # using memory for buffering and the number of system calls 

226 # issued to read from or write to temporary files. 

227 fsstats = os.statvfs(tmpdir) 

228 return (_TMPDIR := (tmpdir, max(10 * fsstats.f_bsize, 256 * 4096))) 

229 

230 

231class BearerTokenAuth(AuthBase): 

232 """Attach a bearer token 'Authorization' header to each request. 

233 

234 Parameters 

235 ---------- 

236 token : `str` 

237 Can be either the path to a local protected file which contains the 

238 value of the token or the token itself. 

239 """ 

240 

241 def __init__(self, token: str): 

242 self._token = self._path = None 

243 self._mtime: float = -1.0 

244 if not token: 

245 return 

246 

247 self._token = token 

248 if os.path.isfile(token): 

249 self._path = os.path.abspath(token) 

250 if not _is_protected(self._path): 

251 raise PermissionError( 

252 f"Bearer token file at {self._path} must be protected for access only by its owner" 

253 ) 

254 self._refresh() 

255 

256 def _refresh(self) -> None: 

257 """Read the token file (if any) if its modification time is more recent 

258 than the last time we read it. 

259 """ 

260 if not self._path: 

261 return 

262 

263 if (mtime := os.stat(self._path).st_mtime) > self._mtime: 

264 log.debug("Reading bearer token file at %s", self._path) 

265 self._mtime = mtime 

266 with open(self._path) as f: 

267 self._token = f.read().rstrip("\n") 

268 

269 def __call__(self, req: requests.PreparedRequest) -> requests.PreparedRequest: 

270 if self._token: 

271 self._refresh() 

272 req.headers["Authorization"] = f"Bearer {self._token}" 

273 return req 

274 

275 

276class SessionStore: 

277 """Cache a reusable HTTP client session per endpoint.""" 

278 

279 def __init__(self, num_pools: int = 10, max_persistent_connections: int = 1) -> None: 

280 # Dictionary to store the session associated to a given URI. The key 

281 # of the dictionary is a root URI and the value is the session. 

282 self._sessions: dict[str, requests.Session] = {} 

283 

284 # Number of connection pools to keep: there is one pool per remote 

285 # host. See documentation of urllib3 PoolManager class: 

286 # https://urllib3.readthedocs.io 

287 self._num_pools = num_pools 

288 

289 # Maximum number of connections per remote host to persist in each 

290 # connection pool. See urllib3 Advanced Usage documentation: 

291 # https://urllib3.readthedocs.io/en/stable/advanced-usage.html 

292 self._max_persistent_connections = max_persistent_connections 

293 

294 def clear(self) -> None: 

295 """Destroy all previously created sessions and attempt to close 

296 underlying idle network connections. 

297 """ 

298 

299 # Close all sessions and empty the store. Idle network connections 

300 # should be closed as a consequence. We don't have means through 

301 # the API exposed by Requests to actually force closing the 

302 # underlying open sockets. 

303 for session in self._sessions.values(): 

304 session.close() 

305 

306 self._sessions.clear() 

307 

308 def get(self, rpath: ResourcePath) -> requests.Session: 

309 """Retrieve a session for accessing the remote resource at rpath. 

310 

311 Parameters 

312 ---------- 

313 rpath : `ResourcePath` 

314 URL to a resource at the remote server for which a session is to 

315 be retrieved. 

316 

317 Notes 

318 ----- 

319 Once a session is created for a given endpoint it is cached and 

320 returned every time a session is requested for any path under that same 

321 endpoint. For instance, a single session will be cached and shared 

322 for paths "https://www.example.org/path/to/file" and 

323 "https://www.example.org/any/other/path". 

324 

325 Note that "https://www.example.org" and "https://www.example.org:12345" 

326 will have different sessions since the port number is not identical. 

327 

328 In order to configure the session, some environment variables are 

329 inspected: 

330 

331 - LSST_HTTP_CACERT_BUNDLE: path to a .pem file containing the CA 

332 certificates to trust when verifying the server's certificate. 

333 

334 - LSST_HTTP_AUTH_BEARER_TOKEN: value of a bearer token or path to a 

335 local file containing a bearer token to be used as the client 

336 authentication mechanism with all requests. 

337 The permissions of the token file must be set so that only its 

338 owner can access it. 

339 If initialized, takes precedence over LSST_HTTP_AUTH_CLIENT_CERT 

340 and LSST_HTTP_AUTH_CLIENT_KEY. 

341 

342 - LSST_HTTP_AUTH_CLIENT_CERT: path to a .pem file which contains the 

343 client certificate for authenticating to the server. 

344 If initialized, the variable LSST_HTTP_AUTH_CLIENT_KEY must also be 

345 initialized with the path of the client private key file. 

346 The permissions of the client private key must be set so that only 

347 its owner can access it, at least for reading. 

348 """ 

349 root_uri = str(rpath.root_uri()) 

350 if root_uri not in self._sessions: 

351 # We don't have yet a session for this endpoint: create a new one. 

352 self._sessions[root_uri] = self._make_session(rpath) 

353 

354 return self._sessions[root_uri] 

355 

356 def _make_session(self, rpath: ResourcePath) -> requests.Session: 

357 """Make a new session configured from values from the environment.""" 

358 session = requests.Session() 

359 root_uri = str(rpath.root_uri()) 

360 log.debug("Creating new HTTP session for endpoint %s ...", root_uri) 

361 

362 retries = Retry( 

363 # Total number of retries to allow. Takes precedence over other 

364 # counts. 

365 total=6, 

366 # How many connection-related errors to retry on. 

367 connect=3, 

368 # How many times to retry on read errors. 

369 read=3, 

370 # Backoff factor to apply between attempts after the second try 

371 # (seconds) 

372 backoff_factor=30 * (1 + random.random()), 

373 # How many times to retry on bad status codes. 

374 status=5, 

375 # Set of uppercased HTTP method verbs that we should retry on. 

376 # We only automatically retry idempotent requests. 

377 allowed_methods=frozenset( 

378 [ 

379 "COPY", 

380 "DELETE", 

381 "GET", 

382 "HEAD", 

383 "MKCOL", 

384 "OPTIONS", 

385 "PROPFIND", 

386 "PUT", 

387 ] 

388 ), 

389 # HTTP status codes that we should force a retry on. 

390 status_forcelist=frozenset( 

391 [ 

392 requests.codes.too_many_requests, # 429 

393 requests.codes.internal_server_error, # 500 

394 requests.codes.bad_gateway, # 502 

395 requests.codes.service_unavailable, # 503 

396 requests.codes.gateway_timeout, # 504 

397 ] 

398 ), 

399 # Whether to respect Retry-After header on status codes defined 

400 # above. 

401 respect_retry_after_header=True, 

402 ) 

403 

404 # Persist the specified number of connections to the front end server. 

405 session.mount( 

406 root_uri, 

407 HTTPAdapter( 

408 pool_connections=self._num_pools, 

409 pool_maxsize=self._max_persistent_connections, 

410 pool_block=False, 

411 max_retries=retries, 

412 ), 

413 ) 

414 

415 # Do not persist the connections to back end servers which may vary 

416 # from request to request. Systematically persisting connections to 

417 # those servers may exhaust their capabilities when there are thousands 

418 # of simultaneous clients. 

419 session.mount( 

420 f"{rpath.scheme}://", 

421 HTTPAdapter( 

422 pool_connections=self._num_pools, 

423 pool_maxsize=0, 

424 pool_block=False, 

425 max_retries=retries, 

426 ), 

427 ) 

428 

429 # If the remote endpoint doesn't use secure HTTP we don't include 

430 # bearer tokens in the requests nor need to authenticate the remote 

431 # server. 

432 if rpath.scheme != "https": 

433 return session 

434 

435 # Should we use a specific CA cert bundle for authenticating the 

436 # server? 

437 session.verify = True 

438 if ca_bundle := os.getenv("LSST_HTTP_CACERT_BUNDLE"): 

439 session.verify = ca_bundle 

440 

441 # Should we use bearer tokens for client authentication? 

442 if token := os.getenv("LSST_HTTP_AUTH_BEARER_TOKEN"): 

443 log.debug("... using bearer token authentication") 

444 session.auth = BearerTokenAuth(token) 

445 return session 

446 

447 # Should we instead use client certificate and private key? If so, both 

448 # LSST_HTTP_AUTH_CLIENT_CERT and LSST_HTTP_AUTH_CLIENT_KEY must be 

449 # initialized. 

450 client_cert = os.getenv("LSST_HTTP_AUTH_CLIENT_CERT") 

451 client_key = os.getenv("LSST_HTTP_AUTH_CLIENT_KEY") 

452 if client_cert and client_key: 

453 if not _is_protected(client_key): 

454 raise PermissionError( 

455 f"Private key file at {client_key} must be protected for access only by its owner" 

456 ) 

457 log.debug("... using client certificate authentication.") 

458 session.cert = (client_cert, client_key) 

459 return session 

460 

461 if client_cert: 

462 # Only the client certificate was provided. 

463 raise ValueError( 

464 "Environment variable LSST_HTTP_AUTH_CLIENT_KEY must be set to client private key file path" 

465 ) 

466 

467 if client_key: 

468 # Only the client private key was provided. 

469 raise ValueError( 

470 "Environment variable LSST_HTTP_AUTH_CLIENT_CERT must be set to client certificate file path" 

471 ) 

472 

473 log.debug( 

474 "Neither LSST_HTTP_AUTH_BEARER_TOKEN nor (LSST_HTTP_AUTH_CLIENT_CERT and " 

475 "LSST_HTTP_AUTH_CLIENT_KEY) are initialized. Client authentication is disabled." 

476 ) 

477 return session 

478 

479 

480class HttpResourcePath(ResourcePath): 

481 """General HTTP(S) resource. 

482 

483 Notes 

484 ----- 

485 In order to configure the behavior of instances of this class, the 

486 environment variables below are inspected: 

487 

488 - LSST_HTTP_PUT_SEND_EXPECT_HEADER: if set (with any value), a 

489 "Expect: 100-Continue" header will be added to all HTTP PUT requests. 

490 This header is required by some servers to detect if the client 

491 knows how to handle redirections. In case of redirection, the body 

492 of the PUT request is sent to the redirected location and not to 

493 the front end server. 

494 

495 - LSST_HTTP_TIMEOUT_CONNECT and LSST_HTTP_TIMEOUT_READ: if set to a 

496 numeric value, they are interpreted as the number of seconds to wait 

497 for establishing a connection with the server and for reading its 

498 response, respectively. 

499 

500 - LSST_HTTP_FRONTEND_PERSISTENT_CONNECTIONS and 

501 LSST_HTTP_BACKEND_PERSISTENT_CONNECTIONS: contain the maximum number 

502 of connections to attempt to persist with both the front end servers 

503 and the back end servers. 

504 Default values: DEFAULT_FRONTEND_PERSISTENT_CONNECTIONS and 

505 DEFAULT_BACKEND_PERSISTENT_CONNECTIONS. 

506 

507 - LSST_HTTP_DIGEST: case-insensitive name of the digest algorithm to 

508 ask the server to compute for every file's content sent to the server 

509 via a PUT request. No digest is requested if this variable is not set 

510 or is set to an invalid value. 

511 Valid values are those in ACCEPTED_DIGESTS. 

512 """ 

513 

514 _is_webdav: Optional[bool] = None 

515 

516 # Configuration items for this class instances. 

517 _config = HttpResourcePathConfig() 

518 

519 # The session for metadata requests is used for interacting with 

520 # the front end servers for requests such as PROPFIND, HEAD, etc. Those 

521 # interactions are typically served by the front end servers. We want to 

522 # keep the connection to the front end servers open, to reduce the cost 

523 # associated to TCP and TLS handshaking for each new request. 

524 _metadata_session_store = SessionStore( 

525 num_pools=5, 

526 max_persistent_connections=_config.front_end_connections, 

527 ) 

528 

529 # The data session is used for interaction with the front end servers which 

530 # typically redirect to the back end servers for serving our PUT and GET 

531 # requests. We attempt to keep a single connection open with the front end 

532 # server, if possible. This depends on how the server behaves and the 

533 # kind of request. Some servers close the connection when redirecting 

534 # the client to a back end server, for instance when serving a PUT 

535 # request. 

536 _data_session_store = SessionStore( 

537 num_pools=25, 

538 max_persistent_connections=_config.back_end_connections, 

539 ) 

540 

541 # Process ID which created the sessions above. We need to store this 

542 # to replace sessions created by a parent process and inherited by a 

543 # child process after a fork, to avoid confusing the SSL layer. 

544 _pid: int = -1 

545 

546 @property 

547 def metadata_session(self) -> requests.Session: 

548 """Client session to send requests which do not require upload or 

549 download of data, i.e. mostly metadata requests. 

550 """ 

551 

552 if hasattr(self, "_metadata_session") and self._pid == os.getpid(): 

553 return self._metadata_session 

554 

555 # Reset the store in case it was created by another process and 

556 # retrieve a session. 

557 self._metadata_session_store.clear() 

558 self._pid = os.getpid() 

559 self._metadata_session: requests.Session = self._metadata_session_store.get(self) 

560 return self._metadata_session 

561 

562 @property 

563 def data_session(self) -> requests.Session: 

564 """Client session for uploading and downloading data.""" 

565 

566 if hasattr(self, "_data_session") and self._pid == os.getpid(): 

567 return self._data_session 

568 

569 # Reset the store in case it was created by another process and 

570 # retrieve a session. 

571 self._data_session_store.clear() 

572 self._pid = os.getpid() 

573 self._data_session: requests.Session = self._data_session_store.get(self) 

574 return self._data_session 

575 

576 def _clear_sessions(self) -> None: 

577 """Internal method to close the socket connections still open. Used 

578 only in test suites to avoid warnings. 

579 """ 

580 self._metadata_session_store.clear() 

581 self._data_session_store.clear() 

582 

583 if hasattr(self, "_metadata_session"): 

584 delattr(self, "_metadata_session") 

585 

586 if hasattr(self, "_data_session"): 

587 delattr(self, "_data_session") 

588 

589 @property 

590 def is_webdav_endpoint(self) -> bool: 

591 """Check if the current endpoint implements WebDAV features. 

592 

593 This is stored per URI but cached by root so there is 

594 only one check per hostname. 

595 """ 

596 if self._is_webdav is not None: 

597 return self._is_webdav 

598 

599 self._is_webdav = _is_webdav_endpoint(self.root_uri()) 

600 return self._is_webdav 

601 

602 def exists(self) -> bool: 

603 """Check that a remote HTTP resource exists.""" 

604 log.debug("Checking if resource exists: %s", self.geturl()) 

605 if not self.is_webdav_endpoint: 

606 # The remote is a plain HTTP server. Let's attempt a HEAD 

607 # request, even if the behavior for such a request against a 

608 # directory is not specified, so it depends on the server 

609 # implementation. 

610 resp = self.metadata_session.head( 

611 self.geturl(), timeout=self._config.timeout, allow_redirects=True, stream=False 

612 ) 

613 return resp.status_code == requests.codes.ok # 200 

614 

615 # The remote endpoint is a webDAV server: send a PROPFIND request 

616 # to determine if it exists. 

617 resp = self._propfind() 

618 if resp.status_code == requests.codes.multi_status: # 207 

619 prop = _parse_propfind_response_body(resp.text)[0] 

620 return prop.exists 

621 else: # 404 Not Found 

622 return False 

623 

624 def size(self) -> int: 

625 """Return the size of the remote resource in bytes.""" 

626 if self.dirLike: 

627 return 0 

628 

629 if not self.is_webdav_endpoint: 

630 # The remote is a plain HTTP server. Send a HEAD request to 

631 # retrieve the size of the resource. 

632 resp = self.metadata_session.head( 

633 self.geturl(), timeout=self._config.timeout, allow_redirects=True, stream=False 

634 ) 

635 if resp.status_code == requests.codes.ok: # 200 

636 if "Content-Length" in resp.headers: 

637 return int(resp.headers["Content-Length"]) 

638 else: 

639 raise ValueError( 

640 f"Response to HEAD request to {self} does not contain 'Content-Length' header" 

641 ) 

642 elif resp.status_code == requests.codes.not_found: 

643 raise FileNotFoundError( 

644 f"Resource {self} does not exist, status: {resp.status_code} {resp.reason}" 

645 ) 

646 else: 

647 raise ValueError( 

648 f"Unexpected response for HEAD request for {self}, status: {resp.status_code} " 

649 f"{resp.reason}" 

650 ) 

651 

652 # The remote is a webDAV server: send a PROPFIND request to retrieve 

653 # the size of the resource. Sizes are only meaningful for files. 

654 resp = self._propfind() 

655 if resp.status_code == requests.codes.multi_status: # 207 

656 prop = _parse_propfind_response_body(resp.text)[0] 

657 if prop.is_file: 

658 return prop.size 

659 elif prop.is_directory: 

660 raise IsADirectoryError( 

661 f"Resource {self} is reported by server as a directory but has a file path" 

662 ) 

663 else: 

664 raise FileNotFoundError(f"Resource {self} does not exist") 

665 else: # 404 Not Found 

666 raise FileNotFoundError( 

667 f"Resource {self} does not exist, status: {resp.status_code} {resp.reason}" 

668 ) 

669 

670 def mkdir(self) -> None: 

671 """Create the directory resource if it does not already exist.""" 

672 # Creating directories is only available on WebDAV back ends. 

673 if not self.is_webdav_endpoint: 

674 raise NotImplementedError( 

675 f"Creation of directory {self} is not implemented by plain HTTP servers" 

676 ) 

677 

678 if not self.dirLike: 

679 raise NotADirectoryError(f"Can not create a 'directory' for file-like URI {self}") 

680 

681 # Check if the target directory already exists. 

682 resp = self._propfind() 

683 if resp.status_code == requests.codes.multi_status: # 207 

684 prop = _parse_propfind_response_body(resp.text)[0] 

685 if prop.exists: 

686 if prop.is_directory: 

687 return 

688 else: 

689 # A file exists at this path 

690 raise NotADirectoryError( 

691 f"Can not create a directory for {self} because a file already exists at that path" 

692 ) 

693 

694 # Target directory does not exist. Create it and its ancestors as 

695 # needed. We need to test if parent URL is different from self URL, 

696 # otherwise we could be stuck in a recursive loop 

697 # where self == parent. 

698 if self.geturl() != self.parent().geturl(): 

699 self.parent().mkdir() 

700 

701 log.debug("Creating new directory: %s", self.geturl()) 

702 self._mkcol() 

703 

704 def remove(self) -> None: 

705 """Remove the resource.""" 

706 self._delete() 

707 

708 def read(self, size: int = -1) -> bytes: 

709 """Open the resource and return the contents in bytes. 

710 

711 Parameters 

712 ---------- 

713 size : `int`, optional 

714 The number of bytes to read. Negative or omitted indicates 

715 that all data should be read. 

716 """ 

717 

718 # Use the data session as a context manager to ensure that the 

719 # network connections to both the front end and back end servers are 

720 # closed after downloading the data. 

721 log.debug("Reading from remote resource: %s", self.geturl()) 

722 stream = True if size > 0 else False 

723 with self.data_session as session: 

724 with time_this(log, msg="GET %s", args=(self,)): 

725 resp = session.get(self.geturl(), stream=stream, timeout=self._config.timeout) 

726 

727 if resp.status_code != requests.codes.ok: # 200 

728 raise FileNotFoundError( 

729 f"Unable to read resource {self}; status: {resp.status_code} {resp.reason}" 

730 ) 

731 if not stream: 

732 return resp.content 

733 else: 

734 return next(resp.iter_content(chunk_size=size)) 

735 

736 def write(self, data: bytes, overwrite: bool = True) -> None: 

737 """Write the supplied bytes to the new resource. 

738 

739 Parameters 

740 ---------- 

741 data : `bytes` 

742 The bytes to write to the resource. The entire contents of the 

743 resource will be replaced. 

744 overwrite : `bool`, optional 

745 If `True` the resource will be overwritten if it exists. Otherwise 

746 the write will fail. 

747 """ 

748 log.debug("Writing to remote resource: %s", self.geturl()) 

749 if not overwrite: 

750 if self.exists(): 

751 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled") 

752 

753 # Ensure the parent directory exists. 

754 self.parent().mkdir() 

755 

756 # Upload the data. 

757 log.debug("Writing data to remote resource: %s", self.geturl()) 

758 self._put(data=data) 

759 

760 def transfer_from( 

761 self, 

762 src: ResourcePath, 

763 transfer: str = "copy", 

764 overwrite: bool = False, 

765 transaction: Optional[TransactionProtocol] = None, 

766 ) -> None: 

767 """Transfer the current resource to a Webdav repository. 

768 

769 Parameters 

770 ---------- 

771 src : `ResourcePath` 

772 Source URI. 

773 transfer : `str` 

774 Mode to use for transferring the resource. Supports the following 

775 options: copy. 

776 transaction : `~lsst.resources.utils.TransactionProtocol`, optional 

777 Currently unused. 

778 """ 

779 # Fail early to prevent delays if remote resources are requested. 

780 if transfer not in self.transferModes: 

781 raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}") 

782 

783 # Existence checks cost time so do not call this unless we know 

784 # that debugging is enabled. 

785 if log.isEnabledFor(logging.DEBUG): 

786 log.debug( 

787 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)", 

788 src, 

789 src.exists(), 

790 self, 

791 self.exists(), 

792 transfer, 

793 ) 

794 

795 # Short circuit immediately if the URIs are identical. 

796 if self == src: 

797 log.debug( 

798 "Target and destination URIs are identical: %s, returning immediately." 

799 " No further action required.", 

800 self, 

801 ) 

802 return 

803 

804 if not overwrite and self.exists(): 

805 raise FileExistsError(f"Destination path {self} already exists.") 

806 

807 if transfer == "auto": 

808 transfer = self.transferDefault 

809 

810 # We can use webDAV 'COPY' or 'MOVE' if both the current and source 

811 # resources are located in the same server. 

812 if isinstance(src, type(self)) and self.root_uri() == src.root_uri() and self.is_webdav_endpoint: 

813 log.debug("Transfer from %s to %s directly", src, self) 

814 return self._move(src) if transfer == "move" else self._copy(src) 

815 

816 # For resources of different classes or for plain HTTP resources we can 

817 # perform the copy or move operation by downloading to a local file 

818 # and uploading to the destination. 

819 self._copy_via_local(src) 

820 

821 # This was an explicit move, try to remove the source. 

822 if transfer == "move": 

823 src.remove() 

824 

825 def walk( 

826 self, file_filter: Optional[Union[str, re.Pattern]] = None 

827 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]: 

828 """Walk the directory tree returning matching files and directories. 

829 Parameters 

830 ---------- 

831 file_filter : `str` or `re.Pattern`, optional 

832 Regex to filter out files from the list before it is returned. 

833 Yields 

834 ------ 

835 dirpath : `ResourcePath` 

836 Current directory being examined. 

837 dirnames : `list` of `str` 

838 Names of subdirectories within dirpath. 

839 filenames : `list` of `str` 

840 Names of all the files within dirpath. 

841 """ 

842 if not self.dirLike: 

843 raise ValueError("Can not walk a non-directory URI") 

844 

845 # Walking directories is only available on WebDAV back ends. 

846 if not self.is_webdav_endpoint: 

847 raise NotImplementedError(f"Walking directory {self} is not implemented by plain HTTP servers") 

848 

849 if isinstance(file_filter, str): 

850 file_filter = re.compile(file_filter) 

851 

852 resp = self._propfind(depth="1") 

853 if resp.status_code == requests.codes.multi_status: # 207 

854 files: List[str] = [] 

855 dirs: List[str] = [] 

856 

857 for prop in _parse_propfind_response_body(resp.text): 

858 if prop.is_file: 

859 files.append(prop.name) 

860 elif not self.path.endswith(prop.href): 

861 # Only include the names of sub-directories not the 

862 # directory being walked. 

863 dirs.append(prop.name) 

864 

865 if file_filter is not None: 

866 files = [f for f in files if file_filter.search(f)] 

867 

868 if not dirs and not files: 

869 return 

870 else: 

871 yield type(self)(self, forceAbsolute=False, forceDirectory=True), dirs, files 

872 

873 for dir in dirs: 

874 new_uri = self.join(dir, forceDirectory=True) 

875 yield from new_uri.walk(file_filter) 

876 

877 def _as_local(self) -> Tuple[str, bool]: 

878 """Download object over HTTP and place in temporary directory. 

879 

880 Returns 

881 ------- 

882 path : `str` 

883 Path to local temporary file. 

884 temporary : `bool` 

885 Always returns `True`. This is always a temporary file. 

886 """ 

887 

888 # Use the session as a context manager to ensure that connections 

889 # to both the front end and back end servers are closed after the 

890 # download operation is finished. 

891 with self.data_session as session: 

892 resp = session.get(self.geturl(), stream=True, timeout=self._config.timeout) 

893 if resp.status_code != requests.codes.ok: 

894 raise FileNotFoundError( 

895 f"Unable to download resource {self}; status: {resp.status_code} {resp.reason}" 

896 ) 

897 

898 content_length = 0 

899 expected_length = int(resp.headers.get("Content-Length", "-1")) 

900 tmpdir, buffering = _get_temp_dir() 

901 

902 with tempfile.NamedTemporaryFile( 

903 suffix=self.getExtension(), buffering=buffering, dir=tmpdir, delete=False 

904 ) as tmpFile: 

905 with time_this( 

906 log, 

907 msg="GET %s [length=%d] to local file %s [chunk_size=%d]", 

908 args=(self, expected_length, tmpFile.name, buffering), 

909 mem_usage=True, 

910 mem_unit=u.mebibyte, 

911 ): 

912 for chunk in resp.iter_content(chunk_size=buffering): 

913 tmpFile.write(chunk) 

914 content_length += len(chunk) 

915 

916 # Check that the expected and actual content lengths match 

917 if expected_length >= 0 and expected_length != content_length: 

918 raise ValueError( 

919 f"Size of downloaded file does not match value in Content-Length header for {self}: " 

920 f"expecting {expected_length} and got {content_length} bytes" 

921 ) 

922 

923 return tmpFile.name, True 

924 

925 def _send_webdav_request( 

926 self, 

927 method: str, 

928 url: Optional[str] = None, 

929 headers: dict[str, str] = {}, 

930 body: Optional[str] = None, 

931 session: Optional[requests.Session] = None, 

932 timeout: Optional[tuple[int, int]] = None, 

933 ) -> requests.Response: 

934 """Send a webDAV request and correctly handle redirects. 

935 

936 Parameters 

937 ---------- 

938 method : `str` 

939 The mthod of the HTTP request to be sent, e.g. PROPFIND, MKCOL. 

940 headers : `dict`, optional 

941 A dictionary of key-value pairs (both strings) to include as 

942 headers in the request. 

943 body: `str`, optional 

944 The body of the request. 

945 

946 Notes 

947 ----- 

948 This way of sending webDAV requests is necessary for handling 

949 redirection ourselves, since the 'requests' package changes the method 

950 of the redirected request when the server responds with status 302 and 

951 the method of the original request is not HEAD (which is the case for 

952 webDAV requests). 

953 

954 That means that when the webDAV server we interact with responds with 

955 a redirection to a PROPFIND or MKCOL request, the request gets 

956 converted to a GET request when sent to the redirected location. 

957 

958 See `requests.sessions.SessionRedirectMixin.rebuild_method()` in 

959 https://github.com/psf/requests/blob/main/requests/sessions.py 

960 

961 This behavior of the 'requests' package is meant to be compatible with 

962 what is specified in RFC 9110: 

963 

964 https://www.rfc-editor.org/rfc/rfc9110#name-302-found 

965 

966 For our purposes, we do need to follow the redirection and send a new 

967 request using the same HTTP verb. 

968 """ 

969 if url is None: 

970 url = self.geturl() 

971 

972 if session is None: 

973 session = self.metadata_session 

974 

975 if timeout is None: 

976 timeout = self._config.timeout 

977 

978 with time_this( 

979 log, 

980 msg="%s %s", 

981 args=( 

982 method, 

983 url, 

984 ), 

985 mem_usage=True, 

986 mem_unit=u.mebibyte, 

987 ): 

988 for _ in range(max_redirects := 5): 

989 resp = session.request( 

990 method, 

991 url, 

992 data=body, 

993 headers=headers, 

994 stream=False, 

995 timeout=timeout, 

996 allow_redirects=False, 

997 ) 

998 if resp.is_redirect: 

999 url = resp.headers["Location"] 

1000 else: 

1001 return resp 

1002 

1003 # We reached the maximum allowed number of redirects. 

1004 # Stop trying. 

1005 raise ValueError( 

1006 f"Could not get a response to {method} request for {self} after " 

1007 f"{max_redirects} redirections" 

1008 ) 

1009 

1010 def _propfind(self, body: Optional[str] = None, depth: str = "0") -> requests.Response: 

1011 """Send a PROPFIND webDAV request and return the response. 

1012 

1013 Parameters 

1014 ---------- 

1015 body : `str`, optional 

1016 The body of the PROPFIND request to send to the server. If 

1017 provided, it is expected to be a XML document. 

1018 depth : `str`, optional 

1019 The value of the 'Depth' header to include in the request. 

1020 

1021 Returns 

1022 ------- 

1023 response : `requests.Response` 

1024 Response to the PROPFIND request. 

1025 

1026 Notes 

1027 ----- 

1028 It raises `ValueError` if the status code of the PROPFIND request 

1029 is different from "207 Multistatus" or "404 Not Found". 

1030 """ 

1031 if body is None: 

1032 # Request only the DAV live properties we are explicitly interested 

1033 # in namely 'resourcetype', 'getcontentlength', 'getlastmodified' 

1034 # and 'displayname'. 

1035 body = ( 

1036 """<?xml version="1.0" encoding="utf-8" ?>""" 

1037 """<D:propfind xmlns:D="DAV:"><D:prop>""" 

1038 """<D:resourcetype/><D:getcontentlength/><D:getlastmodified/><D:displayname/>""" 

1039 """</D:prop></D:propfind>""" 

1040 ) 

1041 headers = { 

1042 "Depth": depth, 

1043 "Content-Type": 'application/xml; charset="utf-8"', 

1044 "Content-Length": str(len(body)), 

1045 } 

1046 resp = self._send_webdav_request("PROPFIND", headers=headers, body=body) 

1047 if resp.status_code in (requests.codes.multi_status, requests.codes.not_found): 

1048 return resp 

1049 else: 

1050 raise ValueError( 

1051 f"Unexpected response for PROPFIND request for {self}, status: {resp.status_code} " 

1052 f"{resp.reason}" 

1053 ) 

1054 

1055 def _options(self) -> requests.Response: 

1056 """Send a OPTIONS webDAV request for this resource.""" 

1057 

1058 return self._send_webdav_request("OPTIONS") 

1059 

1060 def _head(self) -> requests.Response: 

1061 """Send a HEAD webDAV request for this resource.""" 

1062 

1063 return self._send_webdav_request("HEAD") 

1064 

1065 def _mkcol(self) -> None: 

1066 """Send a MKCOL webDAV request to create a collection. The collection 

1067 may already exist. 

1068 """ 

1069 resp = self._send_webdav_request("MKCOL") 

1070 if resp.status_code == requests.codes.created: # 201 

1071 return 

1072 

1073 if resp.status_code == requests.codes.method_not_allowed: # 405 

1074 # The remote directory already exists 

1075 log.debug("Can not create directory: %s may already exist: skipping.", self.geturl()) 

1076 else: 

1077 raise ValueError(f"Can not create directory {self}, status: {resp.status_code} {resp.reason}") 

1078 

1079 def _delete(self) -> None: 

1080 """Send a DELETE webDAV request for this resource.""" 

1081 

1082 log.debug("Deleting %s ...", self.geturl()) 

1083 

1084 # If this is a directory, ensure the remote is a webDAV server because 

1085 # plain HTTP servers don't support DELETE requests on non-file 

1086 # paths. 

1087 if self.dirLike and not self.is_webdav_endpoint: 

1088 raise NotImplementedError( 

1089 f"Deletion of directory {self} is not implemented by plain HTTP servers" 

1090 ) 

1091 

1092 # Deleting non-empty directories may take some time, so increase 

1093 # the timeout for getting a response from the server. 

1094 timeout = self._config.timeout 

1095 if self.dirLike: 

1096 timeout = (timeout[0], timeout[1] * 100) 

1097 resp = self._send_webdav_request("DELETE", timeout=timeout) 

1098 if resp.status_code in ( 

1099 requests.codes.ok, 

1100 requests.codes.accepted, 

1101 requests.codes.no_content, 

1102 requests.codes.not_found, 

1103 ): 

1104 # We can get a "404 Not Found" error when the file or directory 

1105 # does not exist or when the DELETE request was retried several 

1106 # times and a previous attempt actually deleted the resource. 

1107 # Therefore we consider that a "Not Found" response is not an 

1108 # error since we reached the state desired by the user. 

1109 return 

1110 else: 

1111 # TODO: the response to a DELETE request against a webDAV server 

1112 # may be multistatus. If so, we need to parse the reponse body to 

1113 # determine more precisely the reason of the failure (e.g. a lock) 

1114 # and provide a more helpful error message. 

1115 raise ValueError(f"Unable to delete resource {self}; status: {resp.status_code} {resp.reason}") 

1116 

1117 def _copy_via_local(self, src: ResourcePath) -> None: 

1118 """Replace the contents of this resource with the contents of a remote 

1119 resource by using a local temporary file. 

1120 

1121 Parameters 

1122 ---------- 

1123 src : `HttpResourcePath` 

1124 The source of the contents to copy to `self`. 

1125 """ 

1126 with src.as_local() as local_uri: 

1127 log.debug("Transfer from %s to %s via local file %s", src, self, local_uri) 

1128 with open(local_uri.ospath, "rb") as f: 

1129 self._put(data=f) 

1130 

1131 def _copy_or_move(self, method: str, src: HttpResourcePath) -> None: 

1132 """Send a COPY or MOVE webDAV request to copy or replace the contents 

1133 of this resource with the contents of another resource located in the 

1134 same server. 

1135 

1136 Parameters 

1137 ---------- 

1138 method : `str` 

1139 The method to perform. Valid values are "COPY" or "MOVE" (in 

1140 uppercase). 

1141 

1142 src : `HttpResourcePath` 

1143 The source of the contents to move to `self`. 

1144 """ 

1145 headers = {"Destination": self.geturl()} 

1146 resp = self._send_webdav_request(method, url=src.geturl(), headers=headers, session=self.data_session) 

1147 if resp.status_code in (requests.codes.created, requests.codes.no_content): 

1148 return 

1149 

1150 if resp.status_code == requests.codes.multi_status: 

1151 tree = eTree.fromstring(resp.content) 

1152 status_element = tree.find("./{DAV:}response/{DAV:}status") 

1153 status = status_element.text if status_element is not None else "unknown" 

1154 error = tree.find("./{DAV:}response/{DAV:}error") 

1155 raise ValueError(f"{method} returned multistatus reponse with status {status} and error {error}") 

1156 else: 

1157 raise ValueError( 

1158 f"{method} operation from {src} to {self} failed, status: {resp.status_code} {resp.reason}" 

1159 ) 

1160 

1161 def _copy(self, src: HttpResourcePath) -> None: 

1162 """Send a COPY webDAV request to replace the contents of this resource 

1163 (if any) with the contents of another resource located in the same 

1164 server. 

1165 

1166 Parameters 

1167 ---------- 

1168 src : `HttpResourcePath` 

1169 The source of the contents to copy to `self`. 

1170 """ 

1171 # Neither dCache nor XrootD currently implement the COPY 

1172 # webDAV method as documented in 

1173 # http://www.webdav.org/specs/rfc4918.html#METHOD_COPY 

1174 # (See issues DM-37603 and DM-37651 for details) 

1175 # 

1176 # For the time being, we use a temporary local file to 

1177 # perform the copy client side. 

1178 # TODO: when those 2 issues above are solved remove the 3 lines below. 

1179 must_use_local = True 

1180 if must_use_local: 

1181 return self._copy_via_local(src) 

1182 

1183 return self._copy_or_move("COPY", src) 

1184 

1185 def _move(self, src: HttpResourcePath) -> None: 

1186 """Send a MOVE webDAV request to replace the contents of this resource 

1187 with the contents of another resource located in the same server. 

1188 

1189 Parameters 

1190 ---------- 

1191 src : `HttpResourcePath` 

1192 The source of the contents to move to `self`. 

1193 """ 

1194 return self._copy_or_move("MOVE", src) 

1195 

1196 def _put(self, data: Union[BinaryIO, bytes]) -> None: 

1197 """Perform an HTTP PUT request and handle redirection. 

1198 

1199 Parameters 

1200 ---------- 

1201 data : `Union[BinaryIO, bytes]` 

1202 The data to be included in the body of the PUT request. 

1203 """ 

1204 # Retrieve the final URL for this upload by sending a PUT request with 

1205 # no content. Follow a single server redirection to retrieve the 

1206 # final URL. 

1207 headers = {"Content-Length": "0"} 

1208 if self._config.send_expect_on_put: 

1209 headers["Expect"] = "100-continue" 

1210 

1211 url = self.geturl() 

1212 

1213 # Use the session as a context manager to ensure the underlying 

1214 # connections are closed after finishing uploading the data. 

1215 with self.data_session as session: 

1216 # Send an empty PUT request to get redirected to the final 

1217 # destination. 

1218 log.debug("Sending empty PUT request to %s", url) 

1219 with time_this(log, msg="PUT (no data) %s", args=(url,), mem_usage=True, mem_unit=u.mebibyte): 

1220 resp = session.request( 

1221 "PUT", 

1222 url, 

1223 data=None, 

1224 headers=headers, 

1225 stream=False, 

1226 timeout=self._config.timeout, 

1227 allow_redirects=False, 

1228 ) 

1229 if resp.is_redirect: 

1230 url = resp.headers["Location"] 

1231 

1232 # Upload the data to the final destination. 

1233 log.debug("Uploading data to %s", url) 

1234 

1235 # Ask the server to compute and record a checksum of the uploaded 

1236 # file contents, for later integrity checks. Since we don't compute 

1237 # the digest ourselves while uploading the data, we cannot control 

1238 # after the request is complete that the data we uploaded is 

1239 # identical to the data recorded by the server, but at least the 

1240 # server has recorded a digest of the data it stored. 

1241 # 

1242 # See RFC-3230 for details and 

1243 # https://www.iana.org/assignments/http-dig-alg/http-dig-alg.xhtml 

1244 # for the list of supported digest algorithhms. 

1245 # In addition, note that not all servers implement this RFC so 

1246 # the checksum may not be computed by the server. 

1247 put_headers: Optional[dict[str, str]] = None 

1248 if digest := self._config.digest_algorithm: 

1249 put_headers = {"Want-Digest": digest} 

1250 

1251 with time_this(log, msg="PUT %s", args=(url,), mem_usage=True, mem_unit=u.mebibyte): 

1252 resp = session.request( 

1253 "PUT", 

1254 url, 

1255 data=data, 

1256 headers=put_headers, 

1257 stream=False, 

1258 timeout=self._config.timeout, 

1259 allow_redirects=False, 

1260 ) 

1261 if resp.status_code in ( 

1262 requests.codes.ok, 

1263 requests.codes.created, 

1264 requests.codes.no_content, 

1265 ): 

1266 return 

1267 else: 

1268 raise ValueError(f"Can not write file {self}, status: {resp.status_code} {resp.reason}") 

1269 

1270 @contextlib.contextmanager 

1271 def _openImpl( 

1272 self, 

1273 mode: str = "r", 

1274 *, 

1275 encoding: Optional[str] = None, 

1276 ) -> Iterator[ResourceHandleProtocol]: 

1277 resp = self._head() 

1278 accepts_range = resp.status_code == requests.codes.ok and resp.headers.get("Accept-Ranges") == "bytes" 

1279 handle: ResourceHandleProtocol 

1280 if mode in ("rb", "r") and accepts_range: 

1281 handle = HttpReadResourceHandle( 

1282 mode, log, url=self.geturl(), session=self.data_session, timeout=self._config.timeout 

1283 ) 

1284 if mode == "r": 

1285 # cast because the protocol is compatible, but does not have 

1286 # BytesIO in the inheritance tree 

1287 yield io.TextIOWrapper(cast(io.BytesIO, handle), encoding=encoding) 

1288 else: 

1289 yield handle 

1290 else: 

1291 with super()._openImpl(mode, encoding=encoding) as http_handle: 

1292 yield http_handle 

1293 

1294 

1295def _dump_response(resp: requests.Response) -> None: 

1296 """Log the contents of a HTTP or webDAV request and its response. 

1297 

1298 Parameters 

1299 ---------- 

1300 resp : `requests.Response` 

1301 The response to log. 

1302 

1303 Notes 

1304 ----- 

1305 Intended for development purposes only. 

1306 """ 

1307 log.debug("-----------------------------------------------") 

1308 log.debug("Request") 

1309 log.debug(" method=%s", resp.request.method) 

1310 log.debug(" URL=%s", resp.request.url) 

1311 log.debug(" headers=%s", resp.request.headers) 

1312 if resp.request.method == "PUT": 

1313 log.debug(" body=<data>") 

1314 elif resp.request.body is None: 

1315 log.debug(" body=<empty>") 

1316 else: 

1317 log.debug(" body=%r", resp.request.body[:120]) 

1318 

1319 log.debug("Response:") 

1320 log.debug(" status_code=%d", resp.status_code) 

1321 log.debug(" headers=%s", resp.headers) 

1322 if not resp.content: 

1323 log.debug(" body=<empty>") 

1324 elif "Content-Type" in resp.headers and resp.headers["Content-Type"] == "text/plain": 

1325 log.debug(" body=%r", resp.content) 

1326 else: 

1327 log.debug(" body=%r", resp.content[:80]) 

1328 

1329 

1330def _is_protected(filepath: str) -> bool: 

1331 """Return true if the permissions of file at filepath only allow for access 

1332 by its owner. 

1333 

1334 Parameters 

1335 ---------- 

1336 filepath : `str` 

1337 Path of a local file. 

1338 """ 

1339 if not os.path.isfile(filepath): 

1340 return False 

1341 mode = stat.S_IMODE(os.stat(filepath).st_mode) 

1342 owner_accessible = bool(mode & stat.S_IRWXU) 

1343 group_accessible = bool(mode & stat.S_IRWXG) 

1344 other_accessible = bool(mode & stat.S_IRWXO) 

1345 return owner_accessible and not group_accessible and not other_accessible 

1346 

1347 

1348def _parse_propfind_response_body(body: str) -> List[DavProperty]: 

1349 """Parse the XML-encoded contents of the response body to a webDAV PROPFIND 

1350 request. 

1351 

1352 Parameters 

1353 ---------- 

1354 body : `str` 

1355 XML-encoded response body to a PROPFIND request 

1356 

1357 Returns 

1358 ------- 

1359 responses : `List[DavProperty]` 

1360 

1361 Notes 

1362 ----- 

1363 Is is expected that there is at least one reponse in `body`, otherwise 

1364 this function raises. 

1365 """ 

1366 # A response body to a PROPFIND request is of the form (indented for 

1367 # readability): 

1368 # 

1369 # <?xml version="1.0" encoding="UTF-8"?> 

1370 # <D:multistatus xmlns:D="DAV:"> 

1371 # <D:response> 

1372 # <D:href>path/to/resource</D:href> 

1373 # <D:propstat> 

1374 # <D:prop> 

1375 # <D:resourcetype> 

1376 # <D:collection xmlns:D="DAV:"/> 

1377 # </D:resourcetype> 

1378 # <D:getlastmodified> 

1379 # Fri, 27 Jan 2 023 13:59:01 GMT 

1380 # </D:getlastmodified> 

1381 # <D:getcontentlength> 

1382 # 12345 

1383 # </D:getcontentlength> 

1384 # </D:prop> 

1385 # <D:status> 

1386 # HTTP/1.1 200 OK 

1387 # </D:status> 

1388 # </D:propstat> 

1389 # </D:response> 

1390 # <D:response> 

1391 # ... 

1392 # </D:response> 

1393 # <D:response> 

1394 # ... 

1395 # </D:response> 

1396 # </D:multistatus> 

1397 

1398 # Scan all the 'response' elements and extract the relevant properties 

1399 responses = [] 

1400 multistatus = eTree.fromstring(body.strip()) 

1401 for response in multistatus.findall("./{DAV:}response"): 

1402 responses.append(DavProperty(response)) 

1403 

1404 if responses: 

1405 return responses 

1406 else: 

1407 # Could not parse the body 

1408 raise ValueError(f"Unable to parse response for PROPFIND request: {response}") 

1409 

1410 

1411class DavProperty: 

1412 """Helper class to encapsulate select live DAV properties of a single 

1413 resource, as retrieved via a PROPFIND request. 

1414 """ 

1415 

1416 # Regular expression to compare against the 'status' element of a 

1417 # PROPFIND response's 'propstat' element. 

1418 _status_ok_rex = re.compile(r"^HTTP/.* 200 .*$", re.IGNORECASE) 

1419 

1420 def __init__(self, response: Optional[eTree.Element]): 

1421 self._href: str = "" 

1422 self._displayname: str = "" 

1423 self._collection: bool = False 

1424 self._getlastmodified: str = "" 

1425 self._getcontentlength: int = -1 

1426 

1427 if response is not None: 

1428 self._parse(response) 

1429 

1430 def _parse(self, response: eTree.Element) -> None: 

1431 # Extract 'href' 

1432 if (element := response.find("./{DAV:}href")) is not None: 

1433 # We need to use "str(element.text)"" instead of "element.text" to 

1434 # keep mypy happy 

1435 self._href = str(element.text).strip() 

1436 

1437 for propstat in response.findall("./{DAV:}propstat"): 

1438 # Only extract properties of interest with status OK. 

1439 status = propstat.find("./{DAV:}status") 

1440 if status is None or not self._status_ok_rex.match(str(status.text)): 

1441 continue 

1442 

1443 for prop in propstat.findall("./{DAV:}prop"): 

1444 # Parse "collection". 

1445 if (element := prop.find("./{DAV:}resourcetype/{DAV:}collection")) is not None: 

1446 self._collection = True 

1447 

1448 # Parse "getlastmodified". 

1449 if (element := prop.find("./{DAV:}getlastmodified")) is not None: 

1450 self._getlastmodified = str(element.text) 

1451 

1452 # Parse "getcontentlength". 

1453 if (element := prop.find("./{DAV:}getcontentlength")) is not None: 

1454 self._getcontentlength = int(str(element.text)) 

1455 

1456 # Parse "displayname". 

1457 if (element := prop.find("./{DAV:}displayname")) is not None: 

1458 self._displayname = str(element.text) 

1459 

1460 @property 

1461 def exists(self) -> bool: 

1462 # It is either a directory or a file with length of at least zero 

1463 return self._collection or self._getcontentlength >= 0 

1464 

1465 @property 

1466 def is_directory(self) -> bool: 

1467 return self._collection 

1468 

1469 @property 

1470 def is_file(self) -> bool: 

1471 return self._getcontentlength >= 0 

1472 

1473 @property 

1474 def size(self) -> int: 

1475 # Only valid if is_file is True 

1476 return self._getcontentlength 

1477 

1478 @property 

1479 def name(self) -> str: 

1480 return self._displayname 

1481 

1482 @property 

1483 def href(self) -> str: 

1484 return self._href