Coverage for python/lsst/resources/_resourceHandles/_httpResourceHandle.py: 24%

90 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-09 03:06 -0800

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14__all__ = ("HttpReadResourceHandle",) 

15 

16import io 

17from logging import Logger 

18from typing import AnyStr, Callable, Iterable, Optional, Union 

19 

20import requests 

21from lsst.utils.timer import time_this 

22 

23from ._baseResourceHandle import BaseResourceHandle, CloseStatus 

24 

25 

26class HttpReadResourceHandle(BaseResourceHandle[bytes]): 

27 def __init__( 

28 self, 

29 mode: str, 

30 log: Logger, 

31 *, 

32 session: Optional[requests.Session] = None, 

33 url: Optional[str] = None, 

34 timeout: Optional[tuple[float, float]] = None, 

35 newline: Optional[AnyStr] = None, 

36 ) -> None: 

37 super().__init__(mode, log, newline=newline) 

38 if url is None: 

39 raise ValueError("Url must be specified when constructing this object") 

40 self._url = url 

41 if session is None: 

42 raise ValueError("Session must be specified when constructing this object") 

43 self._session = session 

44 

45 if timeout is None: 

46 raise ValueError("timeout must be specified when constructing this object") 

47 self._timeout = timeout 

48 

49 self._completeBuffer: Optional[io.BytesIO] = None 

50 

51 self._closed = CloseStatus.OPEN 

52 self._current_position = 0 

53 

54 def close(self) -> None: 

55 self._closed = CloseStatus.CLOSED 

56 self._completeBuffer = None 

57 

58 @property 

59 def closed(self) -> bool: 

60 return self._closed == CloseStatus.CLOSED 

61 

62 def fileno(self) -> int: 

63 raise io.UnsupportedOperation("HttpReadResourceHandle does not have a file number") 

64 

65 def flush(self) -> None: 

66 raise io.UnsupportedOperation("HttpReadResourceHandles are read only") 

67 

68 @property 

69 def isatty(self) -> Union[bool, Callable[[], bool]]: 

70 return False 

71 

72 def readable(self) -> bool: 

73 return True 

74 

75 def readline(self, size: int = -1) -> AnyStr: 

76 raise io.UnsupportedOperation("HttpReadResourceHandles Do not support line by line reading") 

77 

78 def readlines(self, size: int = -1) -> Iterable[bytes]: 

79 raise io.UnsupportedOperation("HttpReadResourceHandles Do not support line by line reading") 

80 

81 def seek(self, offset: int, whence: int = io.SEEK_SET) -> int: 

82 if whence == io.SEEK_CUR and (self._current_position + offset) >= 0: 

83 self._current_position += offset 

84 elif whence == io.SEEK_SET and offset >= 0: 

85 self._current_position = offset 

86 else: 

87 raise io.UnsupportedOperation("Seek value is incorrect, or whence mode is unsupported") 

88 

89 # handle if the complete file has be read already 

90 if self._completeBuffer is not None: 

91 self._completeBuffer.seek(self._current_position, whence) 

92 return self._current_position 

93 

94 def seekable(self) -> bool: 

95 return True 

96 

97 def tell(self) -> int: 

98 return self._current_position 

99 

100 def truncate(self, size: Optional[int] = None) -> int: 

101 raise io.UnsupportedOperation("HttpReadResourceHandles Do not support truncation") 

102 

103 def writable(self) -> bool: 

104 return False 

105 

106 def write(self, b: bytes, /) -> int: 

107 raise io.UnsupportedOperation("HttpReadResourceHandles are read only") 

108 

109 def writelines(self, b: Iterable[bytes], /) -> None: 

110 raise io.UnsupportedOperation("HttpReadResourceHandles are read only") 

111 

112 def read(self, size: int = -1) -> bytes: 

113 # branch for if the complete file has been read before 

114 if self._completeBuffer is not None: 

115 result = self._completeBuffer.read(size) 

116 self._current_position += len(result) 

117 return result 

118 

119 if self._completeBuffer is None and size == -1 and self._current_position == 0: 

120 # The whole file has been requested, read it into a buffer and 

121 # return the result 

122 self._completeBuffer = io.BytesIO() 

123 with time_this(self._log, msg="Read from remote resource %s", args=(self._url,)): 

124 resp = self._session.get(self._url, stream=False, timeout=self._timeout) 

125 if (code := resp.status_code) not in (200, 206): 

126 raise FileNotFoundError(f"Unable to read resource {self._url}; status code: {code}") 

127 self._completeBuffer.write(resp.content) 

128 self._current_position = self._completeBuffer.tell() 

129 

130 return self._completeBuffer.getbuffer().tobytes() 

131 

132 # a partial read is required, either because a size has been specified, 

133 # or a read has previously been done. 

134 

135 end_pos = self._current_position + (size - 1) if size >= 0 else "" 

136 headers = {"Range": f"bytes={self._current_position}-{end_pos}"} 

137 

138 with time_this(self._log, msg="Read from remote resource %s", args=(self._url,)): 

139 resp = self._session.get(self._url, stream=False, timeout=self._timeout, headers=headers) 

140 

141 if (code := resp.status_code) not in (200, 206): 

142 raise FileNotFoundError( 

143 f"Unable to read resource {self._url}, or bytes are out of range; status code: {code}" 

144 ) 

145 

146 # verify this is not actually the whole file and the server did not lie 

147 # about supporting ranges 

148 if len(resp.content) > size or code != 206: 

149 self._completeBuffer = io.BytesIO() 

150 self._completeBuffer.write(resp.content) 

151 self._completeBuffer.seek(0) 

152 return self.read(size=size) 

153 

154 self._current_position += size 

155 return resp.content