Coverage for python/lsst/resources/_resourceHandles/_httpResourceHandle.py: 18%
115 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-03 02:26 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-03 02:26 -0700
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14__all__ = ("HttpReadResourceHandle",)
16import io
17from logging import Logger
18from typing import AnyStr, Callable, Iterable, Optional, Union
20import requests
21from lsst.utils.timer import time_this
23from ._baseResourceHandle import BaseResourceHandle, CloseStatus
26class HttpReadResourceHandle(BaseResourceHandle[bytes]):
27 def __init__(
28 self,
29 mode: str,
30 log: Logger,
31 *,
32 session: Optional[requests.Session] = None,
33 url: Optional[str] = None,
34 timeout: Optional[tuple[float, float]] = None,
35 newline: Optional[AnyStr] = None,
36 ) -> None:
37 super().__init__(mode, log, newline=newline)
38 if url is None:
39 raise ValueError("Url must be specified when constructing this object")
40 self._url = url
41 if session is None:
42 raise ValueError("Session must be specified when constructing this object")
43 self._session = session
45 if timeout is None:
46 raise ValueError("timeout must be specified when constructing this object")
47 self._timeout = timeout
49 self._completeBuffer: Optional[io.BytesIO] = None
51 self._closed = CloseStatus.OPEN
52 self._current_position = 0
53 self._eof = False
55 def close(self) -> None:
56 self._closed = CloseStatus.CLOSED
57 self._completeBuffer = None
58 self._eof = True
60 @property
61 def closed(self) -> bool:
62 return self._closed == CloseStatus.CLOSED
64 def fileno(self) -> int:
65 raise io.UnsupportedOperation("HttpReadResourceHandle does not have a file number")
67 def flush(self) -> None:
68 modes = set(self._mode)
69 if {"w", "x", "a", "+"} & modes:
70 raise io.UnsupportedOperation("HttpReadResourceHandles are read only")
72 @property
73 def isatty(self) -> Union[bool, Callable[[], bool]]:
74 return False
76 def readable(self) -> bool:
77 return True
79 def readline(self, size: int = -1) -> AnyStr:
80 raise io.UnsupportedOperation("HttpReadResourceHandles Do not support line by line reading")
82 def readlines(self, size: int = -1) -> Iterable[bytes]:
83 raise io.UnsupportedOperation("HttpReadResourceHandles Do not support line by line reading")
85 def seek(self, offset: int, whence: int = io.SEEK_SET) -> int:
86 self._eof = False
87 if whence == io.SEEK_CUR and (self._current_position + offset) >= 0:
88 self._current_position += offset
89 elif whence == io.SEEK_SET and offset >= 0:
90 self._current_position = offset
91 else:
92 raise io.UnsupportedOperation("Seek value is incorrect, or whence mode is unsupported")
94 # handle if the complete file has be read already
95 if self._completeBuffer is not None:
96 self._completeBuffer.seek(self._current_position, whence)
97 return self._current_position
99 def seekable(self) -> bool:
100 return True
102 def tell(self) -> int:
103 return self._current_position
105 def truncate(self, size: Optional[int] = None) -> int:
106 raise io.UnsupportedOperation("HttpReadResourceHandles Do not support truncation")
108 def writable(self) -> bool:
109 return False
111 def write(self, b: bytes, /) -> int:
112 raise io.UnsupportedOperation("HttpReadResourceHandles are read only")
114 def writelines(self, b: Iterable[bytes], /) -> None:
115 raise io.UnsupportedOperation("HttpReadResourceHandles are read only")
117 def read(self, size: int = -1) -> bytes:
118 if self._eof:
119 # At EOF so always return an empty byte string.
120 return b""
122 # branch for if the complete file has been read before
123 if self._completeBuffer is not None:
124 result = self._completeBuffer.read(size)
125 self._current_position += len(result)
126 return result
128 if self._completeBuffer is None and size == -1 and self._current_position == 0:
129 # The whole file has been requested, read it into a buffer and
130 # return the result
131 self._completeBuffer = io.BytesIO()
132 with time_this(self._log, msg="Read from remote resource %s", args=(self._url,)):
133 resp = self._session.get(self._url, stream=False, timeout=self._timeout)
134 if (code := resp.status_code) not in (requests.codes.ok, requests.codes.partial):
135 raise FileNotFoundError(f"Unable to read resource {self._url}; status code: {code}")
136 self._completeBuffer.write(resp.content)
137 self._current_position = self._completeBuffer.tell()
139 return self._completeBuffer.getbuffer().tobytes()
141 # A partial read is required, either because a size has been specified,
142 # or a read has previously been done. Any time we specify a byte range
143 # we must disable the gzip compression on the server since we want
144 # to address ranges in the uncompressed file. If we send ranges that
145 # are interpreted by the server as offsets into the compressed file
146 # then that is at least confusing and also there is no guarantee that
147 # the bytes can be uncompressed.
149 end_pos = self._current_position + (size - 1) if size >= 0 else ""
150 headers = {"Range": f"bytes={self._current_position}-{end_pos}", "Accept-Encoding": "identity"}
152 with time_this(
153 self._log, msg="Read from remote resource %s using headers %s", args=(self._url, headers)
154 ):
155 resp = self._session.get(self._url, stream=False, timeout=self._timeout, headers=headers)
157 if resp.status_code == requests.codes.range_not_satisfiable:
158 # Must have run off the end of the file. A standard file handle
159 # will treat this as EOF so be consistent with that. Do not change
160 # the current position.
161 self._eof = True
162 return b""
164 if (code := resp.status_code) not in (requests.codes.ok, requests.codes.partial):
165 raise FileNotFoundError(
166 f"Unable to read resource {self._url}, or bytes are out of range; status code: {code}"
167 )
169 len_content = len(resp.content)
171 # verify this is not actually the whole file and the server did not lie
172 # about supporting ranges
173 if len_content > size or code != requests.codes.partial:
174 self._completeBuffer = io.BytesIO()
175 self._completeBuffer.write(resp.content)
176 self._completeBuffer.seek(0)
177 return self.read(size=size)
179 # The response header should tell us the total number of bytes
180 # in the file and also the current position we have got to in the
181 # server.
182 if "Content-Range" in resp.headers:
183 content_range = resp.headers["Content-Range"]
184 units, range_string = content_range.split(" ")
185 if units == "bytes":
186 range, total = range_string.split("/")
187 if "-" in range:
188 _, end = range.split("-")
189 end_pos = int(end)
190 if total != "*":
191 if end_pos >= int(total) - 1:
192 self._eof = True
193 else:
194 self._log.warning("Requested byte range from server but instead got: %s", content_range)
196 # Try to guess that we overran the end. This will not help if we
197 # read exactly the number of bytes to get us to the end and so we
198 # will need to do one more read and get a 416.
199 if len_content < size:
200 self._eof = True
202 self._current_position += len_content
203 return resp.content