Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import logging 

25import tempfile 

26 

27__all__ = ('ButlerS3URI',) 

28 

29from typing import ( 

30 TYPE_CHECKING, 

31 Optional, 

32 Any, 

33 Callable, 

34 Tuple, 

35 Union, 

36) 

37 

38from .utils import NoTransaction 

39from ._butlerUri import ButlerURI 

40from .s3utils import getS3Client, s3CheckFileExists, bucketExists 

41 

42from botocore.exceptions import ClientError 

43from http.client import ImproperConnectionState, HTTPException 

44from urllib3.exceptions import RequestError, HTTPError 

45 

46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 try: 

48 import boto3 

49 except ImportError: 

50 pass 

51 from ..datastore import DatastoreTransaction 

52 

53# https://pypi.org/project/backoff/ 

54try: 

55 import backoff 

56except ImportError: 

57 class Backoff(): 

58 @staticmethod 

59 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

60 return func 

61 

62 @staticmethod 

63 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

64 return func 

65 

66 backoff = Backoff 

67 

68# settings for "backoff" retry decorators. these retries are belt-and- 

69# suspenders along with the retries built into Boto3, to account for 

70# semantic differences in errors between S3-like providers. 

71retryable_io_errors = ( 

72 # http.client 

73 ImproperConnectionState, HTTPException, 

74 # urllib3.exceptions 

75 RequestError, HTTPError, 

76 # built-ins 

77 TimeoutError, ConnectionError) 

78retryable_client_errors = ( 

79 # botocore.exceptions 

80 ClientError, 

81 # built-ins 

82 PermissionError) 

83all_retryable_errors = retryable_client_errors + retryable_io_errors 

84max_retry_time = 60 

85 

86 

87log = logging.getLogger(__name__) 

88 

89 

90class ButlerS3URI(ButlerURI): 

91 """S3 URI""" 

92 

93 @property 

94 def client(self) -> boto3.client: 

95 """Client object to address remote resource.""" 

96 # Defer import for circular dependencies 

97 return getS3Client() 

98 

99 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time) 

100 def exists(self) -> bool: 

101 if self.is_root: 101 ↛ 103line 101 didn't jump to line 103, because the condition on line 101 was never true

102 # Only check for the bucket since the path is irrelevant 

103 return bucketExists(self.netloc) 

104 exists, _ = s3CheckFileExists(self, client=self.client) 

105 return exists 

106 

107 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time) 

108 def size(self) -> int: 

109 if self.dirLike: 109 ↛ 110line 109 didn't jump to line 110, because the condition on line 109 was never true

110 return 0 

111 _, sz = s3CheckFileExists(self, client=self.client) 

112 return sz 

113 

114 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time) 

115 def remove(self) -> None: 

116 """Remove the resource.""" 

117 

118 # https://github.com/boto/boto3/issues/507 - there is no 

119 # way of knowing if the file was actually deleted except 

120 # for checking all the keys again, reponse is HTTP 204 OK 

121 # response all the time 

122 self.client.delete_object(Bucket=self.netloc, Key=self.relativeToPathRoot) 

123 

124 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time) 

125 def read(self, size: int = -1) -> bytes: 

126 args = {} 

127 if size > 0: 

128 args["Range"] = f"bytes=0-{size-1}" 

129 try: 

130 response = self.client.get_object(Bucket=self.netloc, 

131 Key=self.relativeToPathRoot, 

132 **args) 

133 except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err: 

134 raise FileNotFoundError(f"No such resource: {self}") from err 

135 body = response["Body"].read() 

136 response["Body"].close() 

137 return body 

138 

139 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time) 

140 def write(self, data: bytes, overwrite: bool = True) -> None: 

141 if not overwrite: 

142 if self.exists(): 142 ↛ 143line 142 didn't jump to line 143, because the condition on line 142 was never true

143 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled") 

144 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot, 

145 Body=data) 

146 

147 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time) 

148 def mkdir(self) -> None: 

149 if not bucketExists(self.netloc): 149 ↛ 150line 149 didn't jump to line 150, because the condition on line 149 was never true

150 raise ValueError(f"Bucket {self.netloc} does not exist for {self}!") 

151 

152 if not self.dirLike: 152 ↛ 153line 152 didn't jump to line 153, because the condition on line 152 was never true

153 raise ValueError(f"Can not create a 'directory' for file-like URI {self}") 

154 

155 # don't create S3 key when root is at the top-level of an Bucket 

156 if not self.path == "/": 156 ↛ exitline 156 didn't return from function 'mkdir', because the condition on line 156 was never false

157 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot) 

158 

159 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time) 

160 def _as_local(self) -> Tuple[str, bool]: 

161 """Download object from S3 and place in temporary directory. 

162 

163 Returns 

164 ------- 

165 path : `str` 

166 Path to local temporary file. 

167 temporary : `bool` 

168 Always returns `True`. This is always a temporary file. 

169 """ 

170 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile: 

171 self.client.download_fileobj(self.netloc, self.relativeToPathRoot, tmpFile) 

172 return tmpFile.name, True 

173 

174 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time) 

175 def transfer_from(self, src: ButlerURI, transfer: str = "copy", 

176 overwrite: bool = False, 

177 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

178 """Transfer the current resource to an S3 bucket. 

179 

180 Parameters 

181 ---------- 

182 src : `ButlerURI` 

183 Source URI. 

184 transfer : `str` 

185 Mode to use for transferring the resource. Supports the following 

186 options: copy. 

187 overwrite : `bool`, optional 

188 Allow an existing file to be overwritten. Defaults to `False`. 

189 transaction : `DatastoreTransaction`, optional 

190 Currently unused. 

191 """ 

192 # Fail early to prevent delays if remote resources are requested 

193 if transfer not in self.transferModes: 

194 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

195 

196 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

197 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

198 

199 if not overwrite and self.exists(): 

200 raise FileExistsError(f"Destination path '{self}' already exists.") 

201 

202 if transfer == "auto": 202 ↛ 203line 202 didn't jump to line 203, because the condition on line 202 was never true

203 transfer = self.transferDefault 

204 

205 if isinstance(src, type(self)): 

206 # Looks like an S3 remote uri so we can use direct copy 

207 # note that boto3.resource.meta.copy is cleverer than the low 

208 # level copy_object 

209 copy_source = { 

210 "Bucket": src.netloc, 

211 "Key": src.relativeToPathRoot, 

212 } 

213 self.client.copy_object(CopySource=copy_source, Bucket=self.netloc, Key=self.relativeToPathRoot) 

214 else: 

215 # Use local file and upload it 

216 with src.as_local() as local_uri: 

217 

218 # resource.meta.upload_file seems like the right thing 

219 # but we have a low level client 

220 with open(local_uri.ospath, "rb") as fh: 

221 self.client.put_object(Bucket=self.netloc, 

222 Key=self.relativeToPathRoot, Body=fh) 

223 

224 # This was an explicit move requested from a remote resource 

225 # try to remove that resource 

226 if transfer == "move": 226 ↛ 228line 226 didn't jump to line 228, because the condition on line 226 was never true

227 # Transactions do not work here 

228 src.remove()