Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import os.path 

26import logging 

27import tempfile 

28 

29__all__ = ('ButlerS3URI',) 

30 

31from typing import ( 

32 TYPE_CHECKING, 

33 Optional, 

34 Tuple, 

35 Union, 

36) 

37 

38from .utils import NoTransaction 

39from ._butlerUri import ButlerURI 

40from .s3utils import getS3Client, s3CheckFileExists, bucketExists 

41 

42 

43if TYPE_CHECKING: 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true

44 try: 

45 import boto3 

46 except ImportError: 

47 pass 

48 from ..datastore import DatastoreTransaction 

49 

50log = logging.getLogger(__name__) 

51 

52 

53class ButlerS3URI(ButlerURI): 

54 """S3 URI""" 

55 

56 @property 

57 def client(self) -> boto3.client: 

58 """Client object to address remote resource.""" 

59 # Defer import for circular dependencies 

60 return getS3Client() 

61 

62 def exists(self) -> bool: 

63 if self.is_root: 63 ↛ 65line 63 didn't jump to line 65, because the condition on line 63 was never true

64 # Only check for the bucket since the path is irrelevant 

65 return bucketExists(self.netloc) 

66 exists, _ = s3CheckFileExists(self, client=self.client) 

67 return exists 

68 

69 def size(self) -> int: 

70 if self.dirLike: 70 ↛ 71line 70 didn't jump to line 71, because the condition on line 70 was never true

71 return 0 

72 _, sz = s3CheckFileExists(self, client=self.client) 

73 return sz 

74 

75 def remove(self) -> None: 

76 """Remove the resource.""" 

77 

78 # https://github.com/boto/boto3/issues/507 - there is no 

79 # way of knowing if the file was actually deleted except 

80 # for checking all the keys again, reponse is HTTP 204 OK 

81 # response all the time 

82 self.client.delete_object(Bucket=self.netloc, Key=self.relativeToPathRoot) 

83 

84 def read(self, size: int = -1) -> bytes: 

85 args = {} 

86 if size > 0: 

87 args["Range"] = f"bytes=0-{size-1}" 

88 try: 

89 response = self.client.get_object(Bucket=self.netloc, 

90 Key=self.relativeToPathRoot, 

91 **args) 

92 except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err: 

93 raise FileNotFoundError(f"No such resource: {self}") from err 

94 body = response["Body"].read() 

95 response["Body"].close() 

96 return body 

97 

98 def write(self, data: bytes, overwrite: bool = True) -> None: 

99 if not overwrite: 

100 if self.exists(): 100 ↛ 101line 100 didn't jump to line 101, because the condition on line 100 was never true

101 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled") 

102 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot, 

103 Body=data) 

104 

105 def mkdir(self) -> None: 

106 # Defer import for circular dependencies 

107 if not bucketExists(self.netloc): 107 ↛ 108line 107 didn't jump to line 108, because the condition on line 107 was never true

108 raise ValueError(f"Bucket {self.netloc} does not exist for {self}!") 

109 

110 if not self.dirLike: 110 ↛ 111line 110 didn't jump to line 111, because the condition on line 110 was never true

111 raise ValueError(f"Can not create a 'directory' for file-like URI {self}") 

112 

113 # don't create S3 key when root is at the top-level of an Bucket 

114 if not self.path == "/": 114 ↛ exitline 114 didn't return from function 'mkdir', because the condition on line 114 was never false

115 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot) 

116 

117 def as_local(self) -> Tuple[str, bool]: 

118 """Download object from S3 and place in temporary directory. 

119 

120 Returns 

121 ------- 

122 path : `str` 

123 Path to local temporary file. 

124 temporary : `bool` 

125 Always returns `True`. This is always a temporary file. 

126 """ 

127 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile: 

128 self.client.download_fileobj(self.netloc, self.relativeToPathRoot, tmpFile) 

129 return tmpFile.name, True 

130 

131 def transfer_from(self, src: ButlerURI, transfer: str = "copy", 

132 overwrite: bool = False, 

133 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

134 """Transfer the current resource to an S3 bucket. 

135 

136 Parameters 

137 ---------- 

138 src : `ButlerURI` 

139 Source URI. 

140 transfer : `str` 

141 Mode to use for transferring the resource. Supports the following 

142 options: copy. 

143 overwrite : `bool`, optional 

144 Allow an existing file to be overwritten. Defaults to `False`. 

145 transaction : `DatastoreTransaction`, optional 

146 Currently unused. 

147 """ 

148 # Fail early to prevent delays if remote resources are requested 

149 if transfer not in self.transferModes: 

150 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

151 

152 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

153 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

154 

155 if not overwrite and self.exists(): 

156 raise FileExistsError(f"Destination path '{self}' already exists.") 

157 

158 if transfer == "auto": 158 ↛ 159line 158 didn't jump to line 159, because the condition on line 158 was never true

159 transfer = self.transferDefault 

160 

161 if isinstance(src, type(self)): 

162 # Looks like an S3 remote uri so we can use direct copy 

163 # note that boto3.resource.meta.copy is cleverer than the low 

164 # level copy_object 

165 copy_source = { 

166 "Bucket": src.netloc, 

167 "Key": src.relativeToPathRoot, 

168 } 

169 self.client.copy_object(CopySource=copy_source, Bucket=self.netloc, Key=self.relativeToPathRoot) 

170 else: 

171 # Use local file and upload it 

172 local_src, is_temporary = src.as_local() 

173 

174 # resource.meta.upload_file seems like the right thing 

175 # but we have a low level client 

176 with open(local_src, "rb") as fh: 

177 self.client.put_object(Bucket=self.netloc, 

178 Key=self.relativeToPathRoot, Body=fh) 

179 if is_temporary: 179 ↛ 180line 179 didn't jump to line 180, because the condition on line 179 was never true

180 os.remove(local_src) 

181 

182 # This was an explicit move requested from a remote resource 

183 # try to remove that resource 

184 if transfer == "move": 184 ↛ 186line 184 didn't jump to line 186, because the condition on line 184 was never true

185 # Transactions do not work here 

186 src.remove()