Coverage for python/lsst/daf/butler/formatters/file.py: 29%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

55 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for reading and writing files to a POSIX file system.""" 

25 

26__all__ = ("FileFormatter",) 

27 

28from abc import abstractmethod 

29from typing import TYPE_CHECKING, Any, Optional, Type 

30 

31from lsst.daf.butler import Formatter 

32 

33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true

34 from lsst.daf.butler import StorageClass 

35 

36 

37class FileFormatter(Formatter): 

38 """Interface for reading and writing files on a POSIX file system.""" 

39 

40 extension: Optional[str] = None 

41 """Default file extension to use for writing files. None means that no 

42 modifications will be made to the supplied file extension. (`str`)""" 

43 

44 @abstractmethod 

45 def _readFile(self, path: str, pytype: Optional[Type[Any]] = None) -> Any: 

46 """Read a file from the path in the correct format. 

47 

48 Parameters 

49 ---------- 

50 path : `str` 

51 Path to use to open the file. 

52 pytype : `class`, optional 

53 Class to use to read the file. 

54 

55 Returns 

56 ------- 

57 data : `object` 

58 Data read from file. Returns `None` if the file can not be 

59 found at the given path. 

60 

61 Raises 

62 ------ 

63 Exception 

64 Some problem reading the file. 

65 """ 

66 pass 

67 

68 @abstractmethod 

69 def _writeFile(self, inMemoryDataset: Any) -> None: 

70 """Write the in memory dataset to file on disk. 

71 

72 Parameters 

73 ---------- 

74 inMemoryDataset : `object` 

75 Object to serialize. 

76 

77 Raises 

78 ------ 

79 Exception 

80 The file could not be written. 

81 """ 

82 pass 

83 

84 def _assembleDataset(self, data: Any, component: Optional[str] = None) -> Any: 

85 """Assembles and coerces the dataset, or one of its components, 

86 into an appropriate python type and returns it. 

87 

88 Parameters 

89 ---------- 

90 data : `dict` or `object` 

91 Composite or a dict that, or which component, needs to be 

92 coerced to the python type specified in "fileDescriptor" 

93 component : `str`, optional 

94 Component to read from the file. Only used if the `StorageClass` 

95 for reading differed from the `StorageClass` used to write the 

96 file. 

97 

98 Returns 

99 ------- 

100 inMemoryDataset : `object` 

101 The requested data as a Python object. The type of object 

102 is controlled by the specific formatter. 

103 """ 

104 fileDescriptor = self.fileDescriptor 

105 

106 # if read and write storage classes differ, more work is required 

107 readStorageClass = fileDescriptor.readStorageClass 

108 if readStorageClass != fileDescriptor.storageClass: 

109 if component is None: 

110 # This likely means that type conversion is required but 

111 # it will be an error if no valid converter is available 

112 # for this pytype. 

113 if not readStorageClass.can_convert(fileDescriptor.storageClass): 

114 raise ValueError( 

115 f"Storage class inconsistency ({readStorageClass.name,} vs" 

116 f" {fileDescriptor.storageClass.name}) but no" 

117 " component requested or converter registered" 

118 ) 

119 else: 

120 # Concrete composite written as a single file (we hope) 

121 try: 

122 data = fileDescriptor.storageClass.delegate().getComponent(data, component) 

123 except AttributeError: 

124 # Defer the complaint 

125 data = None 

126 

127 # Coerce to the requested type (not necessarily the type that was 

128 # written) 

129 data = self._coerceType(data, fileDescriptor.storageClass, readStorageClass) 

130 

131 return data 

132 

133 def _coerceType( 

134 self, inMemoryDataset: Any, writeStorageClass: StorageClass, readStorageClass: StorageClass 

135 ) -> Any: 

136 """Coerce the supplied inMemoryDataset to the correct python type. 

137 

138 Parameters 

139 ---------- 

140 inMemoryDataset : `object` 

141 Object to coerce to expected type. 

142 writeStorageClass : `StorageClass` 

143 Storage class used to serialize this data. 

144 readStorageClass : `StorageClass` 

145 Storage class requested as the outcome. 

146 

147 Returns 

148 ------- 

149 inMemoryDataset : `object` 

150 Object of expected type ``readStorageClass.pytype``. 

151 """ 

152 return readStorageClass.coerce_type(inMemoryDataset) 

153 

154 def read(self, component: Optional[str] = None) -> Any: 

155 """Read data from a file. 

156 

157 Parameters 

158 ---------- 

159 fileDescriptor : `FileDescriptor` 

160 Identifies the file to read, type to read it into and parameters 

161 to be used for reading. 

162 component : `str`, optional 

163 Component to read from the file. Only used if the `StorageClass` 

164 for reading differed from the `StorageClass` used to write the 

165 file. 

166 

167 Returns 

168 ------- 

169 inMemoryDataset : `object` 

170 The requested data as a Python object. The type of object 

171 is controlled by the specific formatter. 

172 

173 Raises 

174 ------ 

175 ValueError 

176 Component requested but this file does not seem to be a concrete 

177 composite. 

178 NotImplementedError 

179 Formatter does not implement a method to read from files. 

180 """ 

181 

182 # Read the file naively 

183 path = self.fileDescriptor.location.path 

184 data = self._readFile(path, self.fileDescriptor.storageClass.pytype) 

185 

186 # Assemble the requested dataset and potentially return only its 

187 # component coercing it to its appropriate pytype 

188 data = self._assembleDataset(data, component) 

189 

190 # Special case components by allowing a formatter to return None 

191 # to indicate that the component was understood but is missing 

192 if data is None and component is None: 

193 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}") 

194 

195 return data 

196 

197 def fromBytes(self, serializedDataset: bytes, component: Optional[str] = None) -> Any: 

198 """Reads serialized data into a Dataset or its component. 

199 

200 Parameters 

201 ---------- 

202 serializedDataset : `bytes` 

203 Bytes object to unserialize. 

204 component : `str`, optional 

205 Component to read from the Dataset. Only used if the `StorageClass` 

206 for reading differed from the `StorageClass` used to write the 

207 file. 

208 

209 Returns 

210 ------- 

211 inMemoryDataset : `object` 

212 The requested data as a Python object. The type of object 

213 is controlled by the specific formatter. 

214 

215 Raises 

216 ------ 

217 NotImplementedError 

218 Formatter does not support reading from bytes. 

219 """ 

220 if not hasattr(self, "_fromBytes"): 

221 raise NotImplementedError("Type does not support reading from bytes.") 

222 

223 # mypy can not understand that the previous line protects this call 

224 data = self._fromBytes(serializedDataset, self.fileDescriptor.storageClass.pytype) # type: ignore 

225 

226 # Assemble the requested dataset and potentially return only its 

227 # component coercing it to its appropriate pytype 

228 data = self._assembleDataset(data, component) 

229 

230 # Special case components by allowing a formatter to return None 

231 # to indicate that the component was understood but is missing 

232 if data is None and component is None: 

233 nbytes = len(serializedDataset) 

234 s = "s" if nbytes != 1 else "" 

235 raise ValueError( 

236 f"Unable to unpersist {nbytes} byte{s} from URI {self.fileDescriptor.location.uri}" 

237 ) 

238 

239 return data 

240 

241 def write(self, inMemoryDataset: Any) -> None: 

242 """Write a Python object to a file. 

243 

244 Parameters 

245 ---------- 

246 inMemoryDataset : `object` 

247 The Python object to store. 

248 

249 Returns 

250 ------- 

251 path : `str` 

252 The path where the primary file is stored within the datastore. 

253 """ 

254 fileDescriptor = self.fileDescriptor 

255 # Update the location with the formatter-preferred file extension 

256 fileDescriptor.location.updateExtension(self.extension) 

257 

258 self._writeFile(inMemoryDataset) 

259 

260 def toBytes(self, inMemoryDataset: Any) -> bytes: 

261 """Serialize the Dataset to bytes based on formatter. 

262 

263 Parameters 

264 ---------- 

265 inMemoryDataset : `object` 

266 Object to serialize. 

267 

268 Returns 

269 ------- 

270 serializedDataset : `bytes` 

271 Bytes representing the serialized dataset. 

272 

273 Raises 

274 ------ 

275 NotImplementedError 

276 Formatter does not support reading from bytes. 

277 """ 

278 if not hasattr(self, "_toBytes"): 

279 raise NotImplementedError("Type does not support reading from bytes.") 

280 

281 # mypy can not understand that the previous line protects this call 

282 return self._toBytes(inMemoryDataset) # type: ignore