Coverage for python/lsst/daf/butler/formatters/file.py: 29%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

55 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for reading and writing files to a POSIX file system.""" 

25 

26__all__ = ("FileFormatter",) 

27 

28from abc import abstractmethod 

29 

30from typing import ( 

31 TYPE_CHECKING, 

32 Any, 

33 Optional, 

34 Type, 

35) 

36 

37from lsst.daf.butler import Formatter 

38 

39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true

40 from lsst.daf.butler import StorageClass 

41 

42 

43class FileFormatter(Formatter): 

44 """Interface for reading and writing files on a POSIX file system. 

45 """ 

46 

47 extension: Optional[str] = None 

48 """Default file extension to use for writing files. None means that no 

49 modifications will be made to the supplied file extension. (`str`)""" 

50 

51 @abstractmethod 

52 def _readFile(self, path: str, pytype: Optional[Type[Any]] = None) -> Any: 

53 """Read a file from the path in the correct format. 

54 

55 Parameters 

56 ---------- 

57 path : `str` 

58 Path to use to open the file. 

59 pytype : `class`, optional 

60 Class to use to read the file. 

61 

62 Returns 

63 ------- 

64 data : `object` 

65 Data read from file. Returns `None` if the file can not be 

66 found at the given path. 

67 

68 Raises 

69 ------ 

70 Exception 

71 Some problem reading the file. 

72 """ 

73 pass 

74 

75 @abstractmethod 

76 def _writeFile(self, inMemoryDataset: Any) -> None: 

77 """Write the in memory dataset to file on disk. 

78 

79 Parameters 

80 ---------- 

81 inMemoryDataset : `object` 

82 Object to serialize. 

83 

84 Raises 

85 ------ 

86 Exception 

87 The file could not be written. 

88 """ 

89 pass 

90 

91 def _assembleDataset(self, data: Any, component: Optional[str] = None) -> Any: 

92 """Assembles and coerces the dataset, or one of its components, 

93 into an appropriate python type and returns it. 

94 

95 Parameters 

96 ---------- 

97 data : `dict` or `object` 

98 Composite or a dict that, or which component, needs to be 

99 coerced to the python type specified in "fileDescriptor" 

100 component : `str`, optional 

101 Component to read from the file. Only used if the `StorageClass` 

102 for reading differed from the `StorageClass` used to write the 

103 file. 

104 

105 Returns 

106 ------- 

107 inMemoryDataset : `object` 

108 The requested data as a Python object. The type of object 

109 is controlled by the specific formatter. 

110 """ 

111 fileDescriptor = self.fileDescriptor 

112 

113 # if read and write storage classes differ, more work is required 

114 readStorageClass = fileDescriptor.readStorageClass 

115 if readStorageClass != fileDescriptor.storageClass: 

116 if component is None: 

117 # This likely means that type conversion is required but 

118 # it will be an error if no valid converter is available 

119 # for this pytype. 

120 if not readStorageClass.can_convert(fileDescriptor.storageClass): 

121 raise ValueError(f"Storage class inconsistency ({readStorageClass.name,} vs" 

122 f" {fileDescriptor.storageClass.name}) but no" 

123 " component requested or converter registered") 

124 else: 

125 # Concrete composite written as a single file (we hope) 

126 try: 

127 data = fileDescriptor.storageClass.delegate().getComponent(data, component) 

128 except AttributeError: 

129 # Defer the complaint 

130 data = None 

131 

132 # Coerce to the requested type (not necessarily the type that was 

133 # written) 

134 data = self._coerceType(data, fileDescriptor.storageClass, readStorageClass) 

135 

136 return data 

137 

138 def _coerceType(self, inMemoryDataset: Any, writeStorageClass: StorageClass, 

139 readStorageClass: StorageClass) -> Any: 

140 """Coerce the supplied inMemoryDataset to the correct python type. 

141 

142 Parameters 

143 ---------- 

144 inMemoryDataset : `object` 

145 Object to coerce to expected type. 

146 writeStorageClass : `StorageClass` 

147 Storage class used to serialize this data. 

148 readStorageClass : `StorageClass` 

149 Storage class requested as the outcome. 

150 

151 Returns 

152 ------- 

153 inMemoryDataset : `object` 

154 Object of expected type ``readStorageClass.pytype``. 

155 """ 

156 return readStorageClass.coerce_type(inMemoryDataset) 

157 

158 def read(self, component: Optional[str] = None) -> Any: 

159 """Read data from a file. 

160 

161 Parameters 

162 ---------- 

163 fileDescriptor : `FileDescriptor` 

164 Identifies the file to read, type to read it into and parameters 

165 to be used for reading. 

166 component : `str`, optional 

167 Component to read from the file. Only used if the `StorageClass` 

168 for reading differed from the `StorageClass` used to write the 

169 file. 

170 

171 Returns 

172 ------- 

173 inMemoryDataset : `object` 

174 The requested data as a Python object. The type of object 

175 is controlled by the specific formatter. 

176 

177 Raises 

178 ------ 

179 ValueError 

180 Component requested but this file does not seem to be a concrete 

181 composite. 

182 NotImplementedError 

183 Formatter does not implement a method to read from files. 

184 """ 

185 

186 # Read the file naively 

187 path = self.fileDescriptor.location.path 

188 data = self._readFile(path, self.fileDescriptor.storageClass.pytype) 

189 

190 # Assemble the requested dataset and potentially return only its 

191 # component coercing it to its appropriate pytype 

192 data = self._assembleDataset(data, component) 

193 

194 # Special case components by allowing a formatter to return None 

195 # to indicate that the component was understood but is missing 

196 if data is None and component is None: 

197 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}") 

198 

199 return data 

200 

201 def fromBytes(self, serializedDataset: bytes, component: Optional[str] = None) -> Any: 

202 """Reads serialized data into a Dataset or its component. 

203 

204 Parameters 

205 ---------- 

206 serializedDataset : `bytes` 

207 Bytes object to unserialize. 

208 component : `str`, optional 

209 Component to read from the Dataset. Only used if the `StorageClass` 

210 for reading differed from the `StorageClass` used to write the 

211 file. 

212 

213 Returns 

214 ------- 

215 inMemoryDataset : `object` 

216 The requested data as a Python object. The type of object 

217 is controlled by the specific formatter. 

218 

219 Raises 

220 ------ 

221 NotImplementedError 

222 Formatter does not support reading from bytes. 

223 """ 

224 if not hasattr(self, '_fromBytes'): 

225 raise NotImplementedError("Type does not support reading from bytes.") 

226 

227 # mypy can not understand that the previous line protects this call 

228 data = self._fromBytes(serializedDataset, # type: ignore 

229 self.fileDescriptor.storageClass.pytype) 

230 

231 # Assemble the requested dataset and potentially return only its 

232 # component coercing it to its appropriate pytype 

233 data = self._assembleDataset(data, component) 

234 

235 # Special case components by allowing a formatter to return None 

236 # to indicate that the component was understood but is missing 

237 if data is None and component is None: 

238 nbytes = len(serializedDataset) 

239 s = "s" if nbytes != 1 else "" 

240 raise ValueError(f"Unable to unpersist {nbytes} byte{s} from " 

241 f"URI {self.fileDescriptor.location.uri}") 

242 

243 return data 

244 

245 def write(self, inMemoryDataset: Any) -> None: 

246 """Write a Python object to a file. 

247 

248 Parameters 

249 ---------- 

250 inMemoryDataset : `object` 

251 The Python object to store. 

252 

253 Returns 

254 ------- 

255 path : `str` 

256 The path where the primary file is stored within the datastore. 

257 """ 

258 fileDescriptor = self.fileDescriptor 

259 # Update the location with the formatter-preferred file extension 

260 fileDescriptor.location.updateExtension(self.extension) 

261 

262 self._writeFile(inMemoryDataset) 

263 

264 def toBytes(self, inMemoryDataset: Any) -> bytes: 

265 """Serialize the Dataset to bytes based on formatter. 

266 

267 Parameters 

268 ---------- 

269 inMemoryDataset : `object` 

270 Object to serialize. 

271 

272 Returns 

273 ------- 

274 serializedDataset : `bytes` 

275 Bytes representing the serialized dataset. 

276 

277 Raises 

278 ------ 

279 NotImplementedError 

280 Formatter does not support reading from bytes. 

281 """ 

282 if not hasattr(self, '_toBytes'): 

283 raise NotImplementedError("Type does not support reading from bytes.") 

284 

285 # mypy can not understand that the previous line protects this call 

286 return self._toBytes(inMemoryDataset) # type: ignore