Coverage for python/lsst/daf/butler/formatters/file.py: 29%

57 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-08 22:06 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for reading and writing files to a POSIX file system.""" 

25 

26__all__ = ("FileFormatter",) 

27 

28from abc import abstractmethod 

29from typing import TYPE_CHECKING, Any, Optional, Type 

30 

31from lsst.daf.butler import Formatter 

32 

33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true

34 from lsst.daf.butler import StorageClass 

35 

36 

37class FileFormatter(Formatter): 

38 """Interface for reading and writing files on a POSIX file system.""" 

39 

40 extension: Optional[str] = None 

41 """Default file extension to use for writing files. None means that no 

42 modifications will be made to the supplied file extension. (`str`)""" 

43 

44 @abstractmethod 

45 def _readFile(self, path: str, pytype: Optional[Type[Any]] = None) -> Any: 

46 """Read a file from the path in the correct format. 

47 

48 Parameters 

49 ---------- 

50 path : `str` 

51 Path to use to open the file. 

52 pytype : `class`, optional 

53 Class to use to read the file. 

54 

55 Returns 

56 ------- 

57 data : `object` 

58 Data read from file. Returns `None` if the file can not be 

59 found at the given path. 

60 

61 Raises 

62 ------ 

63 Exception 

64 Some problem reading the file. 

65 """ 

66 pass 

67 

68 @abstractmethod 

69 def _writeFile(self, inMemoryDataset: Any) -> None: 

70 """Write the in memory dataset to file on disk. 

71 

72 Parameters 

73 ---------- 

74 inMemoryDataset : `object` 

75 Object to serialize. 

76 

77 Raises 

78 ------ 

79 Exception 

80 The file could not be written. 

81 """ 

82 pass 

83 

84 def _assembleDataset(self, data: Any, component: Optional[str] = None) -> Any: 

85 """Assembles and coerces the dataset, or one of its components, 

86 into an appropriate python type and returns it. 

87 

88 Parameters 

89 ---------- 

90 data : `dict` or `object` 

91 Composite or a dict that, or which component, needs to be 

92 coerced to the python type specified in "fileDescriptor" 

93 component : `str`, optional 

94 Component to read from the file. Only used if the `StorageClass` 

95 for reading differed from the `StorageClass` used to write the 

96 file. 

97 

98 Returns 

99 ------- 

100 inMemoryDataset : `object` 

101 The requested data as a Python object. The type of object 

102 is controlled by the specific formatter. 

103 """ 

104 fileDescriptor = self.fileDescriptor 

105 

106 # if read and write storage classes differ, more work is required 

107 readStorageClass = fileDescriptor.readStorageClass 

108 if readStorageClass != fileDescriptor.storageClass: 

109 if component is None: 

110 # This likely means that type conversion is required but 

111 # it will be an error if no valid converter is available 

112 # for this pytype. 

113 if not readStorageClass.can_convert(fileDescriptor.storageClass): 

114 raise ValueError( 

115 f"Storage class inconsistency ({readStorageClass.name} vs" 

116 f" {fileDescriptor.storageClass.name}) but no" 

117 " component requested or converter registered for" 

118 f" python type {type(data)}" 

119 ) 

120 else: 

121 # Concrete composite written as a single file (we hope) 

122 try: 

123 data = fileDescriptor.storageClass.delegate().getComponent(data, component) 

124 except AttributeError: 

125 # Defer the complaint 

126 data = None 

127 

128 # Coerce to the requested type (not necessarily the type that was 

129 # written) 

130 data = self._coerceType(data, fileDescriptor.storageClass, readStorageClass) 

131 

132 return data 

133 

134 def _coerceType( 

135 self, inMemoryDataset: Any, writeStorageClass: StorageClass, readStorageClass: StorageClass 

136 ) -> Any: 

137 """Coerce the supplied inMemoryDataset to the correct python type. 

138 

139 Parameters 

140 ---------- 

141 inMemoryDataset : `object` 

142 Object to coerce to expected type. 

143 writeStorageClass : `StorageClass` 

144 Storage class used to serialize this data. 

145 readStorageClass : `StorageClass` 

146 Storage class requested as the outcome. 

147 

148 Returns 

149 ------- 

150 inMemoryDataset : `object` 

151 Object of expected type ``readStorageClass.pytype``. 

152 """ 

153 return readStorageClass.coerce_type(inMemoryDataset) 

154 

155 def read(self, component: Optional[str] = None) -> Any: 

156 """Read data from a file. 

157 

158 Parameters 

159 ---------- 

160 fileDescriptor : `FileDescriptor` 

161 Identifies the file to read, type to read it into and parameters 

162 to be used for reading. 

163 component : `str`, optional 

164 Component to read from the file. Only used if the `StorageClass` 

165 for reading differed from the `StorageClass` used to write the 

166 file. 

167 

168 Returns 

169 ------- 

170 inMemoryDataset : `object` 

171 The requested data as a Python object. The type of object 

172 is controlled by the specific formatter. 

173 

174 Raises 

175 ------ 

176 ValueError 

177 Component requested but this file does not seem to be a concrete 

178 composite. 

179 NotImplementedError 

180 Formatter does not implement a method to read from files. 

181 """ 

182 

183 # Read the file naively 

184 path = self.fileDescriptor.location.path 

185 data = self._readFile(path, self.fileDescriptor.storageClass.pytype) 

186 

187 # Assemble the requested dataset and potentially return only its 

188 # component coercing it to its appropriate pytype 

189 data = self._assembleDataset(data, component) 

190 

191 # Special case components by allowing a formatter to return None 

192 # to indicate that the component was understood but is missing 

193 if data is None and component is None: 

194 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}") 

195 

196 return data 

197 

198 def fromBytes(self, serializedDataset: bytes, component: Optional[str] = None) -> Any: 

199 """Reads serialized data into a Dataset or its component. 

200 

201 Parameters 

202 ---------- 

203 serializedDataset : `bytes` 

204 Bytes object to unserialize. 

205 component : `str`, optional 

206 Component to read from the Dataset. Only used if the `StorageClass` 

207 for reading differed from the `StorageClass` used to write the 

208 file. 

209 

210 Returns 

211 ------- 

212 inMemoryDataset : `object` 

213 The requested data as a Python object. The type of object 

214 is controlled by the specific formatter. 

215 

216 Raises 

217 ------ 

218 NotImplementedError 

219 Formatter does not support reading from bytes. 

220 """ 

221 if not hasattr(self, "_fromBytes"): 

222 raise NotImplementedError("Type does not support reading from bytes.") 

223 

224 # mypy can not understand that the previous line protects this call 

225 data = self._fromBytes(serializedDataset, self.fileDescriptor.storageClass.pytype) # type: ignore 

226 

227 # Assemble the requested dataset and potentially return only its 

228 # component coercing it to its appropriate pytype 

229 data = self._assembleDataset(data, component) 

230 

231 # Special case components by allowing a formatter to return None 

232 # to indicate that the component was understood but is missing 

233 if data is None and component is None: 

234 nbytes = len(serializedDataset) 

235 s = "s" if nbytes != 1 else "" 

236 raise ValueError( 

237 f"Unable to unpersist {nbytes} byte{s} from URI {self.fileDescriptor.location.uri}" 

238 ) 

239 

240 return data 

241 

242 def write(self, inMemoryDataset: Any) -> None: 

243 """Write a Python object to a file. 

244 

245 Parameters 

246 ---------- 

247 inMemoryDataset : `object` 

248 The Python object to store. 

249 

250 Returns 

251 ------- 

252 path : `str` 

253 The path where the primary file is stored within the datastore. 

254 """ 

255 fileDescriptor = self.fileDescriptor 

256 # Update the location with the formatter-preferred file extension 

257 fileDescriptor.location.updateExtension(self.extension) 

258 

259 self._writeFile(inMemoryDataset) 

260 

261 def toBytes(self, inMemoryDataset: Any) -> bytes: 

262 """Serialize the Dataset to bytes based on formatter. 

263 

264 Parameters 

265 ---------- 

266 inMemoryDataset : `object` 

267 Object to serialize. 

268 

269 Returns 

270 ------- 

271 serializedDataset : `bytes` 

272 Bytes representing the serialized dataset. 

273 

274 Raises 

275 ------ 

276 NotImplementedError 

277 Formatter does not support reading from bytes. 

278 """ 

279 if not hasattr(self, "_toBytes"): 

280 raise NotImplementedError("Type does not support reading from bytes.") 

281 

282 # mypy can not understand that the previous line protects this call 

283 return self._toBytes(inMemoryDataset) # type: ignore