Coverage for python/lsst/daf/butler/formatters/file.py: 30%

58 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-28 09:59 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for reading and writing files to a POSIX file system.""" 

25 

26__all__ = ("FileFormatter",) 

27 

28from abc import abstractmethod 

29from typing import TYPE_CHECKING, Any, Optional, Type 

30 

31from lsst.daf.butler import Formatter 

32from lsst.utils.introspection import get_full_type_name 

33 

34if TYPE_CHECKING: 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true

35 from lsst.daf.butler import StorageClass 

36 

37 

38class FileFormatter(Formatter): 

39 """Interface for reading and writing files on a POSIX file system.""" 

40 

41 extension: Optional[str] = None 

42 """Default file extension to use for writing files. None means that no 

43 modifications will be made to the supplied file extension. (`str`)""" 

44 

45 @abstractmethod 

46 def _readFile(self, path: str, pytype: Optional[Type[Any]] = None) -> Any: 

47 """Read a file from the path in the correct format. 

48 

49 Parameters 

50 ---------- 

51 path : `str` 

52 Path to use to open the file. 

53 pytype : `class`, optional 

54 Class to use to read the file. 

55 

56 Returns 

57 ------- 

58 data : `object` 

59 Data read from file. Returns `None` if the file can not be 

60 found at the given path. 

61 

62 Raises 

63 ------ 

64 Exception 

65 Some problem reading the file. 

66 """ 

67 pass 

68 

69 @abstractmethod 

70 def _writeFile(self, inMemoryDataset: Any) -> None: 

71 """Write the in memory dataset to file on disk. 

72 

73 Parameters 

74 ---------- 

75 inMemoryDataset : `object` 

76 Object to serialize. 

77 

78 Raises 

79 ------ 

80 Exception 

81 The file could not be written. 

82 """ 

83 pass 

84 

85 def _assembleDataset(self, data: Any, component: Optional[str] = None) -> Any: 

86 """Assembles and coerces the dataset, or one of its components, 

87 into an appropriate python type and returns it. 

88 

89 Parameters 

90 ---------- 

91 data : `dict` or `object` 

92 Composite or a dict that, or which component, needs to be 

93 coerced to the python type specified in "fileDescriptor" 

94 component : `str`, optional 

95 Component to read from the file. Only used if the `StorageClass` 

96 for reading differed from the `StorageClass` used to write the 

97 file. 

98 

99 Returns 

100 ------- 

101 inMemoryDataset : `object` 

102 The requested data as a Python object. The type of object 

103 is controlled by the specific formatter. 

104 """ 

105 fileDescriptor = self.fileDescriptor 

106 

107 # if read and write storage classes differ, more work is required 

108 readStorageClass = fileDescriptor.readStorageClass 

109 if readStorageClass != fileDescriptor.storageClass: 

110 if component is None: 

111 # This likely means that type conversion is required but 

112 # it will be an error if no valid converter is available 

113 # for this pytype. 

114 if not readStorageClass.can_convert(fileDescriptor.storageClass): 

115 raise ValueError( 

116 f"Storage class inconsistency ({readStorageClass.name} vs" 

117 f" {fileDescriptor.storageClass.name}) but no" 

118 " component requested or converter registered for" 

119 f" converting type {get_full_type_name(fileDescriptor.storageClass.pytype)}" 

120 f" to {get_full_type_name(readStorageClass.pytype)}." 

121 ) 

122 else: 

123 # Concrete composite written as a single file (we hope) 

124 try: 

125 data = fileDescriptor.storageClass.delegate().getComponent(data, component) 

126 except AttributeError: 

127 # Defer the complaint 

128 data = None 

129 

130 # Coerce to the requested type (not necessarily the type that was 

131 # written) 

132 data = self._coerceType(data, fileDescriptor.storageClass, readStorageClass) 

133 

134 return data 

135 

136 def _coerceType( 

137 self, inMemoryDataset: Any, writeStorageClass: StorageClass, readStorageClass: StorageClass 

138 ) -> Any: 

139 """Coerce the supplied inMemoryDataset to the correct python type. 

140 

141 Parameters 

142 ---------- 

143 inMemoryDataset : `object` 

144 Object to coerce to expected type. 

145 writeStorageClass : `StorageClass` 

146 Storage class used to serialize this data. 

147 readStorageClass : `StorageClass` 

148 Storage class requested as the outcome. 

149 

150 Returns 

151 ------- 

152 inMemoryDataset : `object` 

153 Object of expected type ``readStorageClass.pytype``. 

154 """ 

155 return readStorageClass.coerce_type(inMemoryDataset) 

156 

157 def read(self, component: Optional[str] = None) -> Any: 

158 """Read data from a file. 

159 

160 Parameters 

161 ---------- 

162 fileDescriptor : `FileDescriptor` 

163 Identifies the file to read, type to read it into and parameters 

164 to be used for reading. 

165 component : `str`, optional 

166 Component to read from the file. Only used if the `StorageClass` 

167 for reading differed from the `StorageClass` used to write the 

168 file. 

169 

170 Returns 

171 ------- 

172 inMemoryDataset : `object` 

173 The requested data as a Python object. The type of object 

174 is controlled by the specific formatter. 

175 

176 Raises 

177 ------ 

178 ValueError 

179 Component requested but this file does not seem to be a concrete 

180 composite. 

181 NotImplementedError 

182 Formatter does not implement a method to read from files. 

183 """ 

184 

185 # Read the file naively 

186 path = self.fileDescriptor.location.path 

187 data = self._readFile(path, self.fileDescriptor.storageClass.pytype) 

188 

189 # Assemble the requested dataset and potentially return only its 

190 # component coercing it to its appropriate pytype 

191 data = self._assembleDataset(data, component) 

192 

193 # Special case components by allowing a formatter to return None 

194 # to indicate that the component was understood but is missing 

195 if data is None and component is None: 

196 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}") 

197 

198 return data 

199 

200 def fromBytes(self, serializedDataset: bytes, component: Optional[str] = None) -> Any: 

201 """Reads serialized data into a Dataset or its component. 

202 

203 Parameters 

204 ---------- 

205 serializedDataset : `bytes` 

206 Bytes object to unserialize. 

207 component : `str`, optional 

208 Component to read from the Dataset. Only used if the `StorageClass` 

209 for reading differed from the `StorageClass` used to write the 

210 file. 

211 

212 Returns 

213 ------- 

214 inMemoryDataset : `object` 

215 The requested data as a Python object. The type of object 

216 is controlled by the specific formatter. 

217 

218 Raises 

219 ------ 

220 NotImplementedError 

221 Formatter does not support reading from bytes. 

222 """ 

223 if not hasattr(self, "_fromBytes"): 

224 raise NotImplementedError("Type does not support reading from bytes.") 

225 

226 # mypy can not understand that the previous line protects this call 

227 data = self._fromBytes(serializedDataset, self.fileDescriptor.storageClass.pytype) # type: ignore 

228 

229 # Assemble the requested dataset and potentially return only its 

230 # component coercing it to its appropriate pytype 

231 data = self._assembleDataset(data, component) 

232 

233 # Special case components by allowing a formatter to return None 

234 # to indicate that the component was understood but is missing 

235 if data is None and component is None: 

236 nbytes = len(serializedDataset) 

237 s = "s" if nbytes != 1 else "" 

238 raise ValueError( 

239 f"Unable to unpersist {nbytes} byte{s} from URI {self.fileDescriptor.location.uri}" 

240 ) 

241 

242 return data 

243 

244 def write(self, inMemoryDataset: Any) -> None: 

245 """Write a Python object to a file. 

246 

247 Parameters 

248 ---------- 

249 inMemoryDataset : `object` 

250 The Python object to store. 

251 

252 Returns 

253 ------- 

254 path : `str` 

255 The path where the primary file is stored within the datastore. 

256 """ 

257 fileDescriptor = self.fileDescriptor 

258 # Update the location with the formatter-preferred file extension 

259 fileDescriptor.location.updateExtension(self.extension) 

260 

261 self._writeFile(inMemoryDataset) 

262 

263 def toBytes(self, inMemoryDataset: Any) -> bytes: 

264 """Serialize the Dataset to bytes based on formatter. 

265 

266 Parameters 

267 ---------- 

268 inMemoryDataset : `object` 

269 Object to serialize. 

270 

271 Returns 

272 ------- 

273 serializedDataset : `bytes` 

274 Bytes representing the serialized dataset. 

275 

276 Raises 

277 ------ 

278 NotImplementedError 

279 Formatter does not support reading from bytes. 

280 """ 

281 if not hasattr(self, "_toBytes"): 

282 raise NotImplementedError("Type does not support reading from bytes.") 

283 

284 # mypy can not understand that the previous line protects this call 

285 return self._toBytes(inMemoryDataset) # type: ignore