Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for reading and writing files to a POSIX file system.""" 

25 

26__all__ = ("FileFormatter",) 

27 

28from abc import abstractmethod 

29 

30from typing import ( 

31 TYPE_CHECKING, 

32 Any, 

33 Optional, 

34 Type, 

35) 

36 

37from lsst.daf.butler import Formatter 

38 

39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true

40 from lsst.daf.butler import StorageClass 

41 

42 

43class FileFormatter(Formatter): 

44 """Interface for reading and writing files on a POSIX file system. 

45 """ 

46 

47 extension: Optional[str] = None 

48 """Default file extension to use for writing files. None means that no 

49 modifications will be made to the supplied file extension. (`str`)""" 

50 

51 @abstractmethod 

52 def _readFile(self, path: str, pytype: Optional[Type[Any]] = None) -> Any: 

53 """Read a file from the path in the correct format. 

54 

55 Parameters 

56 ---------- 

57 path : `str` 

58 Path to use to open the file. 

59 pytype : `class`, optional 

60 Class to use to read the file. 

61 

62 Returns 

63 ------- 

64 data : `object` 

65 Data read from file. Returns `None` if the file can not be 

66 found at the given path. 

67 

68 Raises 

69 ------ 

70 Exception 

71 Some problem reading the file. 

72 """ 

73 pass 

74 

75 @abstractmethod 

76 def _writeFile(self, inMemoryDataset: Any) -> None: 

77 """Write the in memory dataset to file on disk. 

78 

79 Parameters 

80 ---------- 

81 inMemoryDataset : `object` 

82 Object to serialize. 

83 

84 Raises 

85 ------ 

86 Exception 

87 The file could not be written. 

88 """ 

89 pass 

90 

91 def _assembleDataset(self, data: Any, component: Optional[str] = None) -> Any: 

92 """Assembles and coerces the dataset, or one of its components, 

93 into an appropriate python type and returns it. 

94 

95 Parameters 

96 ---------- 

97 data : `dict` or `object` 

98 Composite or a dict that, or which component, needs to be 

99 coerced to the python type specified in "fileDescriptor" 

100 component : `str`, optional 

101 Component to read from the file. Only used if the `StorageClass` 

102 for reading differed from the `StorageClass` used to write the 

103 file. 

104 

105 Returns 

106 ------- 

107 inMemoryDataset : `object` 

108 The requested data as a Python object. The type of object 

109 is controlled by the specific formatter. 

110 """ 

111 fileDescriptor = self.fileDescriptor 

112 

113 # if read and write storage classes differ, more work is required 

114 readStorageClass = fileDescriptor.readStorageClass 

115 if readStorageClass != fileDescriptor.storageClass: 

116 if component is None: 

117 raise ValueError("Storage class inconsistency ({} vs {}) but no" 

118 " component requested".format(readStorageClass.name, 

119 fileDescriptor.storageClass.name)) 

120 

121 # Concrete composite written as a single file (we hope) 

122 try: 

123 data = fileDescriptor.storageClass.delegate().getComponent(data, component) 

124 except AttributeError: 

125 # Defer the complaint 

126 data = None 

127 

128 # Coerce to the requested type (not necessarily the type that was 

129 # written) 

130 data = self._coerceType(data, fileDescriptor.readStorageClass, 

131 pytype=fileDescriptor.readStorageClass.pytype) 

132 

133 return data 

134 

135 def _coerceType(self, inMemoryDataset: Any, storageClass: StorageClass, 

136 pytype: Optional[Type[Any]] = None) -> Any: 

137 """Coerce the supplied inMemoryDataset to type `pytype`. 

138 

139 Usually a no-op. 

140 

141 Parameters 

142 ---------- 

143 inMemoryDataset : `object` 

144 Object to coerce to expected type. 

145 storageClass : `StorageClass` 

146 StorageClass associated with ``inMemoryDataset``. 

147 pytype : `class`, optional 

148 Override type to use for conversion. 

149 

150 Returns 

151 ------- 

152 inMemoryDataset : `object` 

153 Object of expected type `pytype`. 

154 """ 

155 return inMemoryDataset 

156 

157 def read(self, component: Optional[str] = None) -> Any: 

158 """Read data from a file. 

159 

160 Parameters 

161 ---------- 

162 fileDescriptor : `FileDescriptor` 

163 Identifies the file to read, type to read it into and parameters 

164 to be used for reading. 

165 component : `str`, optional 

166 Component to read from the file. Only used if the `StorageClass` 

167 for reading differed from the `StorageClass` used to write the 

168 file. 

169 

170 Returns 

171 ------- 

172 inMemoryDataset : `object` 

173 The requested data as a Python object. The type of object 

174 is controlled by the specific formatter. 

175 

176 Raises 

177 ------ 

178 ValueError 

179 Component requested but this file does not seem to be a concrete 

180 composite. 

181 NotImplementedError 

182 Formatter does not implement a method to read from files. 

183 """ 

184 

185 # Read the file naively 

186 path = self.fileDescriptor.location.path 

187 data = self._readFile(path, self.fileDescriptor.storageClass.pytype) 

188 

189 # Assemble the requested dataset and potentially return only its 

190 # component coercing it to its appropriate pytype 

191 data = self._assembleDataset(data, component) 

192 

193 # Special case components by allowing a formatter to return None 

194 # to indicate that the component was understood but is missing 

195 if data is None and component is None: 

196 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}") 

197 

198 return data 

199 

200 def fromBytes(self, serializedDataset: bytes, component: Optional[str] = None) -> Any: 

201 """Reads serialized data into a Dataset or its component. 

202 

203 Parameters 

204 ---------- 

205 serializedDataset : `bytes` 

206 Bytes object to unserialize. 

207 component : `str`, optional 

208 Component to read from the Dataset. Only used if the `StorageClass` 

209 for reading differed from the `StorageClass` used to write the 

210 file. 

211 

212 Returns 

213 ------- 

214 inMemoryDataset : `object` 

215 The requested data as a Python object. The type of object 

216 is controlled by the specific formatter. 

217 

218 Raises 

219 ------ 

220 NotImplementedError 

221 Formatter does not support reading from bytes. 

222 """ 

223 if not hasattr(self, '_fromBytes'): 

224 raise NotImplementedError("Type does not support reading from bytes.") 

225 

226 # mypy can not understand that the previous line protects this call 

227 data = self._fromBytes(serializedDataset, # type: ignore 

228 self.fileDescriptor.storageClass.pytype) 

229 

230 # Assemble the requested dataset and potentially return only its 

231 # component coercing it to its appropriate ptype 

232 data = self._assembleDataset(data, component) 

233 

234 if data is None: 

235 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}") 

236 

237 return data 

238 

239 def write(self, inMemoryDataset: Any) -> str: 

240 """Write a Python object to a file. 

241 

242 Parameters 

243 ---------- 

244 inMemoryDataset : `object` 

245 The Python object to store. 

246 

247 Returns 

248 ------- 

249 path : `str` 

250 The path where the primary file is stored within the datastore. 

251 """ 

252 fileDescriptor = self.fileDescriptor 

253 # Update the location with the formatter-preferred file extension 

254 fileDescriptor.location.updateExtension(self.extension) 

255 

256 self._writeFile(inMemoryDataset) 

257 

258 return fileDescriptor.location.pathInStore 

259 

260 def toBytes(self, inMemoryDataset: Any) -> bytes: 

261 """Serialize the Dataset to bytes based on formatter. 

262 

263 Parameters 

264 ---------- 

265 inMemoryDataset : `object` 

266 Object to serialize. 

267 

268 Returns 

269 ------- 

270 serializedDataset : `bytes` 

271 Bytes representing the serialized dataset. 

272 

273 Raises 

274 ------ 

275 NotImplementedError 

276 Formatter does not support reading from bytes. 

277 """ 

278 if not hasattr(self, '_toBytes'): 

279 raise NotImplementedError("Type does not support reading from bytes.") 

280 

281 # mypy can not understand that the previous line protects this call 

282 return self._toBytes(inMemoryDataset) # type: ignore