Coverage for python / lsst / daf / butler / tests / testFormatters.py: 34%

91 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-06 08:30 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "DoNothingFormatter", 

32 "FormatterTest", 

33 "LenientYamlFormatter", 

34 "MetricsExampleFormatter", 

35 "MultipleExtensionsFormatter", 

36 "SingleExtensionFormatter", 

37) 

38 

39import json 

40from collections.abc import Mapping 

41from typing import TYPE_CHECKING, Any, BinaryIO 

42 

43import yaml 

44 

45from lsst.resources import ResourceHandleProtocol 

46 

47from .._formatter import Formatter, FormatterV2 

48from ..formatters.json import JsonFormatter 

49from ..formatters.yaml import YamlFormatter 

50 

51if TYPE_CHECKING: 

52 from .._dataset_provenance import DatasetProvenance 

53 from .._location import Location 

54 

55 

56class DoNothingFormatter(Formatter): 

57 """A test formatter that does not need to format anything and has 

58 parameters. 

59 """ 

60 

61 def read(self, component: str | None = None) -> Any: 

62 raise NotImplementedError("Type does not support reading") 

63 

64 def write(self, inMemoryDataset: Any) -> None: 

65 raise NotImplementedError("Type does not support writing") 

66 

67 

68class FormatterTest(Formatter): 

69 """A test formatter that does not need to format anything.""" 

70 

71 supportedWriteParameters = frozenset({"min", "max", "median", "comment", "extra", "recipe"}) 

72 

73 def read(self, component: str | None = None) -> Any: 

74 raise NotImplementedError("Type does not support reading") 

75 

76 def write(self, inMemoryDataset: Any) -> None: 

77 raise NotImplementedError("Type does not support writing") 

78 

79 @staticmethod 

80 def validate_write_recipes(recipes: Mapping[str, Any] | None) -> Mapping[str, Any] | None: 

81 if not recipes: 

82 return recipes 

83 for recipeName in recipes: 

84 if "mode" not in recipes[recipeName]: 

85 raise RuntimeError("'mode' is a required write recipe parameter") 

86 return recipes 

87 

88 

89class SingleExtensionFormatter(DoNothingFormatter): 

90 """A do nothing formatter that has a single extension registered.""" 

91 

92 extension = ".fits" 

93 

94 

95class MultipleExtensionsFormatter(SingleExtensionFormatter): 

96 """A formatter that has multiple extensions registered.""" 

97 

98 supportedExtensions = frozenset({".fits.gz", ".fits.fz", ".fit"}) 

99 

100 

101class LenientYamlFormatter(YamlFormatter): 

102 """A test formatter that allows any file extension but always reads and 

103 writes YAML. 

104 """ 

105 

106 @classmethod 

107 def validate_extension(cls, location: Location) -> None: 

108 return 

109 

110 

111class MetricsExampleFormatter(FormatterV2): 

112 """A specialist test formatter for metrics that supports components 

113 directly without assembler delegate. 

114 """ 

115 

116 supported_extensions = frozenset({".yaml", ".json"}) 

117 default_extension = ".yaml" 

118 can_read_from_stream = True 

119 

120 def read_from_stream( 

121 self, stream: BinaryIO | ResourceHandleProtocol, component: str | None = None, expected_size: int = -1 

122 ) -> Any: 

123 """Read data from a file. 

124 

125 Parameters 

126 ---------- 

127 stream : `typing.BinaryIO` or `lsst.resources.ResourceHandleProtocol` 

128 Open file handle to read from. 

129 component : `str`, optional 

130 Component to read from the file. Only used if the `StorageClass` 

131 for reading differed from the `StorageClass` used to write the 

132 file. 

133 expected_size : `int`, optional 

134 The expected size of the resource. 

135 

136 Returns 

137 ------- 

138 inMemoryDataset : `object` 

139 The requested data as a Python object. The type of object 

140 is controlled by the specific formatter. 

141 

142 Raises 

143 ------ 

144 ValueError 

145 Component requested but this file does not seem to be a concrete 

146 composite. 

147 KeyError 

148 Raised when parameters passed with fileDescriptor are not 

149 supported. 

150 """ 

151 # This formatter can not read a subset from disk because it 

152 # uses yaml or json. 

153 if ".yaml" in stream.name: 

154 data = yaml.load(stream, Loader=yaml.SafeLoader) 

155 elif ".json" in stream.name: 

156 data = json.load(stream) 

157 else: 

158 raise RuntimeError(f"Unsupported file extension found in path '{stream.name}'") 

159 

160 # We can slice up front if required 

161 parameters = self.file_descriptor.parameters 

162 if "data" in data and parameters and "slice" in parameters: 

163 data["data"] = data["data"][parameters["slice"]] 

164 

165 pytype = self.file_descriptor.storageClass.pytype 

166 in_memory_dataset = pytype(**data) 

167 

168 if not component: 

169 return in_memory_dataset 

170 

171 if component == "summary": 

172 return in_memory_dataset.summary 

173 elif component == "output": 

174 return in_memory_dataset.output 

175 elif component == "data": 

176 return in_memory_dataset.data 

177 elif component == "counter": 

178 return len(in_memory_dataset.data) 

179 raise ValueError(f"Unsupported component: {component}") 

180 

181 def to_bytes(self, in_memory_dataset: Any) -> bytes: 

182 """Write a Dataset. 

183 

184 Parameters 

185 ---------- 

186 in_memory_dataset : `object` 

187 The Dataset to store. 

188 

189 Returns 

190 ------- 

191 serialized_dataset : `bytes` 

192 The in-memory dataset as bytes. 

193 """ 

194 serialized = yaml.dump(in_memory_dataset._asdict()) 

195 return serialized.encode() 

196 

197 

198class MetricsExampleDataFormatter(Formatter): 

199 """A specialist test formatter for the data component of a MetricsExample. 

200 

201 This is needed if the MetricsExample is disassembled and we want to 

202 support the derived component. 

203 """ 

204 

205 unsupportedParameters = None 

206 """Let the assembler delegate handle slice""" 

207 

208 extension = ".yaml" 

209 """Always write YAML""" 

210 

211 def read(self, component: str | None = None) -> Any: 

212 """Read data from a file. 

213 

214 Parameters 

215 ---------- 

216 component : `str`, optional 

217 Component to read from the file. Only used if the `StorageClass` 

218 for reading differed from the `StorageClass` used to write the 

219 file. 

220 

221 Returns 

222 ------- 

223 inMemoryDataset : `object` 

224 The requested data as a Python object. The type of object 

225 is controlled by the specific formatter. 

226 

227 Raises 

228 ------ 

229 ValueError 

230 Component requested but this file does not seem to be a concrete 

231 composite. 

232 KeyError 

233 Raised when parameters passed with fileDescriptor are not 

234 supported. 

235 """ 

236 # This formatter can not read a subset from disk because it 

237 # uses yaml. 

238 path = self.fileDescriptor.location.path 

239 with open(path) as fd: 

240 data = yaml.load(fd, Loader=yaml.SafeLoader) 

241 

242 # We can slice up front if required 

243 parameters = self.fileDescriptor.parameters 

244 if parameters and "slice" in parameters: 

245 data = data[parameters["slice"]] 

246 

247 # This should be a native list 

248 inMemoryDataset = data 

249 

250 if not component: 

251 return inMemoryDataset 

252 

253 if component == "counter": 

254 return len(inMemoryDataset) 

255 raise ValueError(f"Unsupported component: {component}") 

256 

257 def write(self, inMemoryDataset: Any) -> None: 

258 """Write a Dataset. 

259 

260 Parameters 

261 ---------- 

262 inMemoryDataset : `object` 

263 The Dataset to store. 

264 

265 Returns 

266 ------- 

267 path : `str` 

268 The path to where the Dataset was stored within the datastore. 

269 """ 

270 fileDescriptor = self.fileDescriptor 

271 

272 # Update the location with the formatter-preferred file extension 

273 fileDescriptor.location.updateExtension(self.extension) 

274 

275 data = yaml.dump(inMemoryDataset) 

276 fileDescriptor.location.uri.write(data.encode("utf-8")) 

277 

278 

279class MetricsExampleModelProvenanceFormatter(JsonFormatter): 

280 """Specialist formatter to test provenance addition.""" 

281 

282 def add_provenance( 

283 self, in_memory_dataset: Any, /, *, provenance: DatasetProvenance | None = None 

284 ) -> Any: 

285 # Copy it to prove that works. 

286 new = in_memory_dataset.model_copy() 

287 new.provenance = provenance 

288 new.dataset_id = self.dataset_ref.id 

289 return new