Coverage for python / lsst / images / fits / formatters.py: 0%

143 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-07 08:34 +0000

1# This file is part of lsst-images. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14__all__ = ( 

15 "CellCoaddFormatter", 

16 "GenericFormatter", 

17 "ImageFormatter", 

18 "MaskedImageFormatter", 

19 "VisitImageFormatter", 

20) 

21 

22import enum 

23import hashlib 

24import json 

25from typing import Any, ClassVar 

26 

27import astropy.io.fits 

28from astro_metadata_translator import ObservationInfo 

29 

30from lsst.daf.butler import DatasetProvenance, FormatterV2 

31from lsst.resources import ResourcePath 

32 

33from .._geom import Box 

34from .._image import Image 

35from .._mask import Mask 

36from .._masked_image import MaskedImageSerializationModel 

37from .._transforms import Projection, ProjectionSerializationModel 

38from .._visit_image import VisitImageSerializationModel 

39from ..serialization import ButlerInfo 

40from ._common import FitsCompressionOptions, PointerModel 

41from ._input_archive import FitsInputArchive, read 

42from ._output_archive import write 

43 

44 

45class GenericFormatter(FormatterV2): 

46 """The butler interface to FITS archive serialization. 

47 

48 Serialized types must meet all the requirements of the `read` and `write` 

49 functions. 

50 

51 Notes 

52 ----- 

53 This formatter just forwards all read parameters it receives as 

54 ``**kwargs`` to `.read` and hence the ``deserialize`` method of the type it 

55 is reading. This may or may not be appropriate. 

56 

57 This formatter must be subclassed to add component support. 

58 

59 The write parameter configuration for this formatter is designed to be 

60 identical to that for the legacy FITS formatters defined in 

61 `lsst.obs.base`. 

62 

63 Butler provenance is written to both FITS headers and the archive tree. 

64 """ 

65 

66 default_extension: ClassVar[str] = ".fits" 

67 can_read_from_uri: ClassVar[bool] = True 

68 supported_write_parameters: ClassVar[frozenset[str]] = frozenset({"recipe"}) 

69 

70 butler_provenance: DatasetProvenance | None = None 

71 

72 def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any: 

73 pytype = self.dataset_ref.datasetType.storageClass.pytype 

74 kwargs = self.file_descriptor.parameters or {} 

75 return read(pytype, uri, **kwargs).deserialized 

76 

77 def write_local_file(self, in_memory_dataset: Any, uri: ResourcePath) -> None: 

78 butler_info = ButlerInfo( 

79 dataset=self.dataset_ref.to_simple(), 

80 provenance=self.butler_provenance if self.butler_provenance is not None else DatasetProvenance(), 

81 ) 

82 write( 

83 in_memory_dataset, 

84 uri.ospath, 

85 update_header=self._update_header, 

86 compression_options=self._get_compression_options(), 

87 compression_seed=self._get_compression_seed(), 

88 butler_info=butler_info, 

89 ) 

90 

91 def add_provenance( 

92 self, in_memory_dataset: Any, /, *, provenance: DatasetProvenance | None = None 

93 ) -> Any: 

94 # Instead of attaching the provenance to the object we remember it on 

95 # the formatter, since a Formatter instance is only used once. 

96 self.butler_provenance = provenance 

97 return in_memory_dataset 

98 

99 def _get_compression_seed(self) -> int: 

100 # Set the seed based on data ID (all logic here duplicated from 

101 # obs_base). We can't just use 'hash', since like 'set' that's not 

102 # deterministic. And we can't rely on a DimensionPacker because those 

103 # are only defined for certain combinations of dimensions. Doing an MD5 

104 # of the JSON feels like overkill but I don't really see anything much 

105 # simpler. 

106 hash_bytes = hashlib.md5( 

107 json.dumps(list(self.data_id.required_values)).encode(), 

108 usedforsecurity=False, 

109 ).digest() 

110 # And it *really* feels like overkill when we squash that into the [1, 

111 # 10000] range allowed by FITS. 

112 return 1 + int.from_bytes(hash_bytes) % 9999 

113 

114 def _get_compression_options(self) -> dict[str, FitsCompressionOptions]: 

115 recipe = self.write_parameters.get("recipe", "default") 

116 try: 

117 config = self.write_recipes[recipe] 

118 except KeyError: 

119 if recipe == "default": 

120 # If there's no default recipe just use the software defaults. 

121 return {} 

122 raise RuntimeError(f"Invalid recipe for ImageFormatter: {recipe!r}.") from None 

123 return {k: FitsCompressionOptions.model_validate(v) for k, v in config.items()} 

124 

125 def _update_header(self, header: astropy.io.fits.Header) -> None: 

126 # Logic here largely lifted from lsst.obs.base.utils, which we 

127 # can't use directly for dependency and maybe mapping-type 

128 # (PropertyList vs. astropy) reasons. We assume we can always add 

129 # long cards (astropy will CONTINUE them) but not comments 

130 # (astropy will truncate and warn on long cards). 

131 for key in list(header): 

132 if key.startswith("LSST BUTLER"): 

133 del header[key] 

134 if self.butler_provenance is not None: 

135 for key, value in self.butler_provenance.to_flat_dict( 

136 self.dataset_ref, prefix="HIERARCH LSST BUTLER", sep=" ", simple_types=True, max_inputs=3_000 

137 ).items(): 

138 header.set(key, value) 

139 

140 

141class ComponentSentinel(enum.Enum): 

142 """Special values returned by `ImageFormatter.read_component`.""" 

143 

144 UNRECOGNIZED_COMPONENT = enum.auto() 

145 """This formatter does not recognize the given component, but a subclass 

146 might. 

147 """ 

148 

149 INVALID_COMPONENT_MODEL = enum.auto() 

150 """This formatter recognizes the given component, but the expected 

151 attribute of the top-level `..serialization.ArchiveTree` did not exist 

152 or had the wrong type. 

153 """ 

154 

155 

156class ImageFormatter(GenericFormatter): 

157 """The specialized butler interface to FITS archive serialization of 

158 image-like objects with ``projection`` and ``bbox`` components. 

159 

160 Notes 

161 ----- 

162 This formatter works by assuming the `..serialization.ArchiveTree` for the 

163 top-level object has a ``projection`` attribute (a 

164 `..ProjectionSerializationModel`) and a ``bbox`` property (a `..Box`). 

165 

166 Subclasses can add support for additional components by overriding 

167 `read_component`, delegating to `super`, and handling the cases where it 

168 returns a `ComponentSentinel` instance. 

169 """ 

170 

171 def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any: 

172 pytype: Any = self.file_descriptor.storageClass.pytype 

173 if component is None: 

174 result = read(pytype, uri, bbox=self.pop_bbox_from_parameters()).deserialized 

175 else: 

176 with FitsInputArchive.open(uri, partial=True) as archive: 

177 tree = archive.get_tree(pytype._get_archive_tree_type(PointerModel)) 

178 result = self.read_component(component, tree, archive) 

179 if result is ComponentSentinel.UNRECOGNIZED_COMPONENT: 

180 raise NotImplementedError( 

181 f"Unrecognized component {component!r} for {type(self).__name__}." 

182 ) 

183 if result is ComponentSentinel.INVALID_COMPONENT_MODEL: 

184 raise NotImplementedError( 

185 f"Invalid serialization model for component {component!r} for {type(self).__name__}." 

186 ) 

187 self.check_unhandled_parameters() 

188 return result 

189 

190 def pop_bbox_from_parameters(self) -> Box | None: 

191 parameters = self.file_descriptor.parameters or {} 

192 return parameters.pop("bbox", None) 

193 

194 def check_unhandled_parameters(self) -> None: 

195 if self.file_descriptor.parameters: 

196 raise RuntimeError(f"Parameters {list(self.file_descriptor.parameters.keys())} not recognized.") 

197 

198 def read_component( 

199 self, 

200 component: str, 

201 tree: Any, 

202 archive: FitsInputArchive, 

203 ) -> Any: 

204 match component: 

205 case "projection": 

206 if isinstance( 

207 serialized_projection := getattr(tree, "projection", None), ProjectionSerializationModel 

208 ): 

209 return Projection.deserialize(serialized_projection, archive) 

210 else: 

211 return ComponentSentinel.INVALID_COMPONENT_MODEL 

212 case "bbox": 

213 if isinstance(bbox := getattr(tree, "bbox", None), Box): 

214 return bbox 

215 else: 

216 return ComponentSentinel.INVALID_COMPONENT_MODEL 

217 case "obs_info": 

218 if isinstance(obs_info := getattr(tree, "obs_info", None), ObservationInfo): 

219 return obs_info 

220 else: 

221 return ComponentSentinel.INVALID_COMPONENT_MODEL 

222 return ComponentSentinel.UNRECOGNIZED_COMPONENT 

223 

224 

225class MaskedImageFormatter(ImageFormatter): 

226 """A specialized butler interface to FITS archive serialization of 

227 the `..MaskedImage` class. 

228 """ 

229 

230 def read_component( 

231 self, 

232 component: str, 

233 tree: Any, 

234 archive: FitsInputArchive, 

235 ) -> Any: 

236 match super().read_component(component, tree, archive): 

237 case ComponentSentinel(): 

238 pass 

239 case handled: 

240 return handled 

241 if not isinstance(tree, MaskedImageSerializationModel): 

242 return ComponentSentinel.INVALID_COMPONENT_MODEL 

243 match component: 

244 case "image": 

245 return Image.deserialize(tree.image, archive, bbox=self.pop_bbox_from_parameters()) 

246 case "mask": 

247 return Mask.deserialize(tree.mask, archive, bbox=self.pop_bbox_from_parameters()) 

248 case "variance": 

249 return Image.deserialize(tree.variance, archive, bbox=self.pop_bbox_from_parameters()) 

250 return ComponentSentinel.UNRECOGNIZED_COMPONENT 

251 

252 

253class VisitImageFormatter(MaskedImageFormatter): 

254 """A specialized butler interface to FITS archive serialization of 

255 the `..VisitImage` class. 

256 """ 

257 

258 def read_component( 

259 self, 

260 component: str, 

261 tree: Any, 

262 archive: FitsInputArchive, 

263 ) -> Any: 

264 match super().read_component(component, tree, archive): 

265 case ComponentSentinel(): 

266 pass 

267 case handled: 

268 return handled 

269 if not isinstance(tree, VisitImageSerializationModel): 

270 return ComponentSentinel.INVALID_COMPONENT_MODEL 

271 match component: 

272 case "psf": 

273 return tree.deserialize_psf(archive) 

274 case "summary_stats": 

275 return tree.summary_stats 

276 case "aperture_corrections": 

277 return tree.aperture_corrections.deserialize(archive) 

278 return ComponentSentinel.UNRECOGNIZED_COMPONENT 

279 

280 

281class CellCoaddFormatter(MaskedImageFormatter): 

282 """A specialized butler interface to FITS archive serialization of 

283 the `..cells.CellCoadd` class. 

284 """ 

285 

286 def read_component( 

287 self, 

288 component: str, 

289 tree: Any, 

290 archive: FitsInputArchive, 

291 ) -> Any: 

292 from ..cells import CellCoaddSerializationModel 

293 

294 match super().read_component(component, tree, archive): 

295 case ComponentSentinel(): 

296 pass 

297 case handled: 

298 return handled 

299 if not isinstance(tree, CellCoaddSerializationModel): 

300 return ComponentSentinel.INVALID_COMPONENT_MODEL 

301 match component: 

302 case "psf": 

303 bbox = self.pop_bbox_from_parameters() 

304 return tree.deserialize_psf(archive, bbox=bbox) 

305 case "provenance": 

306 return tree.deserialize_provenance(archive) 

307 return ComponentSentinel.UNRECOGNIZED_COMPONENT