Coverage for python / lsst / images / fits / formatters.py: 0%

127 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-18 09:00 +0000

1# This file is part of lsst-images. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14__all__ = ("GenericFormatter", "ImageFormatter", "MaskedImageFormatter", "VisitImageFormatter") 

15 

16import enum 

17import hashlib 

18import json 

19from typing import Any, ClassVar 

20 

21import astropy.io.fits 

22from astro_metadata_translator import ObservationInfo 

23 

24from lsst.daf.butler import DatasetProvenance, FormatterV2 

25from lsst.resources import ResourcePath 

26 

27from .._geom import Box 

28from .._image import Image 

29from .._mask import Mask 

30from .._masked_image import MaskedImageSerializationModel 

31from .._observation_summary_stats import ObservationSummaryStats 

32from .._transforms import Projection, ProjectionSerializationModel 

33from .._visit_image import VisitImageSerializationModel 

34from ..serialization import ButlerInfo, TableCellReferenceModel 

35from ._common import FitsCompressionOptions 

36from ._input_archive import FitsInputArchive, read 

37from ._output_archive import write 

38 

39 

40class GenericFormatter(FormatterV2): 

41 """The butler interface to FITS archive serialization. 

42 

43 Serialized types must meet all the requirements of the `read` and `write` 

44 functions. 

45 

46 Notes 

47 ----- 

48 This formatter just forwards all read parameters it receives as 

49 ``**kwargs`` to `.read` and hence the ``deserialize`` method of the type it 

50 is reading. This may or may not be appropriate. 

51 

52 This formatter must be subclassed to add component support. 

53 

54 The write parameter configuration for this formatter is designed to be 

55 identical to that for the legacy FITS formatters defined in 

56 `lsst.obs.base`. 

57 

58 Butler provenance is written to both FITS headers and the archive tree. 

59 """ 

60 

61 default_extension: ClassVar[str] = ".fits" 

62 can_read_from_uri: ClassVar[bool] = True 

63 supported_write_parameters: ClassVar[frozenset[str]] = frozenset({"recipe"}) 

64 

65 butler_provenance: DatasetProvenance | None = None 

66 

67 def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any: 

68 pytype = self.dataset_ref.datasetType.storageClass.pytype 

69 kwargs = self.file_descriptor.parameters or {} 

70 return read(pytype, uri, **kwargs).deserialized 

71 

72 def write_local_file(self, in_memory_dataset: Any, uri: ResourcePath) -> None: 

73 butler_info = ButlerInfo( 

74 dataset=self.dataset_ref.to_simple(), 

75 provenance=self.butler_provenance if self.butler_provenance is not None else DatasetProvenance(), 

76 ) 

77 write( 

78 in_memory_dataset, 

79 uri.ospath, 

80 update_header=self._update_header, 

81 compression_options=self._get_compression_options(), 

82 compression_seed=self._get_compression_seed(), 

83 butler_info=butler_info, 

84 ) 

85 

86 def add_provenance( 

87 self, in_memory_dataset: Any, /, *, provenance: DatasetProvenance | None = None 

88 ) -> Any: 

89 # Instead of attaching the provenance to the object we remember it on 

90 # the formatter, since a Formatter instance is only used once. 

91 self.butler_provenance = provenance 

92 return in_memory_dataset 

93 

94 def _get_compression_seed(self) -> int: 

95 # Set the seed based on data ID (all logic here duplicated from 

96 # obs_base). We can't just use 'hash', since like 'set' that's not 

97 # deterministic. And we can't rely on a DimensionPacker because those 

98 # are only defined for certain combinations of dimensions. Doing an MD5 

99 # of the JSON feels like overkill but I don't really see anything much 

100 # simpler. 

101 hash_bytes = hashlib.md5( 

102 json.dumps(list(self.data_id.required_values)).encode(), 

103 usedforsecurity=False, 

104 ).digest() 

105 # And it *really* feels like overkill when we squash that into the [1, 

106 # 10000] range allowed by FITS. 

107 return 1 + int.from_bytes(hash_bytes) % 9999 

108 

109 def _get_compression_options(self) -> dict[str, FitsCompressionOptions]: 

110 recipe = self.write_parameters.get("recipe", "default") 

111 try: 

112 config = self.write_recipes[recipe] 

113 except KeyError: 

114 if recipe == "default": 

115 # If there's no default recipe just use the software defaults. 

116 return {} 

117 raise RuntimeError(f"Invalid recipe for ImageFormatter: {recipe!r}.") from None 

118 return {k: FitsCompressionOptions.model_validate(v) for k, v in config.items()} 

119 

120 def _update_header(self, header: astropy.io.fits.Header) -> None: 

121 # Logic here largely lifted from lsst.obs.base.utils, which we 

122 # can't use directly for dependency and maybe mapping-type 

123 # (PropertyList vs. astropy) reasons. We assume we can always add 

124 # long cards (astropy will CONTINUE them) but not comments 

125 # (astropy will truncate and warn on long cards). 

126 for key in list(header): 

127 if key.startswith("LSST BUTLER"): 

128 del header[key] 

129 if self.butler_provenance is not None: 

130 for key, value in self.butler_provenance.to_flat_dict( 

131 self.dataset_ref, prefix="HIERARCH LSST BUTLER", sep=" ", simple_types=True, max_inputs=3_000 

132 ).items(): 

133 header.set(key, value) 

134 

135 

136class ComponentSentinel(enum.Enum): 

137 """Special values returned by `ImageFormatter.read_component`.""" 

138 

139 UNRECOGNIZED_COMPONENT = enum.auto() 

140 """This formatter does not recognize the given component, but a subclass 

141 might. 

142 """ 

143 

144 INVALID_COMPONENT_MODEL = enum.auto() 

145 """This formatter recognizes the given component, but the expected 

146 attribute of the top-level `..serialization.ArchiveTree` did not exist 

147 or had the wrong type. 

148 """ 

149 

150 

151class ImageFormatter(GenericFormatter): 

152 """The specialized butler interface to FITS archive serialization of 

153 image-like objects with ``projection`` and ``bbox`` components. 

154 

155 Notes 

156 ----- 

157 This formatter works by assuming the `..serialization.ArchiveTree` for the 

158 top-level object has a ``projection`` attribute (a 

159 `..ProjectionSerializationModel`) and a ``bbox`` property (a `..Box`). 

160 

161 Subclasses can add support for additional components by overriding 

162 `read_component`, delegating to `super`, and handling the cases where it 

163 returns a `ComponentSentinel` instance. 

164 """ 

165 

166 def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any: 

167 pytype: Any = self.file_descriptor.storageClass.pytype 

168 if component is None: 

169 result = read(pytype, uri, bbox=self.pop_bbox_from_parameters()).deserialized 

170 else: 

171 with FitsInputArchive.open(uri, partial=True) as archive: 

172 tree = archive.get_tree(pytype._get_archive_tree_type(TableCellReferenceModel)) 

173 result = self.read_component(component, tree, archive) 

174 if result is ComponentSentinel.UNRECOGNIZED_COMPONENT: 

175 raise NotImplementedError( 

176 f"Unrecognized component {component!r} for {type(self).__name__}." 

177 ) 

178 if result is ComponentSentinel.INVALID_COMPONENT_MODEL: 

179 raise NotImplementedError( 

180 f"Invalid serialization model for component {component!r} for {type(self).__name__}." 

181 ) 

182 self.check_unhandled_parameters() 

183 return result 

184 

185 def pop_bbox_from_parameters(self) -> Box | None: 

186 parameters = self.file_descriptor.parameters or {} 

187 return parameters.pop("bbox", None) 

188 

189 def check_unhandled_parameters(self) -> None: 

190 if self.file_descriptor.parameters: 

191 raise RuntimeError(f"Parameters {list(self.file_descriptor.parameters.keys())} not recognized.") 

192 

193 def read_component( 

194 self, 

195 component: str, 

196 tree: Any, 

197 archive: FitsInputArchive, 

198 ) -> Any: 

199 match component: 

200 case "projection": 

201 if isinstance( 

202 serialized_projection := getattr(tree, "projection", None), ProjectionSerializationModel 

203 ): 

204 return Projection.deserialize(serialized_projection, archive) 

205 else: 

206 return ComponentSentinel.INVALID_COMPONENT_MODEL 

207 case "bbox": 

208 if isinstance(bbox := getattr(tree, "bbox", None), Box): 

209 return bbox 

210 else: 

211 return ComponentSentinel.INVALID_COMPONENT_MODEL 

212 case "obs_info": 

213 if isinstance(obs_info := getattr(tree, "obs_info", None), ObservationInfo): 

214 return obs_info 

215 else: 

216 return ComponentSentinel.INVALID_COMPONENT_MODEL 

217 return ComponentSentinel.UNRECOGNIZED_COMPONENT 

218 

219 

220class MaskedImageFormatter(ImageFormatter): 

221 """A specialized butler interface to FITS archive serialization of 

222 the `..MaskedImage` class. 

223 """ 

224 

225 def read_component( 

226 self, 

227 component: str, 

228 tree: Any, 

229 archive: FitsInputArchive, 

230 ) -> Any: 

231 match super().read_component(component, tree, archive): 

232 case ComponentSentinel(): 

233 pass 

234 case handled: 

235 return handled 

236 if not isinstance(tree, MaskedImageSerializationModel): 

237 return ComponentSentinel.INVALID_COMPONENT_MODEL 

238 match component: 

239 case "image": 

240 return Image.deserialize(tree.image, archive, bbox=self.pop_bbox_from_parameters()) 

241 case "mask": 

242 return Mask.deserialize(tree.mask, archive, bbox=self.pop_bbox_from_parameters()) 

243 case "variance": 

244 return Image.deserialize(tree.variance, archive, bbox=self.pop_bbox_from_parameters()) 

245 return ComponentSentinel.UNRECOGNIZED_COMPONENT 

246 

247 

248class VisitImageFormatter(MaskedImageFormatter): 

249 """A specialized butler interface to FITS archive serialization of 

250 the `..VisitImage` class. 

251 """ 

252 

253 def read_component( 

254 self, 

255 component: str, 

256 tree: Any, 

257 archive: FitsInputArchive, 

258 ) -> Any: 

259 match super().read_component(component, tree, archive): 

260 case ComponentSentinel(): 

261 pass 

262 case handled: 

263 return handled 

264 if not isinstance(tree, VisitImageSerializationModel): 

265 return ComponentSentinel.INVALID_COMPONENT_MODEL 

266 match component: 

267 case "psf": 

268 return tree.deserialize_psf(archive) 

269 case "summary_stats": 

270 if isinstance(summary_stats := getattr(tree, "summary_stats", None), ObservationSummaryStats): 

271 return summary_stats 

272 else: 

273 return ComponentSentinel.INVALID_COMPONENT_MODEL 

274 return ComponentSentinel.UNRECOGNIZED_COMPONENT