Coverage for python / lsst / images / fits / formatters.py: 0%
122 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-17 09:16 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-17 09:16 +0000
1# This file is part of lsst-images.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14__all__ = ("GenericFormatter", "ImageFormatter", "MaskedImageFormatter", "VisitImageFormatter")
16import enum
17import hashlib
18import json
19from typing import Any, ClassVar
21import astropy.io.fits
22from astro_metadata_translator import ObservationInfo
24from lsst.daf.butler import DatasetProvenance, FormatterV2
25from lsst.resources import ResourcePath
27from .._geom import Box
28from .._image import Image
29from .._mask import Mask
30from .._masked_image import MaskedImageSerializationModel
31from .._transforms import Projection, ProjectionSerializationModel
32from .._visit_image import VisitImageSerializationModel
33from ..serialization import ButlerInfo, TableCellReferenceModel
34from ._common import FitsCompressionOptions
35from ._input_archive import FitsInputArchive, read
36from ._output_archive import write
39class GenericFormatter(FormatterV2):
40 """The butler interface to FITS archive serialization.
42 Serialized types must meet all the requirements of the `read` and `write`
43 functions.
45 Notes
46 -----
47 This formatter just forwards all read parameters it receives as
48 ``**kwargs`` to `.read` and hence the ``deserialize`` method of the type it
49 is reading. This may or may not be appropriate.
51 This formatter must be subclassed to add component support.
53 The write parameter configuration for this formatter is designed to be
54 identical to that for the legacy FITS formatters defined in
55 `lsst.obs.base`.
57 Butler provenance is written to both FITS headers and the archive tree.
58 """
60 default_extension: ClassVar[str] = ".fits"
61 can_read_from_uri: ClassVar[bool] = True
62 supported_write_parameters: ClassVar[frozenset[str]] = frozenset({"recipe"})
64 butler_provenance: DatasetProvenance | None = None
66 def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any:
67 pytype = self.dataset_ref.datasetType.storageClass.pytype
68 kwargs = self.file_descriptor.parameters or {}
69 return read(pytype, uri, **kwargs).deserialized
71 def write_local_file(self, in_memory_dataset: Any, uri: ResourcePath) -> None:
72 butler_info = ButlerInfo(
73 dataset=self.dataset_ref.to_simple(),
74 provenance=self.butler_provenance if self.butler_provenance is not None else DatasetProvenance(),
75 )
76 write(
77 in_memory_dataset,
78 uri.ospath,
79 update_header=self._update_header,
80 compression_options=self._get_compression_options(),
81 compression_seed=self._get_compression_seed(),
82 butler_info=butler_info,
83 )
85 def add_provenance(
86 self, in_memory_dataset: Any, /, *, provenance: DatasetProvenance | None = None
87 ) -> Any:
88 # Instead of attaching the provenance to the object we remember it on
89 # the formatter, since a Formatter instance is only used once.
90 self.butler_provenance = provenance
91 return in_memory_dataset
93 def _get_compression_seed(self) -> int:
94 # Set the seed based on data ID (all logic here duplicated from
95 # obs_base). We can't just use 'hash', since like 'set' that's not
96 # deterministic. And we can't rely on a DimensionPacker because those
97 # are only defined for certain combinations of dimensions. Doing an MD5
98 # of the JSON feels like overkill but I don't really see anything much
99 # simpler.
100 hash_bytes = hashlib.md5(
101 json.dumps(list(self.data_id.required_values)).encode(),
102 usedforsecurity=False,
103 ).digest()
104 # And it *really* feels like overkill when we squash that into the [1,
105 # 10000] range allowed by FITS.
106 return 1 + int.from_bytes(hash_bytes) % 9999
108 def _get_compression_options(self) -> dict[str, FitsCompressionOptions]:
109 recipe = self.write_parameters.get("recipe", "default")
110 try:
111 config = self.write_recipes[recipe]
112 except KeyError:
113 if recipe == "default":
114 # If there's no default recipe just use the software defaults.
115 return {}
116 raise RuntimeError(f"Invalid recipe for ImageFormatter: {recipe!r}.") from None
117 return {k: FitsCompressionOptions.model_validate(v) for k, v in config.items()}
119 def _update_header(self, header: astropy.io.fits.Header) -> None:
120 # Logic here largely lifted from lsst.obs.base.utils, which we
121 # can't use directly for dependency and maybe mapping-type
122 # (PropertyList vs. astropy) reasons. We assume we can always add
123 # long cards (astropy will CONTINUE them) but not comments
124 # (astropy will truncate and warn on long cards).
125 for key in list(header):
126 if key.startswith("LSST BUTLER"):
127 del header[key]
128 if self.butler_provenance is not None:
129 for key, value in self.butler_provenance.to_flat_dict(
130 self.dataset_ref, prefix="HIERARCH LSST BUTLER", sep=" ", simple_types=True, max_inputs=3_000
131 ).items():
132 header.set(key, value)
135class ComponentSentinel(enum.Enum):
136 """Special values returned by `ImageFormatter.read_component`."""
138 UNRECOGNIZED_COMPONENT = enum.auto()
139 """This formatter does not recognize the given component, but a subclass
140 might.
141 """
143 INVALID_COMPONENT_MODEL = enum.auto()
144 """This formatter recognizes the given component, but the expected
145 attribute of the top-level `..serialization.ArchiveTree` did not exist
146 or had the wrong type.
147 """
150class ImageFormatter(GenericFormatter):
151 """The specialized butler interface to FITS archive serialization of
152 image-like objects with ``projection`` and ``bbox`` components.
154 Notes
155 -----
156 This formatter works by assuming the `..serialization.ArchiveTree` for the
157 top-level object has a ``projection`` attribute (a
158 `..ProjectionSerializationModel`) and a ``bbox`` property (a `..Box`).
160 Subclasses can add support for additional components by overriding
161 `read_component`, delegating to `super`, and handling the cases where it
162 returns a `ComponentSentinel` instance.
163 """
165 def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any:
166 pytype: Any = self.file_descriptor.storageClass.pytype
167 if component is None:
168 result = read(pytype, uri, bbox=self.pop_bbox_from_parameters()).deserialized
169 else:
170 with FitsInputArchive.open(uri, partial=True) as archive:
171 tree = archive.get_tree(pytype._get_archive_tree_type(TableCellReferenceModel))
172 result = self.read_component(component, tree, archive)
173 if result is ComponentSentinel.UNRECOGNIZED_COMPONENT:
174 raise NotImplementedError(
175 f"Unrecognized component {component!r} for {type(self).__name__}."
176 )
177 if result is ComponentSentinel.INVALID_COMPONENT_MODEL:
178 raise NotImplementedError(
179 f"Invalid serialization model for component {component!r} for {type(self).__name__}."
180 )
181 self.check_unhandled_parameters()
182 return result
184 def pop_bbox_from_parameters(self) -> Box | None:
185 parameters = self.file_descriptor.parameters or {}
186 return parameters.pop("bbox", None)
188 def check_unhandled_parameters(self) -> None:
189 if self.file_descriptor.parameters:
190 raise RuntimeError(f"Parameters {list(self.file_descriptor.parameters.keys())} not recognized.")
192 def read_component(
193 self,
194 component: str,
195 tree: Any,
196 archive: FitsInputArchive,
197 ) -> Any:
198 match component:
199 case "projection":
200 if isinstance(
201 serialized_projection := getattr(tree, "projection", None), ProjectionSerializationModel
202 ):
203 return Projection.deserialize(serialized_projection, archive)
204 else:
205 return ComponentSentinel.INVALID_COMPONENT_MODEL
206 case "bbox":
207 if isinstance(bbox := getattr(tree, "bbox", None), Box):
208 return bbox
209 else:
210 return ComponentSentinel.INVALID_COMPONENT_MODEL
211 case "obs_info":
212 if isinstance(obs_info := getattr(tree, "obs_info", None), ObservationInfo):
213 return obs_info
214 else:
215 return ComponentSentinel.INVALID_COMPONENT_MODEL
216 return ComponentSentinel.UNRECOGNIZED_COMPONENT
219class MaskedImageFormatter(ImageFormatter):
220 """A specialized butler interface to FITS archive serialization of
221 the `..MaskedImage` class.
222 """
224 def read_component(
225 self,
226 component: str,
227 tree: Any,
228 archive: FitsInputArchive,
229 ) -> Any:
230 match super().read_component(component, tree, archive):
231 case ComponentSentinel():
232 pass
233 case handled:
234 return handled
235 if not isinstance(tree, MaskedImageSerializationModel):
236 return ComponentSentinel.INVALID_COMPONENT_MODEL
237 match component:
238 case "image":
239 return Image.deserialize(tree.image, archive, bbox=self.pop_bbox_from_parameters())
240 case "mask":
241 return Mask.deserialize(tree.mask, archive, bbox=self.pop_bbox_from_parameters())
242 case "variance":
243 return Image.deserialize(tree.variance, archive, bbox=self.pop_bbox_from_parameters())
244 return ComponentSentinel.UNRECOGNIZED_COMPONENT
247class VisitImageFormatter(MaskedImageFormatter):
248 """A specialized butler interface to FITS archive serialization of
249 the `..VisitImage` class.
250 """
252 def read_component(
253 self,
254 component: str,
255 tree: Any,
256 archive: FitsInputArchive,
257 ) -> Any:
258 match super().read_component(component, tree, archive):
259 case ComponentSentinel():
260 pass
261 case handled:
262 return handled
263 if not isinstance(tree, VisitImageSerializationModel):
264 return ComponentSentinel.INVALID_COMPONENT_MODEL
265 match component:
266 case "psf":
267 return tree.deserialize_psf(archive)
268 return ComponentSentinel.UNRECOGNIZED_COMPONENT