Coverage for python / lsst / daf / butler / tests / testFormatters.py: 34%
91 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-06 08:30 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-06 08:30 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "DoNothingFormatter",
32 "FormatterTest",
33 "LenientYamlFormatter",
34 "MetricsExampleFormatter",
35 "MultipleExtensionsFormatter",
36 "SingleExtensionFormatter",
37)
39import json
40from collections.abc import Mapping
41from typing import TYPE_CHECKING, Any, BinaryIO
43import yaml
45from lsst.resources import ResourceHandleProtocol
47from .._formatter import Formatter, FormatterV2
48from ..formatters.json import JsonFormatter
49from ..formatters.yaml import YamlFormatter
51if TYPE_CHECKING:
52 from .._dataset_provenance import DatasetProvenance
53 from .._location import Location
56class DoNothingFormatter(Formatter):
57 """A test formatter that does not need to format anything and has
58 parameters.
59 """
61 def read(self, component: str | None = None) -> Any:
62 raise NotImplementedError("Type does not support reading")
64 def write(self, inMemoryDataset: Any) -> None:
65 raise NotImplementedError("Type does not support writing")
68class FormatterTest(Formatter):
69 """A test formatter that does not need to format anything."""
71 supportedWriteParameters = frozenset({"min", "max", "median", "comment", "extra", "recipe"})
73 def read(self, component: str | None = None) -> Any:
74 raise NotImplementedError("Type does not support reading")
76 def write(self, inMemoryDataset: Any) -> None:
77 raise NotImplementedError("Type does not support writing")
79 @staticmethod
80 def validate_write_recipes(recipes: Mapping[str, Any] | None) -> Mapping[str, Any] | None:
81 if not recipes:
82 return recipes
83 for recipeName in recipes:
84 if "mode" not in recipes[recipeName]:
85 raise RuntimeError("'mode' is a required write recipe parameter")
86 return recipes
89class SingleExtensionFormatter(DoNothingFormatter):
90 """A do nothing formatter that has a single extension registered."""
92 extension = ".fits"
95class MultipleExtensionsFormatter(SingleExtensionFormatter):
96 """A formatter that has multiple extensions registered."""
98 supportedExtensions = frozenset({".fits.gz", ".fits.fz", ".fit"})
101class LenientYamlFormatter(YamlFormatter):
102 """A test formatter that allows any file extension but always reads and
103 writes YAML.
104 """
106 @classmethod
107 def validate_extension(cls, location: Location) -> None:
108 return
111class MetricsExampleFormatter(FormatterV2):
112 """A specialist test formatter for metrics that supports components
113 directly without assembler delegate.
114 """
116 supported_extensions = frozenset({".yaml", ".json"})
117 default_extension = ".yaml"
118 can_read_from_stream = True
120 def read_from_stream(
121 self, stream: BinaryIO | ResourceHandleProtocol, component: str | None = None, expected_size: int = -1
122 ) -> Any:
123 """Read data from a file.
125 Parameters
126 ----------
127 stream : `typing.BinaryIO` or `lsst.resources.ResourceHandleProtocol`
128 Open file handle to read from.
129 component : `str`, optional
130 Component to read from the file. Only used if the `StorageClass`
131 for reading differed from the `StorageClass` used to write the
132 file.
133 expected_size : `int`, optional
134 The expected size of the resource.
136 Returns
137 -------
138 inMemoryDataset : `object`
139 The requested data as a Python object. The type of object
140 is controlled by the specific formatter.
142 Raises
143 ------
144 ValueError
145 Component requested but this file does not seem to be a concrete
146 composite.
147 KeyError
148 Raised when parameters passed with fileDescriptor are not
149 supported.
150 """
151 # This formatter can not read a subset from disk because it
152 # uses yaml or json.
153 if ".yaml" in stream.name:
154 data = yaml.load(stream, Loader=yaml.SafeLoader)
155 elif ".json" in stream.name:
156 data = json.load(stream)
157 else:
158 raise RuntimeError(f"Unsupported file extension found in path '{stream.name}'")
160 # We can slice up front if required
161 parameters = self.file_descriptor.parameters
162 if "data" in data and parameters and "slice" in parameters:
163 data["data"] = data["data"][parameters["slice"]]
165 pytype = self.file_descriptor.storageClass.pytype
166 in_memory_dataset = pytype(**data)
168 if not component:
169 return in_memory_dataset
171 if component == "summary":
172 return in_memory_dataset.summary
173 elif component == "output":
174 return in_memory_dataset.output
175 elif component == "data":
176 return in_memory_dataset.data
177 elif component == "counter":
178 return len(in_memory_dataset.data)
179 raise ValueError(f"Unsupported component: {component}")
181 def to_bytes(self, in_memory_dataset: Any) -> bytes:
182 """Write a Dataset.
184 Parameters
185 ----------
186 in_memory_dataset : `object`
187 The Dataset to store.
189 Returns
190 -------
191 serialized_dataset : `bytes`
192 The in-memory dataset as bytes.
193 """
194 serialized = yaml.dump(in_memory_dataset._asdict())
195 return serialized.encode()
198class MetricsExampleDataFormatter(Formatter):
199 """A specialist test formatter for the data component of a MetricsExample.
201 This is needed if the MetricsExample is disassembled and we want to
202 support the derived component.
203 """
205 unsupportedParameters = None
206 """Let the assembler delegate handle slice"""
208 extension = ".yaml"
209 """Always write YAML"""
211 def read(self, component: str | None = None) -> Any:
212 """Read data from a file.
214 Parameters
215 ----------
216 component : `str`, optional
217 Component to read from the file. Only used if the `StorageClass`
218 for reading differed from the `StorageClass` used to write the
219 file.
221 Returns
222 -------
223 inMemoryDataset : `object`
224 The requested data as a Python object. The type of object
225 is controlled by the specific formatter.
227 Raises
228 ------
229 ValueError
230 Component requested but this file does not seem to be a concrete
231 composite.
232 KeyError
233 Raised when parameters passed with fileDescriptor are not
234 supported.
235 """
236 # This formatter can not read a subset from disk because it
237 # uses yaml.
238 path = self.fileDescriptor.location.path
239 with open(path) as fd:
240 data = yaml.load(fd, Loader=yaml.SafeLoader)
242 # We can slice up front if required
243 parameters = self.fileDescriptor.parameters
244 if parameters and "slice" in parameters:
245 data = data[parameters["slice"]]
247 # This should be a native list
248 inMemoryDataset = data
250 if not component:
251 return inMemoryDataset
253 if component == "counter":
254 return len(inMemoryDataset)
255 raise ValueError(f"Unsupported component: {component}")
257 def write(self, inMemoryDataset: Any) -> None:
258 """Write a Dataset.
260 Parameters
261 ----------
262 inMemoryDataset : `object`
263 The Dataset to store.
265 Returns
266 -------
267 path : `str`
268 The path to where the Dataset was stored within the datastore.
269 """
270 fileDescriptor = self.fileDescriptor
272 # Update the location with the formatter-preferred file extension
273 fileDescriptor.location.updateExtension(self.extension)
275 data = yaml.dump(inMemoryDataset)
276 fileDescriptor.location.uri.write(data.encode("utf-8"))
279class MetricsExampleModelProvenanceFormatter(JsonFormatter):
280 """Specialist formatter to test provenance addition."""
282 def add_provenance(
283 self, in_memory_dataset: Any, /, *, provenance: DatasetProvenance | None = None
284 ) -> Any:
285 # Copy it to prove that works.
286 new = in_memory_dataset.model_copy()
287 new.provenance = provenance
288 new.dataset_id = self.dataset_ref.id
289 return new