Coverage for python / lsst / daf / butler / formatters / yaml.py: 28%
42 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-17 08:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-17 08:49 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("YamlFormatter",)
32import contextlib
33import dataclasses
34from typing import Any
36import yaml
38from lsst.resources import ResourcePath
40from .typeless import TypelessFormatter
43class YamlFormatter(TypelessFormatter):
44 """Read and write YAML files."""
46 default_extension = ".yaml"
47 unsupported_parameters = None
48 supported_write_parameters = frozenset({"unsafe_dump"})
49 can_read_from_uri = True
51 def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any:
52 # Can not use ResourcePath.open()
53 yaml_bytes = uri.read()
54 try:
55 data = yaml.safe_load(yaml_bytes)
56 except Exception as e:
57 # Report on the first few bytes of the file.
58 bytes_str = yaml_bytes[:60].decode(errors="replace")
59 e.add_note(f"Error parsing JSON bytes starting with {bytes_str!r}")
60 raise
61 return data
63 def to_bytes(self, in_memory_dataset: Any) -> bytes:
64 """Write the in memory dataset to a bytestring.
66 Will look for `_asdict()` method to aid YAML serialization, following
67 the approach of the ``simplejson`` module. Additionally, can attempt
68 to detect `pydantic.BaseModel`.
70 The `dict` will be passed to the relevant constructor on read if
71 not explicitly handled by Pyyaml.
73 Parameters
74 ----------
75 in_memory_dataset : `object`
76 Object to serialize.
78 Returns
79 -------
80 serialized_dataset : `bytes`
81 YAML string encoded to bytes.
83 Raises
84 ------
85 Exception
86 The object could not be serialized.
88 Notes
89 -----
90 `~yaml.SafeDumper` is used when generating the YAML serialization.
91 This will fail for data structures that have complex python classes
92 without a registered YAML representer.
93 """
94 converted = False
95 if hasattr(in_memory_dataset, "model_dump") and hasattr(in_memory_dataset, "model_dump_json"):
96 # Pydantic v2-like model if both model_dump() and model_dump_json()
97 # exist.
98 with contextlib.suppress(Exception):
99 in_memory_dataset = in_memory_dataset.model_dump()
100 converted = True
102 if not converted and hasattr(in_memory_dataset, "dict") and hasattr(in_memory_dataset, "json"):
103 # Pydantic v1-like model if both dict() and json() exist.
104 with contextlib.suppress(Exception):
105 in_memory_dataset = in_memory_dataset.dict()
106 converted = True
108 if not converted:
109 # The initial check this is not a type is to help mypy.
110 if not isinstance(in_memory_dataset, type) and dataclasses.is_dataclass(in_memory_dataset):
111 in_memory_dataset = dataclasses.asdict(in_memory_dataset)
112 elif hasattr(in_memory_dataset, "_asdict"):
113 in_memory_dataset = in_memory_dataset._asdict()
115 unsafe_dump = self.write_parameters.get("unsafe_dump", False)
116 # Now that Python always uses an order dict, do not sort keys
117 # on write so that order can be preserved on read.
118 if unsafe_dump:
119 serialized = yaml.dump(in_memory_dataset, sort_keys=False)
120 else:
121 serialized = yaml.safe_dump(in_memory_dataset, sort_keys=False)
122 return serialized.encode()