Coverage for python / lsst / daf / butler / formatters / yaml.py: 28%

42 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-26 08:49 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("YamlFormatter",) 

31 

32import contextlib 

33import dataclasses 

34from typing import Any 

35 

36import yaml 

37 

38from lsst.resources import ResourcePath 

39 

40from .typeless import TypelessFormatter 

41 

42 

43class YamlFormatter(TypelessFormatter): 

44 """Read and write YAML files.""" 

45 

46 default_extension = ".yaml" 

47 unsupported_parameters = None 

48 supported_write_parameters = frozenset({"unsafe_dump"}) 

49 can_read_from_uri = True 

50 

51 def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any: 

52 # Can not use ResourcePath.open() 

53 yaml_bytes = uri.read() 

54 try: 

55 data = yaml.safe_load(yaml_bytes) 

56 except Exception as e: 

57 # Report on the first few bytes of the file. 

58 bytes_str = yaml_bytes[:60].decode(errors="replace") 

59 e.add_note(f"Error parsing JSON bytes starting with {bytes_str!r}") 

60 raise 

61 return data 

62 

63 def to_bytes(self, in_memory_dataset: Any) -> bytes: 

64 """Write the in memory dataset to a bytestring. 

65 

66 Will look for `_asdict()` method to aid YAML serialization, following 

67 the approach of the ``simplejson`` module. Additionally, can attempt 

68 to detect `pydantic.BaseModel`. 

69 

70 The `dict` will be passed to the relevant constructor on read if 

71 not explicitly handled by Pyyaml. 

72 

73 Parameters 

74 ---------- 

75 in_memory_dataset : `object` 

76 Object to serialize. 

77 

78 Returns 

79 ------- 

80 serialized_dataset : `bytes` 

81 YAML string encoded to bytes. 

82 

83 Raises 

84 ------ 

85 Exception 

86 The object could not be serialized. 

87 

88 Notes 

89 ----- 

90 `~yaml.SafeDumper` is used when generating the YAML serialization. 

91 This will fail for data structures that have complex python classes 

92 without a registered YAML representer. 

93 """ 

94 converted = False 

95 if hasattr(in_memory_dataset, "model_dump") and hasattr(in_memory_dataset, "model_dump_json"): 

96 # Pydantic v2-like model if both model_dump() and model_dump_json() 

97 # exist. 

98 with contextlib.suppress(Exception): 

99 in_memory_dataset = in_memory_dataset.model_dump() 

100 converted = True 

101 

102 if not converted and hasattr(in_memory_dataset, "dict") and hasattr(in_memory_dataset, "json"): 

103 # Pydantic v1-like model if both dict() and json() exist. 

104 with contextlib.suppress(Exception): 

105 in_memory_dataset = in_memory_dataset.dict() 

106 converted = True 

107 

108 if not converted: 

109 # The initial check this is not a type is to help mypy. 

110 if not isinstance(in_memory_dataset, type) and dataclasses.is_dataclass(in_memory_dataset): 

111 in_memory_dataset = dataclasses.asdict(in_memory_dataset) 

112 elif hasattr(in_memory_dataset, "_asdict"): 

113 in_memory_dataset = in_memory_dataset._asdict() 

114 

115 unsafe_dump = self.write_parameters.get("unsafe_dump", False) 

116 # Now that Python always uses an order dict, do not sort keys 

117 # on write so that order can be preserved on read. 

118 if unsafe_dump: 

119 serialized = yaml.dump(in_memory_dataset, sort_keys=False) 

120 else: 

121 serialized = yaml.safe_dump(in_memory_dataset, sort_keys=False) 

122 return serialized.encode()