Coverage for python / lsst / daf / butler / formatters / json.py: 37%

35 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-01 08:18 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("JsonFormatter",) 

31 

32import contextlib 

33import dataclasses 

34import json 

35from typing import Any 

36 

37from pydantic_core import from_json 

38 

39from lsst.resources import ResourcePath 

40 

41from .typeless import TypelessFormatter 

42 

43 

44class JsonFormatter(TypelessFormatter): 

45 """Read and write JSON files.""" 

46 

47 default_extension = ".json" 

48 unsupported_parameters = None 

49 can_read_from_uri = True 

50 

51 def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any: 

52 # json.load() reads the entire file content into memory 

53 # and is no different from json.loads(uri.read()). It does not attempt 

54 # to support incremental reading to minimize memory usage. 

55 # This means the JSON string always has to be read entirely into 

56 # memory regardless of being remote or local. 

57 json_bytes = uri.read() 

58 

59 # Pydantic models can do model_validate_json() which is going to 

60 # be faster than json.loads(). 

61 pytype = self.file_descriptor.storageClass.pytype 

62 if hasattr(pytype, "model_validate_json"): 

63 # This can raise ValidationError. 

64 data = pytype.model_validate_json(json_bytes) 

65 else: 

66 # This can raise ValueError. 

67 try: 

68 data = from_json(json_bytes) 

69 except ValueError as e: 

70 # Report on the first few bytes of the file. 

71 bytes_str = json_bytes[:60].decode(errors="replace") 

72 e.add_note(f"Error parsing JSON bytes starting with {bytes_str!r}") 

73 raise 

74 

75 return data 

76 

77 def to_bytes(self, in_memory_dataset: Any) -> bytes: 

78 """Write the in memory dataset to a bytestring. 

79 

80 Parameters 

81 ---------- 

82 in_memory_dataset : `object` 

83 Object to serialize. 

84 

85 Returns 

86 ------- 

87 serialized_dataset : `bytes` 

88 Bytes representing the serialized dataset. 

89 

90 Raises 

91 ------ 

92 Exception 

93 The object could not be serialized. 

94 """ 

95 # Try different standardized methods for native json. 

96 # For example, Pydantic models have a .model_dump_json method. 

97 # v1 models without compatibility layer will need .json() 

98 with contextlib.suppress(AttributeError): 

99 return in_memory_dataset.model_dump_json().encode() 

100 with contextlib.suppress(AttributeError): 

101 return in_memory_dataset.json().encode() 

102 

103 # The initial check this is not a type is to help mypy. 

104 if not isinstance(in_memory_dataset, type) and dataclasses.is_dataclass(in_memory_dataset): 

105 in_memory_dataset = dataclasses.asdict(in_memory_dataset) 

106 elif hasattr(in_memory_dataset, "_asdict"): 

107 in_memory_dataset = in_memory_dataset._asdict() 

108 return json.dumps(in_memory_dataset, ensure_ascii=False).encode()