Coverage for python / lsst / daf / butler / formatters / json.py: 37%
35 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-30 08:41 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-30 08:41 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("JsonFormatter",)
32import contextlib
33import dataclasses
34import json
35from typing import Any
37from pydantic_core import from_json
39from lsst.resources import ResourcePath
41from .typeless import TypelessFormatter
44class JsonFormatter(TypelessFormatter):
45 """Read and write JSON files."""
47 default_extension = ".json"
48 unsupported_parameters = None
49 can_read_from_uri = True
51 def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any:
52 # json.load() reads the entire file content into memory
53 # and is no different from json.loads(uri.read()). It does not attempt
54 # to support incremental reading to minimize memory usage.
55 # This means the JSON string always has to be read entirely into
56 # memory regardless of being remote or local.
57 json_bytes = uri.read()
59 # Pydantic models can do model_validate_json() which is going to
60 # be faster than json.loads().
61 pytype = self.file_descriptor.storageClass.pytype
62 if hasattr(pytype, "model_validate_json"):
63 # This can raise ValidationError.
64 data = pytype.model_validate_json(json_bytes)
65 else:
66 # This can raise ValueError.
67 try:
68 data = from_json(json_bytes)
69 except ValueError as e:
70 # Report on the first few bytes of the file.
71 bytes_str = json_bytes[:60].decode(errors="replace")
72 e.add_note(f"Error parsing JSON bytes starting with {bytes_str!r}")
73 raise
75 return data
77 def to_bytes(self, in_memory_dataset: Any) -> bytes:
78 """Write the in memory dataset to a bytestring.
80 Parameters
81 ----------
82 in_memory_dataset : `object`
83 Object to serialize.
85 Returns
86 -------
87 serialized_dataset : `bytes`
88 Bytes representing the serialized dataset.
90 Raises
91 ------
92 Exception
93 The object could not be serialized.
94 """
95 # Try different standardized methods for native json.
96 # For example, Pydantic models have a .model_dump_json method.
97 # v1 models without compatibility layer will need .json()
98 with contextlib.suppress(AttributeError):
99 return in_memory_dataset.model_dump_json().encode()
100 with contextlib.suppress(AttributeError):
101 return in_memory_dataset.json().encode()
103 # The initial check this is not a type is to help mypy.
104 if not isinstance(in_memory_dataset, type) and dataclasses.is_dataclass(in_memory_dataset):
105 in_memory_dataset = dataclasses.asdict(in_memory_dataset)
106 elif hasattr(in_memory_dataset, "_asdict"):
107 in_memory_dataset = in_memory_dataset._asdict()
108 return json.dumps(in_memory_dataset, ensure_ascii=False).encode()