Coverage for python / lsst / daf / butler / formatters / typeless.py: 18%
41 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:37 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:37 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Support for reading and writing files to a POSIX file system."""
30from __future__ import annotations
32__all__ = ["TypelessFormatter"]
34import dataclasses
35from typing import TYPE_CHECKING, Any
37from lsst.daf.butler import FormatterV2
39if TYPE_CHECKING:
40 from lsst.daf.butler import StorageClass
41 from lsst.daf.butler.datastore.cache_manager import AbstractDatastoreCacheManager
44class TypelessFormatter(FormatterV2):
45 """Formatter V2 base class that attempts to coerce generic objects
46 read in subclasses into the correct Python type.
48 Notes
49 -----
50 This class provides a ``read()`` method that will run `FormatterV2.read`
51 and coerce the return type using a variety of techniques. Use the
52 standard `FormatterV2` methods for reading bytes/files and writing
53 bytes/files.
54 """
56 def read(
57 self,
58 component: str | None = None,
59 expected_size: int = -1,
60 cache_manager: AbstractDatastoreCacheManager | None = None,
61 ) -> Any:
62 # Do the standard read of the base class.
63 data = super().read(component, expected_size, cache_manager)
65 # Assemble the requested dataset and potentially return only its
66 # component coercing it to its appropriate pytype.
67 data = self._assemble_dataset(data, component)
69 # Special case components by allowing a formatter to return None
70 # to indicate that the component was understood but is missing.
71 if data is None and component is None:
72 raise ValueError(f"Unable to read data with URI {self.file_descriptor.location.uri}")
74 return data
76 def _assemble_dataset(self, data: Any, component: str | None = None) -> Any:
77 """Assembles and coerces the dataset, or one of its components,
78 into an appropriate python type and returns it.
80 Parameters
81 ----------
82 data : `dict` or `object`
83 Composite or a dict that, or which component, needs to be
84 coerced to the python type specified in "fileDescriptor"
85 component : `str`, optional
86 Component to read from the file. Only used if the `StorageClass`
87 for reading differed from the `StorageClass` used to write the
88 file.
90 Returns
91 -------
92 inMemoryDataset : `object`
93 The requested data as a Python object. The type of object
94 is controlled by the specific formatter.
95 """
96 file_descriptor = self.file_descriptor
98 # Get the read and write storage classes.
99 read_storage_class = file_descriptor.readStorageClass
100 write_storage_class = file_descriptor.storageClass
102 if component is not None:
103 # Requesting a component implies that we need to first ensure
104 # that the composite is the correct python type. Lie to the
105 # coercion routine since the read StorageClass is not relevant
106 # if we want the original.
107 data = self._coerce_type(data, write_storage_class, write_storage_class)
109 # Concrete composite written as a single file (we hope)
110 # so try to get the component.
111 try:
112 data = file_descriptor.storageClass.delegate().getComponent(data, component)
113 except AttributeError:
114 # Defer the complaint
115 data = None
117 # Update the write storage class to match that of the component.
118 # It should be safe to use the component storage class directly
119 # since that should match what was returned from getComponent
120 # (else we could create a temporary storage class guaranteed to
121 # match the python type we have).
122 write_storage_class = write_storage_class.allComponents()[component]
124 # Coerce to the requested type.
125 data = self._coerce_type(data, write_storage_class, read_storage_class)
127 return data
129 def _coerce_builtin_type(self, in_memory_dataset: Any, write_storage_class: StorageClass) -> Any:
130 """Coerce the supplied in-memory dataset to the written python type if
131 it is currently a built-in type.
133 Parameters
134 ----------
135 in_memory_dataset : `object`
136 Object to coerce to expected type.
137 write_storage_class : `StorageClass`
138 Storage class used to serialize this data.
140 Returns
141 -------
142 in_memory_dataset : `object`
143 Object of expected type ``write_storage_class.pytype``.
145 Notes
146 -----
147 This method only modifies the supplied object if the object is:
149 * Not already the required type.
150 * Not `None`.
151 * Looks like a built-in type.
153 It is intended to be used as a helper for file formats that do not
154 store the original Python type information in serialized form and
155 instead return built-in types such as `dict` and `list` that need
156 to be converted to the required form. This happens before
157 `StorageClass` converters trigger so that constructors can be
158 called that can build the original type first before checking the
159 requested Python type. This is important for Pydantic models where
160 the internal structure of the model may not match the `dict` form
161 in a scenario where the user has requested a `dict`.
162 """
163 if (
164 in_memory_dataset is not None
165 and not isinstance(in_memory_dataset, write_storage_class.pytype)
166 and type(in_memory_dataset).__module__ == "builtins"
167 ):
168 # Try different ways of converting to the required type.
169 # Pydantic v1 uses parse_obj and some non-pydantic classes
170 # use that convention. Pydantic v2 uses model_validate.
171 for method_name in ("model_validate", "parse_obj"):
172 if method := getattr(write_storage_class.pytype, method_name, None):
173 return method(in_memory_dataset)
174 if isinstance(in_memory_dataset, dict):
175 if dataclasses.is_dataclass(write_storage_class.pytype):
176 # Dataclasses accept key/value parameters.
177 in_memory_dataset = write_storage_class.pytype(**in_memory_dataset)
178 elif write_storage_class.isComposite():
179 # Assume that this type can be constructed
180 # using the registered assembler from a dict.
181 in_memory_dataset = write_storage_class.delegate().assemble(
182 in_memory_dataset, pytype=write_storage_class.pytype
183 )
184 else:
185 # Unpack the dict and hope that works.
186 in_memory_dataset = write_storage_class.pytype(**in_memory_dataset)
187 else:
188 # Hope that we can pass the arguments in directly.
189 in_memory_dataset = write_storage_class.pytype(in_memory_dataset)
191 return in_memory_dataset
193 def _coerce_type(
194 self, in_memory_dataset: Any, write_storage_class: StorageClass, read_storage_class: StorageClass
195 ) -> Any:
196 """Coerce the supplied in-memory dataset to the correct python type.
198 Parameters
199 ----------
200 in_memory_dataset : `object`
201 Object to coerce to expected type.
202 write_storage_class : `StorageClass`
203 Storage class used to serialize this data.
204 read_storage_class : `StorageClass`
205 Storage class requested as the outcome.
207 Returns
208 -------
209 in_memory_dataset : `object`
210 Object of expected type ``readStorageClass.pytype``.
211 """
212 in_memory_dataset = self._coerce_builtin_type(in_memory_dataset, write_storage_class)
213 return read_storage_class.coerce_type(in_memory_dataset)