Coverage for python/lsst/daf/butler/delegates/arrowtable.py: 27%
35 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-28 10:37 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-28 10:37 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for reading Arrow tables."""
23from __future__ import annotations
25from typing import Any, Mapping, Optional
27import pyarrow as pa
28from lsst.daf.butler import StorageClassDelegate
29from lsst.utils.introspection import get_full_type_name
30from lsst.utils.iteration import ensure_iterable
32__all__ = ["ArrowTableDelegate"]
35class ArrowTableDelegate(StorageClassDelegate):
36 _datasetType = pa.Table
38 def getComponent(self, composite: pa.Table, componentName: str) -> Any:
39 """Get a component from an Arrow table.
41 Parameters
42 ----------
43 composite : `~pyarrow.Table`
44 Arrow table to access component.
45 componentName : `str`
46 Name of component to retrieve.
48 Returns
49 -------
50 component : `object`
51 The component.
53 Raises
54 ------
55 AttributeError
56 The component can not be found.
57 """
58 if componentName in ("columns", "schema"):
59 # The schema will be translated to column format
60 # depending on the input type.
61 return composite.schema
62 elif componentName == "rowcount":
63 return len(composite[composite.schema.names[0]])
65 raise AttributeError(
66 f"Do not know how to retrieve component {componentName} from {get_full_type_name(composite)}"
67 )
69 def handleParameters(self, inMemoryDataset: Any, parameters: Optional[Mapping[str, Any]] = None) -> Any:
70 if not isinstance(inMemoryDataset, self._datasetType):
71 raise ValueError(
72 f"inMemoryDataset must be a {get_full_type_name(self._datasetType)} and "
73 f"not {get_full_type_name(inMemoryDataset)}."
74 )
76 if parameters is None:
77 return inMemoryDataset
79 if "columns" in parameters:
80 read_columns = list(ensure_iterable(parameters["columns"]))
81 for column in read_columns:
82 if not isinstance(column, str):
83 raise NotImplementedError(
84 "InMemoryDataset of an Arrow Table only supports string column names."
85 )
86 if column not in self._getColumns(inMemoryDataset):
87 raise ValueError(f"Unrecognized column name {column!r}.")
89 # Ensure uniqueness, keeping order.
90 read_columns = list(dict.fromkeys(read_columns))
92 return self._selectColumns(inMemoryDataset, read_columns)
93 else:
94 return inMemoryDataset
96 def _getColumns(self, inMemoryDataset: pa.Table) -> list[str]:
97 """Get the column names from the inMemoryDataset.
99 Parameters
100 ----------
101 inMemoryDataset : `object`
102 Dataset to extract columns.
104 Returns
105 -------
106 columns : `list` [`str`]
107 List of columns.
108 """
109 return inMemoryDataset.schema.names
111 def _selectColumns(self, inMemoryDataset: pa.Table, columns: list[str]) -> pa.Table:
112 """Select a subset of columns from the inMemoryDataset.
114 Parameters
115 ----------
116 inMemoryDataset : `object`
117 Dataset to extract columns.
118 columns : `list` [`str`]
119 List of columns to extract.
121 Returns
122 -------
123 subDataset : `object`
124 Subselection of inMemoryDataset.
125 """
126 return inMemoryDataset.select(columns)