Coverage for python/lsst/daf/butler/delegates/arrowtable.py: 31%
35 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:21 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:21 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for reading Arrow tables."""
23from __future__ import annotations
25from collections.abc import Mapping
26from typing import Any
28import pyarrow as pa
29from lsst.daf.butler import StorageClassDelegate
30from lsst.utils.introspection import get_full_type_name
31from lsst.utils.iteration import ensure_iterable
33__all__ = ["ArrowTableDelegate"]
36class ArrowTableDelegate(StorageClassDelegate):
37 """Delegate that understands the ``ArrowTable`` storage class."""
39 _datasetType = pa.Table
41 def getComponent(self, composite: pa.Table, componentName: str) -> Any:
42 """Get a component from an Arrow table.
44 Parameters
45 ----------
46 composite : `~pyarrow.Table`
47 Arrow table to access component.
48 componentName : `str`
49 Name of component to retrieve.
51 Returns
52 -------
53 component : `object`
54 The component.
56 Raises
57 ------
58 AttributeError
59 The component can not be found.
60 """
61 if componentName in ("columns", "schema"):
62 # The schema will be translated to column format
63 # depending on the input type.
64 return composite.schema
65 elif componentName == "rowcount":
66 return len(composite[composite.schema.names[0]])
68 raise AttributeError(
69 f"Do not know how to retrieve component {componentName} from {get_full_type_name(composite)}"
70 )
72 def handleParameters(self, inMemoryDataset: Any, parameters: Mapping[str, Any] | None = None) -> Any:
73 if not isinstance(inMemoryDataset, self._datasetType):
74 raise ValueError(
75 f"inMemoryDataset must be a {get_full_type_name(self._datasetType)} and "
76 f"not {get_full_type_name(inMemoryDataset)}."
77 )
79 if parameters is None:
80 return inMemoryDataset
82 if "columns" in parameters:
83 read_columns = list(ensure_iterable(parameters["columns"]))
84 for column in read_columns:
85 if not isinstance(column, str):
86 raise NotImplementedError(
87 "InMemoryDataset of an Arrow Table only supports string column names."
88 )
89 if column not in self._getColumns(inMemoryDataset):
90 raise ValueError(f"Unrecognized column name {column!r}.")
92 # Ensure uniqueness, keeping order.
93 read_columns = list(dict.fromkeys(read_columns))
95 return self._selectColumns(inMemoryDataset, read_columns)
96 else:
97 return inMemoryDataset
99 def _getColumns(self, inMemoryDataset: pa.Table) -> list[str]:
100 """Get the column names from the inMemoryDataset.
102 Parameters
103 ----------
104 inMemoryDataset : `object`
105 Dataset to extract columns.
107 Returns
108 -------
109 columns : `list` [`str`]
110 List of columns.
111 """
112 return inMemoryDataset.schema.names
114 def _selectColumns(self, inMemoryDataset: pa.Table, columns: list[str]) -> pa.Table:
115 """Select a subset of columns from the inMemoryDataset.
117 Parameters
118 ----------
119 inMemoryDataset : `object`
120 Dataset to extract columns.
121 columns : `list` [`str`]
122 List of columns to extract.
124 Returns
125 -------
126 subDataset : `object`
127 Subselection of inMemoryDataset.
128 """
129 return inMemoryDataset.select(columns)