Coverage for python/lsst/daf/butler/delegates/dataframe.py: 23%
36 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-04 02:19 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-04 02:19 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for reading DataFrames."""
23from __future__ import annotations
25import collections.abc
26from typing import Any, Mapping, Optional
28import pandas
29from lsst.daf.butler import StorageClassDelegate
30from lsst.utils.introspection import get_full_type_name
31from lsst.utils.iteration import ensure_iterable
33__all__ = ["DataFrameDelegate"]
36class DataFrameDelegate(StorageClassDelegate):
37 def getComponent(self, composite: pandas.DataFrame, componentName: str) -> Any:
38 """Get a component from a DataFrame.
40 Parameters
41 ----------
42 composite : `~pandas.DataFrame`
43 ``DataFrame`` to access component.
44 componentName : `str`
45 Name of component to retrieve.
47 Returns
48 -------
49 component : `object`
50 The component.
52 Raises
53 ------
54 AttributeError
55 The component can not be found.
56 """
57 if componentName == "columns":
58 return pandas.Index(self._getAllColumns(composite))
59 else:
60 raise AttributeError(
61 f"Do not know how to retrieve component {componentName} from {get_full_type_name(composite)}"
62 )
64 def handleParameters(
65 self, inMemoryDataset: pandas.DataFrame, parameters: Optional[Mapping[str, Any]] = None
66 ) -> Any:
67 """Return possibly new in-memory dataset using the supplied parameters.
69 Parameters
70 ----------
71 inMemoryDataset : `object`
72 Object to modify based on the parameters.
73 parameters : `dict`, optional
74 Parameters to apply. Values are specific to the parameter.
75 Supported parameters are defined in the associated
76 `StorageClass`. If no relevant parameters are specified the
77 ``inMemoryDataset`` will be return unchanged.
79 Returns
80 -------
81 inMemoryDataset : `object`
82 Original in-memory dataset, or updated form after parameters
83 have been used.
84 """
85 if not isinstance(inMemoryDataset, pandas.DataFrame):
86 raise ValueError(
87 "handleParameters for a DataFrame must get a DataFrame, "
88 f"not {get_full_type_name(inMemoryDataset)}."
89 )
91 if parameters is None:
92 return inMemoryDataset
94 if "columns" in parameters:
95 allColumns = self._getAllColumns(inMemoryDataset)
97 if not isinstance(parameters["columns"], collections.abc.Iterable):
98 raise NotImplementedError(
99 "InMemoryDataset of a DataFrame only supports list/tuple of string column names"
100 )
102 for column in ensure_iterable(parameters["columns"]):
103 if not isinstance(column, str):
104 raise NotImplementedError(
105 "InMemoryDataset of a DataFrame only supports string column names."
106 )
107 if column not in allColumns:
108 raise ValueError(f"Unrecognized column name {column!r}.")
110 # Exclude index columns from the subset.
111 readColumns = [
112 name
113 for name in ensure_iterable(parameters["columns"])
114 if name not in inMemoryDataset.index.names
115 ]
117 return inMemoryDataset[readColumns]
118 else:
119 return inMemoryDataset
121 def _getAllColumns(self, inMemoryDataset: pandas.DataFrame) -> list[str]:
122 """Get all columns, including index columns.
124 Returns
125 -------
126 columns : `list` [`str`]
127 List of all columns.
128 """
129 allColumns = list(inMemoryDataset.columns)
130 if inMemoryDataset.index.names[0] is not None:
131 allColumns.extend(inMemoryDataset.index.names)
133 return allColumns