Coverage for python/lsst/daf/butler/delegates/dataframe.py: 26%

36 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-15 09:41 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for reading DataFrames.""" 

23from __future__ import annotations 

24 

25import collections.abc 

26from typing import Any, Mapping, Optional 

27 

28import pandas 

29from lsst.daf.butler import StorageClassDelegate 

30from lsst.utils.introspection import get_full_type_name 

31from lsst.utils.iteration import ensure_iterable 

32 

33__all__ = ["DataFrameDelegate"] 

34 

35 

36class DataFrameDelegate(StorageClassDelegate): 

37 def getComponent(self, composite: pandas.DataFrame, componentName: str) -> Any: 

38 """Get a component from a DataFrame. 

39 

40 Parameters 

41 ---------- 

42 composite : `~pandas.DataFrame` 

43 ``DataFrame`` to access component. 

44 componentName : `str` 

45 Name of component to retrieve. 

46 

47 Returns 

48 ------- 

49 component : `object` 

50 The component. 

51 

52 Raises 

53 ------ 

54 AttributeError 

55 The component can not be found. 

56 """ 

57 if componentName == "columns": 

58 return pandas.Index(self._getAllColumns(composite)) 

59 else: 

60 raise AttributeError( 

61 f"Do not know how to retrieve component {componentName} from {get_full_type_name(composite)}" 

62 ) 

63 

64 def handleParameters( 

65 self, inMemoryDataset: pandas.DataFrame, parameters: Optional[Mapping[str, Any]] = None 

66 ) -> Any: 

67 """Return possibly new in-memory dataset using the supplied parameters. 

68 

69 Parameters 

70 ---------- 

71 inMemoryDataset : `object` 

72 Object to modify based on the parameters. 

73 parameters : `dict`, optional 

74 Parameters to apply. Values are specific to the parameter. 

75 Supported parameters are defined in the associated 

76 `StorageClass`. If no relevant parameters are specified the 

77 ``inMemoryDataset`` will be return unchanged. 

78 

79 Returns 

80 ------- 

81 inMemoryDataset : `object` 

82 Original in-memory dataset, or updated form after parameters 

83 have been used. 

84 """ 

85 if not isinstance(inMemoryDataset, pandas.DataFrame): 

86 raise ValueError( 

87 "handleParameters for a DataFrame must get a DataFrame, " 

88 f"not {get_full_type_name(inMemoryDataset)}." 

89 ) 

90 

91 if parameters is None: 

92 return inMemoryDataset 

93 

94 if "columns" in parameters: 

95 allColumns = self._getAllColumns(inMemoryDataset) 

96 

97 if not isinstance(parameters["columns"], collections.abc.Iterable): 

98 raise NotImplementedError( 

99 "InMemoryDataset of a DataFrame only supports list/tuple of string column names" 

100 ) 

101 

102 for column in ensure_iterable(parameters["columns"]): 

103 if not isinstance(column, str): 

104 raise NotImplementedError( 

105 "InMemoryDataset of a DataFrame only supports string column names." 

106 ) 

107 if column not in allColumns: 

108 raise ValueError(f"Unrecognized column name {column!r}.") 

109 

110 # Exclude index columns from the subset. 

111 readColumns = [ 

112 name 

113 for name in ensure_iterable(parameters["columns"]) 

114 if name not in inMemoryDataset.index.names 

115 ] 

116 

117 return inMemoryDataset[readColumns] 

118 else: 

119 return inMemoryDataset 

120 

121 def _getAllColumns(self, inMemoryDataset: pandas.DataFrame) -> list[str]: 

122 """Get all columns, including index columns. 

123 

124 Returns 

125 ------- 

126 columns : `list` [`str`] 

127 List of all columns. 

128 """ 

129 allColumns = list(inMemoryDataset.columns) 

130 if inMemoryDataset.index.names[0] is not None: 

131 allColumns.extend(inMemoryDataset.index.names) 

132 

133 return allColumns