Coverage for python/lsst/pipe/tasks/dataFrameActions/_evalColumnExpression.py: 60%

68 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-15 03:30 -0700

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("makeColumnExpressionAction", ) 

25 

26import ast 

27import operator as op 

28 

29from typing import Mapping, MutableMapping, Set, Type, Union, Optional, Any, Iterable 

30 

31from numpy import log10 as log 

32from numpy import (cos, sin, cosh, sinh) 

33import pandas as pd 

34 

35from ..configurableActions import ConfigurableActionField 

36from ._baseDataFrameActions import DataFrameAction 

37 

38 

39OPERATORS = {ast.Add: op.add, ast.Sub: op.sub, ast.Mult: op.mul, 

40 ast.Div: op.truediv, ast.Pow: op.pow, ast.BitXor: op.xor, 

41 ast.USub: op.neg} 

42 

43EXTRA_MATH = {"cos": cos, "sin": sin, "cosh": cosh, "sinh": sinh, "log": log} 

44 

45 

46class ExpressionParser(ast.NodeVisitor): 

47 def __init__(self, **kwargs): 

48 self.variables = kwargs 

49 self.variables['log'] = log 

50 

51 def visit_Name(self, node): 

52 if node.id in self.variables: 

53 return self.variables[node.id] 

54 else: 

55 return None 

56 

57 def visit_Num(self, node): 

58 return node.n 

59 

60 def visit_NameConstant(self, node): 

61 return node.value 

62 

63 def visit_UnaryOp(self, node): 

64 val = self.visit(node.operand) 

65 return OPERATORS[type(node.op)](val) 

66 

67 def visit_BinOp(self, node): 

68 lhs = self.visit(node.left) 

69 rhs = self.visit(node.right) 

70 return OPERATORS[type(node.op)](lhs, rhs) 

71 

72 def visit_Call(self, node): 

73 if node.func.id in self.variables: 

74 function = self.visit(node.func) 

75 return function(self.visit(node.args[0])) 

76 else: 

77 raise ValueError("String not recognized") 

78 

79 def generic_visit(self, node): 

80 raise ValueError("String not recognized") 

81 

82 

83def makeColumnExpressionAction(className: str, expr: str, 

84 exprDefaults: Optional[Mapping[str, Union[DataFrameAction, 

85 Type[DataFrameAction]]]] = None, 

86 docstring: str = None 

87 ) -> Type[DataFrameAction]: 

88 """Factory function for producing ConfigurableAction classes which are 

89 realizations of arithmetic operations. 

90 

91 Parameters 

92 ---------- 

93 className : `str` 

94 The name of the class that will be produced 

95 expr : `str` 

96 An arithmetic expression that will be parsed to produce the output 

97 ConfigurableAction. Individual variable names will be the name of 

98 individual `ConfigActions` inside the expression (i.e. "x+y" will 

99 produce an action with configAction.actions.x and 

100 configAction.actions.y). Expression can contain arithmatic python 

101 operators as well as; sin, cos, sinh, cosh, log (which is base 10). 

102 exprDefaults : `Mapping` of `str` to `DataFrameAction` optional 

103 A mapping of strings which correspond to the names in the expression to 

104 values which are default `ConfigurableActions` to assign in the 

105 expression. If no default for a action is supplied `SingleColumnAction` 

106 is set as the default. 

107 docstring : `str` 

108 A string that is assigned as the resulting classes docstring 

109 

110 Returns 

111 ------- 

112 action : `Type` of `DataFrameAction` 

113 A `DataFrameAction` class that was programatically constructed from the 

114 input expression. 

115 """ 

116 # inspect is used because this is a factory function used to produce classes 

117 # and it is desireable that the classes generated appear to be in the 

118 # module of the calling frame, instead of something defined within the 

119 # scope of this function call. 

120 import inspect 

121 new_module = inspect.stack()[1].frame.f_locals['__name__'] 

122 node = ast.parse(expr, mode='eval') 

123 

124 # gather the specified names 

125 names: Set[str] = set() 

126 for elm in ast.walk(node): 

127 if isinstance(elm, ast.Name): 

128 names.add(elm.id) 

129 

130 # remove the known Math names 

131 names -= EXTRA_MATH.keys() 

132 

133 fields: Mapping[str, ConfigurableActionField] = {} 

134 for name in sorted(names): 

135 if exprDefaults is not None and (value := exprDefaults.get(name)) is not None: 135 ↛ 138line 135 didn't jump to line 138, because the condition on line 135 was never false

136 kwargs = {"default": value} 

137 else: 

138 kwargs = {} 

139 fields[name] = ConfigurableActionField(doc=f"expression action {name}", **kwargs) 

140 

141 # skip flake8 on N807 because this is a stand alone function, but it is 

142 # intended to be patched in as a method on a dynamically generated class 

143 def __call__(self, df: pd.DataFrame, **kwargs) -> pd.Series: # noqa: N807 

144 values_map = {} 

145 for name in fields: 

146 values_map[name] = getattr(self, name)(df, **kwargs) 

147 

148 parser = ExpressionParser(**values_map) 

149 return parser.visit(node.body) 

150 

151 # create the function to look up the columns for the dynamically created action 

152 def columns(self) -> Iterable[str]: 

153 for name in fields: 

154 yield from getattr(self, name).columns 

155 

156 dct: MutableMapping[str, Any] = {"__call__": __call__, "columns": property(columns)} 

157 if docstring is not None: 157 ↛ 159line 157 didn't jump to line 159, because the condition on line 157 was never false

158 dct['__doc__'] = docstring 

159 dct.update(**fields) 

160 dct['__module__'] = new_module 

161 

162 return type(className, (DataFrameAction, ), dct)