Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 48%
123 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-16 10:52 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-16 10:52 +0000
1from __future__ import annotations
3import operator
4from typing import cast
6import numpy as np
7from lsst.pex.config import ChoiceField, Field
9from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector
10from ...statistics import nansigmaMad
13class MedianAction(ScalarAction):
14 vectorKey = Field[str]("Key of Vector to median")
16 def getInputSchema(self) -> KeyedDataSchema:
17 return ((self.vectorKey, Vector),)
19 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
20 mask = self.getMask(**kwargs)
21 return cast(Scalar, float(np.nanmedian(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
24class MeanAction(ScalarAction):
25 vectorKey = Field[str]("Key of Vector from which to calculate mean")
27 def getInputSchema(self) -> KeyedDataSchema:
28 return ((self.vectorKey, Vector),)
30 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
31 mask = self.getMask(**kwargs)
32 return cast(Scalar, float(np.nanmean(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
35class StdevAction(ScalarAction):
36 vectorKey = Field[str]("Key of Vector from which to calculate std deviation")
38 def getInputSchema(self) -> KeyedDataSchema:
39 return ((self.vectorKey, Vector),)
41 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
42 mask = self.getMask(**kwargs)
43 return cast(Scalar, float(np.nanstd(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
46class SigmaMadAction(ScalarAction):
47 vectorKey = Field[str]("Key of Vector to median")
49 def getInputSchema(self) -> KeyedDataSchema:
50 return ((self.vectorKey, Vector),)
52 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
53 mask = self.getMask(**kwargs)
54 return cast(
55 Scalar,
56 float(
57 nansigmaMad(
58 data[self.vectorKey.format(**kwargs)][mask], # type: ignore
59 )
60 ),
61 )
64class CountAction(ScalarAction):
65 vectorKey = Field[str]("Key of Vector to count")
67 def getInputSchema(self) -> KeyedDataSchema:
68 return ((self.vectorKey, Vector),)
70 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
71 mask = self.getMask(**kwargs)
72 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
73 arr = arr[~np.isnan(arr)]
74 return cast(Scalar, len(arr))
77class CountUniqueAction(ScalarAction):
78 """Counts the number of unique rows in a given column.
80 Parameters
81 ----------
82 data : `KeyedData`
84 Returns
85 -------
86 count : `Scalar`
87 The number of unique rows in a given column.
88 """
90 vectorKey = Field[str](doc="Name of column.")
92 def getInputSchema(self) -> KeyedDataSchema:
93 return ((self.vectorKey, Vector),)
95 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
96 mask = self.getMask(**kwargs)
97 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
98 count = len(np.unique(values))
99 return cast(Scalar, count)
102class ApproxFloor(ScalarAction):
103 vectorKey = Field[str](doc="Key for the vector to perform action on", optional=False)
105 def getInputSchema(self) -> KeyedDataSchema:
106 return ((self.vectorKey, Vector),)
108 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
109 mask = self.getMask(**kwargs)
110 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore
111 x = len(value) // 10
112 return cast(Scalar, float(np.nanmedian(value[-x:])))
115class FracThreshold(ScalarAction):
116 """Compute the fraction of a distribution that is above or below a
117 specified threshold. The operator is specified as a string, for example,
118 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To
119 compute the fraction of elements with values less than a given threshold,
120 use op="le".
121 """
123 op = ChoiceField[str](
124 doc="Operator name string.",
125 allowed={
126 "lt": "less than threshold",
127 "le": "less than or equal to threshold",
128 "ge": "greater than or equal to threshold",
129 "gt": "greater than threshold",
130 },
131 )
132 threshold = Field[float](doc="Threshold to apply.")
133 vectorKey = Field[str](doc="Name of column")
134 percent = Field[bool](doc="Express result as percentage", default=False)
136 def getInputSchema(self) -> KeyedDataSchema:
137 return ((self.vectorKey, Vector),)
139 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
140 mask = self.getMask(**kwargs)
141 values = data[self.vectorKey.format(**kwargs)]
142 values = values[mask] # type: ignore
143 values = values[np.logical_not(np.isnan(values))]
144 result = cast(
145 Scalar,
146 float(np.sum(getattr(operator, self.op)(values, self.threshold)) / len(values)), # type: ignore
147 )
148 if self.percent:
149 return 100.0 * result
150 else:
151 return result
154class MaxAction(ScalarAction):
155 vectorKey = Field[str]("Key of Vector to find maximum")
157 def getInputSchema(self) -> KeyedDataSchema:
158 return ((self.vectorKey, Vector),)
160 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
161 mask = self.getMask(**kwargs)
162 return cast(Scalar, float(np.max(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
165class MinAction(ScalarAction):
166 vectorKey = Field[str]("Key for the vector to perform action on")
168 def getInputSchema(self) -> KeyedDataSchema:
169 return ((self.vectorKey, Vector),)
171 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
172 mask = self.getMask(**kwargs)
173 return cast(Scalar, float(np.min(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
176class FracInRange(ScalarAction):
177 """Compute the fraction of a distribution that is between specified
178 minimum and maximum values, and is not NaN.
179 """
181 vectorKey = Field[str](doc="Name of column")
182 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0))
183 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0))
184 percent = Field[bool](doc="Express result as percentage", default=False)
186 def getInputSchema(self) -> KeyedDataSchema:
187 return ((self.vectorKey, Vector),)
189 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
190 """Return the fraction of rows with values within the specified range.
192 Parameters
193 ----------
194 data : `KeyedData`
196 Returns
197 -------
198 result : `Scalar`
199 The fraction (or percentage) of rows with values within the
200 specified range.
201 """
202 mask = self.getMask(**kwargs)
203 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
204 nvalues = len(values)
205 values = values[np.logical_not(np.isnan(values))]
206 sel_range = (values >= self.minimum) & (values < self.maximum)
207 result = cast(
208 Scalar,
209 float(len(values[sel_range]) / nvalues), # type: ignore
210 )
211 if self.percent:
212 return 100.0 * result
213 else:
214 return result
217class FracNan(ScalarAction):
218 """Compute the fraction of vector entries that are NaN."""
220 vectorKey = Field[str](doc="Name of column")
221 percent = Field[bool](doc="Express result as percentage", default=False)
223 def getInputSchema(self) -> KeyedDataSchema:
224 return ((self.vectorKey, Vector),)
226 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
227 """Return the fraction of rows with NaN values.
229 Parameters
230 ----------
231 data : `KeyedData`
233 Returns
234 -------
235 result : `Scalar`
236 The fraction (or percentage) of rows with NaN values.
237 """
238 mask = self.getMask(**kwargs)
239 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
240 nvalues = len(values)
241 values = values[np.isnan(values)]
242 result = cast(
243 Scalar,
244 float(len(values) / nvalues), # type: ignore
245 )
246 if self.percent:
247 return 100.0 * result
248 else:
249 return result