Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 48%
124 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-23 04:22 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-23 04:22 -0700
1from __future__ import annotations
3__all__ = (
4 "MedianAction",
5 "MeanAction",
6 "StdevAction",
7 "SigmaMadAction",
8 "CountAction",
9 "CountUniqueAction",
10 "ApproxFloor",
11 "FracThreshold",
12 "MaxAction",
13 "MinAction",
14 "FracInRange",
15 "FracNan",
16)
18import operator
19from typing import cast
21import numpy as np
22from lsst.pex.config import ChoiceField, Field
24from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector
25from ...statistics import nansigmaMad
28class MedianAction(ScalarAction):
29 """Calculates the median of the given data."""
31 vectorKey = Field[str]("Key of Vector to median")
33 def getInputSchema(self) -> KeyedDataSchema:
34 return ((self.vectorKey, Vector),)
36 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
37 mask = self.getMask(**kwargs)
38 return cast(Scalar, float(np.nanmedian(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
41class MeanAction(ScalarAction):
42 """Calculates the mean of the given data."""
44 vectorKey = Field[str]("Key of Vector from which to calculate mean")
46 def getInputSchema(self) -> KeyedDataSchema:
47 return ((self.vectorKey, Vector),)
49 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
50 mask = self.getMask(**kwargs)
51 return cast(Scalar, float(np.nanmean(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
54class StdevAction(ScalarAction):
55 """Calculates the standard deviation of the given data."""
57 vectorKey = Field[str]("Key of Vector from which to calculate std deviation")
59 def getInputSchema(self) -> KeyedDataSchema:
60 return ((self.vectorKey, Vector),)
62 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
63 mask = self.getMask(**kwargs)
64 return cast(Scalar, float(np.nanstd(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
67class SigmaMadAction(ScalarAction):
68 """Calculates the sigma mad of the given data."""
70 vectorKey = Field[str]("Key of Vector to median")
72 def getInputSchema(self) -> KeyedDataSchema:
73 return ((self.vectorKey, Vector),)
75 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
76 mask = self.getMask(**kwargs)
77 return cast(
78 Scalar,
79 float(
80 nansigmaMad(
81 data[self.vectorKey.format(**kwargs)][mask], # type: ignore
82 )
83 ),
84 )
87class CountAction(ScalarAction):
88 """Returns the number of non-NaN entries in the given column."""
90 vectorKey = Field[str]("Key of Vector to count")
92 def getInputSchema(self) -> KeyedDataSchema:
93 return ((self.vectorKey, Vector),)
95 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
96 mask = self.getMask(**kwargs)
97 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
98 arr = arr[~np.isnan(arr)]
99 return cast(Scalar, len(arr))
102class CountUniqueAction(ScalarAction):
103 """Counts the number of unique rows in a given column.
105 Parameters
106 ----------
107 data : `KeyedData`
109 Returns
110 -------
111 count : `Scalar`
112 The number of unique rows in a given column.
113 """
115 vectorKey = Field[str](doc="Name of column.")
117 def getInputSchema(self) -> KeyedDataSchema:
118 return ((self.vectorKey, Vector),)
120 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
121 mask = self.getMask(**kwargs)
122 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
123 count = len(np.unique(values))
124 return cast(Scalar, count)
127class ApproxFloor(ScalarAction):
128 """Returns the median of the lowest ten values of the sorted input."""
130 vectorKey = Field[str](doc="Key for the vector to perform action on", optional=False)
132 def getInputSchema(self) -> KeyedDataSchema:
133 return ((self.vectorKey, Vector),)
135 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
136 mask = self.getMask(**kwargs)
137 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore
138 x = len(value) // 10
139 return cast(Scalar, float(np.nanmedian(value[-x:])))
142class FracThreshold(ScalarAction):
143 """Compute the fraction of a distribution that is above or below a
144 specified threshold. The operator is specified as a string, for example,
145 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To
146 compute the fraction of elements with values less than a given threshold,
147 use op="le".
148 """
150 op = ChoiceField[str](
151 doc="Operator name string.",
152 allowed={
153 "lt": "less than threshold",
154 "le": "less than or equal to threshold",
155 "ge": "greater than or equal to threshold",
156 "gt": "greater than threshold",
157 },
158 )
159 threshold = Field[float](doc="Threshold to apply.")
160 vectorKey = Field[str](doc="Name of column")
161 percent = Field[bool](doc="Express result as percentage", default=False)
163 def getInputSchema(self) -> KeyedDataSchema:
164 return ((self.vectorKey, Vector),)
166 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
167 mask = self.getMask(**kwargs)
168 values = data[self.vectorKey.format(**kwargs)]
169 values = values[mask] # type: ignore
170 values = values[np.logical_not(np.isnan(values))]
171 result = cast(
172 Scalar,
173 float(np.sum(getattr(operator, self.op)(values, self.threshold)) / len(values)), # type: ignore
174 )
175 if self.percent:
176 return 100.0 * result
177 else:
178 return result
181class MaxAction(ScalarAction):
182 """Returns the maximum of the given data."""
184 vectorKey = Field[str]("Key of Vector to find maximum")
186 def getInputSchema(self) -> KeyedDataSchema:
187 return ((self.vectorKey, Vector),)
189 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
190 mask = self.getMask(**kwargs)
191 return cast(Scalar, float(np.max(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
194class MinAction(ScalarAction):
195 """Returns the minimum of the given data."""
197 vectorKey = Field[str]("Key for the vector to perform action on")
199 def getInputSchema(self) -> KeyedDataSchema:
200 return ((self.vectorKey, Vector),)
202 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
203 mask = self.getMask(**kwargs)
204 return cast(Scalar, float(np.min(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
207class FracInRange(ScalarAction):
208 """Compute the fraction of a distribution that is between specified
209 minimum and maximum values, and is not NaN.
210 """
212 vectorKey = Field[str](doc="Name of column")
213 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0))
214 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0))
215 percent = Field[bool](doc="Express result as percentage", default=False)
217 def getInputSchema(self) -> KeyedDataSchema:
218 return ((self.vectorKey, Vector),)
220 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
221 """Return the fraction of rows with values within the specified range.
223 Parameters
224 ----------
225 data : `KeyedData`
227 Returns
228 -------
229 result : `Scalar`
230 The fraction (or percentage) of rows with values within the
231 specified range.
232 """
233 mask = self.getMask(**kwargs)
234 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
235 nvalues = len(values)
236 values = values[np.logical_not(np.isnan(values))]
237 sel_range = (values >= self.minimum) & (values < self.maximum)
238 result = cast(
239 Scalar,
240 float(len(values[sel_range]) / nvalues), # type: ignore
241 )
242 if self.percent:
243 return 100.0 * result
244 else:
245 return result
248class FracNan(ScalarAction):
249 """Compute the fraction of vector entries that are NaN."""
251 vectorKey = Field[str](doc="Name of column")
252 percent = Field[bool](doc="Express result as percentage", default=False)
254 def getInputSchema(self) -> KeyedDataSchema:
255 return ((self.vectorKey, Vector),)
257 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
258 """Return the fraction of rows with NaN values.
260 Parameters
261 ----------
262 data : `KeyedData`
264 Returns
265 -------
266 result : `Scalar`
267 The fraction (or percentage) of rows with NaN values.
268 """
269 mask = self.getMask(**kwargs)
270 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
271 nvalues = len(values)
272 values = values[np.isnan(values)]
273 result = cast(
274 Scalar,
275 float(len(values) / nvalues), # type: ignore
276 )
277 if self.percent:
278 return 100.0 * result
279 else:
280 return result