Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 45%
148 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 13:17 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 13:17 +0000
1from __future__ import annotations
3__all__ = (
4 "MedianAction",
5 "MeanAction",
6 "StdevAction",
7 "ValueAction",
8 "SigmaMadAction",
9 "CountAction",
10 "CountUniqueAction",
11 "ApproxFloor",
12 "FracThreshold",
13 "MaxAction",
14 "MinAction",
15 "FracInRange",
16 "FracNan",
17 "SumAction",
18)
20import operator
21from typing import cast
23import numpy as np
24from lsst.pex.config import ChoiceField, Field
26from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector
27from ...statistics import nansigmaMad
30class MedianAction(ScalarAction):
31 """Calculates the median of the given data."""
33 vectorKey = Field[str]("Key of Vector to median")
35 def getInputSchema(self) -> KeyedDataSchema:
36 return ((self.vectorKey, Vector),)
38 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
39 mask = self.getMask(**kwargs)
40 if len(data[self.vectorKey.format(**kwargs)][mask]) != 0:
41 med = cast(Scalar, float(np.nanmedian(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
42 else:
43 med = np.NaN
45 return med
48class MeanAction(ScalarAction):
49 """Calculates the mean of the given data."""
51 vectorKey = Field[str]("Key of Vector from which to calculate mean")
53 def getInputSchema(self) -> KeyedDataSchema:
54 return ((self.vectorKey, Vector),)
56 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
57 mask = self.getMask(**kwargs)
58 if len(data[self.vectorKey.format(**kwargs)][mask]) != 0:
59 mean = cast(Scalar, float(np.nanmean(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
60 else:
61 mean = np.NaN
63 return mean
66class StdevAction(ScalarAction):
67 """Calculates the standard deviation of the given data."""
69 vectorKey = Field[str]("Key of Vector from which to calculate std deviation")
71 def getInputSchema(self) -> KeyedDataSchema:
72 return ((self.vectorKey, Vector),)
74 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
75 mask = self.getMask(**kwargs)
76 return cast(Scalar, float(np.nanstd(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
79class ValueAction(ScalarAction):
80 """Extracts the first value from a vector."""
82 vectorKey = Field[str]("Key of Vector from which to extract the first value")
84 def getInputSchema(self) -> KeyedDataSchema:
85 return ((self.vectorKey, Vector),)
87 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
88 return cast(Scalar, float(data[self.vectorKey.format(**kwargs)][0]))
91class SigmaMadAction(ScalarAction):
92 """Calculates the sigma mad of the given data."""
94 vectorKey = Field[str]("Key of Vector to median")
96 def getInputSchema(self) -> KeyedDataSchema:
97 return ((self.vectorKey, Vector),)
99 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
100 mask = self.getMask(**kwargs)
101 return cast(
102 Scalar,
103 float(
104 nansigmaMad(
105 data[self.vectorKey.format(**kwargs)][mask], # type: ignore
106 )
107 ),
108 )
111class CountAction(ScalarAction):
112 """Returns the number of non-NaN entries in the given column."""
114 vectorKey = Field[str]("Key of Vector to count")
116 def getInputSchema(self) -> KeyedDataSchema:
117 return ((self.vectorKey, Vector),)
119 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
120 mask = self.getMask(**kwargs)
121 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
122 arr = arr[~np.isnan(arr)]
123 return cast(Scalar, len(arr))
126class CountUniqueAction(ScalarAction):
127 """Counts the number of unique rows in a given column.
129 Parameters
130 ----------
131 data : `KeyedData`
133 Returns
134 -------
135 count : `Scalar`
136 The number of unique rows in a given column.
137 """
139 vectorKey = Field[str](doc="Name of column.")
141 def getInputSchema(self) -> KeyedDataSchema:
142 return ((self.vectorKey, Vector),)
144 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
145 mask = self.getMask(**kwargs)
146 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
147 count = len(np.unique(values))
148 return cast(Scalar, count)
151class ApproxFloor(ScalarAction):
152 """Returns the median of the lowest ten values of the sorted input."""
154 vectorKey = Field[str](doc="Key for the vector to perform action on", optional=False)
156 def getInputSchema(self) -> KeyedDataSchema:
157 return ((self.vectorKey, Vector),)
159 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
160 mask = self.getMask(**kwargs)
161 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore
162 x = len(value) // 10
163 return cast(Scalar, float(np.nanmedian(value[-x:])))
166class FracThreshold(ScalarAction):
167 """Compute the fraction of a distribution that is above or below a
168 specified threshold. The operator is specified as a string, for example,
169 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To
170 compute the fraction of elements with values less than a given threshold,
171 use op="le".
172 """
174 op = ChoiceField[str](
175 doc="Operator name string.",
176 allowed={
177 "lt": "less than threshold",
178 "le": "less than or equal to threshold",
179 "ge": "greater than or equal to threshold",
180 "gt": "greater than threshold",
181 },
182 )
183 threshold = Field[float](doc="Threshold to apply.")
184 vectorKey = Field[str](doc="Name of column")
185 percent = Field[bool](doc="Express result as percentage", default=False)
186 relative_to_median = Field[bool](doc="Calculate threshold relative to " "the median?", default=False)
188 def getInputSchema(self) -> KeyedDataSchema:
189 return ((self.vectorKey, Vector),)
191 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
192 mask = self.getMask(**kwargs)
193 values = data[self.vectorKey.format(**kwargs)]
194 values = values[mask] # type: ignore
195 values = values[np.logical_not(np.isnan(values))]
196 # If relative_to_median is set, shift the threshold to be median+thresh
197 if self.relative_to_median:
198 threshold = self.threshold + np.median(values)
199 else:
200 threshold = self.threshold
201 result = cast(
202 Scalar,
203 float(np.sum(getattr(operator, self.op)(values, threshold)) / len(values)), # type: ignore
204 )
205 if self.percent:
206 return 100.0 * result
207 else:
208 return result
211class MaxAction(ScalarAction):
212 """Returns the maximum of the given data."""
214 vectorKey = Field[str]("Key of Vector to find maximum")
216 def getInputSchema(self) -> KeyedDataSchema:
217 return ((self.vectorKey, Vector),)
219 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
220 mask = self.getMask(**kwargs)
221 return cast(Scalar, float(np.max(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
224class MinAction(ScalarAction):
225 """Returns the minimum of the given data."""
227 vectorKey = Field[str]("Key for the vector to perform action on")
229 def getInputSchema(self) -> KeyedDataSchema:
230 return ((self.vectorKey, Vector),)
232 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
233 mask = self.getMask(**kwargs)
234 return cast(Scalar, float(np.min(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
237class FracInRange(ScalarAction):
238 """Compute the fraction of a distribution that is between specified
239 minimum and maximum values, and is not NaN.
240 """
242 vectorKey = Field[str](doc="Name of column")
243 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0))
244 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0))
245 percent = Field[bool](doc="Express result as percentage", default=False)
247 def getInputSchema(self) -> KeyedDataSchema:
248 return ((self.vectorKey, Vector),)
250 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
251 """Return the fraction of rows with values within the specified range.
253 Parameters
254 ----------
255 data : `KeyedData`
257 Returns
258 -------
259 result : `Scalar`
260 The fraction (or percentage) of rows with values within the
261 specified range.
262 """
263 mask = self.getMask(**kwargs)
264 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
265 nvalues = len(values)
266 values = values[np.logical_not(np.isnan(values))]
267 sel_range = (values >= self.minimum) & (values < self.maximum)
268 result = cast(
269 Scalar,
270 float(len(values[sel_range]) / nvalues), # type: ignore
271 )
272 if self.percent:
273 return 100.0 * result
274 else:
275 return result
278class FracNan(ScalarAction):
279 """Compute the fraction of vector entries that are NaN."""
281 vectorKey = Field[str](doc="Name of column")
282 percent = Field[bool](doc="Express result as percentage", default=False)
284 def getInputSchema(self) -> KeyedDataSchema:
285 return ((self.vectorKey, Vector),)
287 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
288 """Return the fraction of rows with NaN values.
290 Parameters
291 ----------
292 data : `KeyedData`
294 Returns
295 -------
296 result : `Scalar`
297 The fraction (or percentage) of rows with NaN values.
298 """
299 mask = self.getMask(**kwargs)
300 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
301 nvalues = len(values)
302 values = values[np.isnan(values)]
303 result = cast(
304 Scalar,
305 float(len(values) / nvalues), # type: ignore
306 )
307 if self.percent:
308 return 100.0 * result
309 else:
310 return result
313class SumAction(ScalarAction):
314 """Returns the sum of all values in the column."""
316 vectorKey = Field[str]("Key of Vector to sum")
318 def getInputSchema(self) -> KeyedDataSchema:
319 return ((self.vectorKey, Vector),)
321 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
322 mask = self.getMask(**kwargs)
323 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
324 return cast(Scalar, np.nansum(arr))