Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 45%
140 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-30 14:27 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-30 14:27 +0000
1from __future__ import annotations
3__all__ = (
4 "MedianAction",
5 "MeanAction",
6 "StdevAction",
7 "ValueAction",
8 "SigmaMadAction",
9 "CountAction",
10 "CountUniqueAction",
11 "ApproxFloor",
12 "FracThreshold",
13 "MaxAction",
14 "MinAction",
15 "FracInRange",
16 "FracNan",
17)
19import operator
20from typing import cast
22import numpy as np
23from lsst.pex.config import ChoiceField, Field
25from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector
26from ...statistics import nansigmaMad
29class MedianAction(ScalarAction):
30 """Calculates the median of the given data."""
32 vectorKey = Field[str]("Key of Vector to median")
34 def getInputSchema(self) -> KeyedDataSchema:
35 return ((self.vectorKey, Vector),)
37 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
38 mask = self.getMask(**kwargs)
39 if len(data[self.vectorKey.format(**kwargs)][mask]) != 0:
40 med = cast(Scalar, float(np.nanmedian(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
41 else:
42 med = np.NaN
44 return med
47class MeanAction(ScalarAction):
48 """Calculates the mean of the given data."""
50 vectorKey = Field[str]("Key of Vector from which to calculate mean")
52 def getInputSchema(self) -> KeyedDataSchema:
53 return ((self.vectorKey, Vector),)
55 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
56 mask = self.getMask(**kwargs)
57 if len(data[self.vectorKey.format(**kwargs)][mask]) != 0:
58 mean = cast(Scalar, float(np.nanmean(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
59 else:
60 mean = np.NaN
62 return mean
65class StdevAction(ScalarAction):
66 """Calculates the standard deviation of the given data."""
68 vectorKey = Field[str]("Key of Vector from which to calculate std deviation")
70 def getInputSchema(self) -> KeyedDataSchema:
71 return ((self.vectorKey, Vector),)
73 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
74 mask = self.getMask(**kwargs)
75 return cast(Scalar, float(np.nanstd(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
78class ValueAction(ScalarAction):
79 """Extracts the first value from a vector."""
81 vectorKey = Field[str]("Key of Vector from which to extract the first value")
83 def getInputSchema(self) -> KeyedDataSchema:
84 return ((self.vectorKey, Vector),)
86 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
87 return cast(Scalar, float(data[self.vectorKey.format(**kwargs)][0]))
90class SigmaMadAction(ScalarAction):
91 """Calculates the sigma mad of the given data."""
93 vectorKey = Field[str]("Key of Vector to median")
95 def getInputSchema(self) -> KeyedDataSchema:
96 return ((self.vectorKey, Vector),)
98 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
99 mask = self.getMask(**kwargs)
100 return cast(
101 Scalar,
102 float(
103 nansigmaMad(
104 data[self.vectorKey.format(**kwargs)][mask], # type: ignore
105 )
106 ),
107 )
110class CountAction(ScalarAction):
111 """Returns the number of non-NaN entries in the given column."""
113 vectorKey = Field[str]("Key of Vector to count")
115 def getInputSchema(self) -> KeyedDataSchema:
116 return ((self.vectorKey, Vector),)
118 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
119 mask = self.getMask(**kwargs)
120 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
121 arr = arr[~np.isnan(arr)]
122 return cast(Scalar, len(arr))
125class CountUniqueAction(ScalarAction):
126 """Counts the number of unique rows in a given column.
128 Parameters
129 ----------
130 data : `KeyedData`
132 Returns
133 -------
134 count : `Scalar`
135 The number of unique rows in a given column.
136 """
138 vectorKey = Field[str](doc="Name of column.")
140 def getInputSchema(self) -> KeyedDataSchema:
141 return ((self.vectorKey, Vector),)
143 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
144 mask = self.getMask(**kwargs)
145 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
146 count = len(np.unique(values))
147 return cast(Scalar, count)
150class ApproxFloor(ScalarAction):
151 """Returns the median of the lowest ten values of the sorted input."""
153 vectorKey = Field[str](doc="Key for the vector to perform action on", optional=False)
155 def getInputSchema(self) -> KeyedDataSchema:
156 return ((self.vectorKey, Vector),)
158 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
159 mask = self.getMask(**kwargs)
160 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore
161 x = len(value) // 10
162 return cast(Scalar, float(np.nanmedian(value[-x:])))
165class FracThreshold(ScalarAction):
166 """Compute the fraction of a distribution that is above or below a
167 specified threshold. The operator is specified as a string, for example,
168 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To
169 compute the fraction of elements with values less than a given threshold,
170 use op="le".
171 """
173 op = ChoiceField[str](
174 doc="Operator name string.",
175 allowed={
176 "lt": "less than threshold",
177 "le": "less than or equal to threshold",
178 "ge": "greater than or equal to threshold",
179 "gt": "greater than threshold",
180 },
181 )
182 threshold = Field[float](doc="Threshold to apply.")
183 vectorKey = Field[str](doc="Name of column")
184 percent = Field[bool](doc="Express result as percentage", default=False)
185 relative_to_median = Field[bool](doc="Calculate threshold relative to " "the median?", default=False)
187 def getInputSchema(self) -> KeyedDataSchema:
188 return ((self.vectorKey, Vector),)
190 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
191 mask = self.getMask(**kwargs)
192 values = data[self.vectorKey.format(**kwargs)]
193 values = values[mask] # type: ignore
194 values = values[np.logical_not(np.isnan(values))]
195 # If relative_to_median is set, shift the threshold to be median+thresh
196 if self.relative_to_median:
197 threshold = self.threshold + np.median(values)
198 else:
199 threshold = self.threshold
200 result = cast(
201 Scalar,
202 float(np.sum(getattr(operator, self.op)(values, threshold)) / len(values)), # type: ignore
203 )
204 if self.percent:
205 return 100.0 * result
206 else:
207 return result
210class MaxAction(ScalarAction):
211 """Returns the maximum of the given data."""
213 vectorKey = Field[str]("Key of Vector to find maximum")
215 def getInputSchema(self) -> KeyedDataSchema:
216 return ((self.vectorKey, Vector),)
218 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
219 mask = self.getMask(**kwargs)
220 return cast(Scalar, float(np.max(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
223class MinAction(ScalarAction):
224 """Returns the minimum of the given data."""
226 vectorKey = Field[str]("Key for the vector to perform action on")
228 def getInputSchema(self) -> KeyedDataSchema:
229 return ((self.vectorKey, Vector),)
231 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
232 mask = self.getMask(**kwargs)
233 return cast(Scalar, float(np.min(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
236class FracInRange(ScalarAction):
237 """Compute the fraction of a distribution that is between specified
238 minimum and maximum values, and is not NaN.
239 """
241 vectorKey = Field[str](doc="Name of column")
242 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0))
243 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0))
244 percent = Field[bool](doc="Express result as percentage", default=False)
246 def getInputSchema(self) -> KeyedDataSchema:
247 return ((self.vectorKey, Vector),)
249 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
250 """Return the fraction of rows with values within the specified range.
252 Parameters
253 ----------
254 data : `KeyedData`
256 Returns
257 -------
258 result : `Scalar`
259 The fraction (or percentage) of rows with values within the
260 specified range.
261 """
262 mask = self.getMask(**kwargs)
263 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
264 nvalues = len(values)
265 values = values[np.logical_not(np.isnan(values))]
266 sel_range = (values >= self.minimum) & (values < self.maximum)
267 result = cast(
268 Scalar,
269 float(len(values[sel_range]) / nvalues), # type: ignore
270 )
271 if self.percent:
272 return 100.0 * result
273 else:
274 return result
277class FracNan(ScalarAction):
278 """Compute the fraction of vector entries that are NaN."""
280 vectorKey = Field[str](doc="Name of column")
281 percent = Field[bool](doc="Express result as percentage", default=False)
283 def getInputSchema(self) -> KeyedDataSchema:
284 return ((self.vectorKey, Vector),)
286 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
287 """Return the fraction of rows with NaN values.
289 Parameters
290 ----------
291 data : `KeyedData`
293 Returns
294 -------
295 result : `Scalar`
296 The fraction (or percentage) of rows with NaN values.
297 """
298 mask = self.getMask(**kwargs)
299 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
300 nvalues = len(values)
301 values = values[np.isnan(values)]
302 result = cast(
303 Scalar,
304 float(len(values) / nvalues), # type: ignore
305 )
306 if self.percent:
307 return 100.0 * result
308 else:
309 return result