Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 47%
128 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-21 19:24 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-21 19:24 +0000
1from __future__ import annotations
3__all__ = (
4 "MedianAction",
5 "MeanAction",
6 "StdevAction",
7 "SigmaMadAction",
8 "CountAction",
9 "CountUniqueAction",
10 "ApproxFloor",
11 "FracThreshold",
12 "MaxAction",
13 "MinAction",
14 "FracInRange",
15 "FracNan",
16)
18import operator
19from typing import cast
21import numpy as np
22from lsst.pex.config import ChoiceField, Field
24from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector
25from ...statistics import nansigmaMad
28class MedianAction(ScalarAction):
29 """Calculates the median of the given data."""
31 vectorKey = Field[str]("Key of Vector to median")
33 def getInputSchema(self) -> KeyedDataSchema:
34 return ((self.vectorKey, Vector),)
36 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
37 mask = self.getMask(**kwargs)
38 return cast(Scalar, float(np.nanmedian(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
41class MeanAction(ScalarAction):
42 """Calculates the mean of the given data."""
44 vectorKey = Field[str]("Key of Vector from which to calculate mean")
46 def getInputSchema(self) -> KeyedDataSchema:
47 return ((self.vectorKey, Vector),)
49 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
50 mask = self.getMask(**kwargs)
51 return cast(Scalar, float(np.nanmean(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
54class StdevAction(ScalarAction):
55 """Calculates the standard deviation of the given data."""
57 vectorKey = Field[str]("Key of Vector from which to calculate std deviation")
59 def getInputSchema(self) -> KeyedDataSchema:
60 return ((self.vectorKey, Vector),)
62 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
63 mask = self.getMask(**kwargs)
64 return cast(Scalar, float(np.nanstd(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
67class SigmaMadAction(ScalarAction):
68 """Calculates the sigma mad of the given data."""
70 vectorKey = Field[str]("Key of Vector to median")
72 def getInputSchema(self) -> KeyedDataSchema:
73 return ((self.vectorKey, Vector),)
75 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
76 mask = self.getMask(**kwargs)
77 return cast(
78 Scalar,
79 float(
80 nansigmaMad(
81 data[self.vectorKey.format(**kwargs)][mask], # type: ignore
82 )
83 ),
84 )
87class CountAction(ScalarAction):
88 """Returns the number of non-NaN entries in the given column."""
90 vectorKey = Field[str]("Key of Vector to count")
92 def getInputSchema(self) -> KeyedDataSchema:
93 return ((self.vectorKey, Vector),)
95 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
96 mask = self.getMask(**kwargs)
97 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
98 arr = arr[~np.isnan(arr)]
99 return cast(Scalar, len(arr))
102class CountUniqueAction(ScalarAction):
103 """Counts the number of unique rows in a given column.
105 Parameters
106 ----------
107 data : `KeyedData`
109 Returns
110 -------
111 count : `Scalar`
112 The number of unique rows in a given column.
113 """
115 vectorKey = Field[str](doc="Name of column.")
117 def getInputSchema(self) -> KeyedDataSchema:
118 return ((self.vectorKey, Vector),)
120 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
121 mask = self.getMask(**kwargs)
122 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
123 count = len(np.unique(values))
124 return cast(Scalar, count)
127class ApproxFloor(ScalarAction):
128 """Returns the median of the lowest ten values of the sorted input."""
130 vectorKey = Field[str](doc="Key for the vector to perform action on", optional=False)
132 def getInputSchema(self) -> KeyedDataSchema:
133 return ((self.vectorKey, Vector),)
135 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
136 mask = self.getMask(**kwargs)
137 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore
138 x = len(value) // 10
139 return cast(Scalar, float(np.nanmedian(value[-x:])))
142class FracThreshold(ScalarAction):
143 """Compute the fraction of a distribution that is above or below a
144 specified threshold. The operator is specified as a string, for example,
145 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To
146 compute the fraction of elements with values less than a given threshold,
147 use op="le".
148 """
150 op = ChoiceField[str](
151 doc="Operator name string.",
152 allowed={
153 "lt": "less than threshold",
154 "le": "less than or equal to threshold",
155 "ge": "greater than or equal to threshold",
156 "gt": "greater than threshold",
157 },
158 )
159 threshold = Field[float](doc="Threshold to apply.")
160 vectorKey = Field[str](doc="Name of column")
161 percent = Field[bool](doc="Express result as percentage", default=False)
162 relative_to_median = Field[bool](doc="Calculate threshold relative to " "the median?", default=False)
164 def getInputSchema(self) -> KeyedDataSchema:
165 return ((self.vectorKey, Vector),)
167 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
168 mask = self.getMask(**kwargs)
169 values = data[self.vectorKey.format(**kwargs)]
170 values = values[mask] # type: ignore
171 values = values[np.logical_not(np.isnan(values))]
172 # If relative_to_median is set, shift the threshold to be median+thresh
173 if self.relative_to_median:
174 threshold = self.threshold + np.median(values)
175 else:
176 threshold = self.threshold
177 result = cast(
178 Scalar,
179 float(np.sum(getattr(operator, self.op)(values, threshold)) / len(values)), # type: ignore
180 )
181 if self.percent:
182 return 100.0 * result
183 else:
184 return result
187class MaxAction(ScalarAction):
188 """Returns the maximum of the given data."""
190 vectorKey = Field[str]("Key of Vector to find maximum")
192 def getInputSchema(self) -> KeyedDataSchema:
193 return ((self.vectorKey, Vector),)
195 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
196 mask = self.getMask(**kwargs)
197 return cast(Scalar, float(np.max(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
200class MinAction(ScalarAction):
201 """Returns the minimum of the given data."""
203 vectorKey = Field[str]("Key for the vector to perform action on")
205 def getInputSchema(self) -> KeyedDataSchema:
206 return ((self.vectorKey, Vector),)
208 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
209 mask = self.getMask(**kwargs)
210 return cast(Scalar, float(np.min(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
213class FracInRange(ScalarAction):
214 """Compute the fraction of a distribution that is between specified
215 minimum and maximum values, and is not NaN.
216 """
218 vectorKey = Field[str](doc="Name of column")
219 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0))
220 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0))
221 percent = Field[bool](doc="Express result as percentage", default=False)
223 def getInputSchema(self) -> KeyedDataSchema:
224 return ((self.vectorKey, Vector),)
226 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
227 """Return the fraction of rows with values within the specified range.
229 Parameters
230 ----------
231 data : `KeyedData`
233 Returns
234 -------
235 result : `Scalar`
236 The fraction (or percentage) of rows with values within the
237 specified range.
238 """
239 mask = self.getMask(**kwargs)
240 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
241 nvalues = len(values)
242 values = values[np.logical_not(np.isnan(values))]
243 sel_range = (values >= self.minimum) & (values < self.maximum)
244 result = cast(
245 Scalar,
246 float(len(values[sel_range]) / nvalues), # type: ignore
247 )
248 if self.percent:
249 return 100.0 * result
250 else:
251 return result
254class FracNan(ScalarAction):
255 """Compute the fraction of vector entries that are NaN."""
257 vectorKey = Field[str](doc="Name of column")
258 percent = Field[bool](doc="Express result as percentage", default=False)
260 def getInputSchema(self) -> KeyedDataSchema:
261 return ((self.vectorKey, Vector),)
263 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
264 """Return the fraction of rows with NaN values.
266 Parameters
267 ----------
268 data : `KeyedData`
270 Returns
271 -------
272 result : `Scalar`
273 The fraction (or percentage) of rows with NaN values.
274 """
275 mask = self.getMask(**kwargs)
276 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
277 nvalues = len(values)
278 values = values[np.isnan(values)]
279 result = cast(
280 Scalar,
281 float(len(values) / nvalues), # type: ignore
282 )
283 if self.percent:
284 return 100.0 * result
285 else:
286 return result