Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 34%
187 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-04 03:35 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-04 03:35 -0700
1# This file is part of analysis_tools.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "MedianAction",
26 "MeanAction",
27 "StdevAction",
28 "ValueAction",
29 "SigmaMadAction",
30 "CountAction",
31 "CountUniqueAction",
32 "ApproxFloor",
33 "FracThreshold",
34 "MaxAction",
35 "MinAction",
36 "FracInRange",
37 "FracNan",
38 "SumAction",
39 "MedianHistAction",
40 "IqrHistAction",
41 "DivideScalar",
42 "RmsAction",
43)
45import operator
46from math import nan
47from typing import cast
49import numpy as np
50from lsst.pex.config import ChoiceField, Field
51from lsst.pex.config.configurableActions import ConfigurableActionField
53from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector
54from ...math import nanMax, nanMean, nanMedian, nanMin, nanSigmaMad, nanStd
57class ScalarFromVectorAction(ScalarAction):
58 """Calculates a statistic from a single vector."""
60 vectorKey = Field[str]("Key of Vector to compute statistic from.")
62 def getInputSchema(self) -> KeyedDataSchema:
63 return ((self.vectorKey, Vector),)
66class MedianAction(ScalarFromVectorAction):
67 """Calculates the median of the given data."""
69 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
70 mask = self.getMask(**kwargs)
71 values = data[self.vectorKey.format(**kwargs)][mask]
72 med = nanMedian(values) if len(values) else np.NaN
74 return med
77class MeanAction(ScalarFromVectorAction):
78 """Calculates the mean of the given data."""
80 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
81 mask = self.getMask(**kwargs)
82 values = data[self.vectorKey.format(**kwargs)][mask]
83 mean = nanMean(values) if len(values) else np.NaN
85 return mean
88class StdevAction(ScalarFromVectorAction):
89 """Calculates the standard deviation of the given data."""
91 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
92 mask = self.getMask(**kwargs)
93 return nanStd(data[self.vectorKey.format(**kwargs)][mask])
96class RmsAction(ScalarFromVectorAction):
97 """Calculates the root mean square of the given data (without subtracting
98 the mean as in StdevAction)."""
100 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
101 mask = self.getMask(**kwargs)
102 vector = data[self.vectorKey.format(**kwargs)][mask]
103 vector = vector[~np.isnan(vector)]
105 return np.sqrt(np.mean(vector**2))
108class ValueAction(ScalarFromVectorAction):
109 """Extracts the first value from a vector."""
111 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
112 return cast(Scalar, float(data[self.vectorKey.format(**kwargs)][0]))
115class SigmaMadAction(ScalarFromVectorAction):
116 """Calculates the sigma mad of the given data."""
118 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
119 mask = self.getMask(**kwargs)
120 return nanSigmaMad(data[self.vectorKey.format(**kwargs)][mask])
123class CountAction(ScalarAction):
124 """Performs count actions, with threshold-based filtering.
125 The operator is specified as a string, for example, "lt", "le", "ge",
126 "gt", "ne", and "eq" for the mathematical operations <, <=, >=, >, !=,
127 and == respectively. To count non-NaN values, only pass the column name
128 as vector key. To count NaN values, pass threshold = nan (from math.nan).
129 Optionally to configure from a YAML file, pass "threshold: !!float nan".
130 To compute the number of elements with values less than a given threshold,
131 use op="le".
132 """
134 vectorKey = Field[str]("Key of Vector to count")
135 op = ChoiceField[str](
136 doc="Operator name string.",
137 allowed={
138 "lt": "less than threshold",
139 "le": "less than or equal to threshold",
140 "ge": "greater than or equal to threshold",
141 "ne": "not equal to a given value",
142 "eq": "equal to a given value",
143 "gt": "greater than threshold",
144 },
145 default="ne",
146 )
147 threshold = Field[float](doc="Threshold to apply.", default=nan)
149 def getInputSchema(self) -> KeyedDataSchema:
150 return ((self.vectorKey, Vector),)
152 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
153 mask = self.getMask(**kwargs)
154 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
156 # Count NaNs and non-NaNs
157 if self.threshold == nan:
158 if self.op == "eq":
159 # Count number of NaNs
160 result = np.isnan(arr).sum()
161 return cast(Scalar, int(result))
162 elif self.op == "ne":
163 # Count number of non-NaNs
164 result = len(arr) - np.isnan(arr).sum()
165 return cast(Scalar, int(result))
166 else:
167 raise ValueError("Invalid operator for counting NaNs.")
168 # Count for given threshold ignoring all NaNs
169 else:
170 result = arr[~np.isnan(arr)]
171 result = cast(
172 Scalar,
173 int(np.sum(getattr(operator, self.op)(result, self.threshold))),
174 )
175 return result
178class CountUniqueAction(ScalarFromVectorAction):
179 """Counts the number of unique rows in a given column."""
181 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
182 mask = self.getMask(**kwargs)
183 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
184 count = len(np.unique(values))
185 return cast(Scalar, count)
188class ApproxFloor(ScalarFromVectorAction):
189 """Returns the median of the lowest ten values of the sorted input."""
191 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
192 mask = self.getMask(**kwargs)
193 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore
194 x = len(value) // 10
195 return nanMedian(value[-x:])
198class FracThreshold(ScalarFromVectorAction):
199 """Compute the fraction of a distribution above or below a threshold.
201 The operator is specified as a string, for example,
202 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To
203 compute the fraction of elements with values less than a given threshold,
204 use op="le".
205 """
207 op = ChoiceField[str](
208 doc="Operator name string.",
209 allowed={
210 "lt": "less than threshold",
211 "le": "less than or equal to threshold",
212 "ge": "greater than or equal to threshold",
213 "gt": "greater than threshold",
214 },
215 )
216 threshold = Field[float](doc="Threshold to apply.")
217 percent = Field[bool](doc="Express result as percentage", default=False)
218 relative_to_median = Field[bool](doc="Calculate threshold relative to the median?", default=False)
219 use_absolute_value = Field[bool](
220 doc=(
221 "Calculate threshold after taking absolute value. If relative_to_median"
222 " is true the absolute value will be applied after the median is subtracted"
223 ),
224 default=False,
225 )
227 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
228 mask = self.getMask(**kwargs)
229 values = data[self.vectorKey.format(**kwargs)]
230 values = values[mask] # type: ignore
231 values = values[np.logical_not(np.isnan(values))]
232 n_values = len(values)
233 if n_values == 0:
234 return np.nan
235 threshold = self.threshold
236 # If relative_to_median is set, shift the threshold to be median+thresh
237 if self.relative_to_median and len(values) > 0:
238 offset = nanMedian(values)
239 if np.isfinite(offset):
240 values -= offset
241 if self.use_absolute_value:
242 values = np.abs(values)
243 result = cast(
244 Scalar,
245 float(np.sum(getattr(operator, self.op)(values, threshold)) / n_values), # type: ignore
246 )
247 if self.percent:
248 return 100.0 * result
249 else:
250 return result
253class MaxAction(ScalarFromVectorAction):
254 """Returns the maximum of the given data."""
256 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
257 mask = self.getMask(**kwargs)
258 return nanMax(data[self.vectorKey.format(**kwargs)][mask])
261class MinAction(ScalarFromVectorAction):
262 """Returns the minimum of the given data."""
264 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
265 mask = self.getMask(**kwargs)
266 return nanMin(data[self.vectorKey.format(**kwargs)][mask])
269class FracInRange(ScalarFromVectorAction):
270 """Compute the fraction of a distribution that is between specified
271 minimum and maximum values, and is not NaN.
272 """
274 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0))
275 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0))
276 percent = Field[bool](doc="Express result as percentage", default=False)
278 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
279 mask = self.getMask(**kwargs)
280 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
281 nvalues = len(values)
282 values = values[np.logical_not(np.isnan(values))]
283 sel_range = (values >= self.minimum) & (values < self.maximum)
284 result = cast(
285 Scalar,
286 float(len(values[sel_range]) / nvalues), # type: ignore
287 )
288 if self.percent:
289 return 100.0 * result
290 else:
291 return result
294class FracNan(ScalarFromVectorAction):
295 """Compute the fraction of vector entries that are NaN."""
297 percent = Field[bool](doc="Express result as percentage", default=False)
299 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
300 mask = self.getMask(**kwargs)
301 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
302 nvalues = len(values)
303 values = values[np.isnan(values)]
304 result = cast(
305 Scalar,
306 float(len(values) / nvalues), # type: ignore
307 )
308 if self.percent:
309 return 100.0 * result
310 else:
311 return result
314class SumAction(ScalarFromVectorAction):
315 """Returns the sum of all values in the column."""
317 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
318 mask = self.getMask(**kwargs)
319 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
320 return cast(Scalar, np.nansum(arr))
323class MedianHistAction(ScalarAction):
324 """Calculates the median of the given histogram data."""
326 histKey = Field[str]("Key of frequency Vector")
327 midKey = Field[str]("Key of bin midpoints Vector")
329 def getInputSchema(self) -> KeyedDataSchema:
330 return (
331 (self.histKey, Vector),
332 (self.midKey, Vector),
333 )
335 def histMedian(self, hist, bin_mid):
336 """Calculates the median of a histogram with binned values
338 Parameters
339 ----------
340 hist : `numpy.ndarray`
341 Frequency array
342 bin_mid : `numpy.ndarray`
343 Bin midpoints array
345 Returns
346 -------
347 median : `float`
348 Median of histogram with binned values
349 """
350 cumulative_sum = np.cumsum(hist)
351 median_index = np.searchsorted(cumulative_sum, cumulative_sum[-1] / 2)
352 median = bin_mid[median_index]
353 return median
355 def __call__(self, data: KeyedData, **kwargs):
356 if len(data[self.histKey.format(**kwargs)]) != 0:
357 hist = cast(Vector, data[self.histKey.format(**kwargs)])
358 bin_mid = cast(Vector, data[self.midKey.format(**kwargs)])
359 med = cast(Scalar, float(self.histMedian(hist, bin_mid)))
360 else:
361 med = np.NaN
362 return med
365class IqrHistAction(ScalarAction):
366 """Calculates the interquartile range of the given histogram data."""
368 histKey = Field[str]("Key of frequency Vector")
369 midKey = Field[str]("Key of bin midpoints Vector")
371 def getInputSchema(self) -> KeyedDataSchema:
372 return (
373 (self.histKey, Vector),
374 (self.midKey, Vector),
375 )
377 def histIqr(self, hist, bin_mid):
378 """Calculates the interquartile range of a histogram with binned values
380 Parameters
381 ----------
382 hist : `numpy.ndarray`
383 Frequency array
384 bin_mid : `numpy.ndarray`
385 Bin midpoints array
387 Returns
388 -------
389 iqr : `float`
390 Inter-quartile range of histogram with binned values
391 """
392 cumulative_sum = np.cumsum(hist)
393 liqr_index = np.searchsorted(cumulative_sum, cumulative_sum[-1] / 4)
394 uiqr_index = np.searchsorted(cumulative_sum, (3 / 4) * cumulative_sum[-1])
395 liqr = bin_mid[liqr_index]
396 uiqr = bin_mid[uiqr_index]
397 iqr = uiqr - liqr
398 return iqr
400 def __call__(self, data: KeyedData, **kwargs):
401 if len(data[self.histKey.format(**kwargs)]) != 0:
402 hist = cast(Vector, data[self.histKey.format(**kwargs)])
403 bin_mid = cast(Vector, data[self.midKey.format(**kwargs)])
404 iqr = cast(Scalar, float(self.histIqr(hist, bin_mid)))
405 else:
406 iqr = np.NaN
407 return iqr
410class DivideScalar(ScalarAction):
411 """Calculate (A/B) for scalars."""
413 actionA = ConfigurableActionField[ScalarAction](doc="Action which supplies scalar A")
414 actionB = ConfigurableActionField[ScalarAction](doc="Action which supplies scalar B")
416 def getInputSchema(self) -> KeyedDataSchema:
417 yield from self.actionA.getInputSchema()
418 yield from self.actionB.getInputSchema()
420 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
421 """Return the result of A/B.
423 Parameters
424 ----------
425 data : `KeyedData`
427 Returns
428 -------
429 result : `Scalar`
430 The result of dividing A by B.
431 """
432 scalarA = self.actionA(data, **kwargs)
433 scalarB = self.actionB(data, **kwargs)
434 if scalarB == 0:
435 raise ValueError("Denominator is zero!")
436 return scalarA / scalarB