Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 35%
178 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-26 04:09 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-26 04:09 -0700
1# This file is part of analysis_tools.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "MedianAction",
26 "MeanAction",
27 "StdevAction",
28 "ValueAction",
29 "SigmaMadAction",
30 "CountAction",
31 "CountUniqueAction",
32 "ApproxFloor",
33 "FracThreshold",
34 "MaxAction",
35 "MinAction",
36 "FracInRange",
37 "FracNan",
38 "SumAction",
39 "MedianHistAction",
40 "IqrHistAction",
41 "DivideScalar",
42)
44import operator
45from math import nan
46from typing import cast
48import numpy as np
49from lsst.pex.config import ChoiceField, Field
50from lsst.pex.config.configurableActions import ConfigurableActionField
52from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector
53from ...math import nanMax, nanMean, nanMedian, nanMin, nanSigmaMad, nanStd
56class ScalarFromVectorAction(ScalarAction):
57 """Calculates a statistic from a single vector."""
59 vectorKey = Field[str]("Key of Vector to compute statistic from.")
61 def getInputSchema(self) -> KeyedDataSchema:
62 return ((self.vectorKey, Vector),)
65class MedianAction(ScalarFromVectorAction):
66 """Calculates the median of the given data."""
68 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
69 mask = self.getMask(**kwargs)
70 values = data[self.vectorKey.format(**kwargs)][mask]
71 med = nanMedian(values) if len(values) else np.NaN
73 return med
76class MeanAction(ScalarFromVectorAction):
77 """Calculates the mean of the given data."""
79 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
80 mask = self.getMask(**kwargs)
81 values = data[self.vectorKey.format(**kwargs)][mask]
82 mean = nanMean(values) if len(values) else np.NaN
84 return mean
87class StdevAction(ScalarFromVectorAction):
88 """Calculates the standard deviation of the given data."""
90 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
91 mask = self.getMask(**kwargs)
92 return nanStd(data[self.vectorKey.format(**kwargs)][mask])
95class ValueAction(ScalarFromVectorAction):
96 """Extracts the first value from a vector."""
98 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
99 return cast(Scalar, float(data[self.vectorKey.format(**kwargs)][0]))
102class SigmaMadAction(ScalarFromVectorAction):
103 """Calculates the sigma mad of the given data."""
105 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
106 mask = self.getMask(**kwargs)
107 return nanSigmaMad(data[self.vectorKey.format(**kwargs)][mask])
110class CountAction(ScalarAction):
111 """Performs count actions, with threshold-based filtering.
112 The operator is specified as a string, for example, "lt", "le", "ge",
113 "gt", "ne", and "eq" for the mathematical operations <, <=, >=, >, !=,
114 and == respectively. To count non-NaN values, only pass the column name
115 as vector key. To count NaN values, pass threshold = nan (from math.nan).
116 Optionally to configure from a YAML file, pass "threshold: !!float nan".
117 To compute the number of elements with values less than a given threshold,
118 use op="le".
119 """
121 vectorKey = Field[str]("Key of Vector to count")
122 op = ChoiceField[str](
123 doc="Operator name string.",
124 allowed={
125 "lt": "less than threshold",
126 "le": "less than or equal to threshold",
127 "ge": "greater than or equal to threshold",
128 "ne": "not equal to a given value",
129 "eq": "equal to a given value",
130 "gt": "greater than threshold",
131 },
132 default="ne",
133 )
134 threshold = Field[float](doc="Threshold to apply.", default=nan)
136 def getInputSchema(self) -> KeyedDataSchema:
137 return ((self.vectorKey, Vector),)
139 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
140 mask = self.getMask(**kwargs)
141 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
143 # Count NaNs and non-NaNs
144 if self.threshold == nan:
145 if self.op == "eq":
146 # Count number of NaNs
147 result = np.isnan(arr).sum()
148 return cast(Scalar, int(result))
149 elif self.op == "ne":
150 # Count number of non-NaNs
151 result = len(arr) - np.isnan(arr).sum()
152 return cast(Scalar, int(result))
153 else:
154 raise ValueError("Invalid operator for counting NaNs.")
155 # Count for given threshold ignoring all NaNs
156 else:
157 result = arr[~np.isnan(arr)]
158 result = cast(
159 Scalar,
160 int(np.sum(getattr(operator, self.op)(result, self.threshold))),
161 )
162 return result
165class CountUniqueAction(ScalarFromVectorAction):
166 """Counts the number of unique rows in a given column."""
168 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
169 mask = self.getMask(**kwargs)
170 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
171 count = len(np.unique(values))
172 return cast(Scalar, count)
175class ApproxFloor(ScalarFromVectorAction):
176 """Returns the median of the lowest ten values of the sorted input."""
178 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
179 mask = self.getMask(**kwargs)
180 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore
181 x = len(value) // 10
182 return nanMedian(value[-x:])
185class FracThreshold(ScalarFromVectorAction):
186 """Compute the fraction of a distribution above or below a threshold.
188 The operator is specified as a string, for example,
189 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To
190 compute the fraction of elements with values less than a given threshold,
191 use op="le".
192 """
194 op = ChoiceField[str](
195 doc="Operator name string.",
196 allowed={
197 "lt": "less than threshold",
198 "le": "less than or equal to threshold",
199 "ge": "greater than or equal to threshold",
200 "gt": "greater than threshold",
201 },
202 )
203 threshold = Field[float](doc="Threshold to apply.")
204 percent = Field[bool](doc="Express result as percentage", default=False)
205 relative_to_median = Field[bool](doc="Calculate threshold relative to " "the median?", default=False)
207 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
208 mask = self.getMask(**kwargs)
209 values = data[self.vectorKey.format(**kwargs)]
210 values = values[mask] # type: ignore
211 values = values[np.logical_not(np.isnan(values))]
212 n_values = len(values)
213 if n_values == 0:
214 return np.nan
215 threshold = self.threshold
216 # If relative_to_median is set, shift the threshold to be median+thresh
217 if self.relative_to_median and len(values) > 0:
218 offset = nanMedian(values)
219 if np.isfinite(offset):
220 threshold += offset
221 result = cast(
222 Scalar,
223 float(np.sum(getattr(operator, self.op)(values, threshold)) / n_values), # type: ignore
224 )
225 if self.percent:
226 return 100.0 * result
227 else:
228 return result
231class MaxAction(ScalarFromVectorAction):
232 """Returns the maximum of the given data."""
234 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
235 mask = self.getMask(**kwargs)
236 return nanMax(data[self.vectorKey.format(**kwargs)][mask])
239class MinAction(ScalarFromVectorAction):
240 """Returns the minimum of the given data."""
242 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
243 mask = self.getMask(**kwargs)
244 return nanMin(data[self.vectorKey.format(**kwargs)][mask])
247class FracInRange(ScalarFromVectorAction):
248 """Compute the fraction of a distribution that is between specified
249 minimum and maximum values, and is not NaN.
250 """
252 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0))
253 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0))
254 percent = Field[bool](doc="Express result as percentage", default=False)
256 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
257 mask = self.getMask(**kwargs)
258 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
259 nvalues = len(values)
260 values = values[np.logical_not(np.isnan(values))]
261 sel_range = (values >= self.minimum) & (values < self.maximum)
262 result = cast(
263 Scalar,
264 float(len(values[sel_range]) / nvalues), # type: ignore
265 )
266 if self.percent:
267 return 100.0 * result
268 else:
269 return result
272class FracNan(ScalarFromVectorAction):
273 """Compute the fraction of vector entries that are NaN."""
275 percent = Field[bool](doc="Express result as percentage", default=False)
277 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
278 mask = self.getMask(**kwargs)
279 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
280 nvalues = len(values)
281 values = values[np.isnan(values)]
282 result = cast(
283 Scalar,
284 float(len(values) / nvalues), # type: ignore
285 )
286 if self.percent:
287 return 100.0 * result
288 else:
289 return result
292class SumAction(ScalarFromVectorAction):
293 """Returns the sum of all values in the column."""
295 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
296 mask = self.getMask(**kwargs)
297 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
298 return cast(Scalar, np.nansum(arr))
301class MedianHistAction(ScalarAction):
302 """Calculates the median of the given histogram data."""
304 histKey = Field[str]("Key of frequency Vector")
305 midKey = Field[str]("Key of bin midpoints Vector")
307 def getInputSchema(self) -> KeyedDataSchema:
308 return (
309 (self.histKey, Vector),
310 (self.midKey, Vector),
311 )
313 def histMedian(self, hist, bin_mid):
314 """Calculates the median of a histogram with binned values
316 Parameters
317 ----------
318 hist : `numpy.ndarray`
319 Frequency array
320 bin_mid : `numpy.ndarray`
321 Bin midpoints array
323 Returns
324 -------
325 median : `float`
326 Median of histogram with binned values
327 """
328 cumulative_sum = np.cumsum(hist)
329 median_index = np.searchsorted(cumulative_sum, cumulative_sum[-1] / 2)
330 median = bin_mid[median_index]
331 return median
333 def __call__(self, data: KeyedData, **kwargs):
334 if len(data[self.histKey.format(**kwargs)]) != 0:
335 hist = cast(Vector, data[self.histKey.format(**kwargs)])
336 bin_mid = cast(Vector, data[self.midKey.format(**kwargs)])
337 med = cast(Scalar, float(self.histMedian(hist, bin_mid)))
338 else:
339 med = np.NaN
340 return med
343class IqrHistAction(ScalarAction):
344 """Calculates the interquartile range of the given histogram data."""
346 histKey = Field[str]("Key of frequency Vector")
347 midKey = Field[str]("Key of bin midpoints Vector")
349 def getInputSchema(self) -> KeyedDataSchema:
350 return (
351 (self.histKey, Vector),
352 (self.midKey, Vector),
353 )
355 def histIqr(self, hist, bin_mid):
356 """Calculates the interquartile range of a histogram with binned values
358 Parameters
359 ----------
360 hist : `numpy.ndarray`
361 Frequency array
362 bin_mid : `numpy.ndarray`
363 Bin midpoints array
365 Returns
366 -------
367 iqr : `float`
368 Inter-quartile range of histogram with binned values
369 """
370 cumulative_sum = np.cumsum(hist)
371 liqr_index = np.searchsorted(cumulative_sum, cumulative_sum[-1] / 4)
372 uiqr_index = np.searchsorted(cumulative_sum, (3 / 4) * cumulative_sum[-1])
373 liqr = bin_mid[liqr_index]
374 uiqr = bin_mid[uiqr_index]
375 iqr = uiqr - liqr
376 return iqr
378 def __call__(self, data: KeyedData, **kwargs):
379 if len(data[self.histKey.format(**kwargs)]) != 0:
380 hist = cast(Vector, data[self.histKey.format(**kwargs)])
381 bin_mid = cast(Vector, data[self.midKey.format(**kwargs)])
382 iqr = cast(Scalar, float(self.histIqr(hist, bin_mid)))
383 else:
384 iqr = np.NaN
385 return iqr
388class DivideScalar(ScalarAction):
389 """Calculate (A/B) for scalars."""
391 actionA = ConfigurableActionField[ScalarAction](doc="Action which supplies scalar A")
392 actionB = ConfigurableActionField[ScalarAction](doc="Action which supplies scalar B")
394 def getInputSchema(self) -> KeyedDataSchema:
395 yield from self.actionA.getInputSchema()
396 yield from self.actionB.getInputSchema()
398 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
399 """Return the result of A/B.
401 Parameters
402 ----------
403 data : `KeyedData`
405 Returns
406 -------
407 result : `Scalar`
408 The result of dividing A by B.
409 """
410 scalarA = self.actionA(data, **kwargs)
411 scalarB = self.actionB(data, **kwargs)
412 if scalarB == 0:
413 raise ValueError("Denominator is zero!")
414 return scalarA / scalarB