Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 36%
150 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-05 14:05 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-05 14:05 +0000
1# This file is part of analysis_tools.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "MedianAction",
26 "MeanAction",
27 "StdevAction",
28 "ValueAction",
29 "SigmaMadAction",
30 "CountAction",
31 "CountUniqueAction",
32 "ApproxFloor",
33 "FracThreshold",
34 "MaxAction",
35 "MinAction",
36 "FracInRange",
37 "FracNan",
38 "SumAction",
39 "MedianHistAction",
40 "IqrHistAction",
41)
43import operator
44from typing import cast
46import numpy as np
47from lsst.pex.config import ChoiceField, Field
49from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector
50from ...math import nanMax, nanMean, nanMedian, nanMin, nanSigmaMad, nanStd
53class ScalarFromVectorAction(ScalarAction):
54 """Calculates a statistic from a single vector."""
56 vectorKey = Field[str]("Key of Vector to compute statistic from.")
58 def getInputSchema(self) -> KeyedDataSchema:
59 return ((self.vectorKey, Vector),)
62class MedianAction(ScalarFromVectorAction):
63 """Calculates the median of the given data."""
65 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
66 mask = self.getMask(**kwargs)
67 values = data[self.vectorKey.format(**kwargs)][mask]
68 med = nanMedian(values) if len(values) else np.NaN
70 return med
73class MeanAction(ScalarFromVectorAction):
74 """Calculates the mean of the given data."""
76 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
77 mask = self.getMask(**kwargs)
78 values = data[self.vectorKey.format(**kwargs)][mask]
79 mean = nanMean(values) if len(values) else np.NaN
81 return mean
84class StdevAction(ScalarFromVectorAction):
85 """Calculates the standard deviation of the given data."""
87 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
88 mask = self.getMask(**kwargs)
89 return nanStd(data[self.vectorKey.format(**kwargs)][mask])
92class ValueAction(ScalarFromVectorAction):
93 """Extracts the first value from a vector."""
95 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
96 return cast(Scalar, float(data[self.vectorKey.format(**kwargs)][0]))
99class SigmaMadAction(ScalarFromVectorAction):
100 """Calculates the sigma mad of the given data."""
102 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
103 mask = self.getMask(**kwargs)
104 return nanSigmaMad(data[self.vectorKey.format(**kwargs)][mask])
107class CountAction(ScalarFromVectorAction):
108 """Returns the number of non-NaN entries in the given column."""
110 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
111 mask = self.getMask(**kwargs)
112 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
113 arr = arr[~np.isnan(arr)]
114 return cast(Scalar, len(arr))
117class CountUniqueAction(ScalarFromVectorAction):
118 """Counts the number of unique rows in a given column."""
120 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
121 mask = self.getMask(**kwargs)
122 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
123 count = len(np.unique(values))
124 return cast(Scalar, count)
127class ApproxFloor(ScalarFromVectorAction):
128 """Returns the median of the lowest ten values of the sorted input."""
130 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
131 mask = self.getMask(**kwargs)
132 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore
133 x = len(value) // 10
134 return nanMedian(value[-x:])
137class FracThreshold(ScalarFromVectorAction):
138 """Compute the fraction of a distribution above or below a threshold.
140 The operator is specified as a string, for example,
141 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To
142 compute the fraction of elements with values less than a given threshold,
143 use op="le".
144 """
146 op = ChoiceField[str](
147 doc="Operator name string.",
148 allowed={
149 "lt": "less than threshold",
150 "le": "less than or equal to threshold",
151 "ge": "greater than or equal to threshold",
152 "gt": "greater than threshold",
153 },
154 )
155 threshold = Field[float](doc="Threshold to apply.")
156 percent = Field[bool](doc="Express result as percentage", default=False)
157 relative_to_median = Field[bool](doc="Calculate threshold relative to " "the median?", default=False)
159 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
160 mask = self.getMask(**kwargs)
161 values = data[self.vectorKey.format(**kwargs)]
162 values = values[mask] # type: ignore
163 values = values[np.logical_not(np.isnan(values))]
164 n_values = len(values)
165 if n_values == 0:
166 return np.nan
167 threshold = self.threshold
168 # If relative_to_median is set, shift the threshold to be median+thresh
169 if self.relative_to_median and len(values) > 0:
170 offset = nanMedian(values)
171 if np.isfinite(offset):
172 threshold += offset
173 result = cast(
174 Scalar,
175 float(np.sum(getattr(operator, self.op)(values, threshold)) / n_values), # type: ignore
176 )
177 if self.percent:
178 return 100.0 * result
179 else:
180 return result
183class MaxAction(ScalarFromVectorAction):
184 """Returns the maximum of the given data."""
186 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
187 mask = self.getMask(**kwargs)
188 return nanMax(data[self.vectorKey.format(**kwargs)][mask])
191class MinAction(ScalarFromVectorAction):
192 """Returns the minimum of the given data."""
194 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
195 mask = self.getMask(**kwargs)
196 return nanMin(data[self.vectorKey.format(**kwargs)][mask])
199class FracInRange(ScalarFromVectorAction):
200 """Compute the fraction of a distribution that is between specified
201 minimum and maximum values, and is not NaN.
202 """
204 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0))
205 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0))
206 percent = Field[bool](doc="Express result as percentage", default=False)
208 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
209 mask = self.getMask(**kwargs)
210 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
211 nvalues = len(values)
212 values = values[np.logical_not(np.isnan(values))]
213 sel_range = (values >= self.minimum) & (values < self.maximum)
214 result = cast(
215 Scalar,
216 float(len(values[sel_range]) / nvalues), # type: ignore
217 )
218 if self.percent:
219 return 100.0 * result
220 else:
221 return result
224class FracNan(ScalarFromVectorAction):
225 """Compute the fraction of vector entries that are NaN."""
227 percent = Field[bool](doc="Express result as percentage", default=False)
229 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
230 mask = self.getMask(**kwargs)
231 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
232 nvalues = len(values)
233 values = values[np.isnan(values)]
234 result = cast(
235 Scalar,
236 float(len(values) / nvalues), # type: ignore
237 )
238 if self.percent:
239 return 100.0 * result
240 else:
241 return result
244class SumAction(ScalarFromVectorAction):
245 """Returns the sum of all values in the column."""
247 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
248 mask = self.getMask(**kwargs)
249 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
250 return cast(Scalar, np.nansum(arr))
253class MedianHistAction(ScalarAction):
254 """Calculates the median of the given histogram data."""
256 histKey = Field[str]("Key of frequency Vector")
257 midKey = Field[str]("Key of bin midpoints Vector")
259 def getInputSchema(self) -> KeyedDataSchema:
260 return (
261 (self.histKey, Vector),
262 (self.midKey, Vector),
263 )
265 def histMedian(self, hist, bin_mid):
266 """Calculates the median of a histogram with binned values
268 Parameters
269 ----------
270 hist : `numpy.ndarray`
271 Frequency array
272 bin_mid : `numpy.ndarray`
273 Bin midpoints array
275 Returns
276 -------
277 median : `float`
278 Median of histogram with binned values
279 """
280 cumulative_sum = np.cumsum(hist)
281 median_index = np.searchsorted(cumulative_sum, cumulative_sum[-1] / 2)
282 median = bin_mid[median_index]
283 return median
285 def __call__(self, data: KeyedData, **kwargs):
286 if len(data[self.histKey.format(**kwargs)]) != 0:
287 hist = cast(Vector, data[self.histKey.format(**kwargs)])
288 bin_mid = cast(Vector, data[self.midKey.format(**kwargs)])
289 med = cast(Scalar, float(self.histMedian(hist, bin_mid)))
290 else:
291 med = np.NaN
292 return med
295class IqrHistAction(ScalarAction):
296 """Calculates the interquartile range of the given histogram data."""
298 histKey = Field[str]("Key of frequency Vector")
299 midKey = Field[str]("Key of bin midpoints Vector")
301 def getInputSchema(self) -> KeyedDataSchema:
302 return (
303 (self.histKey, Vector),
304 (self.midKey, Vector),
305 )
307 def histIqr(self, hist, bin_mid):
308 """Calculates the interquartile range of a histogram with binned values
310 Parameters
311 ----------
312 hist : `numpy.ndarray`
313 Frequency array
314 bin_mid : `numpy.ndarray`
315 Bin midpoints array
317 Returns
318 -------
319 iqr : `float`
320 Inter-quartile range of histogram with binned values
321 """
322 cumulative_sum = np.cumsum(hist)
323 liqr_index = np.searchsorted(cumulative_sum, cumulative_sum[-1] / 4)
324 uiqr_index = np.searchsorted(cumulative_sum, (3 / 4) * cumulative_sum[-1])
325 liqr = bin_mid[liqr_index]
326 uiqr = bin_mid[uiqr_index]
327 iqr = uiqr - liqr
328 return iqr
330 def __call__(self, data: KeyedData, **kwargs):
331 if len(data[self.histKey.format(**kwargs)]) != 0:
332 hist = cast(Vector, data[self.histKey.format(**kwargs)])
333 bin_mid = cast(Vector, data[self.midKey.format(**kwargs)])
334 iqr = cast(Scalar, float(self.histIqr(hist, bin_mid)))
335 else:
336 iqr = np.NaN
337 return iqr