Coverage for python/lsst/analysis/tools/actions/scalar/scalarActions.py: 48%
124 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 04:09 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 04:09 -0700
1from __future__ import annotations
3__all__ = (
4 "MedianAction",
5 "MeanAction",
6 "StdevAction",
7 "SigmaMadAction",
8 "CountAction",
9 "CountUniqueAction",
10 "ApproxFloor",
11 "FracThreshold",
12 "MaxAction",
13 "MinAction",
14 "FracInRange",
15 "FracNan",
16)
18import operator
19from typing import cast
21import numpy as np
22from lsst.pex.config import ChoiceField, Field
24from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector
25from ...statistics import nansigmaMad
28class MedianAction(ScalarAction):
29 vectorKey = Field[str]("Key of Vector to median")
31 def getInputSchema(self) -> KeyedDataSchema:
32 return ((self.vectorKey, Vector),)
34 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
35 mask = self.getMask(**kwargs)
36 return cast(Scalar, float(np.nanmedian(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
39class MeanAction(ScalarAction):
40 vectorKey = Field[str]("Key of Vector from which to calculate mean")
42 def getInputSchema(self) -> KeyedDataSchema:
43 return ((self.vectorKey, Vector),)
45 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
46 mask = self.getMask(**kwargs)
47 return cast(Scalar, float(np.nanmean(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
50class StdevAction(ScalarAction):
51 vectorKey = Field[str]("Key of Vector from which to calculate std deviation")
53 def getInputSchema(self) -> KeyedDataSchema:
54 return ((self.vectorKey, Vector),)
56 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
57 mask = self.getMask(**kwargs)
58 return cast(Scalar, float(np.nanstd(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
61class SigmaMadAction(ScalarAction):
62 vectorKey = Field[str]("Key of Vector to median")
64 def getInputSchema(self) -> KeyedDataSchema:
65 return ((self.vectorKey, Vector),)
67 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
68 mask = self.getMask(**kwargs)
69 return cast(
70 Scalar,
71 float(
72 nansigmaMad(
73 data[self.vectorKey.format(**kwargs)][mask], # type: ignore
74 )
75 ),
76 )
79class CountAction(ScalarAction):
80 vectorKey = Field[str]("Key of Vector to count")
82 def getInputSchema(self) -> KeyedDataSchema:
83 return ((self.vectorKey, Vector),)
85 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
86 mask = self.getMask(**kwargs)
87 arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
88 arr = arr[~np.isnan(arr)]
89 return cast(Scalar, len(arr))
92class CountUniqueAction(ScalarAction):
93 """Counts the number of unique rows in a given column.
95 Parameters
96 ----------
97 data : `KeyedData`
99 Returns
100 -------
101 count : `Scalar`
102 The number of unique rows in a given column.
103 """
105 vectorKey = Field[str](doc="Name of column.")
107 def getInputSchema(self) -> KeyedDataSchema:
108 return ((self.vectorKey, Vector),)
110 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
111 mask = self.getMask(**kwargs)
112 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
113 count = len(np.unique(values))
114 return cast(Scalar, count)
117class ApproxFloor(ScalarAction):
118 vectorKey = Field[str](doc="Key for the vector to perform action on", optional=False)
120 def getInputSchema(self) -> KeyedDataSchema:
121 return ((self.vectorKey, Vector),)
123 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
124 mask = self.getMask(**kwargs)
125 value = np.sort(data[self.vectorKey.format(**kwargs)][mask]) # type: ignore
126 x = len(value) // 10
127 return cast(Scalar, float(np.nanmedian(value[-x:])))
130class FracThreshold(ScalarAction):
131 """Compute the fraction of a distribution that is above or below a
132 specified threshold. The operator is specified as a string, for example,
133 "lt", "le", "ge", "gt" for the mathematical operations <, <=, >=, >. To
134 compute the fraction of elements with values less than a given threshold,
135 use op="le".
136 """
138 op = ChoiceField[str](
139 doc="Operator name string.",
140 allowed={
141 "lt": "less than threshold",
142 "le": "less than or equal to threshold",
143 "ge": "greater than or equal to threshold",
144 "gt": "greater than threshold",
145 },
146 )
147 threshold = Field[float](doc="Threshold to apply.")
148 vectorKey = Field[str](doc="Name of column")
149 percent = Field[bool](doc="Express result as percentage", default=False)
151 def getInputSchema(self) -> KeyedDataSchema:
152 return ((self.vectorKey, Vector),)
154 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
155 mask = self.getMask(**kwargs)
156 values = data[self.vectorKey.format(**kwargs)]
157 values = values[mask] # type: ignore
158 values = values[np.logical_not(np.isnan(values))]
159 result = cast(
160 Scalar,
161 float(np.sum(getattr(operator, self.op)(values, self.threshold)) / len(values)), # type: ignore
162 )
163 if self.percent:
164 return 100.0 * result
165 else:
166 return result
169class MaxAction(ScalarAction):
170 vectorKey = Field[str]("Key of Vector to find maximum")
172 def getInputSchema(self) -> KeyedDataSchema:
173 return ((self.vectorKey, Vector),)
175 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
176 mask = self.getMask(**kwargs)
177 return cast(Scalar, float(np.max(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
180class MinAction(ScalarAction):
181 vectorKey = Field[str]("Key for the vector to perform action on")
183 def getInputSchema(self) -> KeyedDataSchema:
184 return ((self.vectorKey, Vector),)
186 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
187 mask = self.getMask(**kwargs)
188 return cast(Scalar, float(np.min(cast(Vector, data[self.vectorKey.format(**kwargs)])[mask])))
191class FracInRange(ScalarAction):
192 """Compute the fraction of a distribution that is between specified
193 minimum and maximum values, and is not NaN.
194 """
196 vectorKey = Field[str](doc="Name of column")
197 maximum = Field[float](doc="The maximum value", default=np.nextafter(np.Inf, 0.0))
198 minimum = Field[float](doc="The minimum value", default=np.nextafter(-np.Inf, 0.0))
199 percent = Field[bool](doc="Express result as percentage", default=False)
201 def getInputSchema(self) -> KeyedDataSchema:
202 return ((self.vectorKey, Vector),)
204 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
205 """Return the fraction of rows with values within the specified range.
207 Parameters
208 ----------
209 data : `KeyedData`
211 Returns
212 -------
213 result : `Scalar`
214 The fraction (or percentage) of rows with values within the
215 specified range.
216 """
217 mask = self.getMask(**kwargs)
218 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
219 nvalues = len(values)
220 values = values[np.logical_not(np.isnan(values))]
221 sel_range = (values >= self.minimum) & (values < self.maximum)
222 result = cast(
223 Scalar,
224 float(len(values[sel_range]) / nvalues), # type: ignore
225 )
226 if self.percent:
227 return 100.0 * result
228 else:
229 return result
232class FracNan(ScalarAction):
233 """Compute the fraction of vector entries that are NaN."""
235 vectorKey = Field[str](doc="Name of column")
236 percent = Field[bool](doc="Express result as percentage", default=False)
238 def getInputSchema(self) -> KeyedDataSchema:
239 return ((self.vectorKey, Vector),)
241 def __call__(self, data: KeyedData, **kwargs) -> Scalar:
242 """Return the fraction of rows with NaN values.
244 Parameters
245 ----------
246 data : `KeyedData`
248 Returns
249 -------
250 result : `Scalar`
251 The fraction (or percentage) of rows with NaN values.
252 """
253 mask = self.getMask(**kwargs)
254 values = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
255 nvalues = len(values)
256 values = values[np.isnan(values)]
257 result = cast(
258 Scalar,
259 float(len(values) / nvalues), # type: ignore
260 )
261 if self.percent:
262 return 100.0 * result
263 else:
264 return result