Coverage for python/lsst/analysis/tools/actions/vector/vectorActions.py: 45%
153 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-23 04:04 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-23 04:04 -0700
1# This file is part of analysis_tools.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "LoadVector",
25 "DownselectVector",
26 "MultiCriteriaDownselectVector",
27 "ConvertFluxToMag",
28 "ConvertUnits",
29 "CalcSn",
30 "MagDiff",
31 "ExtinctionCorrectedMagDiff",
32 "PerGroupStatistic",
33 "ResidualWithPerGroupStatistic",
34 "RAcosDec",
35)
37import logging
38import warnings
39from typing import Optional, cast
41import numpy as np
42import pandas as pd
43from astropy import units as u
44from lsst.pex.config import DictField, Field
45from lsst.pex.config.configurableActions import ConfigurableActionField, ConfigurableActionStructField
47from ...interfaces import KeyedData, KeyedDataSchema, Vector, VectorAction
48from .selectors import VectorSelector
50_LOG = logging.getLogger(__name__)
52# Basic vectorActions
55class LoadVector(VectorAction):
56 """Load and return a Vector from KeyedData."""
58 vectorKey = Field[str](doc="Key of vector which should be loaded")
60 def getInputSchema(self) -> KeyedDataSchema:
61 return ((self.vectorKey, Vector),)
63 def __call__(self, data: KeyedData, **kwargs) -> Vector:
64 return np.array(cast(Vector, data[self.vectorKey.format(**kwargs)]))
67class DownselectVector(VectorAction):
68 """Get a vector from KeyedData, apply specified selector, return the
69 shorter Vector.
70 """
72 vectorKey = Field[str](doc="column key to load from KeyedData")
74 selector = ConfigurableActionField[VectorAction](
75 doc="Action which returns a selection mask", default=VectorSelector
76 )
78 def getInputSchema(self) -> KeyedDataSchema:
79 yield (self.vectorKey, Vector)
80 yield from cast(VectorAction, self.selector).getInputSchema()
82 def __call__(self, data: KeyedData, **kwargs) -> Vector:
83 mask = cast(VectorAction, self.selector)(data, **kwargs)
84 return cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
87class MultiCriteriaDownselectVector(VectorAction):
88 """Get a vector from KeyedData, apply specified set of selectors with AND
89 logic, and return the shorter Vector.
90 """
92 vectorKey = Field[str](doc="column key to load from KeyedData")
94 selectors = ConfigurableActionStructField[VectorAction](
95 doc="Selectors for selecting rows, will be AND together",
96 )
98 def getInputSchema(self) -> KeyedDataSchema:
99 yield (self.vectorKey, Vector)
100 for action in self.selectors:
101 yield from action.getInputSchema()
103 def __call__(self, data: KeyedData, **kwargs) -> Vector:
104 mask: Optional[Vector] = None
105 for selector in self.selectors:
106 subMask = selector(data, **kwargs)
107 if mask is None:
108 mask = subMask
109 else:
110 mask *= subMask # type: ignore
111 return cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
114# Astronomical vectorActions
117class CalcSn(VectorAction):
118 """Calculate the signal-to-noise ratio from a single flux vector."""
120 fluxType = Field[str](doc="Flux type (vector key) to calculate the S/N.", default="{band}_psfFlux")
121 uncertaintySuffix = Field[str](
122 doc="Suffix to add to fluxType to specify the uncertainty column", default="Err"
123 )
125 def getInputSchema(self) -> KeyedDataSchema:
126 yield self.fluxType, Vector
127 yield f"{self.fluxType}{self.uncertaintySuffix}", Vector
129 def __call__(self, data: KeyedData, **kwargs) -> Vector:
130 signal = np.array(data[self.fluxType.format(**kwargs)])
131 noise = np.array(data[f"{self.fluxType}{self.uncertaintySuffix}".format(**kwargs)])
132 sn = signal / noise
134 return np.array(sn)
137class ConvertFluxToMag(VectorAction):
138 """Turn nano janskies into magnitudes."""
140 vectorKey = Field[str](doc="Key of flux vector to convert to mags")
141 fluxUnit = Field[str](doc="Astropy unit of flux vector", default="nJy")
142 returnMillimags = Field[bool](doc="Use millimags or not?", default=False)
144 def getInputSchema(self) -> KeyedDataSchema:
145 return ((self.vectorKey, Vector),)
147 def __call__(self, data: KeyedData, **kwargs) -> Vector:
148 with warnings.catch_warnings():
149 warnings.filterwarnings("ignore", r"invalid value encountered")
150 warnings.filterwarnings("ignore", r"divide by zero")
151 vec = cast(Vector, data[self.vectorKey.format(**kwargs)])
152 mags = (np.array(vec) * u.Unit(self.fluxUnit)).to(u.ABmag).value # type: ignore
153 if self.returnMillimags:
154 mags *= 1000
155 return mags
158class ConvertUnits(VectorAction):
159 """Convert the units of a vector."""
161 buildAction = ConfigurableActionField(doc="Action to build vector", default=LoadVector)
162 inUnit = Field[str](doc="input Astropy unit")
163 outUnit = Field[str](doc="output Astropy unit")
165 def getInputSchema(self) -> KeyedDataSchema:
166 return tuple(self.buildAction.getInputSchema())
168 def __call__(self, data: KeyedData, **kwargs) -> Vector:
169 dataWithUnit = self.buildAction(data, **kwargs) * u.Unit(self.inUnit)
170 return dataWithUnit.to(self.outUnit).value
173class MagDiff(VectorAction):
174 """Calculate the difference between two magnitudes;
175 each magnitude is derived from a flux column.
176 Parameters
177 ----------
178 TO DO:
179 Returns
180 -------
181 The magnitude difference in milli mags.
182 Notes
183 -----
184 The flux columns need to be in units (specifiable in
185 the fluxUnits1 and 2 config options) that can be converted
186 to janskies. This action doesn't have any calibration
187 information and assumes that the fluxes are already
188 calibrated.
189 """
191 col1 = Field[str](doc="Column to subtract from")
192 fluxUnits1 = Field[str](doc="Units for col1", default="nanojansky")
193 col2 = Field[str](doc="Column to subtract")
194 fluxUnits2 = Field[str](doc="Units for col2", default="nanojansky")
195 returnMillimags = Field[bool](doc="Use millimags or not?", default=True)
197 def getInputSchema(self) -> KeyedDataSchema:
198 return ((self.col1, Vector), (self.col2, Vector))
200 def __call__(self, data: KeyedData, **kwargs) -> Vector:
201 flux1 = np.array(data[self.col1.format(**kwargs)]) * u.Unit(self.fluxUnits1)
202 mag1 = flux1.to(u.ABmag)
204 flux2 = np.array(data[self.col2.format(**kwargs)]) * u.Unit(self.fluxUnits2)
205 mag2 = flux2.to(u.ABmag)
207 magDiff = mag1 - mag2
209 if self.returnMillimags:
210 magDiff = magDiff.to(u.mmag)
212 return np.array(magDiff.value)
215class ExtinctionCorrectedMagDiff(VectorAction):
216 """Compute the difference between two magnitudes and correct for extinction
217 By default bands are derived from the <band>_ prefix on flux columns,
218 per the naming convention in the Object Table:
219 e.g. the band of 'g_psfFlux' is 'g'. If column names follow another
220 convention, bands can alternatively be supplied via the band1 or band2
221 config parameters.
222 If band1 and band2 are supplied, the flux column names are ignored.
223 """
225 magDiff = ConfigurableActionField[VectorAction](
226 doc="Action that returns a difference in magnitudes", default=MagDiff
227 )
228 ebvCol = Field[str](doc="E(B-V) Column Name", default="ebv")
229 band1 = Field[str](
230 doc="Optional band for magDiff.col1. Supercedes column name prefix",
231 optional=True,
232 default=None,
233 )
234 band2 = Field[str](
235 doc="Optional band for magDiff.col2. Supercedes column name prefix",
236 optional=True,
237 default=None,
238 )
239 extinctionCoeffs = DictField[str, float](
240 doc="Dictionary of extinction coefficients for conversion from E(B-V) to extinction, A_band."
241 "Key must be the band",
242 optional=True,
243 default=None,
244 )
246 def getInputSchema(self) -> KeyedDataSchema:
247 return self.magDiff.getInputSchema() + ((self.ebvCol, Vector),)
249 def __call__(self, data: KeyedData, **kwargs) -> Vector:
250 diff = self.magDiff(data, **kwargs)
251 if not self.extinctionCoeffs:
252 _LOG.debug("No extinction Coefficients. Not applying extinction correction")
253 return diff
255 col1Band = self.band1 if self.band1 else self.magDiff.col1.split("_")[0]
256 col2Band = self.band2 if self.band2 else self.magDiff.col2.split("_")[0]
258 # Return plain MagDiff with warning if either coeff not found
259 for band in (col1Band, col2Band):
260 if band not in self.extinctionCoeffs:
261 _LOG.warning(
262 "%s band not found in coefficients dictionary: %s" " Not applying extinction correction",
263 band,
264 self.extinctionCoeffs,
265 )
266 return diff
268 av1: float = self.extinctionCoeffs[col1Band]
269 av2: float = self.extinctionCoeffs[col2Band]
271 ebv = data[self.ebvCol]
272 # Ignore type until a more complete Vector protocol
273 correction = np.array((av1 - av2) * ebv) * u.mag # type: ignore
275 if self.magDiff.returnMillimags:
276 correction = correction.to(u.mmag)
278 return np.array(diff - correction.value)
281class RAcosDec(VectorAction):
282 """Construct a vector of RA*cos(Dec) in order to have commensurate values
283 between RA and Dec."""
285 raKey = Field[str](doc="RA coordinate", default="coord_ra")
286 decKey = Field[str](doc="Dec coordinate", default="coord_dec")
288 def getInputSchema(self) -> KeyedDataSchema:
289 return ((self.decKey, Vector), (self.raKey, Vector))
291 def __call__(self, data: KeyedData, **kwargs) -> Vector:
292 ra = data[self.raKey]
293 dec = data[self.decKey]
294 return ra.to_numpy() * np.cos((dec.to_numpy() * u.degree).to(u.radian).value)
297# Statistical vectorActions
300class PerGroupStatistic(VectorAction):
301 """Compute per-group statistic values and return result as a vector with
302 one element per group. The computed statistic can be any function accepted
303 by pandas DataFrameGroupBy.aggregate passed in as a string function name.
304 """
306 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index")
307 buildAction = ConfigurableActionField[VectorAction](doc="Action to build vector", default=LoadVector)
308 func = Field[str](doc="Name of function to be applied per group")
310 def getInputSchema(self) -> KeyedDataSchema:
311 return tuple(self.buildAction.getInputSchema()) + ((self.groupKey, Vector),)
313 def __call__(self, data: KeyedData, **kwargs) -> Vector:
314 df = pd.DataFrame({"groupKey": data[self.groupKey], "value": self.buildAction(data, **kwargs)})
315 result = df.groupby("groupKey")["value"].aggregate(self.func)
316 return np.array(result)
319class ResidualWithPerGroupStatistic(VectorAction):
320 """Compute residual between individual elements of group and the per-group
321 statistic."""
323 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index")
324 buildAction = ConfigurableActionField(doc="Action to build vector", default=LoadVector)
325 func = Field[str](doc="Name of function to be applied per group", default="mean")
327 def getInputSchema(self) -> KeyedDataSchema:
328 return tuple(self.buildAction.getInputSchema()) + ((self.groupKey, Vector),)
330 def __call__(self, data: KeyedData, **kwargs) -> Vector:
331 values = self.buildAction(data, **kwargs)
332 df = pd.DataFrame({"groupKey": data[self.groupKey], "value": values})
333 result = df.groupby("groupKey")["value"].aggregate(self.func)
335 joinedDf = df.join(result, on="groupKey", validate="m:1", lsuffix="_individual", rsuffix="_group")
337 result = joinedDf["value_individual"] - joinedDf["value_group"]
338 return np.array(result)