Coverage for python/lsst/analysis/tools/actions/vector/vectorActions.py: 44%
152 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-23 04:22 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-23 04:22 -0700
1# This file is part of analysis_tools.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "LoadVector",
25 "DownselectVector",
26 "MultiCriteriaDownselectVector",
27 "ConvertFluxToMag",
28 "ConvertUnits",
29 "CalcSn",
30 "MagDiff",
31 "ExtinctionCorrectedMagDiff",
32 "PerGroupStatistic",
33 "ResidualWithPerGroupStatistic",
34 "RAcosDec",
35)
37import logging
38from typing import Optional, cast
40import numpy as np
41import pandas as pd
42from astropy import units as u
43from lsst.pex.config import DictField, Field
44from lsst.pex.config.configurableActions import ConfigurableActionField, ConfigurableActionStructField
46from ...interfaces import KeyedData, KeyedDataSchema, Vector, VectorAction
47from .selectors import VectorSelector
49_LOG = logging.getLogger(__name__)
51# Basic vectorActions
54class LoadVector(VectorAction):
55 """Load and return a Vector from KeyedData."""
57 vectorKey = Field[str](doc="Key of vector which should be loaded")
59 def getInputSchema(self) -> KeyedDataSchema:
60 return ((self.vectorKey, Vector),)
62 def __call__(self, data: KeyedData, **kwargs) -> Vector:
63 return np.array(cast(Vector, data[self.vectorKey.format(**kwargs)]))
66class DownselectVector(VectorAction):
67 """Get a vector from KeyedData, apply specified selector, return the
68 shorter Vector.
69 """
71 vectorKey = Field[str](doc="column key to load from KeyedData")
73 selector = ConfigurableActionField[VectorAction](
74 doc="Action which returns a selection mask", default=VectorSelector
75 )
77 def getInputSchema(self) -> KeyedDataSchema:
78 yield (self.vectorKey, Vector)
79 yield from cast(VectorAction, self.selector).getInputSchema()
81 def __call__(self, data: KeyedData, **kwargs) -> Vector:
82 mask = cast(VectorAction, self.selector)(data, **kwargs)
83 return cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
86class MultiCriteriaDownselectVector(VectorAction):
87 """Get a vector from KeyedData, apply specified set of selectors with AND
88 logic, and return the shorter Vector.
89 """
91 vectorKey = Field[str](doc="column key to load from KeyedData")
93 selectors = ConfigurableActionStructField[VectorAction](
94 doc="Selectors for selecting rows, will be AND together",
95 )
97 def getInputSchema(self) -> KeyedDataSchema:
98 yield (self.vectorKey, Vector)
99 for action in self.selectors:
100 yield from action.getInputSchema()
102 def __call__(self, data: KeyedData, **kwargs) -> Vector:
103 mask: Optional[Vector] = None
104 for selector in self.selectors:
105 subMask = selector(data, **kwargs)
106 if mask is None:
107 mask = subMask
108 else:
109 mask *= subMask # type: ignore
110 return cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
113# Astronomical vectorActions
116class CalcSn(VectorAction):
117 """Calculate the signal-to-noise ratio from a single flux vector."""
119 fluxType = Field[str](doc="Flux type (vector key) to calculate the S/N.", default="{band}_psfFlux")
120 uncertaintySuffix = Field[str](
121 doc="Suffix to add to fluxType to specify the uncertainty column", default="Err"
122 )
124 def getInputSchema(self) -> KeyedDataSchema:
125 yield self.fluxType, Vector
126 yield f"{self.fluxType}{self.uncertaintySuffix}", Vector
128 def __call__(self, data: KeyedData, **kwargs) -> Vector:
129 signal = np.array(data[self.fluxType.format(**kwargs)])
130 noise = np.array(data[f"{self.fluxType}{self.uncertaintySuffix}".format(**kwargs)])
131 sn = signal / noise
133 return np.array(sn)
136class ConvertFluxToMag(VectorAction):
137 """Turn nano janskies into magnitudes."""
139 vectorKey = Field[str](doc="Key of flux vector to convert to mags")
140 fluxUnit = Field[str](doc="Astropy unit of flux vector", default="nJy")
141 returnMillimags = Field[bool](doc="Use millimags or not?", default=False)
143 def getInputSchema(self) -> KeyedDataSchema:
144 return ((self.vectorKey, Vector),)
146 def __call__(self, data: KeyedData, **kwargs) -> Vector:
147 with np.warnings.catch_warnings(): # type: ignore
148 np.warnings.filterwarnings("ignore", r"invalid value encountered") # type: ignore
149 np.warnings.filterwarnings("ignore", r"divide by zero") # type: ignore
150 vec = cast(Vector, data[self.vectorKey.format(**kwargs)])
151 mags = (np.array(vec) * u.Unit(self.fluxUnit)).to(u.ABmag).value # type: ignore
152 if self.returnMillimags:
153 mags *= 1000
154 return mags
157class ConvertUnits(VectorAction):
158 """Convert the units of a vector."""
160 buildAction = ConfigurableActionField(doc="Action to build vector", default=LoadVector)
161 inUnit = Field[str](doc="input Astropy unit")
162 outUnit = Field[str](doc="output Astropy unit")
164 def getInputSchema(self) -> KeyedDataSchema:
165 return tuple(self.buildAction.getInputSchema())
167 def __call__(self, data: KeyedData, **kwargs) -> Vector:
168 dataWithUnit = self.buildAction(data, **kwargs) * u.Unit(self.inUnit)
169 return dataWithUnit.to(self.outUnit).value
172class MagDiff(VectorAction):
173 """Calculate the difference between two magnitudes;
174 each magnitude is derived from a flux column.
175 Parameters
176 ----------
177 TO DO:
178 Returns
179 -------
180 The magnitude difference in milli mags.
181 Notes
182 -----
183 The flux columns need to be in units (specifiable in
184 the fluxUnits1 and 2 config options) that can be converted
185 to janskies. This action doesn't have any calibration
186 information and assumes that the fluxes are already
187 calibrated.
188 """
190 col1 = Field[str](doc="Column to subtract from")
191 fluxUnits1 = Field[str](doc="Units for col1", default="nanojansky")
192 col2 = Field[str](doc="Column to subtract")
193 fluxUnits2 = Field[str](doc="Units for col2", default="nanojansky")
194 returnMillimags = Field[bool](doc="Use millimags or not?", default=True)
196 def getInputSchema(self) -> KeyedDataSchema:
197 return ((self.col1, Vector), (self.col2, Vector))
199 def __call__(self, data: KeyedData, **kwargs) -> Vector:
200 flux1 = np.array(data[self.col1.format(**kwargs)]) * u.Unit(self.fluxUnits1)
201 mag1 = flux1.to(u.ABmag)
203 flux2 = np.array(data[self.col2.format(**kwargs)]) * u.Unit(self.fluxUnits2)
204 mag2 = flux2.to(u.ABmag)
206 magDiff = mag1 - mag2
208 if self.returnMillimags:
209 magDiff = magDiff.to(u.mmag)
211 return np.array(magDiff.value)
214class ExtinctionCorrectedMagDiff(VectorAction):
215 """Compute the difference between two magnitudes and correct for extinction
216 By default bands are derived from the <band>_ prefix on flux columns,
217 per the naming convention in the Object Table:
218 e.g. the band of 'g_psfFlux' is 'g'. If column names follow another
219 convention, bands can alternatively be supplied via the band1 or band2
220 config parameters.
221 If band1 and band2 are supplied, the flux column names are ignored.
222 """
224 magDiff = ConfigurableActionField[VectorAction](
225 doc="Action that returns a difference in magnitudes", default=MagDiff
226 )
227 ebvCol = Field[str](doc="E(B-V) Column Name", default="ebv")
228 band1 = Field[str](
229 doc="Optional band for magDiff.col1. Supercedes column name prefix",
230 optional=True,
231 default=None,
232 )
233 band2 = Field[str](
234 doc="Optional band for magDiff.col2. Supercedes column name prefix",
235 optional=True,
236 default=None,
237 )
238 extinctionCoeffs = DictField[str, float](
239 doc="Dictionary of extinction coefficients for conversion from E(B-V) to extinction, A_band."
240 "Key must be the band",
241 optional=True,
242 default=None,
243 )
245 def getInputSchema(self) -> KeyedDataSchema:
246 return self.magDiff.getInputSchema() + ((self.ebvCol, Vector),)
248 def __call__(self, data: KeyedData, **kwargs) -> Vector:
249 diff = self.magDiff(data, **kwargs)
250 if not self.extinctionCoeffs:
251 _LOG.debug("No extinction Coefficients. Not applying extinction correction")
252 return diff
254 col1Band = self.band1 if self.band1 else self.magDiff.col1.split("_")[0]
255 col2Band = self.band2 if self.band2 else self.magDiff.col2.split("_")[0]
257 # Return plain MagDiff with warning if either coeff not found
258 for band in (col1Band, col2Band):
259 if band not in self.extinctionCoeffs:
260 _LOG.warning(
261 "%s band not found in coefficients dictionary: %s" " Not applying extinction correction",
262 band,
263 self.extinctionCoeffs,
264 )
265 return diff
267 av1: float = self.extinctionCoeffs[col1Band]
268 av2: float = self.extinctionCoeffs[col2Band]
270 ebv = data[self.ebvCol]
271 # Ignore type until a more complete Vector protocol
272 correction = np.array((av1 - av2) * ebv) * u.mag # type: ignore
274 if self.magDiff.returnMillimags:
275 correction = correction.to(u.mmag)
277 return np.array(diff - correction.value)
280class RAcosDec(VectorAction):
281 """Construct a vector of RA*cos(Dec) in order to have commensurate values
282 between RA and Dec."""
284 raKey = Field[str](doc="RA coordinate", default="coord_ra")
285 decKey = Field[str](doc="Dec coordinate", default="coord_dec")
287 def getInputSchema(self) -> KeyedDataSchema:
288 return ((self.decKey, Vector), (self.raKey, Vector))
290 def __call__(self, data: KeyedData, **kwargs) -> Vector:
291 ra = data[self.raKey]
292 dec = data[self.decKey]
293 return ra.to_numpy() * np.cos((dec.to_numpy() * u.degree).to(u.radian).value)
296# Statistical vectorActions
299class PerGroupStatistic(VectorAction):
300 """Compute per-group statistic values and return result as a vector with
301 one element per group. The computed statistic can be any function accepted
302 by pandas DataFrameGroupBy.aggregate passed in as a string function name.
303 """
305 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index")
306 buildAction = ConfigurableActionField[VectorAction](doc="Action to build vector", default=LoadVector)
307 func = Field[str](doc="Name of function to be applied per group")
309 def getInputSchema(self) -> KeyedDataSchema:
310 return tuple(self.buildAction.getInputSchema()) + ((self.groupKey, Vector),)
312 def __call__(self, data: KeyedData, **kwargs) -> Vector:
313 df = pd.DataFrame({"groupKey": data[self.groupKey], "value": self.buildAction(data, **kwargs)})
314 result = df.groupby("groupKey")["value"].aggregate(self.func)
315 return np.array(result)
318class ResidualWithPerGroupStatistic(VectorAction):
319 """Compute residual between individual elements of group and the per-group
320 statistic."""
322 groupKey = Field[str](doc="Column key to use for forming groups", default="obj_index")
323 buildAction = ConfigurableActionField(doc="Action to build vector", default=LoadVector)
324 func = Field[str](doc="Name of function to be applied per group", default="mean")
326 def getInputSchema(self) -> KeyedDataSchema:
327 return tuple(self.buildAction.getInputSchema()) + ((self.groupKey, Vector),)
329 def __call__(self, data: KeyedData, **kwargs) -> Vector:
330 values = self.buildAction(data, **kwargs)
331 df = pd.DataFrame({"groupKey": data[self.groupKey], "value": values})
332 result = df.groupby("groupKey")["value"].aggregate(self.func)
334 joinedDf = df.join(result, on="groupKey", validate="m:1", lsuffix="_individual", rsuffix="_group")
336 result = joinedDf["value_individual"] - joinedDf["value_group"]
337 return np.array(result)