Coverage for python/lsst/pipe/tasks/functors.py : 35%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_tasks.
2#
3# LSST Data Management System
4# This product includes software developed by the
5# LSST Project (http://www.lsst.org/).
6# See COPYRIGHT file at the top of the source tree.
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22import yaml
23import re
24from itertools import product
25import os.path
27import pandas as pd
28import numpy as np
29import astropy.units as u
31from lsst.daf.persistence import doImport
32from lsst.daf.butler import DeferredDatasetHandle
33from .parquetTable import ParquetTable, MultilevelParquetTable
36def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
37 typeKey='functor', name=None):
38 """Initialize an object defined in a dictionary
40 The object needs to be importable as
41 f'{basePath}.{initDict[typeKey]}'
42 The positional and keyword arguments (if any) are contained in
43 "args" and "kwargs" entries in the dictionary, respectively.
44 This is used in `functors.CompositeFunctor.from_yaml` to initialize
45 a composite functor from a specification in a YAML file.
47 Parameters
48 ----------
49 initDict : dictionary
50 Dictionary describing object's initialization. Must contain
51 an entry keyed by ``typeKey`` that is the name of the object,
52 relative to ``basePath``.
53 basePath : str
54 Path relative to module in which ``initDict[typeKey]`` is defined.
55 typeKey : str
56 Key of ``initDict`` that is the name of the object
57 (relative to `basePath`).
58 """
59 initDict = initDict.copy()
60 # TO DO: DM-21956 We should be able to define functors outside this module
61 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
62 args = []
63 if 'args' in initDict:
64 args = initDict.pop('args')
65 if isinstance(args, str):
66 args = [args]
67 try:
68 element = pythonType(*args, **initDict)
69 except Exception as e:
70 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
71 raise type(e)(message, e.args)
72 return element
75class Functor(object):
76 """Define and execute a calculation on a ParquetTable
78 The `__call__` method accepts either a `ParquetTable` object or a
79 `DeferredDatasetHandle`, and returns the
80 result of the calculation as a single column. Each functor defines what
81 columns are needed for the calculation, and only these columns are read
82 from the `ParquetTable`.
84 The action of `__call__` consists of two steps: first, loading the
85 necessary columns from disk into memory as a `pandas.DataFrame` object;
86 and second, performing the computation on this dataframe and returning the
87 result.
90 To define a new `Functor`, a subclass must define a `_func` method,
91 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
92 In addition, it must define the following attributes
94 * `_columns`: The columns necessary to perform the calculation
95 * `name`: A name appropriate for a figure axis label
96 * `shortname`: A name appropriate for use as a dictionary key
98 On initialization, a `Functor` should declare what band (`filt` kwarg)
99 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
100 applied to. This enables the `_get_data` method to extract the proper
101 columns from the parquet file. If not specified, the dataset will fall back
102 on the `_defaultDataset`attribute. If band is not specified and `dataset`
103 is anything other than `'ref'`, then an error will be raised when trying to
104 perform the calculation.
106 Originally, `Functor` was set up to expect
107 datasets formatted like the `deepCoadd_obj` dataset; that is, a
108 dataframe with a multi-level column index, with the levels of the
109 column index being `band`, `dataset`, and `column`.
110 It has since been generalized to apply to dataframes without mutli-level
111 indices and multi-level indices with just `dataset` and `column` levels.
112 In addition, the `_get_data` method that reads
113 the dataframe from the `ParquetTable` will return a dataframe with column
114 index levels defined by the `_dfLevels` attribute; by default, this is
115 `column`.
117 The `_dfLevels` attributes should generally not need to
118 be changed, unless `_func` needs columns from multiple filters or datasets
119 to do the calculation.
120 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
121 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
122 it gets to have those levels in the column index.
124 Parameters
125 ----------
126 filt : str
127 Filter upon which to do the calculation
129 dataset : str
130 Dataset upon which to do the calculation
131 (e.g., 'ref', 'meas', 'forced_src').
133 """
135 _defaultDataset = 'ref'
136 _dfLevels = ('column',)
137 _defaultNoDup = False
139 def __init__(self, filt=None, dataset=None, noDup=None):
140 self.filt = filt
141 self.dataset = dataset if dataset is not None else self._defaultDataset
142 self._noDup = noDup
144 @property
145 def noDup(self):
146 if self._noDup is not None:
147 return self._noDup
148 else:
149 return self._defaultNoDup
151 @property
152 def columns(self):
153 """Columns required to perform calculation
154 """
155 if not hasattr(self, '_columns'):
156 raise NotImplementedError('Must define columns property or _columns attribute')
157 return self._columns
159 def _get_data_columnLevels(self, data, columnIndex=None):
160 """Gets the names of the column index levels
162 This should only be called in the context of a multilevel table.
163 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
164 and with the gen3 `DeferredDatasetHandle`.
166 Parameters
167 ----------
168 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
170 columnnIndex (optional): pandas `Index` object
171 if not passed, then it is read from the `DeferredDatasetHandle`
172 """
173 if isinstance(data, DeferredDatasetHandle):
174 if columnIndex is None:
175 columnIndex = data.get(component="columns")
176 if columnIndex is not None:
177 return columnIndex.names
178 if isinstance(data, MultilevelParquetTable):
179 return data.columnLevels
180 else:
181 raise TypeError(f"Unknown type for data: {type(data)}!")
183 def _get_data_columnLevelNames(self, data, columnIndex=None):
184 """Gets the content of each of the column levels for a multilevel table
186 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
188 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
189 """
190 if isinstance(data, DeferredDatasetHandle):
191 if columnIndex is None:
192 columnIndex = data.get(component="columns")
193 if columnIndex is not None:
194 columnLevels = columnIndex.names
195 columnLevelNames = {
196 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
197 for i, level in enumerate(columnLevels)
198 }
199 return columnLevelNames
200 if isinstance(data, MultilevelParquetTable):
201 return data.columnLevelNames
202 else:
203 raise TypeError(f"Unknown type for data: {type(data)}!")
205 def _colsFromDict(self, colDict, columnIndex=None):
206 """Converts dictionary column specficiation to a list of columns
208 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
209 """
210 new_colDict = {}
211 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
213 for i, lev in enumerate(columnLevels):
214 if lev in colDict:
215 if isinstance(colDict[lev], str):
216 new_colDict[lev] = [colDict[lev]]
217 else:
218 new_colDict[lev] = colDict[lev]
219 else:
220 new_colDict[lev] = columnIndex.levels[i]
222 levelCols = [new_colDict[lev] for lev in columnLevels]
223 cols = product(*levelCols)
224 return list(cols)
226 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
227 """Returns columns needed by functor from multilevel dataset
229 To access tables with multilevel column structure, the `MultilevelParquetTable`
230 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
231 dictionary.
233 Parameters
234 ----------
235 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
237 columnIndex (optional): pandas `Index` object
238 either passed or read in from `DeferredDatasetHandle`.
240 `returnTuple` : bool
241 If true, then return a list of tuples rather than the column dictionary
242 specification. This is set to `True` by `CompositeFunctor` in order to be able to
243 combine columns from the various component functors.
245 """
246 if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
247 columnIndex = data.get(component="columns")
249 # Confirm that the dataset has the column levels the functor is expecting it to have.
250 columnLevels = self._get_data_columnLevels(data, columnIndex)
252 columnDict = {'column': self.columns,
253 'dataset': self.dataset}
254 if self.filt is None:
255 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
256 if "band" in columnLevels:
257 if self.dataset == "ref":
258 columnDict["band"] = columnLevelNames["band"][0]
259 else:
260 raise ValueError(f"'filt' not set for functor {self.name}"
261 f"(dataset {self.dataset}) "
262 "and ParquetTable "
263 "contains multiple filters in column index. "
264 "Set 'filt' or set 'dataset' to 'ref'.")
265 else:
266 columnDict['band'] = self.filt
268 if isinstance(data, MultilevelParquetTable):
269 return data._colsFromDict(columnDict)
270 elif isinstance(data, DeferredDatasetHandle):
271 if returnTuple:
272 return self._colsFromDict(columnDict, columnIndex=columnIndex)
273 else:
274 return columnDict
276 def _func(self, df, dropna=True):
277 raise NotImplementedError('Must define calculation on dataframe')
279 def _get_columnIndex(self, data):
280 """Return columnIndex
281 """
283 if isinstance(data, DeferredDatasetHandle):
284 return data.get(component="columns")
285 else:
286 return None
288 def _get_data(self, data):
289 """Retrieve dataframe necessary for calculation.
291 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
293 Returns dataframe upon which `self._func` can act.
295 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
296 """
297 if isinstance(data, pd.DataFrame):
298 return data
300 # First thing to do: check to see if the data source has a multilevel column index or not.
301 columnIndex = self._get_columnIndex(data)
302 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
304 # Simple single-level parquet table, gen2
305 if isinstance(data, ParquetTable) and not is_multiLevel:
306 columns = self.columns
307 df = data.toDataFrame(columns=columns)
308 return df
310 # Get proper columns specification for this functor
311 if is_multiLevel:
312 columns = self.multilevelColumns(data, columnIndex=columnIndex)
313 else:
314 columns = self.columns
316 if isinstance(data, MultilevelParquetTable):
317 # Load in-memory dataframe with appropriate columns the gen2 way
318 df = data.toDataFrame(columns=columns, droplevels=False)
319 elif isinstance(data, DeferredDatasetHandle):
320 # Load in-memory dataframe with appropriate columns the gen3 way
321 df = data.get(parameters={"columns": columns})
323 # Drop unnecessary column levels
324 if is_multiLevel:
325 df = self._setLevels(df)
327 return df
329 def _setLevels(self, df):
330 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
331 df.columns = df.columns.droplevel(levelsToDrop)
332 return df
334 def _dropna(self, vals):
335 return vals.dropna()
337 def __call__(self, data, dropna=False):
338 try:
339 df = self._get_data(data)
340 vals = self._func(df)
341 except Exception:
342 vals = self.fail(df)
343 if dropna:
344 vals = self._dropna(vals)
346 return vals
348 def difference(self, data1, data2, **kwargs):
349 """Computes difference between functor called on two different ParquetTable objects
350 """
351 return self(data1, **kwargs) - self(data2, **kwargs)
353 def fail(self, df):
354 return pd.Series(np.full(len(df), np.nan), index=df.index)
356 @property
357 def name(self):
358 """Full name of functor (suitable for figure labels)
359 """
360 return NotImplementedError
362 @property
363 def shortname(self):
364 """Short name of functor (suitable for column name/dict key)
365 """
366 return self.name
369class CompositeFunctor(Functor):
370 """Perform multiple calculations at once on a catalog
372 The role of a `CompositeFunctor` is to group together computations from
373 multiple functors. Instead of returning `pandas.Series` a
374 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
375 being the keys of `funcDict`.
377 The `columns` attribute of a `CompositeFunctor` is the union of all columns
378 in all the component functors.
380 A `CompositeFunctor` does not use a `_func` method itself; rather,
381 when a `CompositeFunctor` is called, all its columns are loaded
382 at once, and the resulting dataframe is passed to the `_func` method of each component
383 functor. This has the advantage of only doing I/O (reading from parquet file) once,
384 and works because each individual `_func` method of each component functor does not
385 care if there are *extra* columns in the dataframe being passed; only that it must contain
386 *at least* the `columns` it expects.
388 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
389 file specifying a collection of functors.
391 Parameters
392 ----------
393 funcs : `dict` or `list`
394 Dictionary or list of functors. If a list, then it will be converted
395 into a dictonary according to the `.shortname` attribute of each functor.
397 """
398 dataset = None
400 def __init__(self, funcs, **kwargs):
402 if type(funcs) == dict:
403 self.funcDict = funcs
404 else:
405 self.funcDict = {f.shortname: f for f in funcs}
407 self._filt = None
409 super().__init__(**kwargs)
411 @property
412 def filt(self):
413 return self._filt
415 @filt.setter
416 def filt(self, filt):
417 if filt is not None:
418 for _, f in self.funcDict.items():
419 f.filt = filt
420 self._filt = filt
422 def update(self, new):
423 if isinstance(new, dict):
424 self.funcDict.update(new)
425 elif isinstance(new, CompositeFunctor):
426 self.funcDict.update(new.funcDict)
427 else:
428 raise TypeError('Can only update with dictionary or CompositeFunctor.')
430 # Make sure new functors have the same 'filt' set
431 if self.filt is not None:
432 self.filt = self.filt
434 @property
435 def columns(self):
436 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
438 def multilevelColumns(self, data, **kwargs):
439 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
440 return list(
441 set(
442 [
443 x
444 for y in [
445 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
446 ]
447 for x in y
448 ]
449 )
450 )
452 def __call__(self, data, **kwargs):
453 """Apply the functor to the data table
455 Parameters
456 ----------
457 data : `lsst.daf.butler.DeferredDatasetHandle`,
458 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
459 `lsst.pipe.tasks.parquetTable.ParquetTable`,
460 or `pandas.DataFrame`.
461 The table or a pointer to a table on disk from which columns can
462 be accessed
463 """
464 columnIndex = self._get_columnIndex(data)
466 # First, determine whether data has a multilevel index (either gen2 or gen3)
467 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
469 # Multilevel index, gen2 or gen3
470 if is_multiLevel:
471 columns = self.multilevelColumns(data, columnIndex=columnIndex)
473 if isinstance(data, MultilevelParquetTable):
474 # Read data into memory the gen2 way
475 df = data.toDataFrame(columns=columns, droplevels=False)
476 elif isinstance(data, DeferredDatasetHandle):
477 # Read data into memory the gen3 way
478 df = data.get(parameters={"columns": columns})
480 valDict = {}
481 for k, f in self.funcDict.items():
482 try:
483 subdf = f._setLevels(
484 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
485 )
486 valDict[k] = f._func(subdf)
487 except Exception:
488 valDict[k] = f.fail(subdf)
490 else:
491 if isinstance(data, DeferredDatasetHandle):
492 # input if Gen3 deferLoad=True
493 df = data.get(parameters={"columns": self.columns})
494 elif isinstance(data, pd.DataFrame):
495 # input if Gen3 deferLoad=False
496 df = data
497 else:
498 # Original Gen2 input is type ParquetTable and the fallback
499 df = data.toDataFrame(columns=self.columns)
501 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
503 try:
504 valDf = pd.concat(valDict, axis=1)
505 except TypeError:
506 print([(k, type(v)) for k, v in valDict.items()])
507 raise
509 if kwargs.get('dropna', False):
510 valDf = valDf.dropna(how='any')
512 return valDf
514 @classmethod
515 def renameCol(cls, col, renameRules):
516 if renameRules is None:
517 return col
518 for old, new in renameRules:
519 if col.startswith(old):
520 col = col.replace(old, new)
521 return col
523 @classmethod
524 def from_file(cls, filename, **kwargs):
525 # Allow environment variables in the filename.
526 filename = os.path.expandvars(filename)
527 with open(filename) as f:
528 translationDefinition = yaml.safe_load(f)
530 return cls.from_yaml(translationDefinition, **kwargs)
532 @classmethod
533 def from_yaml(cls, translationDefinition, **kwargs):
534 funcs = {}
535 for func, val in translationDefinition['funcs'].items():
536 funcs[func] = init_fromDict(val, name=func)
538 if 'flag_rename_rules' in translationDefinition:
539 renameRules = translationDefinition['flag_rename_rules']
540 else:
541 renameRules = None
543 if 'calexpFlags' in translationDefinition:
544 for flag in translationDefinition['calexpFlags']:
545 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
547 if 'refFlags' in translationDefinition:
548 for flag in translationDefinition['refFlags']:
549 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
551 if 'forcedFlags' in translationDefinition:
552 for flag in translationDefinition['forcedFlags']:
553 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
555 if 'flags' in translationDefinition:
556 for flag in translationDefinition['flags']:
557 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
559 return cls(funcs, **kwargs)
562def mag_aware_eval(df, expr):
563 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
565 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
567 Parameters
568 ----------
569 df : pandas.DataFrame
570 Dataframe on which to evaluate expression.
572 expr : str
573 Expression.
574 """
575 try:
576 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
577 val = df.eval(expr_new, truediv=True)
578 except Exception: # Should check what actually gets raised
579 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
580 val = df.eval(expr_new, truediv=True)
581 return val
584class CustomFunctor(Functor):
585 """Arbitrary computation on a catalog
587 Column names (and thus the columns to be loaded from catalog) are found
588 by finding all words and trying to ignore all "math-y" words.
590 Parameters
591 ----------
592 expr : str
593 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
594 """
595 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
597 def __init__(self, expr, **kwargs):
598 self.expr = expr
599 super().__init__(**kwargs)
601 @property
602 def name(self):
603 return self.expr
605 @property
606 def columns(self):
607 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
609 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
610 not_a_col = []
611 for c in flux_cols:
612 if not re.search('_instFlux$', c):
613 cols.append(f'{c}_instFlux')
614 not_a_col.append(c)
615 else:
616 cols.append(c)
618 return list(set([c for c in cols if c not in not_a_col]))
620 def _func(self, df):
621 return mag_aware_eval(df, self.expr)
624class Column(Functor):
625 """Get column with specified name
626 """
628 def __init__(self, col, **kwargs):
629 self.col = col
630 super().__init__(**kwargs)
632 @property
633 def name(self):
634 return self.col
636 @property
637 def columns(self):
638 return [self.col]
640 def _func(self, df):
641 return df[self.col]
644class Index(Functor):
645 """Return the value of the index for each object
646 """
648 columns = ['coord_ra'] # just a dummy; something has to be here
649 _defaultDataset = 'ref'
650 _defaultNoDup = True
652 def _func(self, df):
653 return pd.Series(df.index, index=df.index)
656class IDColumn(Column):
657 col = 'id'
658 _allow_difference = False
659 _defaultNoDup = True
661 def _func(self, df):
662 return pd.Series(df.index, index=df.index)
665class FootprintNPix(Column):
666 col = 'base_Footprint_nPix'
669class CoordColumn(Column):
670 """Base class for coordinate column, in degrees
671 """
672 _radians = True
674 def __init__(self, col, **kwargs):
675 super().__init__(col, **kwargs)
677 def _func(self, df):
678 # Must not modify original column in case that column is used by another functor
679 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
680 return output
683class RAColumn(CoordColumn):
684 """Right Ascension, in degrees
685 """
686 name = 'RA'
687 _defaultNoDup = True
689 def __init__(self, **kwargs):
690 super().__init__('coord_ra', **kwargs)
692 def __call__(self, catalog, **kwargs):
693 return super().__call__(catalog, **kwargs)
696class DecColumn(CoordColumn):
697 """Declination, in degrees
698 """
699 name = 'Dec'
700 _defaultNoDup = True
702 def __init__(self, **kwargs):
703 super().__init__('coord_dec', **kwargs)
705 def __call__(self, catalog, **kwargs):
706 return super().__call__(catalog, **kwargs)
709def fluxName(col):
710 if not col.endswith('_instFlux'):
711 col += '_instFlux'
712 return col
715def fluxErrName(col):
716 if not col.endswith('_instFluxErr'):
717 col += '_instFluxErr'
718 return col
721class Mag(Functor):
722 """Compute calibrated magnitude
724 Takes a `calib` argument, which returns the flux at mag=0
725 as `calib.getFluxMag0()`. If not provided, then the default
726 `fluxMag0` is 63095734448.0194, which is default for HSC.
727 This default should be removed in DM-21955
729 This calculation hides warnings about invalid values and dividing by zero.
731 As for all functors, a `dataset` and `filt` kwarg should be provided upon
732 initialization. Unlike the default `Functor`, however, the default dataset
733 for a `Mag` is `'meas'`, rather than `'ref'`.
735 Parameters
736 ----------
737 col : `str`
738 Name of flux column from which to compute magnitude. Can be parseable
739 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
740 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
741 understand.
742 calib : `lsst.afw.image.calib.Calib` (optional)
743 Object that knows zero point.
744 """
745 _defaultDataset = 'meas'
747 def __init__(self, col, calib=None, **kwargs):
748 self.col = fluxName(col)
749 self.calib = calib
750 if calib is not None:
751 self.fluxMag0 = calib.getFluxMag0()[0]
752 else:
753 # TO DO: DM-21955 Replace hard coded photometic calibration values
754 self.fluxMag0 = 63095734448.0194
756 super().__init__(**kwargs)
758 @property
759 def columns(self):
760 return [self.col]
762 def _func(self, df):
763 with np.warnings.catch_warnings():
764 np.warnings.filterwarnings('ignore', r'invalid value encountered')
765 np.warnings.filterwarnings('ignore', r'divide by zero')
766 return -2.5*np.log10(df[self.col] / self.fluxMag0)
768 @property
769 def name(self):
770 return f'mag_{self.col}'
773class MagErr(Mag):
774 """Compute calibrated magnitude uncertainty
776 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
778 Parameters
779 col : `str`
780 Name of flux column
781 calib : `lsst.afw.image.calib.Calib` (optional)
782 Object that knows zero point.
783 """
785 def __init__(self, *args, **kwargs):
786 super().__init__(*args, **kwargs)
787 if self.calib is not None:
788 self.fluxMag0Err = self.calib.getFluxMag0()[1]
789 else:
790 self.fluxMag0Err = 0.
792 @property
793 def columns(self):
794 return [self.col, self.col + 'Err']
796 def _func(self, df):
797 with np.warnings.catch_warnings():
798 np.warnings.filterwarnings('ignore', r'invalid value encountered')
799 np.warnings.filterwarnings('ignore', r'divide by zero')
800 fluxCol, fluxErrCol = self.columns
801 x = df[fluxErrCol] / df[fluxCol]
802 y = self.fluxMag0Err / self.fluxMag0
803 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
804 return magErr
806 @property
807 def name(self):
808 return super().name + '_err'
811class NanoMaggie(Mag):
812 """
813 """
815 def _func(self, df):
816 return (df[self.col] / self.fluxMag0) * 1e9
819class MagDiff(Functor):
820 _defaultDataset = 'meas'
822 """Functor to calculate magnitude difference"""
824 def __init__(self, col1, col2, **kwargs):
825 self.col1 = fluxName(col1)
826 self.col2 = fluxName(col2)
827 super().__init__(**kwargs)
829 @property
830 def columns(self):
831 return [self.col1, self.col2]
833 def _func(self, df):
834 with np.warnings.catch_warnings():
835 np.warnings.filterwarnings('ignore', r'invalid value encountered')
836 np.warnings.filterwarnings('ignore', r'divide by zero')
837 return -2.5*np.log10(df[self.col1]/df[self.col2])
839 @property
840 def name(self):
841 return f'(mag_{self.col1} - mag_{self.col2})'
843 @property
844 def shortname(self):
845 return f'magDiff_{self.col1}_{self.col2}'
848class Color(Functor):
849 """Compute the color between two filters
851 Computes color by initializing two different `Mag`
852 functors based on the `col` and filters provided, and
853 then returning the difference.
855 This is enabled by the `_func` expecting a dataframe with a
856 multilevel column index, with both `'band'` and `'column'`,
857 instead of just `'column'`, which is the `Functor` default.
858 This is controlled by the `_dfLevels` attribute.
860 Also of note, the default dataset for `Color` is `forced_src'`,
861 whereas for `Mag` it is `'meas'`.
863 Parameters
864 ----------
865 col : str
866 Name of flux column from which to compute; same as would be passed to
867 `lsst.pipe.tasks.functors.Mag`.
869 filt2, filt1 : str
870 Filters from which to compute magnitude difference.
871 Color computed is `Mag(filt2) - Mag(filt1)`.
872 """
873 _defaultDataset = 'forced_src'
874 _dfLevels = ('band', 'column')
875 _defaultNoDup = True
877 def __init__(self, col, filt2, filt1, **kwargs):
878 self.col = fluxName(col)
879 if filt2 == filt1:
880 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
881 self.filt2 = filt2
882 self.filt1 = filt1
884 self.mag2 = Mag(col, filt=filt2, **kwargs)
885 self.mag1 = Mag(col, filt=filt1, **kwargs)
887 super().__init__(**kwargs)
889 @property
890 def filt(self):
891 return None
893 @filt.setter
894 def filt(self, filt):
895 pass
897 def _func(self, df):
898 mag2 = self.mag2._func(df[self.filt2])
899 mag1 = self.mag1._func(df[self.filt1])
900 return mag2 - mag1
902 @property
903 def columns(self):
904 return [self.mag1.col, self.mag2.col]
906 def multilevelColumns(self, parq, **kwargs):
907 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
909 @property
910 def name(self):
911 return f'{self.filt2} - {self.filt1} ({self.col})'
913 @property
914 def shortname(self):
915 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
918class Labeller(Functor):
919 """Main function of this subclass is to override the dropna=True
920 """
921 _null_label = 'null'
922 _allow_difference = False
923 name = 'label'
924 _force_str = False
926 def __call__(self, parq, dropna=False, **kwargs):
927 return super().__call__(parq, dropna=False, **kwargs)
930class StarGalaxyLabeller(Labeller):
931 _columns = ["base_ClassificationExtendedness_value"]
932 _column = "base_ClassificationExtendedness_value"
934 def _func(self, df):
935 x = df[self._columns][self._column]
936 mask = x.isnull()
937 test = (x < 0.5).astype(int)
938 test = test.mask(mask, 2)
940 # TODO: DM-21954 Look into veracity of inline comment below
941 # are these backwards?
942 categories = ['galaxy', 'star', self._null_label]
943 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
944 index=x.index, name='label')
945 if self._force_str:
946 label = label.astype(str)
947 return label
950class NumStarLabeller(Labeller):
951 _columns = ['numStarFlags']
952 labels = {"star": 0, "maybe": 1, "notStar": 2}
954 def _func(self, df):
955 x = df[self._columns][self._columns[0]]
957 # Number of filters
958 n = len(x.unique()) - 1
960 labels = ['noStar', 'maybe', 'star']
961 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
962 index=x.index, name='label')
964 if self._force_str:
965 label = label.astype(str)
967 return label
970class DeconvolvedMoments(Functor):
971 name = 'Deconvolved Moments'
972 shortname = 'deconvolvedMoments'
973 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
974 "ext_shapeHSM_HsmSourceMoments_yy",
975 "base_SdssShape_xx", "base_SdssShape_yy",
976 "ext_shapeHSM_HsmPsfMoments_xx",
977 "ext_shapeHSM_HsmPsfMoments_yy")
979 def _func(self, df):
980 """Calculate deconvolved moments"""
981 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
982 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
983 else:
984 hsm = np.ones(len(df))*np.nan
985 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
986 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
987 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
988 else:
989 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
990 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
991 # raise TaskError("No psf shape parameter found in catalog")
992 raise RuntimeError('No psf shape parameter found in catalog')
994 return hsm.where(np.isfinite(hsm), sdss) - psf
997class SdssTraceSize(Functor):
998 """Functor to calculate SDSS trace radius size for sources"""
999 name = "SDSS Trace Size"
1000 shortname = 'sdssTrace'
1001 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1003 def _func(self, df):
1004 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1005 return srcSize
1008class PsfSdssTraceSizeDiff(Functor):
1009 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1010 name = "PSF - SDSS Trace Size"
1011 shortname = 'psf_sdssTrace'
1012 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1013 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1015 def _func(self, df):
1016 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1017 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1018 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1019 return sizeDiff
1022class HsmTraceSize(Functor):
1023 """Functor to calculate HSM trace radius size for sources"""
1024 name = 'HSM Trace Size'
1025 shortname = 'hsmTrace'
1026 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1027 "ext_shapeHSM_HsmSourceMoments_yy")
1029 def _func(self, df):
1030 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1031 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1032 return srcSize
1035class PsfHsmTraceSizeDiff(Functor):
1036 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1037 name = 'PSF - HSM Trace Size'
1038 shortname = 'psf_HsmTrace'
1039 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1040 "ext_shapeHSM_HsmSourceMoments_yy",
1041 "ext_shapeHSM_HsmPsfMoments_xx",
1042 "ext_shapeHSM_HsmPsfMoments_yy")
1044 def _func(self, df):
1045 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1046 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1047 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1048 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1049 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1050 return sizeDiff
1053class HsmFwhm(Functor):
1054 name = 'HSM Psf FWHM'
1055 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1056 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1057 pixelScale = 0.168
1058 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1060 def _func(self, df):
1061 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1062 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1065class E1(Functor):
1066 name = "Distortion Ellipticity (e1)"
1067 shortname = "Distortion"
1069 def __init__(self, colXX, colXY, colYY, **kwargs):
1070 self.colXX = colXX
1071 self.colXY = colXY
1072 self.colYY = colYY
1073 self._columns = [self.colXX, self.colXY, self.colYY]
1074 super().__init__(**kwargs)
1076 @property
1077 def columns(self):
1078 return [self.colXX, self.colXY, self.colYY]
1080 def _func(self, df):
1081 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1084class E2(Functor):
1085 name = "Ellipticity e2"
1087 def __init__(self, colXX, colXY, colYY, **kwargs):
1088 self.colXX = colXX
1089 self.colXY = colXY
1090 self.colYY = colYY
1091 super().__init__(**kwargs)
1093 @property
1094 def columns(self):
1095 return [self.colXX, self.colXY, self.colYY]
1097 def _func(self, df):
1098 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1101class RadiusFromQuadrupole(Functor):
1103 def __init__(self, colXX, colXY, colYY, **kwargs):
1104 self.colXX = colXX
1105 self.colXY = colXY
1106 self.colYY = colYY
1107 super().__init__(**kwargs)
1109 @property
1110 def columns(self):
1111 return [self.colXX, self.colXY, self.colYY]
1113 def _func(self, df):
1114 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1117class LocalWcs(Functor):
1118 """Computations using the stored localWcs.
1119 """
1120 name = "LocalWcsOperations"
1122 def __init__(self,
1123 colCD_1_1,
1124 colCD_1_2,
1125 colCD_2_1,
1126 colCD_2_2,
1127 **kwargs):
1128 self.colCD_1_1 = colCD_1_1
1129 self.colCD_1_2 = colCD_1_2
1130 self.colCD_2_1 = colCD_2_1
1131 self.colCD_2_2 = colCD_2_2
1132 super().__init__(**kwargs)
1134 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1135 """Compute the distance on the sphere from x2, y1 to x1, y1.
1137 Parameters
1138 ----------
1139 x : `pandas.Series`
1140 X pixel coordinate.
1141 y : `pandas.Series`
1142 Y pixel coordinate.
1143 cd11 : `pandas.Series`
1144 [1, 1] element of the local Wcs affine transform.
1145 cd11 : `pandas.Series`
1146 [1, 1] element of the local Wcs affine transform.
1147 cd12 : `pandas.Series`
1148 [1, 2] element of the local Wcs affine transform.
1149 cd21 : `pandas.Series`
1150 [2, 1] element of the local Wcs affine transform.
1151 cd22 : `pandas.Series`
1152 [2, 2] element of the local Wcs affine transform.
1154 Returns
1155 -------
1156 raDecTuple : tuple
1157 RA and dec conversion of x and y given the local Wcs. Returned
1158 units are in radians.
1160 """
1161 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1163 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1164 """Compute the local pixel scale conversion.
1166 Parameters
1167 ----------
1168 ra1 : `pandas.Series`
1169 Ra of the first coordinate in radians.
1170 dec1 : `pandas.Series`
1171 Dec of the first coordinate in radians.
1172 ra2 : `pandas.Series`
1173 Ra of the second coordinate in radians.
1174 dec2 : `pandas.Series`
1175 Dec of the second coordinate in radians.
1177 Returns
1178 -------
1179 dist : `pandas.Series`
1180 Distance on the sphere in radians.
1181 """
1182 deltaDec = dec2 - dec1
1183 deltaRa = ra2 - ra1
1184 return 2 * np.arcsin(
1185 np.sqrt(
1186 np.sin(deltaDec / 2) ** 2
1187 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1189 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1190 """Compute the distance on the sphere from x2, y1 to x1, y1.
1192 Parameters
1193 ----------
1194 x1 : `pandas.Series`
1195 X pixel coordinate.
1196 y1 : `pandas.Series`
1197 Y pixel coordinate.
1198 x2 : `pandas.Series`
1199 X pixel coordinate.
1200 y2 : `pandas.Series`
1201 Y pixel coordinate.
1202 cd11 : `pandas.Series`
1203 [1, 1] element of the local Wcs affine transform.
1204 cd11 : `pandas.Series`
1205 [1, 1] element of the local Wcs affine transform.
1206 cd12 : `pandas.Series`
1207 [1, 2] element of the local Wcs affine transform.
1208 cd21 : `pandas.Series`
1209 [2, 1] element of the local Wcs affine transform.
1210 cd22 : `pandas.Series`
1211 [2, 2] element of the local Wcs affine transform.
1213 Returns
1214 -------
1215 Distance : `pandas.Series`
1216 Arcseconds per pixel at the location of the local WC
1217 """
1218 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1219 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1220 # Great circle distance for small separations.
1221 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1224class ComputePixelScale(LocalWcs):
1225 """Compute the local pixel scale from the stored CDMatrix.
1226 """
1227 name = "PixelScale"
1229 @property
1230 def columns(self):
1231 return [self.colCD_1_1,
1232 self.colCD_1_2,
1233 self.colCD_2_1,
1234 self.colCD_2_2]
1236 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1237 """Compute the local pixel to scale conversion in arcseconds.
1239 Parameters
1240 ----------
1241 cd11 : `pandas.Series`
1242 [1, 1] element of the local Wcs affine transform in radians.
1243 cd11 : `pandas.Series`
1244 [1, 1] element of the local Wcs affine transform in radians.
1245 cd12 : `pandas.Series`
1246 [1, 2] element of the local Wcs affine transform in radians.
1247 cd21 : `pandas.Series`
1248 [2, 1] element of the local Wcs affine transform in radians.
1249 cd22 : `pandas.Series`
1250 [2, 2] element of the local Wcs affine transform in radians.
1252 Returns
1253 -------
1254 pixScale : `pandas.Series`
1255 Arcseconds per pixel at the location of the local WC
1256 """
1257 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1259 def _func(self, df):
1260 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1261 df[self.colCD_1_2],
1262 df[self.colCD_2_1],
1263 df[self.colCD_2_2])
1266class ConvertPixelToArcseconds(ComputePixelScale):
1267 """Convert a value in units pixels squared to units arcseconds squared.
1268 """
1270 def __init__(self,
1271 col,
1272 colCD_1_1,
1273 colCD_1_2,
1274 colCD_2_1,
1275 colCD_2_2,
1276 **kwargs):
1277 self.col = col
1278 super().__init__(colCD_1_1,
1279 colCD_1_2,
1280 colCD_2_1,
1281 colCD_2_2,
1282 **kwargs)
1284 @property
1285 def name(self):
1286 return f"{self.col}_asArcseconds"
1288 @property
1289 def columns(self):
1290 return [self.col,
1291 self.colCD_1_1,
1292 self.colCD_1_2,
1293 self.colCD_2_1,
1294 self.colCD_2_2]
1296 def _func(self, df):
1297 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1298 df[self.colCD_1_2],
1299 df[self.colCD_2_1],
1300 df[self.colCD_2_2])
1303class ConvertPixelSqToArcsecondsSq(ComputePixelScale):
1304 """Convert a value in units pixels to units arcseconds.
1305 """
1307 def __init__(self,
1308 col,
1309 colCD_1_1,
1310 colCD_1_2,
1311 colCD_2_1,
1312 colCD_2_2,
1313 **kwargs):
1314 self.col = col
1315 super().__init__(colCD_1_1,
1316 colCD_1_2,
1317 colCD_2_1,
1318 colCD_2_2,
1319 **kwargs)
1321 @property
1322 def name(self):
1323 return f"{self.col}_asArcsecondsSq"
1325 @property
1326 def columns(self):
1327 return [self.col,
1328 self.colCD_1_1,
1329 self.colCD_1_2,
1330 self.colCD_2_1,
1331 self.colCD_2_2]
1333 def _func(self, df):
1334 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1335 df[self.colCD_1_2],
1336 df[self.colCD_2_1],
1337 df[self.colCD_2_2])
1338 return df[self.col] * pixScale * pixScale
1341class ReferenceBand(Functor):
1342 name = 'Reference Band'
1343 shortname = 'refBand'
1345 @property
1346 def columns(self):
1347 return ["merge_measurement_i",
1348 "merge_measurement_r",
1349 "merge_measurement_z",
1350 "merge_measurement_y",
1351 "merge_measurement_g"]
1353 def _func(self, df):
1354 def getFilterAliasName(row):
1355 # get column name with the max value (True > False)
1356 colName = row.idxmax()
1357 return colName.replace('merge_measurement_', '')
1359 return df[self.columns].apply(getFilterAliasName, axis=1)
1362class Photometry(Functor):
1363 # AB to NanoJansky (3631 Jansky)
1364 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1365 LOG_AB_FLUX_SCALE = 12.56
1366 FIVE_OVER_2LOG10 = 1.085736204758129569
1367 # TO DO: DM-21955 Replace hard coded photometic calibration values
1368 COADD_ZP = 27
1370 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1371 self.vhypot = np.vectorize(self.hypot)
1372 self.col = colFlux
1373 self.colFluxErr = colFluxErr
1375 self.calib = calib
1376 if calib is not None:
1377 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1378 else:
1379 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1380 self.fluxMag0Err = 0.
1382 super().__init__(**kwargs)
1384 @property
1385 def columns(self):
1386 return [self.col]
1388 @property
1389 def name(self):
1390 return f'mag_{self.col}'
1392 @classmethod
1393 def hypot(cls, a, b):
1394 if np.abs(a) < np.abs(b):
1395 a, b = b, a
1396 if a == 0.:
1397 return 0.
1398 q = b/a
1399 return np.abs(a) * np.sqrt(1. + q*q)
1401 def dn2flux(self, dn, fluxMag0):
1402 return self.AB_FLUX_SCALE * dn / fluxMag0
1404 def dn2mag(self, dn, fluxMag0):
1405 with np.warnings.catch_warnings():
1406 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1407 np.warnings.filterwarnings('ignore', r'divide by zero')
1408 return -2.5 * np.log10(dn/fluxMag0)
1410 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1411 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1412 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1413 return retVal
1415 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1416 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1417 return self.FIVE_OVER_2LOG10 * retVal
1420class NanoJansky(Photometry):
1421 def _func(self, df):
1422 return self.dn2flux(df[self.col], self.fluxMag0)
1425class NanoJanskyErr(Photometry):
1426 @property
1427 def columns(self):
1428 return [self.col, self.colFluxErr]
1430 def _func(self, df):
1431 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1432 return pd.Series(retArr, index=df.index)
1435class Magnitude(Photometry):
1436 def _func(self, df):
1437 return self.dn2mag(df[self.col], self.fluxMag0)
1440class MagnitudeErr(Photometry):
1441 @property
1442 def columns(self):
1443 return [self.col, self.colFluxErr]
1445 def _func(self, df):
1446 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1447 return pd.Series(retArr, index=df.index)
1450class LocalPhotometry(Functor):
1451 """Base class for calibrating the specified instrument flux column using
1452 the local photometric calibration.
1454 Parameters
1455 ----------
1456 instFluxCol : `str`
1457 Name of the instrument flux column.
1458 instFluxErrCol : `str`
1459 Name of the assocated error columns for ``instFluxCol``.
1460 photoCalibCol : `str`
1461 Name of local calibration column.
1462 photoCalibErrCol : `str`
1463 Error associated with ``photoCalibCol``
1465 See also
1466 --------
1467 LocalPhotometry
1468 LocalNanojansky
1469 LocalNanojanskyErr
1470 LocalMagnitude
1471 LocalMagnitudeErr
1472 """
1473 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1475 def __init__(self,
1476 instFluxCol,
1477 instFluxErrCol,
1478 photoCalibCol,
1479 photoCalibErrCol,
1480 **kwargs):
1481 self.instFluxCol = instFluxCol
1482 self.instFluxErrCol = instFluxErrCol
1483 self.photoCalibCol = photoCalibCol
1484 self.photoCalibErrCol = photoCalibErrCol
1485 super().__init__(**kwargs)
1487 def instFluxToNanojansky(self, instFlux, localCalib):
1488 """Convert instrument flux to nanojanskys.
1490 Parameters
1491 ----------
1492 instFlux : `numpy.ndarray` or `pandas.Series`
1493 Array of instrument flux measurements
1494 localCalib : `numpy.ndarray` or `pandas.Series`
1495 Array of local photometric calibration estimates.
1497 Returns
1498 -------
1499 calibFlux : `numpy.ndarray` or `pandas.Series`
1500 Array of calibrated flux measurements.
1501 """
1502 return instFlux * localCalib
1504 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1505 """Convert instrument flux to nanojanskys.
1507 Parameters
1508 ----------
1509 instFlux : `numpy.ndarray` or `pandas.Series`
1510 Array of instrument flux measurements
1511 instFluxErr : `numpy.ndarray` or `pandas.Series`
1512 Errors on associated ``instFlux`` values
1513 localCalib : `numpy.ndarray` or `pandas.Series`
1514 Array of local photometric calibration estimates.
1515 localCalibErr : `numpy.ndarray` or `pandas.Series`
1516 Errors on associated ``localCalib`` values
1518 Returns
1519 -------
1520 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1521 Errors on calibrated flux measurements.
1522 """
1523 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1525 def instFluxToMagnitude(self, instFlux, localCalib):
1526 """Convert instrument flux to nanojanskys.
1528 Parameters
1529 ----------
1530 instFlux : `numpy.ndarray` or `pandas.Series`
1531 Array of instrument flux measurements
1532 localCalib : `numpy.ndarray` or `pandas.Series`
1533 Array of local photometric calibration estimates.
1535 Returns
1536 -------
1537 calibMag : `numpy.ndarray` or `pandas.Series`
1538 Array of calibrated AB magnitudes.
1539 """
1540 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1542 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1543 """Convert instrument flux err to nanojanskys.
1545 Parameters
1546 ----------
1547 instFlux : `numpy.ndarray` or `pandas.Series`
1548 Array of instrument flux measurements
1549 instFluxErr : `numpy.ndarray` or `pandas.Series`
1550 Errors on associated ``instFlux`` values
1551 localCalib : `numpy.ndarray` or `pandas.Series`
1552 Array of local photometric calibration estimates.
1553 localCalibErr : `numpy.ndarray` or `pandas.Series`
1554 Errors on associated ``localCalib`` values
1556 Returns
1557 -------
1558 calibMagErr: `numpy.ndarray` or `pandas.Series`
1559 Error on calibrated AB magnitudes.
1560 """
1561 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1562 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1565class LocalNanojansky(LocalPhotometry):
1566 """Compute calibrated fluxes using the local calibration value.
1568 See also
1569 --------
1570 LocalNanojansky
1571 LocalNanojanskyErr
1572 LocalMagnitude
1573 LocalMagnitudeErr
1574 """
1576 @property
1577 def columns(self):
1578 return [self.instFluxCol, self.photoCalibCol]
1580 @property
1581 def name(self):
1582 return f'flux_{self.instFluxCol}'
1584 def _func(self, df):
1585 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1588class LocalNanojanskyErr(LocalPhotometry):
1589 """Compute calibrated flux errors using the local calibration value.
1591 See also
1592 --------
1593 LocalNanojansky
1594 LocalNanojanskyErr
1595 LocalMagnitude
1596 LocalMagnitudeErr
1597 """
1599 @property
1600 def columns(self):
1601 return [self.instFluxCol, self.instFluxErrCol,
1602 self.photoCalibCol, self.photoCalibErrCol]
1604 @property
1605 def name(self):
1606 return f'fluxErr_{self.instFluxCol}'
1608 def _func(self, df):
1609 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1610 df[self.photoCalibCol], df[self.photoCalibErrCol])
1613class LocalMagnitude(LocalPhotometry):
1614 """Compute calibrated AB magnitudes using the local calibration value.
1616 See also
1617 --------
1618 LocalNanojansky
1619 LocalNanojanskyErr
1620 LocalMagnitude
1621 LocalMagnitudeErr
1622 """
1624 @property
1625 def columns(self):
1626 return [self.instFluxCol, self.photoCalibCol]
1628 @property
1629 def name(self):
1630 return f'mag_{self.instFluxCol}'
1632 def _func(self, df):
1633 return self.instFluxToMagnitude(df[self.instFluxCol],
1634 df[self.photoCalibCol])
1637class LocalMagnitudeErr(LocalPhotometry):
1638 """Compute calibrated AB magnitude errors using the local calibration value.
1640 See also
1641 --------
1642 LocalNanojansky
1643 LocalNanojanskyErr
1644 LocalMagnitude
1645 LocalMagnitudeErr
1646 """
1648 @property
1649 def columns(self):
1650 return [self.instFluxCol, self.instFluxErrCol,
1651 self.photoCalibCol, self.photoCalibErrCol]
1653 @property
1654 def name(self):
1655 return f'magErr_{self.instFluxCol}'
1657 def _func(self, df):
1658 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1659 df[self.instFluxErrCol],
1660 df[self.photoCalibCol],
1661 df[self.photoCalibErrCol])
1664class LocalDipoleMeanFlux(LocalPhotometry):
1665 """Compute absolute mean of dipole fluxes.
1667 See also
1668 --------
1669 LocalNanojansky
1670 LocalNanojanskyErr
1671 LocalMagnitude
1672 LocalMagnitudeErr
1673 LocalDipoleMeanFlux
1674 LocalDipoleMeanFluxErr
1675 LocalDipoleDiffFlux
1676 LocalDipoleDiffFluxErr
1677 """
1678 def __init__(self,
1679 instFluxPosCol,
1680 instFluxNegCol,
1681 instFluxPosErrCol,
1682 instFluxNegErrCol,
1683 photoCalibCol,
1684 photoCalibErrCol,
1685 **kwargs):
1686 self.instFluxNegCol = instFluxNegCol
1687 self.instFluxPosCol = instFluxPosCol
1688 self.instFluxNegErrCol = instFluxNegErrCol
1689 self.instFluxPosErrCol = instFluxPosErrCol
1690 self.photoCalibCol = photoCalibCol
1691 self.photoCalibErrCol = photoCalibErrCol
1692 super().__init__(instFluxNegCol,
1693 instFluxNegErrCol,
1694 photoCalibCol,
1695 photoCalibErrCol,
1696 **kwargs)
1698 @property
1699 def columns(self):
1700 return [self.instFluxPosCol,
1701 self.instFluxNegCol,
1702 self.photoCalibCol]
1704 @property
1705 def name(self):
1706 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1708 def _func(self, df):
1709 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))
1710 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])))
1713class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux):
1714 """Compute the error on the absolute mean of dipole fluxes.
1716 See also
1717 --------
1718 LocalNanojansky
1719 LocalNanojanskyErr
1720 LocalMagnitude
1721 LocalMagnitudeErr
1722 LocalDipoleMeanFlux
1723 LocalDipoleMeanFluxErr
1724 LocalDipoleDiffFlux
1725 LocalDipoleDiffFluxErr
1726 """
1728 @property
1729 def columns(self):
1730 return [self.instFluxPosCol,
1731 self.instFluxNegCol,
1732 self.instFluxPosErrCol,
1733 self.instFluxNegErrCol,
1734 self.photoCalibCol,
1735 self.photoCalibErrCol]
1737 @property
1738 def name(self):
1739 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1741 def _func(self, df):
1742 return 0.5*np.sqrt(
1743 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1744 * df[self.photoCalibErrCol])**2
1745 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1746 * df[self.photoCalibCol]**2)
1749class LocalDipoleDiffFlux(LocalDipoleMeanFlux):
1750 """Compute the absolute difference of dipole fluxes.
1752 Value is (abs(pos) - abs(neg))
1754 See also
1755 --------
1756 LocalNanojansky
1757 LocalNanojanskyErr
1758 LocalMagnitude
1759 LocalMagnitudeErr
1760 LocalDipoleMeanFlux
1761 LocalDipoleMeanFluxErr
1762 LocalDipoleDiffFlux
1763 LocalDipoleDiffFluxErr
1764 """
1766 @property
1767 def columns(self):
1768 return [self.instFluxPosCol,
1769 self.instFluxNegCol,
1770 self.photoCalibCol]
1772 @property
1773 def name(self):
1774 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1776 def _func(self, df):
1777 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))
1778 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])))
1781class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux):
1782 """Compute the error on the absolute difference of dipole fluxes.
1784 See also
1785 --------
1786 LocalNanojansky
1787 LocalNanojanskyErr
1788 LocalMagnitude
1789 LocalMagnitudeErr
1790 LocalDipoleMeanFlux
1791 LocalDipoleMeanFluxErr
1792 LocalDipoleDiffFlux
1793 LocalDipoleDiffFluxErr
1794 """
1796 @property
1797 def columns(self):
1798 return [self.instFluxPosCol,
1799 self.instFluxNegCol,
1800 self.instFluxPosErrCol,
1801 self.instFluxNegErrCol,
1802 self.photoCalibCol,
1803 self.photoCalibErrCol]
1805 @property
1806 def name(self):
1807 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1809 def _func(self, df):
1810 return np.sqrt(
1811 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1812 * df[self.photoCalibErrCol])**2
1813 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1814 * df[self.photoCalibCol]**2)
1817class Ratio(Functor):
1818 """Base class for returning the ratio of 2 columns.
1820 Can be used to compute a Signal to Noise ratio for any input flux.
1822 Parameters
1823 ----------
1824 numerator : `str`
1825 Name of the column to use at the numerator in the ratio
1826 denominator : `str`
1827 Name of the column to use as the denominator in the ratio.
1828 """
1829 def __init__(self,
1830 numerator,
1831 denominator,
1832 **kwargs):
1833 self.numerator = numerator
1834 self.denominator = denominator
1835 super().__init__(**kwargs)
1837 @property
1838 def columns(self):
1839 return [self.numerator, self.denominator]
1841 @property
1842 def name(self):
1843 return f'ratio_{self.numerator}_{self.denominator}'
1845 def _func(self, df):
1846 with np.warnings.catch_warnings():
1847 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1848 np.warnings.filterwarnings('ignore', r'divide by zero')
1849 return df[self.numerator] / df[self.denominator]