Coverage for python/lsst/pipe/tasks/functors.py : 36%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_tasks.
2#
3# LSST Data Management System
4# This product includes software developed by the
5# LSST Project (http://www.lsst.org/).
6# See COPYRIGHT file at the top of the source tree.
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22import yaml
23import re
24from itertools import product
25import os.path
27import pandas as pd
28import numpy as np
29import astropy.units as u
31from lsst.daf.persistence import doImport
32from lsst.daf.butler import DeferredDatasetHandle
33from .parquetTable import ParquetTable, MultilevelParquetTable
36def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
37 typeKey='functor', name=None):
38 """Initialize an object defined in a dictionary
40 The object needs to be importable as
41 f'{basePath}.{initDict[typeKey]}'
42 The positional and keyword arguments (if any) are contained in
43 "args" and "kwargs" entries in the dictionary, respectively.
44 This is used in `functors.CompositeFunctor.from_yaml` to initialize
45 a composite functor from a specification in a YAML file.
47 Parameters
48 ----------
49 initDict : dictionary
50 Dictionary describing object's initialization. Must contain
51 an entry keyed by ``typeKey`` that is the name of the object,
52 relative to ``basePath``.
53 basePath : str
54 Path relative to module in which ``initDict[typeKey]`` is defined.
55 typeKey : str
56 Key of ``initDict`` that is the name of the object
57 (relative to `basePath`).
58 """
59 initDict = initDict.copy()
60 # TO DO: DM-21956 We should be able to define functors outside this module
61 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
62 args = []
63 if 'args' in initDict:
64 args = initDict.pop('args')
65 if isinstance(args, str):
66 args = [args]
67 try:
68 element = pythonType(*args, **initDict)
69 except Exception as e:
70 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
71 raise type(e)(message, e.args)
72 return element
75class Functor(object):
76 """Define and execute a calculation on a ParquetTable
78 The `__call__` method accepts either a `ParquetTable` object or a
79 `DeferredDatasetHandle`, and returns the
80 result of the calculation as a single column. Each functor defines what
81 columns are needed for the calculation, and only these columns are read
82 from the `ParquetTable`.
84 The action of `__call__` consists of two steps: first, loading the
85 necessary columns from disk into memory as a `pandas.DataFrame` object;
86 and second, performing the computation on this dataframe and returning the
87 result.
90 To define a new `Functor`, a subclass must define a `_func` method,
91 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
92 In addition, it must define the following attributes
94 * `_columns`: The columns necessary to perform the calculation
95 * `name`: A name appropriate for a figure axis label
96 * `shortname`: A name appropriate for use as a dictionary key
98 On initialization, a `Functor` should declare what band (`filt` kwarg)
99 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
100 applied to. This enables the `_get_data` method to extract the proper
101 columns from the parquet file. If not specified, the dataset will fall back
102 on the `_defaultDataset`attribute. If band is not specified and `dataset`
103 is anything other than `'ref'`, then an error will be raised when trying to
104 perform the calculation.
106 As currently implemented, `Functor` is only set up to expect a
107 dataset of the format of the `deepCoadd_obj` dataset; that is, a
108 dataframe with a multi-level column index,
109 with the levels of the column index being `band`,
110 `dataset`, and `column`. This is defined in the `_columnLevels` attribute,
111 as well as being implicit in the role of the `filt` and `dataset` attributes
112 defined at initialization. In addition, the `_get_data` method that reads
113 the dataframe from the `ParquetTable` will return a dataframe with column
114 index levels defined by the `_dfLevels` attribute; by default, this is
115 `column`.
117 The `_columnLevels` and `_dfLevels` attributes should generally not need to
118 be changed, unless `_func` needs columns from multiple filters or datasets
119 to do the calculation.
120 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
121 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
122 it gets to have those levels in the column index.
124 Parameters
125 ----------
126 filt : str
127 Filter upon which to do the calculation
129 dataset : str
130 Dataset upon which to do the calculation
131 (e.g., 'ref', 'meas', 'forced_src').
133 """
135 _defaultDataset = 'ref'
136 _columnLevels = ('band', 'dataset', 'column')
137 _dfLevels = ('column',)
138 _defaultNoDup = False
140 def __init__(self, filt=None, dataset=None, noDup=None):
141 self.filt = filt
142 self.dataset = dataset if dataset is not None else self._defaultDataset
143 self._noDup = noDup
145 @property
146 def noDup(self):
147 if self._noDup is not None:
148 return self._noDup
149 else:
150 return self._defaultNoDup
152 @property
153 def columns(self):
154 """Columns required to perform calculation
155 """
156 if not hasattr(self, '_columns'):
157 raise NotImplementedError('Must define columns property or _columns attribute')
158 return self._columns
160 def _get_data_columnLevels(self, data, columnIndex=None):
161 """Gets the names of the column index levels
163 This should only be called in the context of a multilevel table.
164 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
165 and with the gen3 `DeferredDatasetHandle`.
167 Parameters
168 ----------
169 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
171 columnnIndex (optional): pandas `Index` object
172 if not passed, then it is read from the `DeferredDatasetHandle`
173 """
174 if isinstance(data, DeferredDatasetHandle):
175 if columnIndex is None:
176 columnIndex = data.get(component="columns")
177 if columnIndex is not None:
178 return columnIndex.names
179 if isinstance(data, MultilevelParquetTable):
180 return data.columnLevels
181 else:
182 raise TypeError(f"Unknown type for data: {type(data)}!")
184 def _get_data_columnLevelNames(self, data, columnIndex=None):
185 """Gets the content of each of the column levels for a multilevel table
187 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
189 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
190 """
191 if isinstance(data, DeferredDatasetHandle):
192 if columnIndex is None:
193 columnIndex = data.get(component="columns")
194 if columnIndex is not None:
195 columnLevels = columnIndex.names
196 columnLevelNames = {
197 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
198 for i, level in enumerate(columnLevels)
199 }
200 return columnLevelNames
201 if isinstance(data, MultilevelParquetTable):
202 return data.columnLevelNames
203 else:
204 raise TypeError(f"Unknown type for data: {type(data)}!")
206 def _colsFromDict(self, colDict, columnIndex=None):
207 """Converts dictionary column specficiation to a list of columns
209 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
210 """
211 new_colDict = {}
212 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
214 for i, lev in enumerate(columnLevels):
215 if lev in colDict:
216 if isinstance(colDict[lev], str):
217 new_colDict[lev] = [colDict[lev]]
218 else:
219 new_colDict[lev] = colDict[lev]
220 else:
221 new_colDict[lev] = columnIndex.levels[i]
223 levelCols = [new_colDict[lev] for lev in columnLevels]
224 cols = product(*levelCols)
225 return list(cols)
227 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
228 """Returns columns needed by functor from multilevel dataset
230 To access tables with multilevel column structure, the `MultilevelParquetTable`
231 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
232 dictionary.
234 Parameters
235 ----------
236 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
238 columnIndex (optional): pandas `Index` object
239 either passed or read in from `DeferredDatasetHandle`.
241 `returnTuple` : bool
242 If true, then return a list of tuples rather than the column dictionary
243 specification. This is set to `True` by `CompositeFunctor` in order to be able to
244 combine columns from the various component functors.
246 """
247 if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
248 columnIndex = data.get(component="columns")
250 # Confirm that the dataset has the column levels the functor is expecting it to have.
251 columnLevels = self._get_data_columnLevels(data, columnIndex)
253 if not set(columnLevels) == set(self._columnLevels):
254 raise ValueError(
255 "ParquetTable does not have the expected column levels. "
256 f"Got {columnLevels}; expected {self._columnLevels}."
257 )
259 columnDict = {'column': self.columns,
260 'dataset': self.dataset}
261 if self.filt is None:
262 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
263 if "band" in columnLevels:
264 if self.dataset == "ref":
265 columnDict["band"] = columnLevelNames["band"][0]
266 else:
267 raise ValueError(f"'filt' not set for functor {self.name}"
268 f"(dataset {self.dataset}) "
269 "and ParquetTable "
270 "contains multiple filters in column index. "
271 "Set 'filt' or set 'dataset' to 'ref'.")
272 else:
273 columnDict['band'] = self.filt
275 if isinstance(data, MultilevelParquetTable):
276 return data._colsFromDict(columnDict)
277 elif isinstance(data, DeferredDatasetHandle):
278 if returnTuple:
279 return self._colsFromDict(columnDict, columnIndex=columnIndex)
280 else:
281 return columnDict
283 def _func(self, df, dropna=True):
284 raise NotImplementedError('Must define calculation on dataframe')
286 def _get_columnIndex(self, data):
287 """Return columnIndex
288 """
290 if isinstance(data, DeferredDatasetHandle):
291 return data.get(component="columns")
292 else:
293 return None
295 def _get_data(self, data):
296 """Retrieve dataframe necessary for calculation.
298 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
300 Returns dataframe upon which `self._func` can act.
302 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
303 """
304 if isinstance(data, pd.DataFrame):
305 return data
307 # First thing to do: check to see if the data source has a multilevel column index or not.
308 columnIndex = self._get_columnIndex(data)
309 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
311 # Simple single-level parquet table, gen2
312 if isinstance(data, ParquetTable) and not is_multiLevel:
313 columns = self.columns
314 df = data.toDataFrame(columns=columns)
315 return df
317 # Get proper columns specification for this functor
318 if is_multiLevel:
319 columns = self.multilevelColumns(data, columnIndex=columnIndex)
320 else:
321 columns = self.columns
323 if isinstance(data, MultilevelParquetTable):
324 # Load in-memory dataframe with appropriate columns the gen2 way
325 df = data.toDataFrame(columns=columns, droplevels=False)
326 elif isinstance(data, DeferredDatasetHandle):
327 # Load in-memory dataframe with appropriate columns the gen3 way
328 df = data.get(parameters={"columns": columns})
330 # Drop unnecessary column levels
331 if is_multiLevel:
332 df = self._setLevels(df)
334 return df
336 def _setLevels(self, df):
337 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
338 df.columns = df.columns.droplevel(levelsToDrop)
339 return df
341 def _dropna(self, vals):
342 return vals.dropna()
344 def __call__(self, data, dropna=False):
345 try:
346 df = self._get_data(data)
347 vals = self._func(df)
348 except Exception:
349 vals = self.fail(df)
350 if dropna:
351 vals = self._dropna(vals)
353 return vals
355 def difference(self, data1, data2, **kwargs):
356 """Computes difference between functor called on two different ParquetTable objects
357 """
358 return self(data1, **kwargs) - self(data2, **kwargs)
360 def fail(self, df):
361 return pd.Series(np.full(len(df), np.nan), index=df.index)
363 @property
364 def name(self):
365 """Full name of functor (suitable for figure labels)
366 """
367 return NotImplementedError
369 @property
370 def shortname(self):
371 """Short name of functor (suitable for column name/dict key)
372 """
373 return self.name
376class CompositeFunctor(Functor):
377 """Perform multiple calculations at once on a catalog
379 The role of a `CompositeFunctor` is to group together computations from
380 multiple functors. Instead of returning `pandas.Series` a
381 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
382 being the keys of `funcDict`.
384 The `columns` attribute of a `CompositeFunctor` is the union of all columns
385 in all the component functors.
387 A `CompositeFunctor` does not use a `_func` method itself; rather,
388 when a `CompositeFunctor` is called, all its columns are loaded
389 at once, and the resulting dataframe is passed to the `_func` method of each component
390 functor. This has the advantage of only doing I/O (reading from parquet file) once,
391 and works because each individual `_func` method of each component functor does not
392 care if there are *extra* columns in the dataframe being passed; only that it must contain
393 *at least* the `columns` it expects.
395 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
396 file specifying a collection of functors.
398 Parameters
399 ----------
400 funcs : `dict` or `list`
401 Dictionary or list of functors. If a list, then it will be converted
402 into a dictonary according to the `.shortname` attribute of each functor.
404 """
405 dataset = None
407 def __init__(self, funcs, **kwargs):
409 if type(funcs) == dict:
410 self.funcDict = funcs
411 else:
412 self.funcDict = {f.shortname: f for f in funcs}
414 self._filt = None
416 super().__init__(**kwargs)
418 @property
419 def filt(self):
420 return self._filt
422 @filt.setter
423 def filt(self, filt):
424 if filt is not None:
425 for _, f in self.funcDict.items():
426 f.filt = filt
427 self._filt = filt
429 def update(self, new):
430 if isinstance(new, dict):
431 self.funcDict.update(new)
432 elif isinstance(new, CompositeFunctor):
433 self.funcDict.update(new.funcDict)
434 else:
435 raise TypeError('Can only update with dictionary or CompositeFunctor.')
437 # Make sure new functors have the same 'filt' set
438 if self.filt is not None:
439 self.filt = self.filt
441 @property
442 def columns(self):
443 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
445 def multilevelColumns(self, data, **kwargs):
446 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
447 return list(
448 set(
449 [
450 x
451 for y in [
452 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
453 ]
454 for x in y
455 ]
456 )
457 )
459 def __call__(self, data, **kwargs):
460 """Apply the functor to the data table
462 Parameters
463 ----------
464 data : `lsst.daf.butler.DeferredDatasetHandle`,
465 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
466 `lsst.pipe.tasks.parquetTable.ParquetTable`,
467 or `pandas.DataFrame`.
468 The table or a pointer to a table on disk from which columns can
469 be accessed
470 """
471 columnIndex = self._get_columnIndex(data)
473 # First, determine whether data has a multilevel index (either gen2 or gen3)
474 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
476 # Multilevel index, gen2 or gen3
477 if is_multiLevel:
478 columns = self.multilevelColumns(data, columnIndex=columnIndex)
480 if isinstance(data, MultilevelParquetTable):
481 # Read data into memory the gen2 way
482 df = data.toDataFrame(columns=columns, droplevels=False)
483 elif isinstance(data, DeferredDatasetHandle):
484 # Read data into memory the gen3 way
485 df = data.get(parameters={"columns": columns})
487 valDict = {}
488 for k, f in self.funcDict.items():
489 try:
490 subdf = f._setLevels(
491 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
492 )
493 valDict[k] = f._func(subdf)
494 except Exception:
495 valDict[k] = f.fail(subdf)
497 else:
498 if isinstance(data, DeferredDatasetHandle):
499 # input if Gen3 deferLoad=True
500 df = data.get(parameters={"columns": self.columns})
501 elif isinstance(data, pd.DataFrame):
502 # input if Gen3 deferLoad=False
503 df = data
504 else:
505 # Original Gen2 input is type ParquetTable and the fallback
506 df = data.toDataFrame(columns=self.columns)
508 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
510 try:
511 valDf = pd.concat(valDict, axis=1)
512 except TypeError:
513 print([(k, type(v)) for k, v in valDict.items()])
514 raise
516 if kwargs.get('dropna', False):
517 valDf = valDf.dropna(how='any')
519 return valDf
521 @classmethod
522 def renameCol(cls, col, renameRules):
523 if renameRules is None:
524 return col
525 for old, new in renameRules:
526 if col.startswith(old):
527 col = col.replace(old, new)
528 return col
530 @classmethod
531 def from_file(cls, filename, **kwargs):
532 # Allow environment variables in the filename.
533 filename = os.path.expandvars(filename)
534 with open(filename) as f:
535 translationDefinition = yaml.safe_load(f)
537 return cls.from_yaml(translationDefinition, **kwargs)
539 @classmethod
540 def from_yaml(cls, translationDefinition, **kwargs):
541 funcs = {}
542 for func, val in translationDefinition['funcs'].items():
543 funcs[func] = init_fromDict(val, name=func)
545 if 'flag_rename_rules' in translationDefinition:
546 renameRules = translationDefinition['flag_rename_rules']
547 else:
548 renameRules = None
550 if 'refFlags' in translationDefinition:
551 for flag in translationDefinition['refFlags']:
552 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
554 if 'forcedFlags' in translationDefinition:
555 for flag in translationDefinition['forcedFlags']:
556 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
558 if 'flags' in translationDefinition:
559 for flag in translationDefinition['flags']:
560 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
562 return cls(funcs, **kwargs)
565def mag_aware_eval(df, expr):
566 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
568 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
570 Parameters
571 ----------
572 df : pandas.DataFrame
573 Dataframe on which to evaluate expression.
575 expr : str
576 Expression.
577 """
578 try:
579 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
580 val = df.eval(expr_new, truediv=True)
581 except Exception: # Should check what actually gets raised
582 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
583 val = df.eval(expr_new, truediv=True)
584 return val
587class CustomFunctor(Functor):
588 """Arbitrary computation on a catalog
590 Column names (and thus the columns to be loaded from catalog) are found
591 by finding all words and trying to ignore all "math-y" words.
593 Parameters
594 ----------
595 expr : str
596 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
597 """
598 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
600 def __init__(self, expr, **kwargs):
601 self.expr = expr
602 super().__init__(**kwargs)
604 @property
605 def name(self):
606 return self.expr
608 @property
609 def columns(self):
610 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
612 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
613 not_a_col = []
614 for c in flux_cols:
615 if not re.search('_instFlux$', c):
616 cols.append(f'{c}_instFlux')
617 not_a_col.append(c)
618 else:
619 cols.append(c)
621 return list(set([c for c in cols if c not in not_a_col]))
623 def _func(self, df):
624 return mag_aware_eval(df, self.expr)
627class Column(Functor):
628 """Get column with specified name
629 """
631 def __init__(self, col, **kwargs):
632 self.col = col
633 super().__init__(**kwargs)
635 @property
636 def name(self):
637 return self.col
639 @property
640 def columns(self):
641 return [self.col]
643 def _func(self, df):
644 return df[self.col]
647class Index(Functor):
648 """Return the value of the index for each object
649 """
651 columns = ['coord_ra'] # just a dummy; something has to be here
652 _defaultDataset = 'ref'
653 _defaultNoDup = True
655 def _func(self, df):
656 return pd.Series(df.index, index=df.index)
659class IDColumn(Column):
660 col = 'id'
661 _allow_difference = False
662 _defaultNoDup = True
664 def _func(self, df):
665 return pd.Series(df.index, index=df.index)
668class FootprintNPix(Column):
669 col = 'base_Footprint_nPix'
672class CoordColumn(Column):
673 """Base class for coordinate column, in degrees
674 """
675 _radians = True
677 def __init__(self, col, **kwargs):
678 super().__init__(col, **kwargs)
680 def _func(self, df):
681 # Must not modify original column in case that column is used by another functor
682 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
683 return output
686class RAColumn(CoordColumn):
687 """Right Ascension, in degrees
688 """
689 name = 'RA'
690 _defaultNoDup = True
692 def __init__(self, **kwargs):
693 super().__init__('coord_ra', **kwargs)
695 def __call__(self, catalog, **kwargs):
696 return super().__call__(catalog, **kwargs)
699class DecColumn(CoordColumn):
700 """Declination, in degrees
701 """
702 name = 'Dec'
703 _defaultNoDup = True
705 def __init__(self, **kwargs):
706 super().__init__('coord_dec', **kwargs)
708 def __call__(self, catalog, **kwargs):
709 return super().__call__(catalog, **kwargs)
712def fluxName(col):
713 if not col.endswith('_instFlux'):
714 col += '_instFlux'
715 return col
718def fluxErrName(col):
719 if not col.endswith('_instFluxErr'):
720 col += '_instFluxErr'
721 return col
724class Mag(Functor):
725 """Compute calibrated magnitude
727 Takes a `calib` argument, which returns the flux at mag=0
728 as `calib.getFluxMag0()`. If not provided, then the default
729 `fluxMag0` is 63095734448.0194, which is default for HSC.
730 This default should be removed in DM-21955
732 This calculation hides warnings about invalid values and dividing by zero.
734 As for all functors, a `dataset` and `filt` kwarg should be provided upon
735 initialization. Unlike the default `Functor`, however, the default dataset
736 for a `Mag` is `'meas'`, rather than `'ref'`.
738 Parameters
739 ----------
740 col : `str`
741 Name of flux column from which to compute magnitude. Can be parseable
742 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
743 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
744 understand.
745 calib : `lsst.afw.image.calib.Calib` (optional)
746 Object that knows zero point.
747 """
748 _defaultDataset = 'meas'
750 def __init__(self, col, calib=None, **kwargs):
751 self.col = fluxName(col)
752 self.calib = calib
753 if calib is not None:
754 self.fluxMag0 = calib.getFluxMag0()[0]
755 else:
756 # TO DO: DM-21955 Replace hard coded photometic calibration values
757 self.fluxMag0 = 63095734448.0194
759 super().__init__(**kwargs)
761 @property
762 def columns(self):
763 return [self.col]
765 def _func(self, df):
766 with np.warnings.catch_warnings():
767 np.warnings.filterwarnings('ignore', r'invalid value encountered')
768 np.warnings.filterwarnings('ignore', r'divide by zero')
769 return -2.5*np.log10(df[self.col] / self.fluxMag0)
771 @property
772 def name(self):
773 return f'mag_{self.col}'
776class MagErr(Mag):
777 """Compute calibrated magnitude uncertainty
779 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
781 Parameters
782 col : `str`
783 Name of flux column
784 calib : `lsst.afw.image.calib.Calib` (optional)
785 Object that knows zero point.
786 """
788 def __init__(self, *args, **kwargs):
789 super().__init__(*args, **kwargs)
790 if self.calib is not None:
791 self.fluxMag0Err = self.calib.getFluxMag0()[1]
792 else:
793 self.fluxMag0Err = 0.
795 @property
796 def columns(self):
797 return [self.col, self.col + 'Err']
799 def _func(self, df):
800 with np.warnings.catch_warnings():
801 np.warnings.filterwarnings('ignore', r'invalid value encountered')
802 np.warnings.filterwarnings('ignore', r'divide by zero')
803 fluxCol, fluxErrCol = self.columns
804 x = df[fluxErrCol] / df[fluxCol]
805 y = self.fluxMag0Err / self.fluxMag0
806 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
807 return magErr
809 @property
810 def name(self):
811 return super().name + '_err'
814class NanoMaggie(Mag):
815 """
816 """
818 def _func(self, df):
819 return (df[self.col] / self.fluxMag0) * 1e9
822class MagDiff(Functor):
823 _defaultDataset = 'meas'
825 """Functor to calculate magnitude difference"""
827 def __init__(self, col1, col2, **kwargs):
828 self.col1 = fluxName(col1)
829 self.col2 = fluxName(col2)
830 super().__init__(**kwargs)
832 @property
833 def columns(self):
834 return [self.col1, self.col2]
836 def _func(self, df):
837 with np.warnings.catch_warnings():
838 np.warnings.filterwarnings('ignore', r'invalid value encountered')
839 np.warnings.filterwarnings('ignore', r'divide by zero')
840 return -2.5*np.log10(df[self.col1]/df[self.col2])
842 @property
843 def name(self):
844 return f'(mag_{self.col1} - mag_{self.col2})'
846 @property
847 def shortname(self):
848 return f'magDiff_{self.col1}_{self.col2}'
851class Color(Functor):
852 """Compute the color between two filters
854 Computes color by initializing two different `Mag`
855 functors based on the `col` and filters provided, and
856 then returning the difference.
858 This is enabled by the `_func` expecting a dataframe with a
859 multilevel column index, with both `'band'` and `'column'`,
860 instead of just `'column'`, which is the `Functor` default.
861 This is controlled by the `_dfLevels` attribute.
863 Also of note, the default dataset for `Color` is `forced_src'`,
864 whereas for `Mag` it is `'meas'`.
866 Parameters
867 ----------
868 col : str
869 Name of flux column from which to compute; same as would be passed to
870 `lsst.pipe.tasks.functors.Mag`.
872 filt2, filt1 : str
873 Filters from which to compute magnitude difference.
874 Color computed is `Mag(filt2) - Mag(filt1)`.
875 """
876 _defaultDataset = 'forced_src'
877 _dfLevels = ('band', 'column')
878 _defaultNoDup = True
880 def __init__(self, col, filt2, filt1, **kwargs):
881 self.col = fluxName(col)
882 if filt2 == filt1:
883 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
884 self.filt2 = filt2
885 self.filt1 = filt1
887 self.mag2 = Mag(col, filt=filt2, **kwargs)
888 self.mag1 = Mag(col, filt=filt1, **kwargs)
890 super().__init__(**kwargs)
892 @property
893 def filt(self):
894 return None
896 @filt.setter
897 def filt(self, filt):
898 pass
900 def _func(self, df):
901 mag2 = self.mag2._func(df[self.filt2])
902 mag1 = self.mag1._func(df[self.filt1])
903 return mag2 - mag1
905 @property
906 def columns(self):
907 return [self.mag1.col, self.mag2.col]
909 def multilevelColumns(self, parq, **kwargs):
910 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
912 @property
913 def name(self):
914 return f'{self.filt2} - {self.filt1} ({self.col})'
916 @property
917 def shortname(self):
918 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
921class Labeller(Functor):
922 """Main function of this subclass is to override the dropna=True
923 """
924 _null_label = 'null'
925 _allow_difference = False
926 name = 'label'
927 _force_str = False
929 def __call__(self, parq, dropna=False, **kwargs):
930 return super().__call__(parq, dropna=False, **kwargs)
933class StarGalaxyLabeller(Labeller):
934 _columns = ["base_ClassificationExtendedness_value"]
935 _column = "base_ClassificationExtendedness_value"
937 def _func(self, df):
938 x = df[self._columns][self._column]
939 mask = x.isnull()
940 test = (x < 0.5).astype(int)
941 test = test.mask(mask, 2)
943 # TODO: DM-21954 Look into veracity of inline comment below
944 # are these backwards?
945 categories = ['galaxy', 'star', self._null_label]
946 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
947 index=x.index, name='label')
948 if self._force_str:
949 label = label.astype(str)
950 return label
953class NumStarLabeller(Labeller):
954 _columns = ['numStarFlags']
955 labels = {"star": 0, "maybe": 1, "notStar": 2}
957 def _func(self, df):
958 x = df[self._columns][self._columns[0]]
960 # Number of filters
961 n = len(x.unique()) - 1
963 labels = ['noStar', 'maybe', 'star']
964 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
965 index=x.index, name='label')
967 if self._force_str:
968 label = label.astype(str)
970 return label
973class DeconvolvedMoments(Functor):
974 name = 'Deconvolved Moments'
975 shortname = 'deconvolvedMoments'
976 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
977 "ext_shapeHSM_HsmSourceMoments_yy",
978 "base_SdssShape_xx", "base_SdssShape_yy",
979 "ext_shapeHSM_HsmPsfMoments_xx",
980 "ext_shapeHSM_HsmPsfMoments_yy")
982 def _func(self, df):
983 """Calculate deconvolved moments"""
984 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
985 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
986 else:
987 hsm = np.ones(len(df))*np.nan
988 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
989 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
990 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
991 else:
992 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
993 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
994 # raise TaskError("No psf shape parameter found in catalog")
995 raise RuntimeError('No psf shape parameter found in catalog')
997 return hsm.where(np.isfinite(hsm), sdss) - psf
1000class SdssTraceSize(Functor):
1001 """Functor to calculate SDSS trace radius size for sources"""
1002 name = "SDSS Trace Size"
1003 shortname = 'sdssTrace'
1004 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1006 def _func(self, df):
1007 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1008 return srcSize
1011class PsfSdssTraceSizeDiff(Functor):
1012 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1013 name = "PSF - SDSS Trace Size"
1014 shortname = 'psf_sdssTrace'
1015 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1016 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1018 def _func(self, df):
1019 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1020 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1021 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1022 return sizeDiff
1025class HsmTraceSize(Functor):
1026 """Functor to calculate HSM trace radius size for sources"""
1027 name = 'HSM Trace Size'
1028 shortname = 'hsmTrace'
1029 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1030 "ext_shapeHSM_HsmSourceMoments_yy")
1032 def _func(self, df):
1033 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1034 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1035 return srcSize
1038class PsfHsmTraceSizeDiff(Functor):
1039 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1040 name = 'PSF - HSM Trace Size'
1041 shortname = 'psf_HsmTrace'
1042 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1043 "ext_shapeHSM_HsmSourceMoments_yy",
1044 "ext_shapeHSM_HsmPsfMoments_xx",
1045 "ext_shapeHSM_HsmPsfMoments_yy")
1047 def _func(self, df):
1048 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1049 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1050 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1051 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1052 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1053 return sizeDiff
1056class HsmFwhm(Functor):
1057 name = 'HSM Psf FWHM'
1058 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1059 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1060 pixelScale = 0.168
1061 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1063 def _func(self, df):
1064 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1065 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1068class E1(Functor):
1069 name = "Distortion Ellipticity (e1)"
1070 shortname = "Distortion"
1072 def __init__(self, colXX, colXY, colYY, **kwargs):
1073 self.colXX = colXX
1074 self.colXY = colXY
1075 self.colYY = colYY
1076 self._columns = [self.colXX, self.colXY, self.colYY]
1077 super().__init__(**kwargs)
1079 @property
1080 def columns(self):
1081 return [self.colXX, self.colXY, self.colYY]
1083 def _func(self, df):
1084 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1087class E2(Functor):
1088 name = "Ellipticity e2"
1090 def __init__(self, colXX, colXY, colYY, **kwargs):
1091 self.colXX = colXX
1092 self.colXY = colXY
1093 self.colYY = colYY
1094 super().__init__(**kwargs)
1096 @property
1097 def columns(self):
1098 return [self.colXX, self.colXY, self.colYY]
1100 def _func(self, df):
1101 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1104class RadiusFromQuadrupole(Functor):
1106 def __init__(self, colXX, colXY, colYY, **kwargs):
1107 self.colXX = colXX
1108 self.colXY = colXY
1109 self.colYY = colYY
1110 super().__init__(**kwargs)
1112 @property
1113 def columns(self):
1114 return [self.colXX, self.colXY, self.colYY]
1116 def _func(self, df):
1117 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1120class LocalWcs(Functor):
1121 """Computations using the stored localWcs.
1122 """
1123 name = "LocalWcsOperations"
1125 def __init__(self,
1126 colCD_1_1,
1127 colCD_1_2,
1128 colCD_2_1,
1129 colCD_2_2,
1130 **kwargs):
1131 self.colCD_1_1 = colCD_1_1
1132 self.colCD_1_2 = colCD_1_2
1133 self.colCD_2_1 = colCD_2_1
1134 self.colCD_2_2 = colCD_2_2
1135 super().__init__(**kwargs)
1137 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1138 """Compute the distance on the sphere from x2, y1 to x1, y1.
1140 Parameters
1141 ----------
1142 x : `pandas.Series`
1143 X pixel coordinate.
1144 y : `pandas.Series`
1145 Y pixel coordinate.
1146 cd11 : `pandas.Series`
1147 [1, 1] element of the local Wcs affine transform.
1148 cd11 : `pandas.Series`
1149 [1, 1] element of the local Wcs affine transform.
1150 cd12 : `pandas.Series`
1151 [1, 2] element of the local Wcs affine transform.
1152 cd21 : `pandas.Series`
1153 [2, 1] element of the local Wcs affine transform.
1154 cd22 : `pandas.Series`
1155 [2, 2] element of the local Wcs affine transform.
1157 Returns
1158 -------
1159 raDecTuple : tuple
1160 RA and dec conversion of x and y given the local Wcs. Returned
1161 units are in radians.
1163 """
1164 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1166 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1167 """Compute the local pixel scale conversion.
1169 Parameters
1170 ----------
1171 ra1 : `pandas.Series`
1172 Ra of the first coordinate in radians.
1173 dec1 : `pandas.Series`
1174 Dec of the first coordinate in radians.
1175 ra2 : `pandas.Series`
1176 Ra of the second coordinate in radians.
1177 dec2 : `pandas.Series`
1178 Dec of the second coordinate in radians.
1180 Returns
1181 -------
1182 dist : `pandas.Series`
1183 Distance on the sphere in radians.
1184 """
1185 deltaDec = dec2 - dec1
1186 deltaRa = ra2 - ra1
1187 return 2 * np.arcsin(
1188 np.sqrt(
1189 np.sin(deltaDec / 2) ** 2
1190 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1192 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1193 """Compute the distance on the sphere from x2, y1 to x1, y1.
1195 Parameters
1196 ----------
1197 x1 : `pandas.Series`
1198 X pixel coordinate.
1199 y1 : `pandas.Series`
1200 Y pixel coordinate.
1201 x2 : `pandas.Series`
1202 X pixel coordinate.
1203 y2 : `pandas.Series`
1204 Y pixel coordinate.
1205 cd11 : `pandas.Series`
1206 [1, 1] element of the local Wcs affine transform.
1207 cd11 : `pandas.Series`
1208 [1, 1] element of the local Wcs affine transform.
1209 cd12 : `pandas.Series`
1210 [1, 2] element of the local Wcs affine transform.
1211 cd21 : `pandas.Series`
1212 [2, 1] element of the local Wcs affine transform.
1213 cd22 : `pandas.Series`
1214 [2, 2] element of the local Wcs affine transform.
1216 Returns
1217 -------
1218 Distance : `pandas.Series`
1219 Arcseconds per pixel at the location of the local WC
1220 """
1221 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1222 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1223 # Great circle distance for small separations.
1224 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1227class ComputePixelScale(LocalWcs):
1228 """Compute the local pixel scale from the stored CDMatrix.
1229 """
1230 name = "PixelScale"
1232 @property
1233 def columns(self):
1234 return [self.colCD_1_1,
1235 self.colCD_1_2,
1236 self.colCD_2_1,
1237 self.colCD_2_2]
1239 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1240 """Compute the local pixel to scale conversion in arcseconds.
1242 Parameters
1243 ----------
1244 cd11 : `pandas.Series`
1245 [1, 1] element of the local Wcs affine transform in radians.
1246 cd11 : `pandas.Series`
1247 [1, 1] element of the local Wcs affine transform in radians.
1248 cd12 : `pandas.Series`
1249 [1, 2] element of the local Wcs affine transform in radians.
1250 cd21 : `pandas.Series`
1251 [2, 1] element of the local Wcs affine transform in radians.
1252 cd22 : `pandas.Series`
1253 [2, 2] element of the local Wcs affine transform in radians.
1255 Returns
1256 -------
1257 pixScale : `pandas.Series`
1258 Arcseconds per pixel at the location of the local WC
1259 """
1260 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1262 def _func(self, df):
1263 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1264 df[self.colCD_1_2],
1265 df[self.colCD_2_1],
1266 df[self.colCD_2_2])
1269class ConvertPixelToArcseconds(ComputePixelScale):
1270 """Convert a value in units pixels squared to units arcseconds squared.
1271 """
1273 def __init__(self,
1274 col,
1275 colCD_1_1,
1276 colCD_1_2,
1277 colCD_2_1,
1278 colCD_2_2,
1279 **kwargs):
1280 self.col = col
1281 super().__init__(colCD_1_1,
1282 colCD_1_2,
1283 colCD_2_1,
1284 colCD_2_2,
1285 **kwargs)
1287 @property
1288 def name(self):
1289 return f"{self.col}_asArcseconds"
1291 @property
1292 def columns(self):
1293 return [self.col,
1294 self.colCD_1_1,
1295 self.colCD_1_2,
1296 self.colCD_2_1,
1297 self.colCD_2_2]
1299 def _func(self, df):
1300 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1301 df[self.colCD_1_2],
1302 df[self.colCD_2_1],
1303 df[self.colCD_2_2])
1306class ConvertPixelSqToArcsecondsSq(ComputePixelScale):
1307 """Convert a value in units pixels to units arcseconds.
1308 """
1310 def __init__(self,
1311 col,
1312 colCD_1_1,
1313 colCD_1_2,
1314 colCD_2_1,
1315 colCD_2_2,
1316 **kwargs):
1317 self.col = col
1318 super().__init__(colCD_1_1,
1319 colCD_1_2,
1320 colCD_2_1,
1321 colCD_2_2,
1322 **kwargs)
1324 @property
1325 def name(self):
1326 return f"{self.col}_asArcsecondsSq"
1328 @property
1329 def columns(self):
1330 return [self.col,
1331 self.colCD_1_1,
1332 self.colCD_1_2,
1333 self.colCD_2_1,
1334 self.colCD_2_2]
1336 def _func(self, df):
1337 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1338 df[self.colCD_1_2],
1339 df[self.colCD_2_1],
1340 df[self.colCD_2_2])
1341 return df[self.col] * pixScale * pixScale
1344class ReferenceBand(Functor):
1345 name = 'Reference Band'
1346 shortname = 'refBand'
1348 @property
1349 def columns(self):
1350 return ["merge_measurement_i",
1351 "merge_measurement_r",
1352 "merge_measurement_z",
1353 "merge_measurement_y",
1354 "merge_measurement_g"]
1356 def _func(self, df):
1357 def getFilterAliasName(row):
1358 # get column name with the max value (True > False)
1359 colName = row.idxmax()
1360 return colName.replace('merge_measurement_', '')
1362 return df[self.columns].apply(getFilterAliasName, axis=1)
1365class Photometry(Functor):
1366 # AB to NanoJansky (3631 Jansky)
1367 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1368 LOG_AB_FLUX_SCALE = 12.56
1369 FIVE_OVER_2LOG10 = 1.085736204758129569
1370 # TO DO: DM-21955 Replace hard coded photometic calibration values
1371 COADD_ZP = 27
1373 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1374 self.vhypot = np.vectorize(self.hypot)
1375 self.col = colFlux
1376 self.colFluxErr = colFluxErr
1378 self.calib = calib
1379 if calib is not None:
1380 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1381 else:
1382 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1383 self.fluxMag0Err = 0.
1385 super().__init__(**kwargs)
1387 @property
1388 def columns(self):
1389 return [self.col]
1391 @property
1392 def name(self):
1393 return f'mag_{self.col}'
1395 @classmethod
1396 def hypot(cls, a, b):
1397 if np.abs(a) < np.abs(b):
1398 a, b = b, a
1399 if a == 0.:
1400 return 0.
1401 q = b/a
1402 return np.abs(a) * np.sqrt(1. + q*q)
1404 def dn2flux(self, dn, fluxMag0):
1405 return self.AB_FLUX_SCALE * dn / fluxMag0
1407 def dn2mag(self, dn, fluxMag0):
1408 with np.warnings.catch_warnings():
1409 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1410 np.warnings.filterwarnings('ignore', r'divide by zero')
1411 return -2.5 * np.log10(dn/fluxMag0)
1413 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1414 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1415 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1416 return retVal
1418 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1419 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1420 return self.FIVE_OVER_2LOG10 * retVal
1423class NanoJansky(Photometry):
1424 def _func(self, df):
1425 return self.dn2flux(df[self.col], self.fluxMag0)
1428class NanoJanskyErr(Photometry):
1429 @property
1430 def columns(self):
1431 return [self.col, self.colFluxErr]
1433 def _func(self, df):
1434 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1435 return pd.Series(retArr, index=df.index)
1438class Magnitude(Photometry):
1439 def _func(self, df):
1440 return self.dn2mag(df[self.col], self.fluxMag0)
1443class MagnitudeErr(Photometry):
1444 @property
1445 def columns(self):
1446 return [self.col, self.colFluxErr]
1448 def _func(self, df):
1449 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1450 return pd.Series(retArr, index=df.index)
1453class LocalPhotometry(Functor):
1454 """Base class for calibrating the specified instrument flux column using
1455 the local photometric calibration.
1457 Parameters
1458 ----------
1459 instFluxCol : `str`
1460 Name of the instrument flux column.
1461 instFluxErrCol : `str`
1462 Name of the assocated error columns for ``instFluxCol``.
1463 photoCalibCol : `str`
1464 Name of local calibration column.
1465 photoCalibErrCol : `str`
1466 Error associated with ``photoCalibCol``
1468 See also
1469 --------
1470 LocalPhotometry
1471 LocalNanojansky
1472 LocalNanojanskyErr
1473 LocalMagnitude
1474 LocalMagnitudeErr
1475 """
1476 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1478 def __init__(self,
1479 instFluxCol,
1480 instFluxErrCol,
1481 photoCalibCol,
1482 photoCalibErrCol,
1483 **kwargs):
1484 self.instFluxCol = instFluxCol
1485 self.instFluxErrCol = instFluxErrCol
1486 self.photoCalibCol = photoCalibCol
1487 self.photoCalibErrCol = photoCalibErrCol
1488 super().__init__(**kwargs)
1490 def instFluxToNanojansky(self, instFlux, localCalib):
1491 """Convert instrument flux to nanojanskys.
1493 Parameters
1494 ----------
1495 instFlux : `numpy.ndarray` or `pandas.Series`
1496 Array of instrument flux measurements
1497 localCalib : `numpy.ndarray` or `pandas.Series`
1498 Array of local photometric calibration estimates.
1500 Returns
1501 -------
1502 calibFlux : `numpy.ndarray` or `pandas.Series`
1503 Array of calibrated flux measurements.
1504 """
1505 return instFlux * localCalib
1507 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1508 """Convert instrument flux to nanojanskys.
1510 Parameters
1511 ----------
1512 instFlux : `numpy.ndarray` or `pandas.Series`
1513 Array of instrument flux measurements
1514 instFluxErr : `numpy.ndarray` or `pandas.Series`
1515 Errors on associated ``instFlux`` values
1516 localCalib : `numpy.ndarray` or `pandas.Series`
1517 Array of local photometric calibration estimates.
1518 localCalibErr : `numpy.ndarray` or `pandas.Series`
1519 Errors on associated ``localCalib`` values
1521 Returns
1522 -------
1523 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1524 Errors on calibrated flux measurements.
1525 """
1526 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1528 def instFluxToMagnitude(self, instFlux, localCalib):
1529 """Convert instrument flux to nanojanskys.
1531 Parameters
1532 ----------
1533 instFlux : `numpy.ndarray` or `pandas.Series`
1534 Array of instrument flux measurements
1535 localCalib : `numpy.ndarray` or `pandas.Series`
1536 Array of local photometric calibration estimates.
1538 Returns
1539 -------
1540 calibMag : `numpy.ndarray` or `pandas.Series`
1541 Array of calibrated AB magnitudes.
1542 """
1543 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1545 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1546 """Convert instrument flux err to nanojanskys.
1548 Parameters
1549 ----------
1550 instFlux : `numpy.ndarray` or `pandas.Series`
1551 Array of instrument flux measurements
1552 instFluxErr : `numpy.ndarray` or `pandas.Series`
1553 Errors on associated ``instFlux`` values
1554 localCalib : `numpy.ndarray` or `pandas.Series`
1555 Array of local photometric calibration estimates.
1556 localCalibErr : `numpy.ndarray` or `pandas.Series`
1557 Errors on associated ``localCalib`` values
1559 Returns
1560 -------
1561 calibMagErr: `numpy.ndarray` or `pandas.Series`
1562 Error on calibrated AB magnitudes.
1563 """
1564 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1565 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1568class LocalNanojansky(LocalPhotometry):
1569 """Compute calibrated fluxes using the local calibration value.
1571 See also
1572 --------
1573 LocalNanojansky
1574 LocalNanojanskyErr
1575 LocalMagnitude
1576 LocalMagnitudeErr
1577 """
1579 @property
1580 def columns(self):
1581 return [self.instFluxCol, self.photoCalibCol]
1583 @property
1584 def name(self):
1585 return f'flux_{self.instFluxCol}'
1587 def _func(self, df):
1588 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1591class LocalNanojanskyErr(LocalPhotometry):
1592 """Compute calibrated flux errors using the local calibration value.
1594 See also
1595 --------
1596 LocalNanojansky
1597 LocalNanojanskyErr
1598 LocalMagnitude
1599 LocalMagnitudeErr
1600 """
1602 @property
1603 def columns(self):
1604 return [self.instFluxCol, self.instFluxErrCol,
1605 self.photoCalibCol, self.photoCalibErrCol]
1607 @property
1608 def name(self):
1609 return f'fluxErr_{self.instFluxCol}'
1611 def _func(self, df):
1612 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1613 df[self.photoCalibCol], df[self.photoCalibErrCol])
1616class LocalMagnitude(LocalPhotometry):
1617 """Compute calibrated AB magnitudes using the local calibration value.
1619 See also
1620 --------
1621 LocalNanojansky
1622 LocalNanojanskyErr
1623 LocalMagnitude
1624 LocalMagnitudeErr
1625 """
1627 @property
1628 def columns(self):
1629 return [self.instFluxCol, self.photoCalibCol]
1631 @property
1632 def name(self):
1633 return f'mag_{self.instFluxCol}'
1635 def _func(self, df):
1636 return self.instFluxToMagnitude(df[self.instFluxCol],
1637 df[self.photoCalibCol])
1640class LocalMagnitudeErr(LocalPhotometry):
1641 """Compute calibrated AB magnitude errors using the local calibration value.
1643 See also
1644 --------
1645 LocalNanojansky
1646 LocalNanojanskyErr
1647 LocalMagnitude
1648 LocalMagnitudeErr
1649 """
1651 @property
1652 def columns(self):
1653 return [self.instFluxCol, self.instFluxErrCol,
1654 self.photoCalibCol, self.photoCalibErrCol]
1656 @property
1657 def name(self):
1658 return f'magErr_{self.instFluxCol}'
1660 def _func(self, df):
1661 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1662 df[self.instFluxErrCol],
1663 df[self.photoCalibCol],
1664 df[self.photoCalibErrCol])
1667class LocalDipoleMeanFlux(LocalPhotometry):
1668 """Compute absolute mean of dipole fluxes.
1670 See also
1671 --------
1672 LocalNanojansky
1673 LocalNanojanskyErr
1674 LocalMagnitude
1675 LocalMagnitudeErr
1676 LocalDipoleMeanFlux
1677 LocalDipoleMeanFluxErr
1678 LocalDipoleDiffFlux
1679 LocalDipoleDiffFluxErr
1680 """
1681 def __init__(self,
1682 instFluxPosCol,
1683 instFluxNegCol,
1684 instFluxPosErrCol,
1685 instFluxNegErrCol,
1686 photoCalibCol,
1687 photoCalibErrCol,
1688 **kwargs):
1689 self.instFluxNegCol = instFluxNegCol
1690 self.instFluxPosCol = instFluxPosCol
1691 self.instFluxNegErrCol = instFluxNegErrCol
1692 self.instFluxPosErrCol = instFluxPosErrCol
1693 self.photoCalibCol = photoCalibCol
1694 self.photoCalibErrCol = photoCalibErrCol
1695 super().__init__(instFluxNegCol,
1696 instFluxNegErrCol,
1697 photoCalibCol,
1698 photoCalibErrCol,
1699 **kwargs)
1701 @property
1702 def columns(self):
1703 return [self.instFluxPosCol,
1704 self.instFluxNegCol,
1705 self.photoCalibCol]
1707 @property
1708 def name(self):
1709 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1711 def _func(self, df):
1712 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))
1713 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])))
1716class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux):
1717 """Compute the error on the absolute mean of dipole fluxes.
1719 See also
1720 --------
1721 LocalNanojansky
1722 LocalNanojanskyErr
1723 LocalMagnitude
1724 LocalMagnitudeErr
1725 LocalDipoleMeanFlux
1726 LocalDipoleMeanFluxErr
1727 LocalDipoleDiffFlux
1728 LocalDipoleDiffFluxErr
1729 """
1731 @property
1732 def columns(self):
1733 return [self.instFluxPosCol,
1734 self.instFluxNegCol,
1735 self.instFluxPosErrCol,
1736 self.instFluxNegErrCol,
1737 self.photoCalibCol,
1738 self.photoCalibErrCol]
1740 @property
1741 def name(self):
1742 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1744 def _func(self, df):
1745 return 0.5*np.sqrt(
1746 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1747 * df[self.photoCalibErrCol])**2
1748 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1749 * df[self.photoCalibCol]**2)
1752class LocalDipoleDiffFlux(LocalDipoleMeanFlux):
1753 """Compute the absolute difference of dipole fluxes.
1755 Value is (abs(pos) - abs(neg))
1757 See also
1758 --------
1759 LocalNanojansky
1760 LocalNanojanskyErr
1761 LocalMagnitude
1762 LocalMagnitudeErr
1763 LocalDipoleMeanFlux
1764 LocalDipoleMeanFluxErr
1765 LocalDipoleDiffFlux
1766 LocalDipoleDiffFluxErr
1767 """
1769 @property
1770 def columns(self):
1771 return [self.instFluxPosCol,
1772 self.instFluxNegCol,
1773 self.photoCalibCol]
1775 @property
1776 def name(self):
1777 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1779 def _func(self, df):
1780 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))
1781 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])))
1784class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux):
1785 """Compute the error on the absolute difference of dipole fluxes.
1787 See also
1788 --------
1789 LocalNanojansky
1790 LocalNanojanskyErr
1791 LocalMagnitude
1792 LocalMagnitudeErr
1793 LocalDipoleMeanFlux
1794 LocalDipoleMeanFluxErr
1795 LocalDipoleDiffFlux
1796 LocalDipoleDiffFluxErr
1797 """
1799 @property
1800 def columns(self):
1801 return [self.instFluxPosCol,
1802 self.instFluxNegCol,
1803 self.instFluxPosErrCol,
1804 self.instFluxNegErrCol,
1805 self.photoCalibCol,
1806 self.photoCalibErrCol]
1808 @property
1809 def name(self):
1810 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1812 def _func(self, df):
1813 return np.sqrt(
1814 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1815 * df[self.photoCalibErrCol])**2
1816 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1817 * df[self.photoCalibCol]**2)
1820class Ratio(Functor):
1821 """Base class for returning the ratio of 2 columns.
1823 Can be used to compute a Signal to Noise ratio for any input flux.
1825 Parameters
1826 ----------
1827 numerator : `str`
1828 Name of the column to use at the numerator in the ratio
1829 denominator : `str`
1830 Name of the column to use as the denominator in the ratio.
1831 """
1832 def __init__(self,
1833 numerator,
1834 denominator,
1835 **kwargs):
1836 self.numerator = numerator
1837 self.denominator = denominator
1838 super().__init__(**kwargs)
1840 @property
1841 def columns(self):
1842 return [self.numerator, self.denominator]
1844 @property
1845 def name(self):
1846 return f'ratio_{self.numerator}_{self.denominator}'
1848 def _func(self, df):
1849 with np.warnings.catch_warnings():
1850 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1851 np.warnings.filterwarnings('ignore', r'divide by zero')
1852 return df[self.numerator] / df[self.denominator]