Coverage for python/lsst/pipe/tasks/functors.py: 41%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_tasks.
2#
3# LSST Data Management System
4# This product includes software developed by the
5# LSST Project (http://www.lsst.org/).
6# See COPYRIGHT file at the top of the source tree.
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22import yaml
23import re
24from itertools import product
25import os.path
27import pandas as pd
28import numpy as np
29import astropy.units as u
31from lsst.daf.persistence import doImport
32from lsst.daf.butler import DeferredDatasetHandle
33import lsst.geom as geom
34import lsst.sphgeom as sphgeom
36from .parquetTable import ParquetTable, MultilevelParquetTable
39def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
40 typeKey='functor', name=None):
41 """Initialize an object defined in a dictionary
43 The object needs to be importable as
44 f'{basePath}.{initDict[typeKey]}'
45 The positional and keyword arguments (if any) are contained in
46 "args" and "kwargs" entries in the dictionary, respectively.
47 This is used in `functors.CompositeFunctor.from_yaml` to initialize
48 a composite functor from a specification in a YAML file.
50 Parameters
51 ----------
52 initDict : dictionary
53 Dictionary describing object's initialization. Must contain
54 an entry keyed by ``typeKey`` that is the name of the object,
55 relative to ``basePath``.
56 basePath : str
57 Path relative to module in which ``initDict[typeKey]`` is defined.
58 typeKey : str
59 Key of ``initDict`` that is the name of the object
60 (relative to `basePath`).
61 """
62 initDict = initDict.copy()
63 # TO DO: DM-21956 We should be able to define functors outside this module
64 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
65 args = []
66 if 'args' in initDict:
67 args = initDict.pop('args')
68 if isinstance(args, str):
69 args = [args]
70 try:
71 element = pythonType(*args, **initDict)
72 except Exception as e:
73 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
74 raise type(e)(message, e.args)
75 return element
78class Functor(object):
79 """Define and execute a calculation on a ParquetTable
81 The `__call__` method accepts either a `ParquetTable` object or a
82 `DeferredDatasetHandle`, and returns the
83 result of the calculation as a single column. Each functor defines what
84 columns are needed for the calculation, and only these columns are read
85 from the `ParquetTable`.
87 The action of `__call__` consists of two steps: first, loading the
88 necessary columns from disk into memory as a `pandas.DataFrame` object;
89 and second, performing the computation on this dataframe and returning the
90 result.
93 To define a new `Functor`, a subclass must define a `_func` method,
94 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
95 In addition, it must define the following attributes
97 * `_columns`: The columns necessary to perform the calculation
98 * `name`: A name appropriate for a figure axis label
99 * `shortname`: A name appropriate for use as a dictionary key
101 On initialization, a `Functor` should declare what band (`filt` kwarg)
102 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
103 applied to. This enables the `_get_data` method to extract the proper
104 columns from the parquet file. If not specified, the dataset will fall back
105 on the `_defaultDataset`attribute. If band is not specified and `dataset`
106 is anything other than `'ref'`, then an error will be raised when trying to
107 perform the calculation.
109 Originally, `Functor` was set up to expect
110 datasets formatted like the `deepCoadd_obj` dataset; that is, a
111 dataframe with a multi-level column index, with the levels of the
112 column index being `band`, `dataset`, and `column`.
113 It has since been generalized to apply to dataframes without mutli-level
114 indices and multi-level indices with just `dataset` and `column` levels.
115 In addition, the `_get_data` method that reads
116 the dataframe from the `ParquetTable` will return a dataframe with column
117 index levels defined by the `_dfLevels` attribute; by default, this is
118 `column`.
120 The `_dfLevels` attributes should generally not need to
121 be changed, unless `_func` needs columns from multiple filters or datasets
122 to do the calculation.
123 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
124 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
125 it gets to have those levels in the column index.
127 Parameters
128 ----------
129 filt : str
130 Filter upon which to do the calculation
132 dataset : str
133 Dataset upon which to do the calculation
134 (e.g., 'ref', 'meas', 'forced_src').
136 """
138 _defaultDataset = 'ref'
139 _dfLevels = ('column',)
140 _defaultNoDup = False
142 def __init__(self, filt=None, dataset=None, noDup=None):
143 self.filt = filt
144 self.dataset = dataset if dataset is not None else self._defaultDataset
145 self._noDup = noDup
147 @property
148 def noDup(self):
149 if self._noDup is not None:
150 return self._noDup
151 else:
152 return self._defaultNoDup
154 @property
155 def columns(self):
156 """Columns required to perform calculation
157 """
158 if not hasattr(self, '_columns'):
159 raise NotImplementedError('Must define columns property or _columns attribute')
160 return self._columns
162 def _get_data_columnLevels(self, data, columnIndex=None):
163 """Gets the names of the column index levels
165 This should only be called in the context of a multilevel table.
166 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
167 and with the gen3 `DeferredDatasetHandle`.
169 Parameters
170 ----------
171 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
173 columnnIndex (optional): pandas `Index` object
174 if not passed, then it is read from the `DeferredDatasetHandle`
175 """
176 if isinstance(data, DeferredDatasetHandle):
177 if columnIndex is None:
178 columnIndex = data.get(component="columns")
179 if columnIndex is not None:
180 return columnIndex.names
181 if isinstance(data, MultilevelParquetTable):
182 return data.columnLevels
183 else:
184 raise TypeError(f"Unknown type for data: {type(data)}!")
186 def _get_data_columnLevelNames(self, data, columnIndex=None):
187 """Gets the content of each of the column levels for a multilevel table
189 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
191 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
192 """
193 if isinstance(data, DeferredDatasetHandle):
194 if columnIndex is None:
195 columnIndex = data.get(component="columns")
196 if columnIndex is not None:
197 columnLevels = columnIndex.names
198 columnLevelNames = {
199 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
200 for i, level in enumerate(columnLevels)
201 }
202 return columnLevelNames
203 if isinstance(data, MultilevelParquetTable):
204 return data.columnLevelNames
205 else:
206 raise TypeError(f"Unknown type for data: {type(data)}!")
208 def _colsFromDict(self, colDict, columnIndex=None):
209 """Converts dictionary column specficiation to a list of columns
211 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
212 """
213 new_colDict = {}
214 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
216 for i, lev in enumerate(columnLevels):
217 if lev in colDict:
218 if isinstance(colDict[lev], str):
219 new_colDict[lev] = [colDict[lev]]
220 else:
221 new_colDict[lev] = colDict[lev]
222 else:
223 new_colDict[lev] = columnIndex.levels[i]
225 levelCols = [new_colDict[lev] for lev in columnLevels]
226 cols = product(*levelCols)
227 return list(cols)
229 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
230 """Returns columns needed by functor from multilevel dataset
232 To access tables with multilevel column structure, the `MultilevelParquetTable`
233 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
234 dictionary.
236 Parameters
237 ----------
238 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
240 columnIndex (optional): pandas `Index` object
241 either passed or read in from `DeferredDatasetHandle`.
243 `returnTuple` : bool
244 If true, then return a list of tuples rather than the column dictionary
245 specification. This is set to `True` by `CompositeFunctor` in order to be able to
246 combine columns from the various component functors.
248 """
249 if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
250 columnIndex = data.get(component="columns")
252 # Confirm that the dataset has the column levels the functor is expecting it to have.
253 columnLevels = self._get_data_columnLevels(data, columnIndex)
255 columnDict = {'column': self.columns,
256 'dataset': self.dataset}
257 if self.filt is None:
258 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
259 if "band" in columnLevels:
260 if self.dataset == "ref":
261 columnDict["band"] = columnLevelNames["band"][0]
262 else:
263 raise ValueError(f"'filt' not set for functor {self.name}"
264 f"(dataset {self.dataset}) "
265 "and ParquetTable "
266 "contains multiple filters in column index. "
267 "Set 'filt' or set 'dataset' to 'ref'.")
268 else:
269 columnDict['band'] = self.filt
271 if isinstance(data, MultilevelParquetTable):
272 return data._colsFromDict(columnDict)
273 elif isinstance(data, DeferredDatasetHandle):
274 if returnTuple:
275 return self._colsFromDict(columnDict, columnIndex=columnIndex)
276 else:
277 return columnDict
279 def _func(self, df, dropna=True):
280 raise NotImplementedError('Must define calculation on dataframe')
282 def _get_columnIndex(self, data):
283 """Return columnIndex
284 """
286 if isinstance(data, DeferredDatasetHandle):
287 return data.get(component="columns")
288 else:
289 return None
291 def _get_data(self, data):
292 """Retrieve dataframe necessary for calculation.
294 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
296 Returns dataframe upon which `self._func` can act.
298 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
299 """
300 if isinstance(data, pd.DataFrame):
301 return data
303 # First thing to do: check to see if the data source has a multilevel column index or not.
304 columnIndex = self._get_columnIndex(data)
305 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
307 # Simple single-level parquet table, gen2
308 if isinstance(data, ParquetTable) and not is_multiLevel:
309 columns = self.columns
310 df = data.toDataFrame(columns=columns)
311 return df
313 # Get proper columns specification for this functor
314 if is_multiLevel:
315 columns = self.multilevelColumns(data, columnIndex=columnIndex)
316 else:
317 columns = self.columns
319 if isinstance(data, MultilevelParquetTable):
320 # Load in-memory dataframe with appropriate columns the gen2 way
321 df = data.toDataFrame(columns=columns, droplevels=False)
322 elif isinstance(data, DeferredDatasetHandle):
323 # Load in-memory dataframe with appropriate columns the gen3 way
324 df = data.get(parameters={"columns": columns})
326 # Drop unnecessary column levels
327 if is_multiLevel:
328 df = self._setLevels(df)
330 return df
332 def _setLevels(self, df):
333 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
334 df.columns = df.columns.droplevel(levelsToDrop)
335 return df
337 def _dropna(self, vals):
338 return vals.dropna()
340 def __call__(self, data, dropna=False):
341 try:
342 df = self._get_data(data)
343 vals = self._func(df)
344 except Exception:
345 vals = self.fail(df)
346 if dropna:
347 vals = self._dropna(vals)
349 return vals
351 def difference(self, data1, data2, **kwargs):
352 """Computes difference between functor called on two different ParquetTable objects
353 """
354 return self(data1, **kwargs) - self(data2, **kwargs)
356 def fail(self, df):
357 return pd.Series(np.full(len(df), np.nan), index=df.index)
359 @property
360 def name(self):
361 """Full name of functor (suitable for figure labels)
362 """
363 return NotImplementedError
365 @property
366 def shortname(self):
367 """Short name of functor (suitable for column name/dict key)
368 """
369 return self.name
372class CompositeFunctor(Functor):
373 """Perform multiple calculations at once on a catalog
375 The role of a `CompositeFunctor` is to group together computations from
376 multiple functors. Instead of returning `pandas.Series` a
377 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
378 being the keys of `funcDict`.
380 The `columns` attribute of a `CompositeFunctor` is the union of all columns
381 in all the component functors.
383 A `CompositeFunctor` does not use a `_func` method itself; rather,
384 when a `CompositeFunctor` is called, all its columns are loaded
385 at once, and the resulting dataframe is passed to the `_func` method of each component
386 functor. This has the advantage of only doing I/O (reading from parquet file) once,
387 and works because each individual `_func` method of each component functor does not
388 care if there are *extra* columns in the dataframe being passed; only that it must contain
389 *at least* the `columns` it expects.
391 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
392 file specifying a collection of functors.
394 Parameters
395 ----------
396 funcs : `dict` or `list`
397 Dictionary or list of functors. If a list, then it will be converted
398 into a dictonary according to the `.shortname` attribute of each functor.
400 """
401 dataset = None
403 def __init__(self, funcs, **kwargs):
405 if type(funcs) == dict:
406 self.funcDict = funcs
407 else:
408 self.funcDict = {f.shortname: f for f in funcs}
410 self._filt = None
412 super().__init__(**kwargs)
414 @property
415 def filt(self):
416 return self._filt
418 @filt.setter
419 def filt(self, filt):
420 if filt is not None:
421 for _, f in self.funcDict.items():
422 f.filt = filt
423 self._filt = filt
425 def update(self, new):
426 if isinstance(new, dict):
427 self.funcDict.update(new)
428 elif isinstance(new, CompositeFunctor):
429 self.funcDict.update(new.funcDict)
430 else:
431 raise TypeError('Can only update with dictionary or CompositeFunctor.')
433 # Make sure new functors have the same 'filt' set
434 if self.filt is not None:
435 self.filt = self.filt
437 @property
438 def columns(self):
439 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
441 def multilevelColumns(self, data, **kwargs):
442 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
443 return list(
444 set(
445 [
446 x
447 for y in [
448 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
449 ]
450 for x in y
451 ]
452 )
453 )
455 def __call__(self, data, **kwargs):
456 """Apply the functor to the data table
458 Parameters
459 ----------
460 data : `lsst.daf.butler.DeferredDatasetHandle`,
461 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
462 `lsst.pipe.tasks.parquetTable.ParquetTable`,
463 or `pandas.DataFrame`.
464 The table or a pointer to a table on disk from which columns can
465 be accessed
466 """
467 columnIndex = self._get_columnIndex(data)
469 # First, determine whether data has a multilevel index (either gen2 or gen3)
470 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
472 # Multilevel index, gen2 or gen3
473 if is_multiLevel:
474 columns = self.multilevelColumns(data, columnIndex=columnIndex)
476 if isinstance(data, MultilevelParquetTable):
477 # Read data into memory the gen2 way
478 df = data.toDataFrame(columns=columns, droplevels=False)
479 elif isinstance(data, DeferredDatasetHandle):
480 # Read data into memory the gen3 way
481 df = data.get(parameters={"columns": columns})
483 valDict = {}
484 for k, f in self.funcDict.items():
485 try:
486 subdf = f._setLevels(
487 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
488 )
489 valDict[k] = f._func(subdf)
490 except Exception as e:
491 try:
492 valDict[k] = f.fail(subdf)
493 except NameError:
494 raise e
496 else:
497 if isinstance(data, DeferredDatasetHandle):
498 # input if Gen3 deferLoad=True
499 df = data.get(parameters={"columns": self.columns})
500 elif isinstance(data, pd.DataFrame):
501 # input if Gen3 deferLoad=False
502 df = data
503 else:
504 # Original Gen2 input is type ParquetTable and the fallback
505 df = data.toDataFrame(columns=self.columns)
507 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
509 # Check that output columns are actually columns
510 for name, colVal in valDict.items():
511 if len(colVal.shape) != 1:
512 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
513 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
515 try:
516 valDf = pd.concat(valDict, axis=1)
517 except TypeError:
518 print([(k, type(v)) for k, v in valDict.items()])
519 raise
521 if kwargs.get('dropna', False):
522 valDf = valDf.dropna(how='any')
524 return valDf
526 @classmethod
527 def renameCol(cls, col, renameRules):
528 if renameRules is None:
529 return col
530 for old, new in renameRules:
531 if col.startswith(old):
532 col = col.replace(old, new)
533 return col
535 @classmethod
536 def from_file(cls, filename, **kwargs):
537 # Allow environment variables in the filename.
538 filename = os.path.expandvars(filename)
539 with open(filename) as f:
540 translationDefinition = yaml.safe_load(f)
542 return cls.from_yaml(translationDefinition, **kwargs)
544 @classmethod
545 def from_yaml(cls, translationDefinition, **kwargs):
546 funcs = {}
547 for func, val in translationDefinition['funcs'].items():
548 funcs[func] = init_fromDict(val, name=func)
550 if 'flag_rename_rules' in translationDefinition:
551 renameRules = translationDefinition['flag_rename_rules']
552 else:
553 renameRules = None
555 if 'calexpFlags' in translationDefinition:
556 for flag in translationDefinition['calexpFlags']:
557 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
559 if 'refFlags' in translationDefinition:
560 for flag in translationDefinition['refFlags']:
561 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
563 if 'forcedFlags' in translationDefinition:
564 for flag in translationDefinition['forcedFlags']:
565 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
567 if 'flags' in translationDefinition:
568 for flag in translationDefinition['flags']:
569 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
571 return cls(funcs, **kwargs)
574def mag_aware_eval(df, expr):
575 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
577 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
579 Parameters
580 ----------
581 df : pandas.DataFrame
582 Dataframe on which to evaluate expression.
584 expr : str
585 Expression.
586 """
587 try:
588 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
589 val = df.eval(expr_new, truediv=True)
590 except Exception: # Should check what actually gets raised
591 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
592 val = df.eval(expr_new, truediv=True)
593 return val
596class CustomFunctor(Functor):
597 """Arbitrary computation on a catalog
599 Column names (and thus the columns to be loaded from catalog) are found
600 by finding all words and trying to ignore all "math-y" words.
602 Parameters
603 ----------
604 expr : str
605 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
606 """
607 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
609 def __init__(self, expr, **kwargs):
610 self.expr = expr
611 super().__init__(**kwargs)
613 @property
614 def name(self):
615 return self.expr
617 @property
618 def columns(self):
619 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
621 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
622 not_a_col = []
623 for c in flux_cols:
624 if not re.search('_instFlux$', c):
625 cols.append(f'{c}_instFlux')
626 not_a_col.append(c)
627 else:
628 cols.append(c)
630 return list(set([c for c in cols if c not in not_a_col]))
632 def _func(self, df):
633 return mag_aware_eval(df, self.expr)
636class Column(Functor):
637 """Get column with specified name
638 """
640 def __init__(self, col, **kwargs):
641 self.col = col
642 super().__init__(**kwargs)
644 @property
645 def name(self):
646 return self.col
648 @property
649 def columns(self):
650 return [self.col]
652 def _func(self, df):
653 return df[self.col]
656class Index(Functor):
657 """Return the value of the index for each object
658 """
660 columns = ['coord_ra'] # just a dummy; something has to be here
661 _defaultDataset = 'ref'
662 _defaultNoDup = True
664 def _func(self, df):
665 return pd.Series(df.index, index=df.index)
668class IDColumn(Column):
669 col = 'id'
670 _allow_difference = False
671 _defaultNoDup = True
673 def _func(self, df):
674 return pd.Series(df.index, index=df.index)
677class FootprintNPix(Column):
678 col = 'base_Footprint_nPix'
681class CoordColumn(Column):
682 """Base class for coordinate column, in degrees
683 """
684 _radians = True
686 def __init__(self, col, **kwargs):
687 super().__init__(col, **kwargs)
689 def _func(self, df):
690 # Must not modify original column in case that column is used by another functor
691 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
692 return output
695class RAColumn(CoordColumn):
696 """Right Ascension, in degrees
697 """
698 name = 'RA'
699 _defaultNoDup = True
701 def __init__(self, **kwargs):
702 super().__init__('coord_ra', **kwargs)
704 def __call__(self, catalog, **kwargs):
705 return super().__call__(catalog, **kwargs)
708class DecColumn(CoordColumn):
709 """Declination, in degrees
710 """
711 name = 'Dec'
712 _defaultNoDup = True
714 def __init__(self, **kwargs):
715 super().__init__('coord_dec', **kwargs)
717 def __call__(self, catalog, **kwargs):
718 return super().__call__(catalog, **kwargs)
721class HtmIndex20(Functor):
722 """Compute the level 20 HtmIndex for the catalog.
724 Notes
725 -----
726 This functor was implemented to satisfy requirements of old APDB interface
727 which required ``pixelId`` column in DiaObject with HTM20 index. APDB
728 interface had migrated to not need that information, but we keep this
729 class in case it may be useful for something else.
730 """
731 name = "Htm20"
732 htmLevel = 20
733 _radians = True
735 def __init__(self, ra, decl, **kwargs):
736 self.pixelator = sphgeom.HtmPixelization(self.htmLevel)
737 self.ra = ra
738 self.decl = decl
739 self._columns = [self.ra, self.decl]
740 super().__init__(**kwargs)
742 def _func(self, df):
744 def computePixel(row):
745 if self._radians:
746 sphPoint = geom.SpherePoint(row[self.ra],
747 row[self.decl],
748 geom.radians)
749 else:
750 sphPoint = geom.SpherePoint(row[self.ra],
751 row[self.decl],
752 geom.degrees)
753 return self.pixelator.index(sphPoint.getVector())
755 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
758def fluxName(col):
759 if not col.endswith('_instFlux'):
760 col += '_instFlux'
761 return col
764def fluxErrName(col):
765 if not col.endswith('_instFluxErr'):
766 col += '_instFluxErr'
767 return col
770class Mag(Functor):
771 """Compute calibrated magnitude
773 Takes a `calib` argument, which returns the flux at mag=0
774 as `calib.getFluxMag0()`. If not provided, then the default
775 `fluxMag0` is 63095734448.0194, which is default for HSC.
776 This default should be removed in DM-21955
778 This calculation hides warnings about invalid values and dividing by zero.
780 As for all functors, a `dataset` and `filt` kwarg should be provided upon
781 initialization. Unlike the default `Functor`, however, the default dataset
782 for a `Mag` is `'meas'`, rather than `'ref'`.
784 Parameters
785 ----------
786 col : `str`
787 Name of flux column from which to compute magnitude. Can be parseable
788 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
789 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
790 understand.
791 calib : `lsst.afw.image.calib.Calib` (optional)
792 Object that knows zero point.
793 """
794 _defaultDataset = 'meas'
796 def __init__(self, col, calib=None, **kwargs):
797 self.col = fluxName(col)
798 self.calib = calib
799 if calib is not None:
800 self.fluxMag0 = calib.getFluxMag0()[0]
801 else:
802 # TO DO: DM-21955 Replace hard coded photometic calibration values
803 self.fluxMag0 = 63095734448.0194
805 super().__init__(**kwargs)
807 @property
808 def columns(self):
809 return [self.col]
811 def _func(self, df):
812 with np.warnings.catch_warnings():
813 np.warnings.filterwarnings('ignore', r'invalid value encountered')
814 np.warnings.filterwarnings('ignore', r'divide by zero')
815 return -2.5*np.log10(df[self.col] / self.fluxMag0)
817 @property
818 def name(self):
819 return f'mag_{self.col}'
822class MagErr(Mag):
823 """Compute calibrated magnitude uncertainty
825 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
827 Parameters
828 col : `str`
829 Name of flux column
830 calib : `lsst.afw.image.calib.Calib` (optional)
831 Object that knows zero point.
832 """
834 def __init__(self, *args, **kwargs):
835 super().__init__(*args, **kwargs)
836 if self.calib is not None:
837 self.fluxMag0Err = self.calib.getFluxMag0()[1]
838 else:
839 self.fluxMag0Err = 0.
841 @property
842 def columns(self):
843 return [self.col, self.col + 'Err']
845 def _func(self, df):
846 with np.warnings.catch_warnings():
847 np.warnings.filterwarnings('ignore', r'invalid value encountered')
848 np.warnings.filterwarnings('ignore', r'divide by zero')
849 fluxCol, fluxErrCol = self.columns
850 x = df[fluxErrCol] / df[fluxCol]
851 y = self.fluxMag0Err / self.fluxMag0
852 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
853 return magErr
855 @property
856 def name(self):
857 return super().name + '_err'
860class NanoMaggie(Mag):
861 """
862 """
864 def _func(self, df):
865 return (df[self.col] / self.fluxMag0) * 1e9
868class MagDiff(Functor):
869 _defaultDataset = 'meas'
871 """Functor to calculate magnitude difference"""
873 def __init__(self, col1, col2, **kwargs):
874 self.col1 = fluxName(col1)
875 self.col2 = fluxName(col2)
876 super().__init__(**kwargs)
878 @property
879 def columns(self):
880 return [self.col1, self.col2]
882 def _func(self, df):
883 with np.warnings.catch_warnings():
884 np.warnings.filterwarnings('ignore', r'invalid value encountered')
885 np.warnings.filterwarnings('ignore', r'divide by zero')
886 return -2.5*np.log10(df[self.col1]/df[self.col2])
888 @property
889 def name(self):
890 return f'(mag_{self.col1} - mag_{self.col2})'
892 @property
893 def shortname(self):
894 return f'magDiff_{self.col1}_{self.col2}'
897class Color(Functor):
898 """Compute the color between two filters
900 Computes color by initializing two different `Mag`
901 functors based on the `col` and filters provided, and
902 then returning the difference.
904 This is enabled by the `_func` expecting a dataframe with a
905 multilevel column index, with both `'band'` and `'column'`,
906 instead of just `'column'`, which is the `Functor` default.
907 This is controlled by the `_dfLevels` attribute.
909 Also of note, the default dataset for `Color` is `forced_src'`,
910 whereas for `Mag` it is `'meas'`.
912 Parameters
913 ----------
914 col : str
915 Name of flux column from which to compute; same as would be passed to
916 `lsst.pipe.tasks.functors.Mag`.
918 filt2, filt1 : str
919 Filters from which to compute magnitude difference.
920 Color computed is `Mag(filt2) - Mag(filt1)`.
921 """
922 _defaultDataset = 'forced_src'
923 _dfLevels = ('band', 'column')
924 _defaultNoDup = True
926 def __init__(self, col, filt2, filt1, **kwargs):
927 self.col = fluxName(col)
928 if filt2 == filt1:
929 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
930 self.filt2 = filt2
931 self.filt1 = filt1
933 self.mag2 = Mag(col, filt=filt2, **kwargs)
934 self.mag1 = Mag(col, filt=filt1, **kwargs)
936 super().__init__(**kwargs)
938 @property
939 def filt(self):
940 return None
942 @filt.setter
943 def filt(self, filt):
944 pass
946 def _func(self, df):
947 mag2 = self.mag2._func(df[self.filt2])
948 mag1 = self.mag1._func(df[self.filt1])
949 return mag2 - mag1
951 @property
952 def columns(self):
953 return [self.mag1.col, self.mag2.col]
955 def multilevelColumns(self, parq, **kwargs):
956 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
958 @property
959 def name(self):
960 return f'{self.filt2} - {self.filt1} ({self.col})'
962 @property
963 def shortname(self):
964 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
967class Labeller(Functor):
968 """Main function of this subclass is to override the dropna=True
969 """
970 _null_label = 'null'
971 _allow_difference = False
972 name = 'label'
973 _force_str = False
975 def __call__(self, parq, dropna=False, **kwargs):
976 return super().__call__(parq, dropna=False, **kwargs)
979class StarGalaxyLabeller(Labeller):
980 _columns = ["base_ClassificationExtendedness_value"]
981 _column = "base_ClassificationExtendedness_value"
983 def _func(self, df):
984 x = df[self._columns][self._column]
985 mask = x.isnull()
986 test = (x < 0.5).astype(int)
987 test = test.mask(mask, 2)
989 # TODO: DM-21954 Look into veracity of inline comment below
990 # are these backwards?
991 categories = ['galaxy', 'star', self._null_label]
992 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
993 index=x.index, name='label')
994 if self._force_str:
995 label = label.astype(str)
996 return label
999class NumStarLabeller(Labeller):
1000 _columns = ['numStarFlags']
1001 labels = {"star": 0, "maybe": 1, "notStar": 2}
1003 def _func(self, df):
1004 x = df[self._columns][self._columns[0]]
1006 # Number of filters
1007 n = len(x.unique()) - 1
1009 labels = ['noStar', 'maybe', 'star']
1010 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
1011 index=x.index, name='label')
1013 if self._force_str:
1014 label = label.astype(str)
1016 return label
1019class DeconvolvedMoments(Functor):
1020 name = 'Deconvolved Moments'
1021 shortname = 'deconvolvedMoments'
1022 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1023 "ext_shapeHSM_HsmSourceMoments_yy",
1024 "base_SdssShape_xx", "base_SdssShape_yy",
1025 "ext_shapeHSM_HsmPsfMoments_xx",
1026 "ext_shapeHSM_HsmPsfMoments_yy")
1028 def _func(self, df):
1029 """Calculate deconvolved moments"""
1030 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1031 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1032 else:
1033 hsm = np.ones(len(df))*np.nan
1034 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1035 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1036 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1037 else:
1038 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
1039 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
1040 # raise TaskError("No psf shape parameter found in catalog")
1041 raise RuntimeError('No psf shape parameter found in catalog')
1043 return hsm.where(np.isfinite(hsm), sdss) - psf
1046class SdssTraceSize(Functor):
1047 """Functor to calculate SDSS trace radius size for sources"""
1048 name = "SDSS Trace Size"
1049 shortname = 'sdssTrace'
1050 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1052 def _func(self, df):
1053 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1054 return srcSize
1057class PsfSdssTraceSizeDiff(Functor):
1058 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1059 name = "PSF - SDSS Trace Size"
1060 shortname = 'psf_sdssTrace'
1061 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1062 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1064 def _func(self, df):
1065 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1066 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1067 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1068 return sizeDiff
1071class HsmTraceSize(Functor):
1072 """Functor to calculate HSM trace radius size for sources"""
1073 name = 'HSM Trace Size'
1074 shortname = 'hsmTrace'
1075 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1076 "ext_shapeHSM_HsmSourceMoments_yy")
1078 def _func(self, df):
1079 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1080 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1081 return srcSize
1084class PsfHsmTraceSizeDiff(Functor):
1085 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1086 name = 'PSF - HSM Trace Size'
1087 shortname = 'psf_HsmTrace'
1088 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1089 "ext_shapeHSM_HsmSourceMoments_yy",
1090 "ext_shapeHSM_HsmPsfMoments_xx",
1091 "ext_shapeHSM_HsmPsfMoments_yy")
1093 def _func(self, df):
1094 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1095 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1096 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1097 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1098 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1099 return sizeDiff
1102class HsmFwhm(Functor):
1103 name = 'HSM Psf FWHM'
1104 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1105 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1106 pixelScale = 0.168
1107 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1109 def _func(self, df):
1110 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1111 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1114class E1(Functor):
1115 name = "Distortion Ellipticity (e1)"
1116 shortname = "Distortion"
1118 def __init__(self, colXX, colXY, colYY, **kwargs):
1119 self.colXX = colXX
1120 self.colXY = colXY
1121 self.colYY = colYY
1122 self._columns = [self.colXX, self.colXY, self.colYY]
1123 super().__init__(**kwargs)
1125 @property
1126 def columns(self):
1127 return [self.colXX, self.colXY, self.colYY]
1129 def _func(self, df):
1130 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1133class E2(Functor):
1134 name = "Ellipticity e2"
1136 def __init__(self, colXX, colXY, colYY, **kwargs):
1137 self.colXX = colXX
1138 self.colXY = colXY
1139 self.colYY = colYY
1140 super().__init__(**kwargs)
1142 @property
1143 def columns(self):
1144 return [self.colXX, self.colXY, self.colYY]
1146 def _func(self, df):
1147 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1150class RadiusFromQuadrupole(Functor):
1152 def __init__(self, colXX, colXY, colYY, **kwargs):
1153 self.colXX = colXX
1154 self.colXY = colXY
1155 self.colYY = colYY
1156 super().__init__(**kwargs)
1158 @property
1159 def columns(self):
1160 return [self.colXX, self.colXY, self.colYY]
1162 def _func(self, df):
1163 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1166class LocalWcs(Functor):
1167 """Computations using the stored localWcs.
1168 """
1169 name = "LocalWcsOperations"
1171 def __init__(self,
1172 colCD_1_1,
1173 colCD_1_2,
1174 colCD_2_1,
1175 colCD_2_2,
1176 **kwargs):
1177 self.colCD_1_1 = colCD_1_1
1178 self.colCD_1_2 = colCD_1_2
1179 self.colCD_2_1 = colCD_2_1
1180 self.colCD_2_2 = colCD_2_2
1181 super().__init__(**kwargs)
1183 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1184 """Compute the distance on the sphere from x2, y1 to x1, y1.
1186 Parameters
1187 ----------
1188 x : `pandas.Series`
1189 X pixel coordinate.
1190 y : `pandas.Series`
1191 Y pixel coordinate.
1192 cd11 : `pandas.Series`
1193 [1, 1] element of the local Wcs affine transform.
1194 cd11 : `pandas.Series`
1195 [1, 1] element of the local Wcs affine transform.
1196 cd12 : `pandas.Series`
1197 [1, 2] element of the local Wcs affine transform.
1198 cd21 : `pandas.Series`
1199 [2, 1] element of the local Wcs affine transform.
1200 cd22 : `pandas.Series`
1201 [2, 2] element of the local Wcs affine transform.
1203 Returns
1204 -------
1205 raDecTuple : tuple
1206 RA and dec conversion of x and y given the local Wcs. Returned
1207 units are in radians.
1209 """
1210 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1212 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1213 """Compute the local pixel scale conversion.
1215 Parameters
1216 ----------
1217 ra1 : `pandas.Series`
1218 Ra of the first coordinate in radians.
1219 dec1 : `pandas.Series`
1220 Dec of the first coordinate in radians.
1221 ra2 : `pandas.Series`
1222 Ra of the second coordinate in radians.
1223 dec2 : `pandas.Series`
1224 Dec of the second coordinate in radians.
1226 Returns
1227 -------
1228 dist : `pandas.Series`
1229 Distance on the sphere in radians.
1230 """
1231 deltaDec = dec2 - dec1
1232 deltaRa = ra2 - ra1
1233 return 2 * np.arcsin(
1234 np.sqrt(
1235 np.sin(deltaDec / 2) ** 2
1236 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1238 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1239 """Compute the distance on the sphere from x2, y1 to x1, y1.
1241 Parameters
1242 ----------
1243 x1 : `pandas.Series`
1244 X pixel coordinate.
1245 y1 : `pandas.Series`
1246 Y pixel coordinate.
1247 x2 : `pandas.Series`
1248 X pixel coordinate.
1249 y2 : `pandas.Series`
1250 Y pixel coordinate.
1251 cd11 : `pandas.Series`
1252 [1, 1] element of the local Wcs affine transform.
1253 cd11 : `pandas.Series`
1254 [1, 1] element of the local Wcs affine transform.
1255 cd12 : `pandas.Series`
1256 [1, 2] element of the local Wcs affine transform.
1257 cd21 : `pandas.Series`
1258 [2, 1] element of the local Wcs affine transform.
1259 cd22 : `pandas.Series`
1260 [2, 2] element of the local Wcs affine transform.
1262 Returns
1263 -------
1264 Distance : `pandas.Series`
1265 Arcseconds per pixel at the location of the local WC
1266 """
1267 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1268 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1269 # Great circle distance for small separations.
1270 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1273class ComputePixelScale(LocalWcs):
1274 """Compute the local pixel scale from the stored CDMatrix.
1275 """
1276 name = "PixelScale"
1278 @property
1279 def columns(self):
1280 return [self.colCD_1_1,
1281 self.colCD_1_2,
1282 self.colCD_2_1,
1283 self.colCD_2_2]
1285 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1286 """Compute the local pixel to scale conversion in arcseconds.
1288 Parameters
1289 ----------
1290 cd11 : `pandas.Series`
1291 [1, 1] element of the local Wcs affine transform in radians.
1292 cd11 : `pandas.Series`
1293 [1, 1] element of the local Wcs affine transform in radians.
1294 cd12 : `pandas.Series`
1295 [1, 2] element of the local Wcs affine transform in radians.
1296 cd21 : `pandas.Series`
1297 [2, 1] element of the local Wcs affine transform in radians.
1298 cd22 : `pandas.Series`
1299 [2, 2] element of the local Wcs affine transform in radians.
1301 Returns
1302 -------
1303 pixScale : `pandas.Series`
1304 Arcseconds per pixel at the location of the local WC
1305 """
1306 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1308 def _func(self, df):
1309 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1310 df[self.colCD_1_2],
1311 df[self.colCD_2_1],
1312 df[self.colCD_2_2])
1315class ConvertPixelToArcseconds(ComputePixelScale):
1316 """Convert a value in units pixels squared to units arcseconds squared.
1317 """
1319 def __init__(self,
1320 col,
1321 colCD_1_1,
1322 colCD_1_2,
1323 colCD_2_1,
1324 colCD_2_2,
1325 **kwargs):
1326 self.col = col
1327 super().__init__(colCD_1_1,
1328 colCD_1_2,
1329 colCD_2_1,
1330 colCD_2_2,
1331 **kwargs)
1333 @property
1334 def name(self):
1335 return f"{self.col}_asArcseconds"
1337 @property
1338 def columns(self):
1339 return [self.col,
1340 self.colCD_1_1,
1341 self.colCD_1_2,
1342 self.colCD_2_1,
1343 self.colCD_2_2]
1345 def _func(self, df):
1346 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1347 df[self.colCD_1_2],
1348 df[self.colCD_2_1],
1349 df[self.colCD_2_2])
1352class ConvertPixelSqToArcsecondsSq(ComputePixelScale):
1353 """Convert a value in units pixels to units arcseconds.
1354 """
1356 def __init__(self,
1357 col,
1358 colCD_1_1,
1359 colCD_1_2,
1360 colCD_2_1,
1361 colCD_2_2,
1362 **kwargs):
1363 self.col = col
1364 super().__init__(colCD_1_1,
1365 colCD_1_2,
1366 colCD_2_1,
1367 colCD_2_2,
1368 **kwargs)
1370 @property
1371 def name(self):
1372 return f"{self.col}_asArcsecondsSq"
1374 @property
1375 def columns(self):
1376 return [self.col,
1377 self.colCD_1_1,
1378 self.colCD_1_2,
1379 self.colCD_2_1,
1380 self.colCD_2_2]
1382 def _func(self, df):
1383 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1384 df[self.colCD_1_2],
1385 df[self.colCD_2_1],
1386 df[self.colCD_2_2])
1387 return df[self.col] * pixScale * pixScale
1390class ReferenceBand(Functor):
1391 name = 'Reference Band'
1392 shortname = 'refBand'
1394 @property
1395 def columns(self):
1396 return ["merge_measurement_i",
1397 "merge_measurement_r",
1398 "merge_measurement_z",
1399 "merge_measurement_y",
1400 "merge_measurement_g",
1401 "merge_measurement_u"]
1403 def _func(self, df: pd.DataFrame) -> pd.Series:
1404 def getFilterAliasName(row):
1405 # get column name with the max value (True > False)
1406 colName = row.idxmax()
1407 return colName.replace('merge_measurement_', '')
1409 # Makes a Series of dtype object if df is empty
1410 return df[self.columns].apply(getFilterAliasName, axis=1,
1411 result_type='reduce').astype('object')
1414class Photometry(Functor):
1415 # AB to NanoJansky (3631 Jansky)
1416 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1417 LOG_AB_FLUX_SCALE = 12.56
1418 FIVE_OVER_2LOG10 = 1.085736204758129569
1419 # TO DO: DM-21955 Replace hard coded photometic calibration values
1420 COADD_ZP = 27
1422 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1423 self.vhypot = np.vectorize(self.hypot)
1424 self.col = colFlux
1425 self.colFluxErr = colFluxErr
1427 self.calib = calib
1428 if calib is not None:
1429 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1430 else:
1431 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1432 self.fluxMag0Err = 0.
1434 super().__init__(**kwargs)
1436 @property
1437 def columns(self):
1438 return [self.col]
1440 @property
1441 def name(self):
1442 return f'mag_{self.col}'
1444 @classmethod
1445 def hypot(cls, a, b):
1446 if np.abs(a) < np.abs(b):
1447 a, b = b, a
1448 if a == 0.:
1449 return 0.
1450 q = b/a
1451 return np.abs(a) * np.sqrt(1. + q*q)
1453 def dn2flux(self, dn, fluxMag0):
1454 return self.AB_FLUX_SCALE * dn / fluxMag0
1456 def dn2mag(self, dn, fluxMag0):
1457 with np.warnings.catch_warnings():
1458 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1459 np.warnings.filterwarnings('ignore', r'divide by zero')
1460 return -2.5 * np.log10(dn/fluxMag0)
1462 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1463 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1464 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1465 return retVal
1467 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1468 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1469 return self.FIVE_OVER_2LOG10 * retVal
1472class NanoJansky(Photometry):
1473 def _func(self, df):
1474 return self.dn2flux(df[self.col], self.fluxMag0)
1477class NanoJanskyErr(Photometry):
1478 @property
1479 def columns(self):
1480 return [self.col, self.colFluxErr]
1482 def _func(self, df):
1483 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1484 return pd.Series(retArr, index=df.index)
1487class Magnitude(Photometry):
1488 def _func(self, df):
1489 return self.dn2mag(df[self.col], self.fluxMag0)
1492class MagnitudeErr(Photometry):
1493 @property
1494 def columns(self):
1495 return [self.col, self.colFluxErr]
1497 def _func(self, df):
1498 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1499 return pd.Series(retArr, index=df.index)
1502class LocalPhotometry(Functor):
1503 """Base class for calibrating the specified instrument flux column using
1504 the local photometric calibration.
1506 Parameters
1507 ----------
1508 instFluxCol : `str`
1509 Name of the instrument flux column.
1510 instFluxErrCol : `str`
1511 Name of the assocated error columns for ``instFluxCol``.
1512 photoCalibCol : `str`
1513 Name of local calibration column.
1514 photoCalibErrCol : `str`
1515 Error associated with ``photoCalibCol``
1517 See also
1518 --------
1519 LocalPhotometry
1520 LocalNanojansky
1521 LocalNanojanskyErr
1522 LocalMagnitude
1523 LocalMagnitudeErr
1524 """
1525 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1527 def __init__(self,
1528 instFluxCol,
1529 instFluxErrCol,
1530 photoCalibCol,
1531 photoCalibErrCol,
1532 **kwargs):
1533 self.instFluxCol = instFluxCol
1534 self.instFluxErrCol = instFluxErrCol
1535 self.photoCalibCol = photoCalibCol
1536 self.photoCalibErrCol = photoCalibErrCol
1537 super().__init__(**kwargs)
1539 def instFluxToNanojansky(self, instFlux, localCalib):
1540 """Convert instrument flux to nanojanskys.
1542 Parameters
1543 ----------
1544 instFlux : `numpy.ndarray` or `pandas.Series`
1545 Array of instrument flux measurements
1546 localCalib : `numpy.ndarray` or `pandas.Series`
1547 Array of local photometric calibration estimates.
1549 Returns
1550 -------
1551 calibFlux : `numpy.ndarray` or `pandas.Series`
1552 Array of calibrated flux measurements.
1553 """
1554 return instFlux * localCalib
1556 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1557 """Convert instrument flux to nanojanskys.
1559 Parameters
1560 ----------
1561 instFlux : `numpy.ndarray` or `pandas.Series`
1562 Array of instrument flux measurements
1563 instFluxErr : `numpy.ndarray` or `pandas.Series`
1564 Errors on associated ``instFlux`` values
1565 localCalib : `numpy.ndarray` or `pandas.Series`
1566 Array of local photometric calibration estimates.
1567 localCalibErr : `numpy.ndarray` or `pandas.Series`
1568 Errors on associated ``localCalib`` values
1570 Returns
1571 -------
1572 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1573 Errors on calibrated flux measurements.
1574 """
1575 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1577 def instFluxToMagnitude(self, instFlux, localCalib):
1578 """Convert instrument flux to nanojanskys.
1580 Parameters
1581 ----------
1582 instFlux : `numpy.ndarray` or `pandas.Series`
1583 Array of instrument flux measurements
1584 localCalib : `numpy.ndarray` or `pandas.Series`
1585 Array of local photometric calibration estimates.
1587 Returns
1588 -------
1589 calibMag : `numpy.ndarray` or `pandas.Series`
1590 Array of calibrated AB magnitudes.
1591 """
1592 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1594 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1595 """Convert instrument flux err to nanojanskys.
1597 Parameters
1598 ----------
1599 instFlux : `numpy.ndarray` or `pandas.Series`
1600 Array of instrument flux measurements
1601 instFluxErr : `numpy.ndarray` or `pandas.Series`
1602 Errors on associated ``instFlux`` values
1603 localCalib : `numpy.ndarray` or `pandas.Series`
1604 Array of local photometric calibration estimates.
1605 localCalibErr : `numpy.ndarray` or `pandas.Series`
1606 Errors on associated ``localCalib`` values
1608 Returns
1609 -------
1610 calibMagErr: `numpy.ndarray` or `pandas.Series`
1611 Error on calibrated AB magnitudes.
1612 """
1613 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1614 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1617class LocalNanojansky(LocalPhotometry):
1618 """Compute calibrated fluxes using the local calibration value.
1620 See also
1621 --------
1622 LocalNanojansky
1623 LocalNanojanskyErr
1624 LocalMagnitude
1625 LocalMagnitudeErr
1626 """
1628 @property
1629 def columns(self):
1630 return [self.instFluxCol, self.photoCalibCol]
1632 @property
1633 def name(self):
1634 return f'flux_{self.instFluxCol}'
1636 def _func(self, df):
1637 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1640class LocalNanojanskyErr(LocalPhotometry):
1641 """Compute calibrated flux errors using the local calibration value.
1643 See also
1644 --------
1645 LocalNanojansky
1646 LocalNanojanskyErr
1647 LocalMagnitude
1648 LocalMagnitudeErr
1649 """
1651 @property
1652 def columns(self):
1653 return [self.instFluxCol, self.instFluxErrCol,
1654 self.photoCalibCol, self.photoCalibErrCol]
1656 @property
1657 def name(self):
1658 return f'fluxErr_{self.instFluxCol}'
1660 def _func(self, df):
1661 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1662 df[self.photoCalibCol], df[self.photoCalibErrCol])
1665class LocalMagnitude(LocalPhotometry):
1666 """Compute calibrated AB magnitudes using the local calibration value.
1668 See also
1669 --------
1670 LocalNanojansky
1671 LocalNanojanskyErr
1672 LocalMagnitude
1673 LocalMagnitudeErr
1674 """
1676 @property
1677 def columns(self):
1678 return [self.instFluxCol, self.photoCalibCol]
1680 @property
1681 def name(self):
1682 return f'mag_{self.instFluxCol}'
1684 def _func(self, df):
1685 return self.instFluxToMagnitude(df[self.instFluxCol],
1686 df[self.photoCalibCol])
1689class LocalMagnitudeErr(LocalPhotometry):
1690 """Compute calibrated AB magnitude errors using the local calibration value.
1692 See also
1693 --------
1694 LocalNanojansky
1695 LocalNanojanskyErr
1696 LocalMagnitude
1697 LocalMagnitudeErr
1698 """
1700 @property
1701 def columns(self):
1702 return [self.instFluxCol, self.instFluxErrCol,
1703 self.photoCalibCol, self.photoCalibErrCol]
1705 @property
1706 def name(self):
1707 return f'magErr_{self.instFluxCol}'
1709 def _func(self, df):
1710 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1711 df[self.instFluxErrCol],
1712 df[self.photoCalibCol],
1713 df[self.photoCalibErrCol])
1716class LocalDipoleMeanFlux(LocalPhotometry):
1717 """Compute absolute mean of dipole fluxes.
1719 See also
1720 --------
1721 LocalNanojansky
1722 LocalNanojanskyErr
1723 LocalMagnitude
1724 LocalMagnitudeErr
1725 LocalDipoleMeanFlux
1726 LocalDipoleMeanFluxErr
1727 LocalDipoleDiffFlux
1728 LocalDipoleDiffFluxErr
1729 """
1730 def __init__(self,
1731 instFluxPosCol,
1732 instFluxNegCol,
1733 instFluxPosErrCol,
1734 instFluxNegErrCol,
1735 photoCalibCol,
1736 photoCalibErrCol,
1737 **kwargs):
1738 self.instFluxNegCol = instFluxNegCol
1739 self.instFluxPosCol = instFluxPosCol
1740 self.instFluxNegErrCol = instFluxNegErrCol
1741 self.instFluxPosErrCol = instFluxPosErrCol
1742 self.photoCalibCol = photoCalibCol
1743 self.photoCalibErrCol = photoCalibErrCol
1744 super().__init__(instFluxNegCol,
1745 instFluxNegErrCol,
1746 photoCalibCol,
1747 photoCalibErrCol,
1748 **kwargs)
1750 @property
1751 def columns(self):
1752 return [self.instFluxPosCol,
1753 self.instFluxNegCol,
1754 self.photoCalibCol]
1756 @property
1757 def name(self):
1758 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1760 def _func(self, df):
1761 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))
1762 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])))
1765class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux):
1766 """Compute the error on the absolute mean of dipole fluxes.
1768 See also
1769 --------
1770 LocalNanojansky
1771 LocalNanojanskyErr
1772 LocalMagnitude
1773 LocalMagnitudeErr
1774 LocalDipoleMeanFlux
1775 LocalDipoleMeanFluxErr
1776 LocalDipoleDiffFlux
1777 LocalDipoleDiffFluxErr
1778 """
1780 @property
1781 def columns(self):
1782 return [self.instFluxPosCol,
1783 self.instFluxNegCol,
1784 self.instFluxPosErrCol,
1785 self.instFluxNegErrCol,
1786 self.photoCalibCol,
1787 self.photoCalibErrCol]
1789 @property
1790 def name(self):
1791 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1793 def _func(self, df):
1794 return 0.5*np.sqrt(
1795 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1796 * df[self.photoCalibErrCol])**2
1797 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1798 * df[self.photoCalibCol]**2)
1801class LocalDipoleDiffFlux(LocalDipoleMeanFlux):
1802 """Compute the absolute difference of dipole fluxes.
1804 Value is (abs(pos) - abs(neg))
1806 See also
1807 --------
1808 LocalNanojansky
1809 LocalNanojanskyErr
1810 LocalMagnitude
1811 LocalMagnitudeErr
1812 LocalDipoleMeanFlux
1813 LocalDipoleMeanFluxErr
1814 LocalDipoleDiffFlux
1815 LocalDipoleDiffFluxErr
1816 """
1818 @property
1819 def columns(self):
1820 return [self.instFluxPosCol,
1821 self.instFluxNegCol,
1822 self.photoCalibCol]
1824 @property
1825 def name(self):
1826 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1828 def _func(self, df):
1829 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))
1830 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])))
1833class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux):
1834 """Compute the error on the absolute difference of dipole fluxes.
1836 See also
1837 --------
1838 LocalNanojansky
1839 LocalNanojanskyErr
1840 LocalMagnitude
1841 LocalMagnitudeErr
1842 LocalDipoleMeanFlux
1843 LocalDipoleMeanFluxErr
1844 LocalDipoleDiffFlux
1845 LocalDipoleDiffFluxErr
1846 """
1848 @property
1849 def columns(self):
1850 return [self.instFluxPosCol,
1851 self.instFluxNegCol,
1852 self.instFluxPosErrCol,
1853 self.instFluxNegErrCol,
1854 self.photoCalibCol,
1855 self.photoCalibErrCol]
1857 @property
1858 def name(self):
1859 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1861 def _func(self, df):
1862 return np.sqrt(
1863 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1864 * df[self.photoCalibErrCol])**2
1865 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1866 * df[self.photoCalibCol]**2)
1869class Ratio(Functor):
1870 """Base class for returning the ratio of 2 columns.
1872 Can be used to compute a Signal to Noise ratio for any input flux.
1874 Parameters
1875 ----------
1876 numerator : `str`
1877 Name of the column to use at the numerator in the ratio
1878 denominator : `str`
1879 Name of the column to use as the denominator in the ratio.
1880 """
1881 def __init__(self,
1882 numerator,
1883 denominator,
1884 **kwargs):
1885 self.numerator = numerator
1886 self.denominator = denominator
1887 super().__init__(**kwargs)
1889 @property
1890 def columns(self):
1891 return [self.numerator, self.denominator]
1893 @property
1894 def name(self):
1895 return f'ratio_{self.numerator}_{self.denominator}'
1897 def _func(self, df):
1898 with np.warnings.catch_warnings():
1899 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1900 np.warnings.filterwarnings('ignore', r'divide by zero')
1901 return df[self.numerator] / df[self.denominator]