Coverage for python/lsst/pipe/tasks/functors.py : 35%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_tasks.
2#
3# LSST Data Management System
4# This product includes software developed by the
5# LSST Project (http://www.lsst.org/).
6# See COPYRIGHT file at the top of the source tree.
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22import yaml
23import re
24from itertools import product
25import os.path
27import pandas as pd
28import numpy as np
29import astropy.units as u
31from lsst.daf.persistence import doImport
32from lsst.daf.butler import DeferredDatasetHandle
33import lsst.geom as geom
34import lsst.sphgeom as sphgeom
36from .parquetTable import ParquetTable, MultilevelParquetTable
39def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
40 typeKey='functor', name=None):
41 """Initialize an object defined in a dictionary
43 The object needs to be importable as
44 f'{basePath}.{initDict[typeKey]}'
45 The positional and keyword arguments (if any) are contained in
46 "args" and "kwargs" entries in the dictionary, respectively.
47 This is used in `functors.CompositeFunctor.from_yaml` to initialize
48 a composite functor from a specification in a YAML file.
50 Parameters
51 ----------
52 initDict : dictionary
53 Dictionary describing object's initialization. Must contain
54 an entry keyed by ``typeKey`` that is the name of the object,
55 relative to ``basePath``.
56 basePath : str
57 Path relative to module in which ``initDict[typeKey]`` is defined.
58 typeKey : str
59 Key of ``initDict`` that is the name of the object
60 (relative to `basePath`).
61 """
62 initDict = initDict.copy()
63 # TO DO: DM-21956 We should be able to define functors outside this module
64 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
65 args = []
66 if 'args' in initDict:
67 args = initDict.pop('args')
68 if isinstance(args, str):
69 args = [args]
70 try:
71 element = pythonType(*args, **initDict)
72 except Exception as e:
73 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
74 raise type(e)(message, e.args)
75 return element
78class Functor(object):
79 """Define and execute a calculation on a ParquetTable
81 The `__call__` method accepts either a `ParquetTable` object or a
82 `DeferredDatasetHandle`, and returns the
83 result of the calculation as a single column. Each functor defines what
84 columns are needed for the calculation, and only these columns are read
85 from the `ParquetTable`.
87 The action of `__call__` consists of two steps: first, loading the
88 necessary columns from disk into memory as a `pandas.DataFrame` object;
89 and second, performing the computation on this dataframe and returning the
90 result.
93 To define a new `Functor`, a subclass must define a `_func` method,
94 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
95 In addition, it must define the following attributes
97 * `_columns`: The columns necessary to perform the calculation
98 * `name`: A name appropriate for a figure axis label
99 * `shortname`: A name appropriate for use as a dictionary key
101 On initialization, a `Functor` should declare what band (`filt` kwarg)
102 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
103 applied to. This enables the `_get_data` method to extract the proper
104 columns from the parquet file. If not specified, the dataset will fall back
105 on the `_defaultDataset`attribute. If band is not specified and `dataset`
106 is anything other than `'ref'`, then an error will be raised when trying to
107 perform the calculation.
109 Originally, `Functor` was set up to expect
110 datasets formatted like the `deepCoadd_obj` dataset; that is, a
111 dataframe with a multi-level column index, with the levels of the
112 column index being `band`, `dataset`, and `column`.
113 It has since been generalized to apply to dataframes without mutli-level
114 indices and multi-level indices with just `dataset` and `column` levels.
115 In addition, the `_get_data` method that reads
116 the dataframe from the `ParquetTable` will return a dataframe with column
117 index levels defined by the `_dfLevels` attribute; by default, this is
118 `column`.
120 The `_dfLevels` attributes should generally not need to
121 be changed, unless `_func` needs columns from multiple filters or datasets
122 to do the calculation.
123 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
124 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
125 it gets to have those levels in the column index.
127 Parameters
128 ----------
129 filt : str
130 Filter upon which to do the calculation
132 dataset : str
133 Dataset upon which to do the calculation
134 (e.g., 'ref', 'meas', 'forced_src').
136 """
138 _defaultDataset = 'ref'
139 _dfLevels = ('column',)
140 _defaultNoDup = False
142 def __init__(self, filt=None, dataset=None, noDup=None):
143 self.filt = filt
144 self.dataset = dataset if dataset is not None else self._defaultDataset
145 self._noDup = noDup
147 @property
148 def noDup(self):
149 if self._noDup is not None:
150 return self._noDup
151 else:
152 return self._defaultNoDup
154 @property
155 def columns(self):
156 """Columns required to perform calculation
157 """
158 if not hasattr(self, '_columns'):
159 raise NotImplementedError('Must define columns property or _columns attribute')
160 return self._columns
162 def _get_data_columnLevels(self, data, columnIndex=None):
163 """Gets the names of the column index levels
165 This should only be called in the context of a multilevel table.
166 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
167 and with the gen3 `DeferredDatasetHandle`.
169 Parameters
170 ----------
171 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
173 columnnIndex (optional): pandas `Index` object
174 if not passed, then it is read from the `DeferredDatasetHandle`
175 """
176 if isinstance(data, DeferredDatasetHandle):
177 if columnIndex is None:
178 columnIndex = data.get(component="columns")
179 if columnIndex is not None:
180 return columnIndex.names
181 if isinstance(data, MultilevelParquetTable):
182 return data.columnLevels
183 else:
184 raise TypeError(f"Unknown type for data: {type(data)}!")
186 def _get_data_columnLevelNames(self, data, columnIndex=None):
187 """Gets the content of each of the column levels for a multilevel table
189 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
191 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
192 """
193 if isinstance(data, DeferredDatasetHandle):
194 if columnIndex is None:
195 columnIndex = data.get(component="columns")
196 if columnIndex is not None:
197 columnLevels = columnIndex.names
198 columnLevelNames = {
199 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
200 for i, level in enumerate(columnLevels)
201 }
202 return columnLevelNames
203 if isinstance(data, MultilevelParquetTable):
204 return data.columnLevelNames
205 else:
206 raise TypeError(f"Unknown type for data: {type(data)}!")
208 def _colsFromDict(self, colDict, columnIndex=None):
209 """Converts dictionary column specficiation to a list of columns
211 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
212 """
213 new_colDict = {}
214 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
216 for i, lev in enumerate(columnLevels):
217 if lev in colDict:
218 if isinstance(colDict[lev], str):
219 new_colDict[lev] = [colDict[lev]]
220 else:
221 new_colDict[lev] = colDict[lev]
222 else:
223 new_colDict[lev] = columnIndex.levels[i]
225 levelCols = [new_colDict[lev] for lev in columnLevels]
226 cols = product(*levelCols)
227 return list(cols)
229 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
230 """Returns columns needed by functor from multilevel dataset
232 To access tables with multilevel column structure, the `MultilevelParquetTable`
233 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
234 dictionary.
236 Parameters
237 ----------
238 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
240 columnIndex (optional): pandas `Index` object
241 either passed or read in from `DeferredDatasetHandle`.
243 `returnTuple` : bool
244 If true, then return a list of tuples rather than the column dictionary
245 specification. This is set to `True` by `CompositeFunctor` in order to be able to
246 combine columns from the various component functors.
248 """
249 if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
250 columnIndex = data.get(component="columns")
252 # Confirm that the dataset has the column levels the functor is expecting it to have.
253 columnLevels = self._get_data_columnLevels(data, columnIndex)
255 columnDict = {'column': self.columns,
256 'dataset': self.dataset}
257 if self.filt is None:
258 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
259 if "band" in columnLevels:
260 if self.dataset == "ref":
261 columnDict["band"] = columnLevelNames["band"][0]
262 else:
263 raise ValueError(f"'filt' not set for functor {self.name}"
264 f"(dataset {self.dataset}) "
265 "and ParquetTable "
266 "contains multiple filters in column index. "
267 "Set 'filt' or set 'dataset' to 'ref'.")
268 else:
269 columnDict['band'] = self.filt
271 if isinstance(data, MultilevelParquetTable):
272 return data._colsFromDict(columnDict)
273 elif isinstance(data, DeferredDatasetHandle):
274 if returnTuple:
275 return self._colsFromDict(columnDict, columnIndex=columnIndex)
276 else:
277 return columnDict
279 def _func(self, df, dropna=True):
280 raise NotImplementedError('Must define calculation on dataframe')
282 def _get_columnIndex(self, data):
283 """Return columnIndex
284 """
286 if isinstance(data, DeferredDatasetHandle):
287 return data.get(component="columns")
288 else:
289 return None
291 def _get_data(self, data):
292 """Retrieve dataframe necessary for calculation.
294 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
296 Returns dataframe upon which `self._func` can act.
298 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
299 """
300 if isinstance(data, pd.DataFrame):
301 return data
303 # First thing to do: check to see if the data source has a multilevel column index or not.
304 columnIndex = self._get_columnIndex(data)
305 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
307 # Simple single-level parquet table, gen2
308 if isinstance(data, ParquetTable) and not is_multiLevel:
309 columns = self.columns
310 df = data.toDataFrame(columns=columns)
311 return df
313 # Get proper columns specification for this functor
314 if is_multiLevel:
315 columns = self.multilevelColumns(data, columnIndex=columnIndex)
316 else:
317 columns = self.columns
319 if isinstance(data, MultilevelParquetTable):
320 # Load in-memory dataframe with appropriate columns the gen2 way
321 df = data.toDataFrame(columns=columns, droplevels=False)
322 elif isinstance(data, DeferredDatasetHandle):
323 # Load in-memory dataframe with appropriate columns the gen3 way
324 df = data.get(parameters={"columns": columns})
326 # Drop unnecessary column levels
327 if is_multiLevel:
328 df = self._setLevels(df)
330 return df
332 def _setLevels(self, df):
333 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
334 df.columns = df.columns.droplevel(levelsToDrop)
335 return df
337 def _dropna(self, vals):
338 return vals.dropna()
340 def __call__(self, data, dropna=False):
341 try:
342 df = self._get_data(data)
343 vals = self._func(df)
344 except Exception:
345 vals = self.fail(df)
346 if dropna:
347 vals = self._dropna(vals)
349 return vals
351 def difference(self, data1, data2, **kwargs):
352 """Computes difference between functor called on two different ParquetTable objects
353 """
354 return self(data1, **kwargs) - self(data2, **kwargs)
356 def fail(self, df):
357 return pd.Series(np.full(len(df), np.nan), index=df.index)
359 @property
360 def name(self):
361 """Full name of functor (suitable for figure labels)
362 """
363 return NotImplementedError
365 @property
366 def shortname(self):
367 """Short name of functor (suitable for column name/dict key)
368 """
369 return self.name
372class CompositeFunctor(Functor):
373 """Perform multiple calculations at once on a catalog
375 The role of a `CompositeFunctor` is to group together computations from
376 multiple functors. Instead of returning `pandas.Series` a
377 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
378 being the keys of `funcDict`.
380 The `columns` attribute of a `CompositeFunctor` is the union of all columns
381 in all the component functors.
383 A `CompositeFunctor` does not use a `_func` method itself; rather,
384 when a `CompositeFunctor` is called, all its columns are loaded
385 at once, and the resulting dataframe is passed to the `_func` method of each component
386 functor. This has the advantage of only doing I/O (reading from parquet file) once,
387 and works because each individual `_func` method of each component functor does not
388 care if there are *extra* columns in the dataframe being passed; only that it must contain
389 *at least* the `columns` it expects.
391 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
392 file specifying a collection of functors.
394 Parameters
395 ----------
396 funcs : `dict` or `list`
397 Dictionary or list of functors. If a list, then it will be converted
398 into a dictonary according to the `.shortname` attribute of each functor.
400 """
401 dataset = None
403 def __init__(self, funcs, **kwargs):
405 if type(funcs) == dict:
406 self.funcDict = funcs
407 else:
408 self.funcDict = {f.shortname: f for f in funcs}
410 self._filt = None
412 super().__init__(**kwargs)
414 @property
415 def filt(self):
416 return self._filt
418 @filt.setter
419 def filt(self, filt):
420 if filt is not None:
421 for _, f in self.funcDict.items():
422 f.filt = filt
423 self._filt = filt
425 def update(self, new):
426 if isinstance(new, dict):
427 self.funcDict.update(new)
428 elif isinstance(new, CompositeFunctor):
429 self.funcDict.update(new.funcDict)
430 else:
431 raise TypeError('Can only update with dictionary or CompositeFunctor.')
433 # Make sure new functors have the same 'filt' set
434 if self.filt is not None:
435 self.filt = self.filt
437 @property
438 def columns(self):
439 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
441 def multilevelColumns(self, data, **kwargs):
442 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
443 return list(
444 set(
445 [
446 x
447 for y in [
448 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
449 ]
450 for x in y
451 ]
452 )
453 )
455 def __call__(self, data, **kwargs):
456 """Apply the functor to the data table
458 Parameters
459 ----------
460 data : `lsst.daf.butler.DeferredDatasetHandle`,
461 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
462 `lsst.pipe.tasks.parquetTable.ParquetTable`,
463 or `pandas.DataFrame`.
464 The table or a pointer to a table on disk from which columns can
465 be accessed
466 """
467 columnIndex = self._get_columnIndex(data)
469 # First, determine whether data has a multilevel index (either gen2 or gen3)
470 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
472 # Multilevel index, gen2 or gen3
473 if is_multiLevel:
474 columns = self.multilevelColumns(data, columnIndex=columnIndex)
476 if isinstance(data, MultilevelParquetTable):
477 # Read data into memory the gen2 way
478 df = data.toDataFrame(columns=columns, droplevels=False)
479 elif isinstance(data, DeferredDatasetHandle):
480 # Read data into memory the gen3 way
481 df = data.get(parameters={"columns": columns})
483 valDict = {}
484 for k, f in self.funcDict.items():
485 try:
486 subdf = f._setLevels(
487 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
488 )
489 valDict[k] = f._func(subdf)
490 except Exception as e:
491 try:
492 valDict[k] = f.fail(subdf)
493 except NameError:
494 raise e
496 else:
497 if isinstance(data, DeferredDatasetHandle):
498 # input if Gen3 deferLoad=True
499 df = data.get(parameters={"columns": self.columns})
500 elif isinstance(data, pd.DataFrame):
501 # input if Gen3 deferLoad=False
502 df = data
503 else:
504 # Original Gen2 input is type ParquetTable and the fallback
505 df = data.toDataFrame(columns=self.columns)
507 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
509 try:
510 valDf = pd.concat(valDict, axis=1)
511 except TypeError:
512 print([(k, type(v)) for k, v in valDict.items()])
513 raise
515 if kwargs.get('dropna', False):
516 valDf = valDf.dropna(how='any')
518 return valDf
520 @classmethod
521 def renameCol(cls, col, renameRules):
522 if renameRules is None:
523 return col
524 for old, new in renameRules:
525 if col.startswith(old):
526 col = col.replace(old, new)
527 return col
529 @classmethod
530 def from_file(cls, filename, **kwargs):
531 # Allow environment variables in the filename.
532 filename = os.path.expandvars(filename)
533 with open(filename) as f:
534 translationDefinition = yaml.safe_load(f)
536 return cls.from_yaml(translationDefinition, **kwargs)
538 @classmethod
539 def from_yaml(cls, translationDefinition, **kwargs):
540 funcs = {}
541 for func, val in translationDefinition['funcs'].items():
542 funcs[func] = init_fromDict(val, name=func)
544 if 'flag_rename_rules' in translationDefinition:
545 renameRules = translationDefinition['flag_rename_rules']
546 else:
547 renameRules = None
549 if 'calexpFlags' in translationDefinition:
550 for flag in translationDefinition['calexpFlags']:
551 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
553 if 'refFlags' in translationDefinition:
554 for flag in translationDefinition['refFlags']:
555 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
557 if 'forcedFlags' in translationDefinition:
558 for flag in translationDefinition['forcedFlags']:
559 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
561 if 'flags' in translationDefinition:
562 for flag in translationDefinition['flags']:
563 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
565 return cls(funcs, **kwargs)
568def mag_aware_eval(df, expr):
569 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
571 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
573 Parameters
574 ----------
575 df : pandas.DataFrame
576 Dataframe on which to evaluate expression.
578 expr : str
579 Expression.
580 """
581 try:
582 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
583 val = df.eval(expr_new, truediv=True)
584 except Exception: # Should check what actually gets raised
585 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
586 val = df.eval(expr_new, truediv=True)
587 return val
590class CustomFunctor(Functor):
591 """Arbitrary computation on a catalog
593 Column names (and thus the columns to be loaded from catalog) are found
594 by finding all words and trying to ignore all "math-y" words.
596 Parameters
597 ----------
598 expr : str
599 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
600 """
601 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
603 def __init__(self, expr, **kwargs):
604 self.expr = expr
605 super().__init__(**kwargs)
607 @property
608 def name(self):
609 return self.expr
611 @property
612 def columns(self):
613 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
615 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
616 not_a_col = []
617 for c in flux_cols:
618 if not re.search('_instFlux$', c):
619 cols.append(f'{c}_instFlux')
620 not_a_col.append(c)
621 else:
622 cols.append(c)
624 return list(set([c for c in cols if c not in not_a_col]))
626 def _func(self, df):
627 return mag_aware_eval(df, self.expr)
630class Column(Functor):
631 """Get column with specified name
632 """
634 def __init__(self, col, **kwargs):
635 self.col = col
636 super().__init__(**kwargs)
638 @property
639 def name(self):
640 return self.col
642 @property
643 def columns(self):
644 return [self.col]
646 def _func(self, df):
647 return df[self.col]
650class Index(Functor):
651 """Return the value of the index for each object
652 """
654 columns = ['coord_ra'] # just a dummy; something has to be here
655 _defaultDataset = 'ref'
656 _defaultNoDup = True
658 def _func(self, df):
659 return pd.Series(df.index, index=df.index)
662class IDColumn(Column):
663 col = 'id'
664 _allow_difference = False
665 _defaultNoDup = True
667 def _func(self, df):
668 return pd.Series(df.index, index=df.index)
671class FootprintNPix(Column):
672 col = 'base_Footprint_nPix'
675class CoordColumn(Column):
676 """Base class for coordinate column, in degrees
677 """
678 _radians = True
680 def __init__(self, col, **kwargs):
681 super().__init__(col, **kwargs)
683 def _func(self, df):
684 # Must not modify original column in case that column is used by another functor
685 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
686 return output
689class RAColumn(CoordColumn):
690 """Right Ascension, in degrees
691 """
692 name = 'RA'
693 _defaultNoDup = True
695 def __init__(self, **kwargs):
696 super().__init__('coord_ra', **kwargs)
698 def __call__(self, catalog, **kwargs):
699 return super().__call__(catalog, **kwargs)
702class DecColumn(CoordColumn):
703 """Declination, in degrees
704 """
705 name = 'Dec'
706 _defaultNoDup = True
708 def __init__(self, **kwargs):
709 super().__init__('coord_dec', **kwargs)
711 def __call__(self, catalog, **kwargs):
712 return super().__call__(catalog, **kwargs)
715class HtmIndex20(Functor):
716 """Compute the level 20 HtmIndex for the catalog.
717 """
718 name = "Htm20"
719 htmLevel = 20
720 _radians = True
722 def __init__(self, ra, decl, **kwargs):
723 self.pixelator = sphgeom.HtmPixelization(self.htmLevel)
724 self.ra = ra
725 self.decl = decl
726 self._columns = [self.ra, self.decl]
727 super().__init__(**kwargs)
729 def _func(self, df):
731 def computePixel(row):
732 if self._radians:
733 sphPoint = geom.SpherePoint(row[self.ra],
734 row[self.decl],
735 geom.radians)
736 else:
737 sphPoint = geom.SpherePoint(row[self.ra],
738 row[self.decl],
739 geom.degrees)
740 return self.pixelator.index(sphPoint.getVector())
742 return df.apply(computePixel, axis=1)
745def fluxName(col):
746 if not col.endswith('_instFlux'):
747 col += '_instFlux'
748 return col
751def fluxErrName(col):
752 if not col.endswith('_instFluxErr'):
753 col += '_instFluxErr'
754 return col
757class Mag(Functor):
758 """Compute calibrated magnitude
760 Takes a `calib` argument, which returns the flux at mag=0
761 as `calib.getFluxMag0()`. If not provided, then the default
762 `fluxMag0` is 63095734448.0194, which is default for HSC.
763 This default should be removed in DM-21955
765 This calculation hides warnings about invalid values and dividing by zero.
767 As for all functors, a `dataset` and `filt` kwarg should be provided upon
768 initialization. Unlike the default `Functor`, however, the default dataset
769 for a `Mag` is `'meas'`, rather than `'ref'`.
771 Parameters
772 ----------
773 col : `str`
774 Name of flux column from which to compute magnitude. Can be parseable
775 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
776 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
777 understand.
778 calib : `lsst.afw.image.calib.Calib` (optional)
779 Object that knows zero point.
780 """
781 _defaultDataset = 'meas'
783 def __init__(self, col, calib=None, **kwargs):
784 self.col = fluxName(col)
785 self.calib = calib
786 if calib is not None:
787 self.fluxMag0 = calib.getFluxMag0()[0]
788 else:
789 # TO DO: DM-21955 Replace hard coded photometic calibration values
790 self.fluxMag0 = 63095734448.0194
792 super().__init__(**kwargs)
794 @property
795 def columns(self):
796 return [self.col]
798 def _func(self, df):
799 with np.warnings.catch_warnings():
800 np.warnings.filterwarnings('ignore', r'invalid value encountered')
801 np.warnings.filterwarnings('ignore', r'divide by zero')
802 return -2.5*np.log10(df[self.col] / self.fluxMag0)
804 @property
805 def name(self):
806 return f'mag_{self.col}'
809class MagErr(Mag):
810 """Compute calibrated magnitude uncertainty
812 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
814 Parameters
815 col : `str`
816 Name of flux column
817 calib : `lsst.afw.image.calib.Calib` (optional)
818 Object that knows zero point.
819 """
821 def __init__(self, *args, **kwargs):
822 super().__init__(*args, **kwargs)
823 if self.calib is not None:
824 self.fluxMag0Err = self.calib.getFluxMag0()[1]
825 else:
826 self.fluxMag0Err = 0.
828 @property
829 def columns(self):
830 return [self.col, self.col + 'Err']
832 def _func(self, df):
833 with np.warnings.catch_warnings():
834 np.warnings.filterwarnings('ignore', r'invalid value encountered')
835 np.warnings.filterwarnings('ignore', r'divide by zero')
836 fluxCol, fluxErrCol = self.columns
837 x = df[fluxErrCol] / df[fluxCol]
838 y = self.fluxMag0Err / self.fluxMag0
839 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
840 return magErr
842 @property
843 def name(self):
844 return super().name + '_err'
847class NanoMaggie(Mag):
848 """
849 """
851 def _func(self, df):
852 return (df[self.col] / self.fluxMag0) * 1e9
855class MagDiff(Functor):
856 _defaultDataset = 'meas'
858 """Functor to calculate magnitude difference"""
860 def __init__(self, col1, col2, **kwargs):
861 self.col1 = fluxName(col1)
862 self.col2 = fluxName(col2)
863 super().__init__(**kwargs)
865 @property
866 def columns(self):
867 return [self.col1, self.col2]
869 def _func(self, df):
870 with np.warnings.catch_warnings():
871 np.warnings.filterwarnings('ignore', r'invalid value encountered')
872 np.warnings.filterwarnings('ignore', r'divide by zero')
873 return -2.5*np.log10(df[self.col1]/df[self.col2])
875 @property
876 def name(self):
877 return f'(mag_{self.col1} - mag_{self.col2})'
879 @property
880 def shortname(self):
881 return f'magDiff_{self.col1}_{self.col2}'
884class Color(Functor):
885 """Compute the color between two filters
887 Computes color by initializing two different `Mag`
888 functors based on the `col` and filters provided, and
889 then returning the difference.
891 This is enabled by the `_func` expecting a dataframe with a
892 multilevel column index, with both `'band'` and `'column'`,
893 instead of just `'column'`, which is the `Functor` default.
894 This is controlled by the `_dfLevels` attribute.
896 Also of note, the default dataset for `Color` is `forced_src'`,
897 whereas for `Mag` it is `'meas'`.
899 Parameters
900 ----------
901 col : str
902 Name of flux column from which to compute; same as would be passed to
903 `lsst.pipe.tasks.functors.Mag`.
905 filt2, filt1 : str
906 Filters from which to compute magnitude difference.
907 Color computed is `Mag(filt2) - Mag(filt1)`.
908 """
909 _defaultDataset = 'forced_src'
910 _dfLevels = ('band', 'column')
911 _defaultNoDup = True
913 def __init__(self, col, filt2, filt1, **kwargs):
914 self.col = fluxName(col)
915 if filt2 == filt1:
916 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
917 self.filt2 = filt2
918 self.filt1 = filt1
920 self.mag2 = Mag(col, filt=filt2, **kwargs)
921 self.mag1 = Mag(col, filt=filt1, **kwargs)
923 super().__init__(**kwargs)
925 @property
926 def filt(self):
927 return None
929 @filt.setter
930 def filt(self, filt):
931 pass
933 def _func(self, df):
934 mag2 = self.mag2._func(df[self.filt2])
935 mag1 = self.mag1._func(df[self.filt1])
936 return mag2 - mag1
938 @property
939 def columns(self):
940 return [self.mag1.col, self.mag2.col]
942 def multilevelColumns(self, parq, **kwargs):
943 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
945 @property
946 def name(self):
947 return f'{self.filt2} - {self.filt1} ({self.col})'
949 @property
950 def shortname(self):
951 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
954class Labeller(Functor):
955 """Main function of this subclass is to override the dropna=True
956 """
957 _null_label = 'null'
958 _allow_difference = False
959 name = 'label'
960 _force_str = False
962 def __call__(self, parq, dropna=False, **kwargs):
963 return super().__call__(parq, dropna=False, **kwargs)
966class StarGalaxyLabeller(Labeller):
967 _columns = ["base_ClassificationExtendedness_value"]
968 _column = "base_ClassificationExtendedness_value"
970 def _func(self, df):
971 x = df[self._columns][self._column]
972 mask = x.isnull()
973 test = (x < 0.5).astype(int)
974 test = test.mask(mask, 2)
976 # TODO: DM-21954 Look into veracity of inline comment below
977 # are these backwards?
978 categories = ['galaxy', 'star', self._null_label]
979 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
980 index=x.index, name='label')
981 if self._force_str:
982 label = label.astype(str)
983 return label
986class NumStarLabeller(Labeller):
987 _columns = ['numStarFlags']
988 labels = {"star": 0, "maybe": 1, "notStar": 2}
990 def _func(self, df):
991 x = df[self._columns][self._columns[0]]
993 # Number of filters
994 n = len(x.unique()) - 1
996 labels = ['noStar', 'maybe', 'star']
997 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
998 index=x.index, name='label')
1000 if self._force_str:
1001 label = label.astype(str)
1003 return label
1006class DeconvolvedMoments(Functor):
1007 name = 'Deconvolved Moments'
1008 shortname = 'deconvolvedMoments'
1009 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1010 "ext_shapeHSM_HsmSourceMoments_yy",
1011 "base_SdssShape_xx", "base_SdssShape_yy",
1012 "ext_shapeHSM_HsmPsfMoments_xx",
1013 "ext_shapeHSM_HsmPsfMoments_yy")
1015 def _func(self, df):
1016 """Calculate deconvolved moments"""
1017 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1018 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1019 else:
1020 hsm = np.ones(len(df))*np.nan
1021 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1022 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1023 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1024 else:
1025 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
1026 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
1027 # raise TaskError("No psf shape parameter found in catalog")
1028 raise RuntimeError('No psf shape parameter found in catalog')
1030 return hsm.where(np.isfinite(hsm), sdss) - psf
1033class SdssTraceSize(Functor):
1034 """Functor to calculate SDSS trace radius size for sources"""
1035 name = "SDSS Trace Size"
1036 shortname = 'sdssTrace'
1037 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1039 def _func(self, df):
1040 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1041 return srcSize
1044class PsfSdssTraceSizeDiff(Functor):
1045 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1046 name = "PSF - SDSS Trace Size"
1047 shortname = 'psf_sdssTrace'
1048 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1049 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1051 def _func(self, df):
1052 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1053 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1054 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1055 return sizeDiff
1058class HsmTraceSize(Functor):
1059 """Functor to calculate HSM trace radius size for sources"""
1060 name = 'HSM Trace Size'
1061 shortname = 'hsmTrace'
1062 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1063 "ext_shapeHSM_HsmSourceMoments_yy")
1065 def _func(self, df):
1066 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1067 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1068 return srcSize
1071class PsfHsmTraceSizeDiff(Functor):
1072 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1073 name = 'PSF - HSM Trace Size'
1074 shortname = 'psf_HsmTrace'
1075 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1076 "ext_shapeHSM_HsmSourceMoments_yy",
1077 "ext_shapeHSM_HsmPsfMoments_xx",
1078 "ext_shapeHSM_HsmPsfMoments_yy")
1080 def _func(self, df):
1081 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1082 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1083 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1084 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1085 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1086 return sizeDiff
1089class HsmFwhm(Functor):
1090 name = 'HSM Psf FWHM'
1091 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1092 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1093 pixelScale = 0.168
1094 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1096 def _func(self, df):
1097 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1098 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1101class E1(Functor):
1102 name = "Distortion Ellipticity (e1)"
1103 shortname = "Distortion"
1105 def __init__(self, colXX, colXY, colYY, **kwargs):
1106 self.colXX = colXX
1107 self.colXY = colXY
1108 self.colYY = colYY
1109 self._columns = [self.colXX, self.colXY, self.colYY]
1110 super().__init__(**kwargs)
1112 @property
1113 def columns(self):
1114 return [self.colXX, self.colXY, self.colYY]
1116 def _func(self, df):
1117 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1120class E2(Functor):
1121 name = "Ellipticity e2"
1123 def __init__(self, colXX, colXY, colYY, **kwargs):
1124 self.colXX = colXX
1125 self.colXY = colXY
1126 self.colYY = colYY
1127 super().__init__(**kwargs)
1129 @property
1130 def columns(self):
1131 return [self.colXX, self.colXY, self.colYY]
1133 def _func(self, df):
1134 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1137class RadiusFromQuadrupole(Functor):
1139 def __init__(self, colXX, colXY, colYY, **kwargs):
1140 self.colXX = colXX
1141 self.colXY = colXY
1142 self.colYY = colYY
1143 super().__init__(**kwargs)
1145 @property
1146 def columns(self):
1147 return [self.colXX, self.colXY, self.colYY]
1149 def _func(self, df):
1150 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1153class LocalWcs(Functor):
1154 """Computations using the stored localWcs.
1155 """
1156 name = "LocalWcsOperations"
1158 def __init__(self,
1159 colCD_1_1,
1160 colCD_1_2,
1161 colCD_2_1,
1162 colCD_2_2,
1163 **kwargs):
1164 self.colCD_1_1 = colCD_1_1
1165 self.colCD_1_2 = colCD_1_2
1166 self.colCD_2_1 = colCD_2_1
1167 self.colCD_2_2 = colCD_2_2
1168 super().__init__(**kwargs)
1170 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1171 """Compute the distance on the sphere from x2, y1 to x1, y1.
1173 Parameters
1174 ----------
1175 x : `pandas.Series`
1176 X pixel coordinate.
1177 y : `pandas.Series`
1178 Y pixel coordinate.
1179 cd11 : `pandas.Series`
1180 [1, 1] element of the local Wcs affine transform.
1181 cd11 : `pandas.Series`
1182 [1, 1] element of the local Wcs affine transform.
1183 cd12 : `pandas.Series`
1184 [1, 2] element of the local Wcs affine transform.
1185 cd21 : `pandas.Series`
1186 [2, 1] element of the local Wcs affine transform.
1187 cd22 : `pandas.Series`
1188 [2, 2] element of the local Wcs affine transform.
1190 Returns
1191 -------
1192 raDecTuple : tuple
1193 RA and dec conversion of x and y given the local Wcs. Returned
1194 units are in radians.
1196 """
1197 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1199 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1200 """Compute the local pixel scale conversion.
1202 Parameters
1203 ----------
1204 ra1 : `pandas.Series`
1205 Ra of the first coordinate in radians.
1206 dec1 : `pandas.Series`
1207 Dec of the first coordinate in radians.
1208 ra2 : `pandas.Series`
1209 Ra of the second coordinate in radians.
1210 dec2 : `pandas.Series`
1211 Dec of the second coordinate in radians.
1213 Returns
1214 -------
1215 dist : `pandas.Series`
1216 Distance on the sphere in radians.
1217 """
1218 deltaDec = dec2 - dec1
1219 deltaRa = ra2 - ra1
1220 return 2 * np.arcsin(
1221 np.sqrt(
1222 np.sin(deltaDec / 2) ** 2
1223 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1225 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1226 """Compute the distance on the sphere from x2, y1 to x1, y1.
1228 Parameters
1229 ----------
1230 x1 : `pandas.Series`
1231 X pixel coordinate.
1232 y1 : `pandas.Series`
1233 Y pixel coordinate.
1234 x2 : `pandas.Series`
1235 X pixel coordinate.
1236 y2 : `pandas.Series`
1237 Y pixel coordinate.
1238 cd11 : `pandas.Series`
1239 [1, 1] element of the local Wcs affine transform.
1240 cd11 : `pandas.Series`
1241 [1, 1] element of the local Wcs affine transform.
1242 cd12 : `pandas.Series`
1243 [1, 2] element of the local Wcs affine transform.
1244 cd21 : `pandas.Series`
1245 [2, 1] element of the local Wcs affine transform.
1246 cd22 : `pandas.Series`
1247 [2, 2] element of the local Wcs affine transform.
1249 Returns
1250 -------
1251 Distance : `pandas.Series`
1252 Arcseconds per pixel at the location of the local WC
1253 """
1254 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1255 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1256 # Great circle distance for small separations.
1257 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1260class ComputePixelScale(LocalWcs):
1261 """Compute the local pixel scale from the stored CDMatrix.
1262 """
1263 name = "PixelScale"
1265 @property
1266 def columns(self):
1267 return [self.colCD_1_1,
1268 self.colCD_1_2,
1269 self.colCD_2_1,
1270 self.colCD_2_2]
1272 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1273 """Compute the local pixel to scale conversion in arcseconds.
1275 Parameters
1276 ----------
1277 cd11 : `pandas.Series`
1278 [1, 1] element of the local Wcs affine transform in radians.
1279 cd11 : `pandas.Series`
1280 [1, 1] element of the local Wcs affine transform in radians.
1281 cd12 : `pandas.Series`
1282 [1, 2] element of the local Wcs affine transform in radians.
1283 cd21 : `pandas.Series`
1284 [2, 1] element of the local Wcs affine transform in radians.
1285 cd22 : `pandas.Series`
1286 [2, 2] element of the local Wcs affine transform in radians.
1288 Returns
1289 -------
1290 pixScale : `pandas.Series`
1291 Arcseconds per pixel at the location of the local WC
1292 """
1293 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1295 def _func(self, df):
1296 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1297 df[self.colCD_1_2],
1298 df[self.colCD_2_1],
1299 df[self.colCD_2_2])
1302class ConvertPixelToArcseconds(ComputePixelScale):
1303 """Convert a value in units pixels squared to units arcseconds squared.
1304 """
1306 def __init__(self,
1307 col,
1308 colCD_1_1,
1309 colCD_1_2,
1310 colCD_2_1,
1311 colCD_2_2,
1312 **kwargs):
1313 self.col = col
1314 super().__init__(colCD_1_1,
1315 colCD_1_2,
1316 colCD_2_1,
1317 colCD_2_2,
1318 **kwargs)
1320 @property
1321 def name(self):
1322 return f"{self.col}_asArcseconds"
1324 @property
1325 def columns(self):
1326 return [self.col,
1327 self.colCD_1_1,
1328 self.colCD_1_2,
1329 self.colCD_2_1,
1330 self.colCD_2_2]
1332 def _func(self, df):
1333 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1334 df[self.colCD_1_2],
1335 df[self.colCD_2_1],
1336 df[self.colCD_2_2])
1339class ConvertPixelSqToArcsecondsSq(ComputePixelScale):
1340 """Convert a value in units pixels to units arcseconds.
1341 """
1343 def __init__(self,
1344 col,
1345 colCD_1_1,
1346 colCD_1_2,
1347 colCD_2_1,
1348 colCD_2_2,
1349 **kwargs):
1350 self.col = col
1351 super().__init__(colCD_1_1,
1352 colCD_1_2,
1353 colCD_2_1,
1354 colCD_2_2,
1355 **kwargs)
1357 @property
1358 def name(self):
1359 return f"{self.col}_asArcsecondsSq"
1361 @property
1362 def columns(self):
1363 return [self.col,
1364 self.colCD_1_1,
1365 self.colCD_1_2,
1366 self.colCD_2_1,
1367 self.colCD_2_2]
1369 def _func(self, df):
1370 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1371 df[self.colCD_1_2],
1372 df[self.colCD_2_1],
1373 df[self.colCD_2_2])
1374 return df[self.col] * pixScale * pixScale
1377class ReferenceBand(Functor):
1378 name = 'Reference Band'
1379 shortname = 'refBand'
1381 @property
1382 def columns(self):
1383 return ["merge_measurement_i",
1384 "merge_measurement_r",
1385 "merge_measurement_z",
1386 "merge_measurement_y",
1387 "merge_measurement_g"]
1389 def _func(self, df):
1390 def getFilterAliasName(row):
1391 # get column name with the max value (True > False)
1392 colName = row.idxmax()
1393 return colName.replace('merge_measurement_', '')
1395 return df[self.columns].apply(getFilterAliasName, axis=1)
1398class Photometry(Functor):
1399 # AB to NanoJansky (3631 Jansky)
1400 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1401 LOG_AB_FLUX_SCALE = 12.56
1402 FIVE_OVER_2LOG10 = 1.085736204758129569
1403 # TO DO: DM-21955 Replace hard coded photometic calibration values
1404 COADD_ZP = 27
1406 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1407 self.vhypot = np.vectorize(self.hypot)
1408 self.col = colFlux
1409 self.colFluxErr = colFluxErr
1411 self.calib = calib
1412 if calib is not None:
1413 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1414 else:
1415 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1416 self.fluxMag0Err = 0.
1418 super().__init__(**kwargs)
1420 @property
1421 def columns(self):
1422 return [self.col]
1424 @property
1425 def name(self):
1426 return f'mag_{self.col}'
1428 @classmethod
1429 def hypot(cls, a, b):
1430 if np.abs(a) < np.abs(b):
1431 a, b = b, a
1432 if a == 0.:
1433 return 0.
1434 q = b/a
1435 return np.abs(a) * np.sqrt(1. + q*q)
1437 def dn2flux(self, dn, fluxMag0):
1438 return self.AB_FLUX_SCALE * dn / fluxMag0
1440 def dn2mag(self, dn, fluxMag0):
1441 with np.warnings.catch_warnings():
1442 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1443 np.warnings.filterwarnings('ignore', r'divide by zero')
1444 return -2.5 * np.log10(dn/fluxMag0)
1446 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1447 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1448 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1449 return retVal
1451 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1452 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1453 return self.FIVE_OVER_2LOG10 * retVal
1456class NanoJansky(Photometry):
1457 def _func(self, df):
1458 return self.dn2flux(df[self.col], self.fluxMag0)
1461class NanoJanskyErr(Photometry):
1462 @property
1463 def columns(self):
1464 return [self.col, self.colFluxErr]
1466 def _func(self, df):
1467 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1468 return pd.Series(retArr, index=df.index)
1471class Magnitude(Photometry):
1472 def _func(self, df):
1473 return self.dn2mag(df[self.col], self.fluxMag0)
1476class MagnitudeErr(Photometry):
1477 @property
1478 def columns(self):
1479 return [self.col, self.colFluxErr]
1481 def _func(self, df):
1482 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1483 return pd.Series(retArr, index=df.index)
1486class LocalPhotometry(Functor):
1487 """Base class for calibrating the specified instrument flux column using
1488 the local photometric calibration.
1490 Parameters
1491 ----------
1492 instFluxCol : `str`
1493 Name of the instrument flux column.
1494 instFluxErrCol : `str`
1495 Name of the assocated error columns for ``instFluxCol``.
1496 photoCalibCol : `str`
1497 Name of local calibration column.
1498 photoCalibErrCol : `str`
1499 Error associated with ``photoCalibCol``
1501 See also
1502 --------
1503 LocalPhotometry
1504 LocalNanojansky
1505 LocalNanojanskyErr
1506 LocalMagnitude
1507 LocalMagnitudeErr
1508 """
1509 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1511 def __init__(self,
1512 instFluxCol,
1513 instFluxErrCol,
1514 photoCalibCol,
1515 photoCalibErrCol,
1516 **kwargs):
1517 self.instFluxCol = instFluxCol
1518 self.instFluxErrCol = instFluxErrCol
1519 self.photoCalibCol = photoCalibCol
1520 self.photoCalibErrCol = photoCalibErrCol
1521 super().__init__(**kwargs)
1523 def instFluxToNanojansky(self, instFlux, localCalib):
1524 """Convert instrument flux to nanojanskys.
1526 Parameters
1527 ----------
1528 instFlux : `numpy.ndarray` or `pandas.Series`
1529 Array of instrument flux measurements
1530 localCalib : `numpy.ndarray` or `pandas.Series`
1531 Array of local photometric calibration estimates.
1533 Returns
1534 -------
1535 calibFlux : `numpy.ndarray` or `pandas.Series`
1536 Array of calibrated flux measurements.
1537 """
1538 return instFlux * localCalib
1540 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1541 """Convert instrument flux to nanojanskys.
1543 Parameters
1544 ----------
1545 instFlux : `numpy.ndarray` or `pandas.Series`
1546 Array of instrument flux measurements
1547 instFluxErr : `numpy.ndarray` or `pandas.Series`
1548 Errors on associated ``instFlux`` values
1549 localCalib : `numpy.ndarray` or `pandas.Series`
1550 Array of local photometric calibration estimates.
1551 localCalibErr : `numpy.ndarray` or `pandas.Series`
1552 Errors on associated ``localCalib`` values
1554 Returns
1555 -------
1556 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1557 Errors on calibrated flux measurements.
1558 """
1559 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1561 def instFluxToMagnitude(self, instFlux, localCalib):
1562 """Convert instrument flux to nanojanskys.
1564 Parameters
1565 ----------
1566 instFlux : `numpy.ndarray` or `pandas.Series`
1567 Array of instrument flux measurements
1568 localCalib : `numpy.ndarray` or `pandas.Series`
1569 Array of local photometric calibration estimates.
1571 Returns
1572 -------
1573 calibMag : `numpy.ndarray` or `pandas.Series`
1574 Array of calibrated AB magnitudes.
1575 """
1576 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1578 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1579 """Convert instrument flux err to nanojanskys.
1581 Parameters
1582 ----------
1583 instFlux : `numpy.ndarray` or `pandas.Series`
1584 Array of instrument flux measurements
1585 instFluxErr : `numpy.ndarray` or `pandas.Series`
1586 Errors on associated ``instFlux`` values
1587 localCalib : `numpy.ndarray` or `pandas.Series`
1588 Array of local photometric calibration estimates.
1589 localCalibErr : `numpy.ndarray` or `pandas.Series`
1590 Errors on associated ``localCalib`` values
1592 Returns
1593 -------
1594 calibMagErr: `numpy.ndarray` or `pandas.Series`
1595 Error on calibrated AB magnitudes.
1596 """
1597 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1598 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1601class LocalNanojansky(LocalPhotometry):
1602 """Compute calibrated fluxes using the local calibration value.
1604 See also
1605 --------
1606 LocalNanojansky
1607 LocalNanojanskyErr
1608 LocalMagnitude
1609 LocalMagnitudeErr
1610 """
1612 @property
1613 def columns(self):
1614 return [self.instFluxCol, self.photoCalibCol]
1616 @property
1617 def name(self):
1618 return f'flux_{self.instFluxCol}'
1620 def _func(self, df):
1621 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1624class LocalNanojanskyErr(LocalPhotometry):
1625 """Compute calibrated flux errors using the local calibration value.
1627 See also
1628 --------
1629 LocalNanojansky
1630 LocalNanojanskyErr
1631 LocalMagnitude
1632 LocalMagnitudeErr
1633 """
1635 @property
1636 def columns(self):
1637 return [self.instFluxCol, self.instFluxErrCol,
1638 self.photoCalibCol, self.photoCalibErrCol]
1640 @property
1641 def name(self):
1642 return f'fluxErr_{self.instFluxCol}'
1644 def _func(self, df):
1645 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1646 df[self.photoCalibCol], df[self.photoCalibErrCol])
1649class LocalMagnitude(LocalPhotometry):
1650 """Compute calibrated AB magnitudes using the local calibration value.
1652 See also
1653 --------
1654 LocalNanojansky
1655 LocalNanojanskyErr
1656 LocalMagnitude
1657 LocalMagnitudeErr
1658 """
1660 @property
1661 def columns(self):
1662 return [self.instFluxCol, self.photoCalibCol]
1664 @property
1665 def name(self):
1666 return f'mag_{self.instFluxCol}'
1668 def _func(self, df):
1669 return self.instFluxToMagnitude(df[self.instFluxCol],
1670 df[self.photoCalibCol])
1673class LocalMagnitudeErr(LocalPhotometry):
1674 """Compute calibrated AB magnitude errors using the local calibration value.
1676 See also
1677 --------
1678 LocalNanojansky
1679 LocalNanojanskyErr
1680 LocalMagnitude
1681 LocalMagnitudeErr
1682 """
1684 @property
1685 def columns(self):
1686 return [self.instFluxCol, self.instFluxErrCol,
1687 self.photoCalibCol, self.photoCalibErrCol]
1689 @property
1690 def name(self):
1691 return f'magErr_{self.instFluxCol}'
1693 def _func(self, df):
1694 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1695 df[self.instFluxErrCol],
1696 df[self.photoCalibCol],
1697 df[self.photoCalibErrCol])
1700class LocalDipoleMeanFlux(LocalPhotometry):
1701 """Compute absolute mean of dipole fluxes.
1703 See also
1704 --------
1705 LocalNanojansky
1706 LocalNanojanskyErr
1707 LocalMagnitude
1708 LocalMagnitudeErr
1709 LocalDipoleMeanFlux
1710 LocalDipoleMeanFluxErr
1711 LocalDipoleDiffFlux
1712 LocalDipoleDiffFluxErr
1713 """
1714 def __init__(self,
1715 instFluxPosCol,
1716 instFluxNegCol,
1717 instFluxPosErrCol,
1718 instFluxNegErrCol,
1719 photoCalibCol,
1720 photoCalibErrCol,
1721 **kwargs):
1722 self.instFluxNegCol = instFluxNegCol
1723 self.instFluxPosCol = instFluxPosCol
1724 self.instFluxNegErrCol = instFluxNegErrCol
1725 self.instFluxPosErrCol = instFluxPosErrCol
1726 self.photoCalibCol = photoCalibCol
1727 self.photoCalibErrCol = photoCalibErrCol
1728 super().__init__(instFluxNegCol,
1729 instFluxNegErrCol,
1730 photoCalibCol,
1731 photoCalibErrCol,
1732 **kwargs)
1734 @property
1735 def columns(self):
1736 return [self.instFluxPosCol,
1737 self.instFluxNegCol,
1738 self.photoCalibCol]
1740 @property
1741 def name(self):
1742 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1744 def _func(self, df):
1745 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))
1746 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])))
1749class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux):
1750 """Compute the error on the absolute mean of dipole fluxes.
1752 See also
1753 --------
1754 LocalNanojansky
1755 LocalNanojanskyErr
1756 LocalMagnitude
1757 LocalMagnitudeErr
1758 LocalDipoleMeanFlux
1759 LocalDipoleMeanFluxErr
1760 LocalDipoleDiffFlux
1761 LocalDipoleDiffFluxErr
1762 """
1764 @property
1765 def columns(self):
1766 return [self.instFluxPosCol,
1767 self.instFluxNegCol,
1768 self.instFluxPosErrCol,
1769 self.instFluxNegErrCol,
1770 self.photoCalibCol,
1771 self.photoCalibErrCol]
1773 @property
1774 def name(self):
1775 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1777 def _func(self, df):
1778 return 0.5*np.sqrt(
1779 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1780 * df[self.photoCalibErrCol])**2
1781 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1782 * df[self.photoCalibCol]**2)
1785class LocalDipoleDiffFlux(LocalDipoleMeanFlux):
1786 """Compute the absolute difference of dipole fluxes.
1788 Value is (abs(pos) - abs(neg))
1790 See also
1791 --------
1792 LocalNanojansky
1793 LocalNanojanskyErr
1794 LocalMagnitude
1795 LocalMagnitudeErr
1796 LocalDipoleMeanFlux
1797 LocalDipoleMeanFluxErr
1798 LocalDipoleDiffFlux
1799 LocalDipoleDiffFluxErr
1800 """
1802 @property
1803 def columns(self):
1804 return [self.instFluxPosCol,
1805 self.instFluxNegCol,
1806 self.photoCalibCol]
1808 @property
1809 def name(self):
1810 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1812 def _func(self, df):
1813 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))
1814 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])))
1817class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux):
1818 """Compute the error on the absolute difference of dipole fluxes.
1820 See also
1821 --------
1822 LocalNanojansky
1823 LocalNanojanskyErr
1824 LocalMagnitude
1825 LocalMagnitudeErr
1826 LocalDipoleMeanFlux
1827 LocalDipoleMeanFluxErr
1828 LocalDipoleDiffFlux
1829 LocalDipoleDiffFluxErr
1830 """
1832 @property
1833 def columns(self):
1834 return [self.instFluxPosCol,
1835 self.instFluxNegCol,
1836 self.instFluxPosErrCol,
1837 self.instFluxNegErrCol,
1838 self.photoCalibCol,
1839 self.photoCalibErrCol]
1841 @property
1842 def name(self):
1843 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1845 def _func(self, df):
1846 return np.sqrt(
1847 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1848 * df[self.photoCalibErrCol])**2
1849 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1850 * df[self.photoCalibCol]**2)
1853class Ratio(Functor):
1854 """Base class for returning the ratio of 2 columns.
1856 Can be used to compute a Signal to Noise ratio for any input flux.
1858 Parameters
1859 ----------
1860 numerator : `str`
1861 Name of the column to use at the numerator in the ratio
1862 denominator : `str`
1863 Name of the column to use as the denominator in the ratio.
1864 """
1865 def __init__(self,
1866 numerator,
1867 denominator,
1868 **kwargs):
1869 self.numerator = numerator
1870 self.denominator = denominator
1871 super().__init__(**kwargs)
1873 @property
1874 def columns(self):
1875 return [self.numerator, self.denominator]
1877 @property
1878 def name(self):
1879 return f'ratio_{self.numerator}_{self.denominator}'
1881 def _func(self, df):
1882 with np.warnings.catch_warnings():
1883 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1884 np.warnings.filterwarnings('ignore', r'divide by zero')
1885 return df[self.numerator] / df[self.denominator]