Coverage for python/lsst/pipe/tasks/functors.py : 35%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_tasks.
2#
3# LSST Data Management System
4# This product includes software developed by the
5# LSST Project (http://www.lsst.org/).
6# See COPYRIGHT file at the top of the source tree.
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22import yaml
23import re
24from itertools import product
25import os.path
27import pandas as pd
28import numpy as np
29import astropy.units as u
31from lsst.daf.persistence import doImport
32from lsst.daf.butler import DeferredDatasetHandle
33import lsst.geom as geom
34import lsst.sphgeom as sphgeom
36from .parquetTable import ParquetTable, MultilevelParquetTable
39def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
40 typeKey='functor', name=None):
41 """Initialize an object defined in a dictionary
43 The object needs to be importable as
44 f'{basePath}.{initDict[typeKey]}'
45 The positional and keyword arguments (if any) are contained in
46 "args" and "kwargs" entries in the dictionary, respectively.
47 This is used in `functors.CompositeFunctor.from_yaml` to initialize
48 a composite functor from a specification in a YAML file.
50 Parameters
51 ----------
52 initDict : dictionary
53 Dictionary describing object's initialization. Must contain
54 an entry keyed by ``typeKey`` that is the name of the object,
55 relative to ``basePath``.
56 basePath : str
57 Path relative to module in which ``initDict[typeKey]`` is defined.
58 typeKey : str
59 Key of ``initDict`` that is the name of the object
60 (relative to `basePath`).
61 """
62 initDict = initDict.copy()
63 # TO DO: DM-21956 We should be able to define functors outside this module
64 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
65 args = []
66 if 'args' in initDict:
67 args = initDict.pop('args')
68 if isinstance(args, str):
69 args = [args]
70 try:
71 element = pythonType(*args, **initDict)
72 except Exception as e:
73 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
74 raise type(e)(message, e.args)
75 return element
78class Functor(object):
79 """Define and execute a calculation on a ParquetTable
81 The `__call__` method accepts either a `ParquetTable` object or a
82 `DeferredDatasetHandle`, and returns the
83 result of the calculation as a single column. Each functor defines what
84 columns are needed for the calculation, and only these columns are read
85 from the `ParquetTable`.
87 The action of `__call__` consists of two steps: first, loading the
88 necessary columns from disk into memory as a `pandas.DataFrame` object;
89 and second, performing the computation on this dataframe and returning the
90 result.
93 To define a new `Functor`, a subclass must define a `_func` method,
94 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
95 In addition, it must define the following attributes
97 * `_columns`: The columns necessary to perform the calculation
98 * `name`: A name appropriate for a figure axis label
99 * `shortname`: A name appropriate for use as a dictionary key
101 On initialization, a `Functor` should declare what band (`filt` kwarg)
102 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
103 applied to. This enables the `_get_data` method to extract the proper
104 columns from the parquet file. If not specified, the dataset will fall back
105 on the `_defaultDataset`attribute. If band is not specified and `dataset`
106 is anything other than `'ref'`, then an error will be raised when trying to
107 perform the calculation.
109 Originally, `Functor` was set up to expect
110 datasets formatted like the `deepCoadd_obj` dataset; that is, a
111 dataframe with a multi-level column index, with the levels of the
112 column index being `band`, `dataset`, and `column`.
113 It has since been generalized to apply to dataframes without mutli-level
114 indices and multi-level indices with just `dataset` and `column` levels.
115 In addition, the `_get_data` method that reads
116 the dataframe from the `ParquetTable` will return a dataframe with column
117 index levels defined by the `_dfLevels` attribute; by default, this is
118 `column`.
120 The `_dfLevels` attributes should generally not need to
121 be changed, unless `_func` needs columns from multiple filters or datasets
122 to do the calculation.
123 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
124 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
125 it gets to have those levels in the column index.
127 Parameters
128 ----------
129 filt : str
130 Filter upon which to do the calculation
132 dataset : str
133 Dataset upon which to do the calculation
134 (e.g., 'ref', 'meas', 'forced_src').
136 """
138 _defaultDataset = 'ref'
139 _dfLevels = ('column',)
140 _defaultNoDup = False
142 def __init__(self, filt=None, dataset=None, noDup=None):
143 self.filt = filt
144 self.dataset = dataset if dataset is not None else self._defaultDataset
145 self._noDup = noDup
147 @property
148 def noDup(self):
149 if self._noDup is not None:
150 return self._noDup
151 else:
152 return self._defaultNoDup
154 @property
155 def columns(self):
156 """Columns required to perform calculation
157 """
158 if not hasattr(self, '_columns'):
159 raise NotImplementedError('Must define columns property or _columns attribute')
160 return self._columns
162 def _get_data_columnLevels(self, data, columnIndex=None):
163 """Gets the names of the column index levels
165 This should only be called in the context of a multilevel table.
166 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
167 and with the gen3 `DeferredDatasetHandle`.
169 Parameters
170 ----------
171 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
173 columnnIndex (optional): pandas `Index` object
174 if not passed, then it is read from the `DeferredDatasetHandle`
175 """
176 if isinstance(data, DeferredDatasetHandle):
177 if columnIndex is None:
178 columnIndex = data.get(component="columns")
179 if columnIndex is not None:
180 return columnIndex.names
181 if isinstance(data, MultilevelParquetTable):
182 return data.columnLevels
183 else:
184 raise TypeError(f"Unknown type for data: {type(data)}!")
186 def _get_data_columnLevelNames(self, data, columnIndex=None):
187 """Gets the content of each of the column levels for a multilevel table
189 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
191 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
192 """
193 if isinstance(data, DeferredDatasetHandle):
194 if columnIndex is None:
195 columnIndex = data.get(component="columns")
196 if columnIndex is not None:
197 columnLevels = columnIndex.names
198 columnLevelNames = {
199 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
200 for i, level in enumerate(columnLevels)
201 }
202 return columnLevelNames
203 if isinstance(data, MultilevelParquetTable):
204 return data.columnLevelNames
205 else:
206 raise TypeError(f"Unknown type for data: {type(data)}!")
208 def _colsFromDict(self, colDict, columnIndex=None):
209 """Converts dictionary column specficiation to a list of columns
211 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
212 """
213 new_colDict = {}
214 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
216 for i, lev in enumerate(columnLevels):
217 if lev in colDict:
218 if isinstance(colDict[lev], str):
219 new_colDict[lev] = [colDict[lev]]
220 else:
221 new_colDict[lev] = colDict[lev]
222 else:
223 new_colDict[lev] = columnIndex.levels[i]
225 levelCols = [new_colDict[lev] for lev in columnLevels]
226 cols = product(*levelCols)
227 return list(cols)
229 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
230 """Returns columns needed by functor from multilevel dataset
232 To access tables with multilevel column structure, the `MultilevelParquetTable`
233 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
234 dictionary.
236 Parameters
237 ----------
238 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
240 columnIndex (optional): pandas `Index` object
241 either passed or read in from `DeferredDatasetHandle`.
243 `returnTuple` : bool
244 If true, then return a list of tuples rather than the column dictionary
245 specification. This is set to `True` by `CompositeFunctor` in order to be able to
246 combine columns from the various component functors.
248 """
249 if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
250 columnIndex = data.get(component="columns")
252 # Confirm that the dataset has the column levels the functor is expecting it to have.
253 columnLevels = self._get_data_columnLevels(data, columnIndex)
255 columnDict = {'column': self.columns,
256 'dataset': self.dataset}
257 if self.filt is None:
258 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
259 if "band" in columnLevels:
260 if self.dataset == "ref":
261 columnDict["band"] = columnLevelNames["band"][0]
262 else:
263 raise ValueError(f"'filt' not set for functor {self.name}"
264 f"(dataset {self.dataset}) "
265 "and ParquetTable "
266 "contains multiple filters in column index. "
267 "Set 'filt' or set 'dataset' to 'ref'.")
268 else:
269 columnDict['band'] = self.filt
271 if isinstance(data, MultilevelParquetTable):
272 return data._colsFromDict(columnDict)
273 elif isinstance(data, DeferredDatasetHandle):
274 if returnTuple:
275 return self._colsFromDict(columnDict, columnIndex=columnIndex)
276 else:
277 return columnDict
279 def _func(self, df, dropna=True):
280 raise NotImplementedError('Must define calculation on dataframe')
282 def _get_columnIndex(self, data):
283 """Return columnIndex
284 """
286 if isinstance(data, DeferredDatasetHandle):
287 return data.get(component="columns")
288 else:
289 return None
291 def _get_data(self, data):
292 """Retrieve dataframe necessary for calculation.
294 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
296 Returns dataframe upon which `self._func` can act.
298 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
299 """
300 if isinstance(data, pd.DataFrame):
301 return data
303 # First thing to do: check to see if the data source has a multilevel column index or not.
304 columnIndex = self._get_columnIndex(data)
305 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
307 # Simple single-level parquet table, gen2
308 if isinstance(data, ParquetTable) and not is_multiLevel:
309 columns = self.columns
310 df = data.toDataFrame(columns=columns)
311 return df
313 # Get proper columns specification for this functor
314 if is_multiLevel:
315 columns = self.multilevelColumns(data, columnIndex=columnIndex)
316 else:
317 columns = self.columns
319 if isinstance(data, MultilevelParquetTable):
320 # Load in-memory dataframe with appropriate columns the gen2 way
321 df = data.toDataFrame(columns=columns, droplevels=False)
322 elif isinstance(data, DeferredDatasetHandle):
323 # Load in-memory dataframe with appropriate columns the gen3 way
324 df = data.get(parameters={"columns": columns})
326 # Drop unnecessary column levels
327 if is_multiLevel:
328 df = self._setLevels(df)
330 return df
332 def _setLevels(self, df):
333 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
334 df.columns = df.columns.droplevel(levelsToDrop)
335 return df
337 def _dropna(self, vals):
338 return vals.dropna()
340 def __call__(self, data, dropna=False):
341 try:
342 df = self._get_data(data)
343 vals = self._func(df)
344 except Exception:
345 vals = self.fail(df)
346 if dropna:
347 vals = self._dropna(vals)
349 return vals
351 def difference(self, data1, data2, **kwargs):
352 """Computes difference between functor called on two different ParquetTable objects
353 """
354 return self(data1, **kwargs) - self(data2, **kwargs)
356 def fail(self, df):
357 return pd.Series(np.full(len(df), np.nan), index=df.index)
359 @property
360 def name(self):
361 """Full name of functor (suitable for figure labels)
362 """
363 return NotImplementedError
365 @property
366 def shortname(self):
367 """Short name of functor (suitable for column name/dict key)
368 """
369 return self.name
372class CompositeFunctor(Functor):
373 """Perform multiple calculations at once on a catalog
375 The role of a `CompositeFunctor` is to group together computations from
376 multiple functors. Instead of returning `pandas.Series` a
377 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
378 being the keys of `funcDict`.
380 The `columns` attribute of a `CompositeFunctor` is the union of all columns
381 in all the component functors.
383 A `CompositeFunctor` does not use a `_func` method itself; rather,
384 when a `CompositeFunctor` is called, all its columns are loaded
385 at once, and the resulting dataframe is passed to the `_func` method of each component
386 functor. This has the advantage of only doing I/O (reading from parquet file) once,
387 and works because each individual `_func` method of each component functor does not
388 care if there are *extra* columns in the dataframe being passed; only that it must contain
389 *at least* the `columns` it expects.
391 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
392 file specifying a collection of functors.
394 Parameters
395 ----------
396 funcs : `dict` or `list`
397 Dictionary or list of functors. If a list, then it will be converted
398 into a dictonary according to the `.shortname` attribute of each functor.
400 """
401 dataset = None
403 def __init__(self, funcs, **kwargs):
405 if type(funcs) == dict:
406 self.funcDict = funcs
407 else:
408 self.funcDict = {f.shortname: f for f in funcs}
410 self._filt = None
412 super().__init__(**kwargs)
414 @property
415 def filt(self):
416 return self._filt
418 @filt.setter
419 def filt(self, filt):
420 if filt is not None:
421 for _, f in self.funcDict.items():
422 f.filt = filt
423 self._filt = filt
425 def update(self, new):
426 if isinstance(new, dict):
427 self.funcDict.update(new)
428 elif isinstance(new, CompositeFunctor):
429 self.funcDict.update(new.funcDict)
430 else:
431 raise TypeError('Can only update with dictionary or CompositeFunctor.')
433 # Make sure new functors have the same 'filt' set
434 if self.filt is not None:
435 self.filt = self.filt
437 @property
438 def columns(self):
439 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
441 def multilevelColumns(self, data, **kwargs):
442 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
443 return list(
444 set(
445 [
446 x
447 for y in [
448 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
449 ]
450 for x in y
451 ]
452 )
453 )
455 def __call__(self, data, **kwargs):
456 """Apply the functor to the data table
458 Parameters
459 ----------
460 data : `lsst.daf.butler.DeferredDatasetHandle`,
461 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
462 `lsst.pipe.tasks.parquetTable.ParquetTable`,
463 or `pandas.DataFrame`.
464 The table or a pointer to a table on disk from which columns can
465 be accessed
466 """
467 columnIndex = self._get_columnIndex(data)
469 # First, determine whether data has a multilevel index (either gen2 or gen3)
470 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
472 # Multilevel index, gen2 or gen3
473 if is_multiLevel:
474 columns = self.multilevelColumns(data, columnIndex=columnIndex)
476 if isinstance(data, MultilevelParquetTable):
477 # Read data into memory the gen2 way
478 df = data.toDataFrame(columns=columns, droplevels=False)
479 elif isinstance(data, DeferredDatasetHandle):
480 # Read data into memory the gen3 way
481 df = data.get(parameters={"columns": columns})
483 valDict = {}
484 for k, f in self.funcDict.items():
485 try:
486 subdf = f._setLevels(
487 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
488 )
489 valDict[k] = f._func(subdf)
490 except Exception as e:
491 try:
492 valDict[k] = f.fail(subdf)
493 except NameError:
494 raise e
496 else:
497 if isinstance(data, DeferredDatasetHandle):
498 # input if Gen3 deferLoad=True
499 df = data.get(parameters={"columns": self.columns})
500 elif isinstance(data, pd.DataFrame):
501 # input if Gen3 deferLoad=False
502 df = data
503 else:
504 # Original Gen2 input is type ParquetTable and the fallback
505 df = data.toDataFrame(columns=self.columns)
507 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
509 try:
510 valDf = pd.concat(valDict, axis=1)
511 except TypeError:
512 print([(k, type(v)) for k, v in valDict.items()])
513 raise
515 if kwargs.get('dropna', False):
516 valDf = valDf.dropna(how='any')
518 return valDf
520 @classmethod
521 def renameCol(cls, col, renameRules):
522 if renameRules is None:
523 return col
524 for old, new in renameRules:
525 if col.startswith(old):
526 col = col.replace(old, new)
527 return col
529 @classmethod
530 def from_file(cls, filename, **kwargs):
531 # Allow environment variables in the filename.
532 filename = os.path.expandvars(filename)
533 with open(filename) as f:
534 translationDefinition = yaml.safe_load(f)
536 return cls.from_yaml(translationDefinition, **kwargs)
538 @classmethod
539 def from_yaml(cls, translationDefinition, **kwargs):
540 funcs = {}
541 for func, val in translationDefinition['funcs'].items():
542 funcs[func] = init_fromDict(val, name=func)
544 if 'flag_rename_rules' in translationDefinition:
545 renameRules = translationDefinition['flag_rename_rules']
546 else:
547 renameRules = None
549 if 'calexpFlags' in translationDefinition:
550 for flag in translationDefinition['calexpFlags']:
551 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
553 if 'refFlags' in translationDefinition:
554 for flag in translationDefinition['refFlags']:
555 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
557 if 'forcedFlags' in translationDefinition:
558 for flag in translationDefinition['forcedFlags']:
559 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
561 if 'flags' in translationDefinition:
562 for flag in translationDefinition['flags']:
563 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
565 return cls(funcs, **kwargs)
568def mag_aware_eval(df, expr):
569 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
571 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
573 Parameters
574 ----------
575 df : pandas.DataFrame
576 Dataframe on which to evaluate expression.
578 expr : str
579 Expression.
580 """
581 try:
582 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
583 val = df.eval(expr_new, truediv=True)
584 except Exception: # Should check what actually gets raised
585 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
586 val = df.eval(expr_new, truediv=True)
587 return val
590class CustomFunctor(Functor):
591 """Arbitrary computation on a catalog
593 Column names (and thus the columns to be loaded from catalog) are found
594 by finding all words and trying to ignore all "math-y" words.
596 Parameters
597 ----------
598 expr : str
599 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
600 """
601 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
603 def __init__(self, expr, **kwargs):
604 self.expr = expr
605 super().__init__(**kwargs)
607 @property
608 def name(self):
609 return self.expr
611 @property
612 def columns(self):
613 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
615 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
616 not_a_col = []
617 for c in flux_cols:
618 if not re.search('_instFlux$', c):
619 cols.append(f'{c}_instFlux')
620 not_a_col.append(c)
621 else:
622 cols.append(c)
624 return list(set([c for c in cols if c not in not_a_col]))
626 def _func(self, df):
627 return mag_aware_eval(df, self.expr)
630class Column(Functor):
631 """Get column with specified name
632 """
634 def __init__(self, col, **kwargs):
635 self.col = col
636 super().__init__(**kwargs)
638 @property
639 def name(self):
640 return self.col
642 @property
643 def columns(self):
644 return [self.col]
646 def _func(self, df):
647 return df[self.col]
650class Index(Functor):
651 """Return the value of the index for each object
652 """
654 columns = ['coord_ra'] # just a dummy; something has to be here
655 _defaultDataset = 'ref'
656 _defaultNoDup = True
658 def _func(self, df):
659 return pd.Series(df.index, index=df.index)
662class IDColumn(Column):
663 col = 'id'
664 _allow_difference = False
665 _defaultNoDup = True
667 def _func(self, df):
668 return pd.Series(df.index, index=df.index)
671class FootprintNPix(Column):
672 col = 'base_Footprint_nPix'
675class CoordColumn(Column):
676 """Base class for coordinate column, in degrees
677 """
678 _radians = True
680 def __init__(self, col, **kwargs):
681 super().__init__(col, **kwargs)
683 def _func(self, df):
684 # Must not modify original column in case that column is used by another functor
685 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
686 return output
689class RAColumn(CoordColumn):
690 """Right Ascension, in degrees
691 """
692 name = 'RA'
693 _defaultNoDup = True
695 def __init__(self, **kwargs):
696 super().__init__('coord_ra', **kwargs)
698 def __call__(self, catalog, **kwargs):
699 return super().__call__(catalog, **kwargs)
702class DecColumn(CoordColumn):
703 """Declination, in degrees
704 """
705 name = 'Dec'
706 _defaultNoDup = True
708 def __init__(self, **kwargs):
709 super().__init__('coord_dec', **kwargs)
711 def __call__(self, catalog, **kwargs):
712 return super().__call__(catalog, **kwargs)
715class HtmIndex20(Functor):
716 """Compute the level 20 HtmIndex for the catalog.
718 Notes
719 -----
720 This functor was implemented to satisfy requirements of old APDB interface
721 which required ``pixelId`` column in DiaObject with HTM20 index. APDB
722 interface had migrated to not need that information, but we keep this
723 class in case it may be useful for something else.
724 """
725 name = "Htm20"
726 htmLevel = 20
727 _radians = True
729 def __init__(self, ra, decl, **kwargs):
730 self.pixelator = sphgeom.HtmPixelization(self.htmLevel)
731 self.ra = ra
732 self.decl = decl
733 self._columns = [self.ra, self.decl]
734 super().__init__(**kwargs)
736 def _func(self, df):
738 def computePixel(row):
739 if self._radians:
740 sphPoint = geom.SpherePoint(row[self.ra],
741 row[self.decl],
742 geom.radians)
743 else:
744 sphPoint = geom.SpherePoint(row[self.ra],
745 row[self.decl],
746 geom.degrees)
747 return self.pixelator.index(sphPoint.getVector())
749 return df.apply(computePixel, axis=1)
752def fluxName(col):
753 if not col.endswith('_instFlux'):
754 col += '_instFlux'
755 return col
758def fluxErrName(col):
759 if not col.endswith('_instFluxErr'):
760 col += '_instFluxErr'
761 return col
764class Mag(Functor):
765 """Compute calibrated magnitude
767 Takes a `calib` argument, which returns the flux at mag=0
768 as `calib.getFluxMag0()`. If not provided, then the default
769 `fluxMag0` is 63095734448.0194, which is default for HSC.
770 This default should be removed in DM-21955
772 This calculation hides warnings about invalid values and dividing by zero.
774 As for all functors, a `dataset` and `filt` kwarg should be provided upon
775 initialization. Unlike the default `Functor`, however, the default dataset
776 for a `Mag` is `'meas'`, rather than `'ref'`.
778 Parameters
779 ----------
780 col : `str`
781 Name of flux column from which to compute magnitude. Can be parseable
782 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
783 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
784 understand.
785 calib : `lsst.afw.image.calib.Calib` (optional)
786 Object that knows zero point.
787 """
788 _defaultDataset = 'meas'
790 def __init__(self, col, calib=None, **kwargs):
791 self.col = fluxName(col)
792 self.calib = calib
793 if calib is not None:
794 self.fluxMag0 = calib.getFluxMag0()[0]
795 else:
796 # TO DO: DM-21955 Replace hard coded photometic calibration values
797 self.fluxMag0 = 63095734448.0194
799 super().__init__(**kwargs)
801 @property
802 def columns(self):
803 return [self.col]
805 def _func(self, df):
806 with np.warnings.catch_warnings():
807 np.warnings.filterwarnings('ignore', r'invalid value encountered')
808 np.warnings.filterwarnings('ignore', r'divide by zero')
809 return -2.5*np.log10(df[self.col] / self.fluxMag0)
811 @property
812 def name(self):
813 return f'mag_{self.col}'
816class MagErr(Mag):
817 """Compute calibrated magnitude uncertainty
819 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
821 Parameters
822 col : `str`
823 Name of flux column
824 calib : `lsst.afw.image.calib.Calib` (optional)
825 Object that knows zero point.
826 """
828 def __init__(self, *args, **kwargs):
829 super().__init__(*args, **kwargs)
830 if self.calib is not None:
831 self.fluxMag0Err = self.calib.getFluxMag0()[1]
832 else:
833 self.fluxMag0Err = 0.
835 @property
836 def columns(self):
837 return [self.col, self.col + 'Err']
839 def _func(self, df):
840 with np.warnings.catch_warnings():
841 np.warnings.filterwarnings('ignore', r'invalid value encountered')
842 np.warnings.filterwarnings('ignore', r'divide by zero')
843 fluxCol, fluxErrCol = self.columns
844 x = df[fluxErrCol] / df[fluxCol]
845 y = self.fluxMag0Err / self.fluxMag0
846 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
847 return magErr
849 @property
850 def name(self):
851 return super().name + '_err'
854class NanoMaggie(Mag):
855 """
856 """
858 def _func(self, df):
859 return (df[self.col] / self.fluxMag0) * 1e9
862class MagDiff(Functor):
863 _defaultDataset = 'meas'
865 """Functor to calculate magnitude difference"""
867 def __init__(self, col1, col2, **kwargs):
868 self.col1 = fluxName(col1)
869 self.col2 = fluxName(col2)
870 super().__init__(**kwargs)
872 @property
873 def columns(self):
874 return [self.col1, self.col2]
876 def _func(self, df):
877 with np.warnings.catch_warnings():
878 np.warnings.filterwarnings('ignore', r'invalid value encountered')
879 np.warnings.filterwarnings('ignore', r'divide by zero')
880 return -2.5*np.log10(df[self.col1]/df[self.col2])
882 @property
883 def name(self):
884 return f'(mag_{self.col1} - mag_{self.col2})'
886 @property
887 def shortname(self):
888 return f'magDiff_{self.col1}_{self.col2}'
891class Color(Functor):
892 """Compute the color between two filters
894 Computes color by initializing two different `Mag`
895 functors based on the `col` and filters provided, and
896 then returning the difference.
898 This is enabled by the `_func` expecting a dataframe with a
899 multilevel column index, with both `'band'` and `'column'`,
900 instead of just `'column'`, which is the `Functor` default.
901 This is controlled by the `_dfLevels` attribute.
903 Also of note, the default dataset for `Color` is `forced_src'`,
904 whereas for `Mag` it is `'meas'`.
906 Parameters
907 ----------
908 col : str
909 Name of flux column from which to compute; same as would be passed to
910 `lsst.pipe.tasks.functors.Mag`.
912 filt2, filt1 : str
913 Filters from which to compute magnitude difference.
914 Color computed is `Mag(filt2) - Mag(filt1)`.
915 """
916 _defaultDataset = 'forced_src'
917 _dfLevels = ('band', 'column')
918 _defaultNoDup = True
920 def __init__(self, col, filt2, filt1, **kwargs):
921 self.col = fluxName(col)
922 if filt2 == filt1:
923 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
924 self.filt2 = filt2
925 self.filt1 = filt1
927 self.mag2 = Mag(col, filt=filt2, **kwargs)
928 self.mag1 = Mag(col, filt=filt1, **kwargs)
930 super().__init__(**kwargs)
932 @property
933 def filt(self):
934 return None
936 @filt.setter
937 def filt(self, filt):
938 pass
940 def _func(self, df):
941 mag2 = self.mag2._func(df[self.filt2])
942 mag1 = self.mag1._func(df[self.filt1])
943 return mag2 - mag1
945 @property
946 def columns(self):
947 return [self.mag1.col, self.mag2.col]
949 def multilevelColumns(self, parq, **kwargs):
950 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
952 @property
953 def name(self):
954 return f'{self.filt2} - {self.filt1} ({self.col})'
956 @property
957 def shortname(self):
958 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
961class Labeller(Functor):
962 """Main function of this subclass is to override the dropna=True
963 """
964 _null_label = 'null'
965 _allow_difference = False
966 name = 'label'
967 _force_str = False
969 def __call__(self, parq, dropna=False, **kwargs):
970 return super().__call__(parq, dropna=False, **kwargs)
973class StarGalaxyLabeller(Labeller):
974 _columns = ["base_ClassificationExtendedness_value"]
975 _column = "base_ClassificationExtendedness_value"
977 def _func(self, df):
978 x = df[self._columns][self._column]
979 mask = x.isnull()
980 test = (x < 0.5).astype(int)
981 test = test.mask(mask, 2)
983 # TODO: DM-21954 Look into veracity of inline comment below
984 # are these backwards?
985 categories = ['galaxy', 'star', self._null_label]
986 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
987 index=x.index, name='label')
988 if self._force_str:
989 label = label.astype(str)
990 return label
993class NumStarLabeller(Labeller):
994 _columns = ['numStarFlags']
995 labels = {"star": 0, "maybe": 1, "notStar": 2}
997 def _func(self, df):
998 x = df[self._columns][self._columns[0]]
1000 # Number of filters
1001 n = len(x.unique()) - 1
1003 labels = ['noStar', 'maybe', 'star']
1004 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
1005 index=x.index, name='label')
1007 if self._force_str:
1008 label = label.astype(str)
1010 return label
1013class DeconvolvedMoments(Functor):
1014 name = 'Deconvolved Moments'
1015 shortname = 'deconvolvedMoments'
1016 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1017 "ext_shapeHSM_HsmSourceMoments_yy",
1018 "base_SdssShape_xx", "base_SdssShape_yy",
1019 "ext_shapeHSM_HsmPsfMoments_xx",
1020 "ext_shapeHSM_HsmPsfMoments_yy")
1022 def _func(self, df):
1023 """Calculate deconvolved moments"""
1024 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1025 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1026 else:
1027 hsm = np.ones(len(df))*np.nan
1028 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1029 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1030 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1031 else:
1032 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
1033 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
1034 # raise TaskError("No psf shape parameter found in catalog")
1035 raise RuntimeError('No psf shape parameter found in catalog')
1037 return hsm.where(np.isfinite(hsm), sdss) - psf
1040class SdssTraceSize(Functor):
1041 """Functor to calculate SDSS trace radius size for sources"""
1042 name = "SDSS Trace Size"
1043 shortname = 'sdssTrace'
1044 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1046 def _func(self, df):
1047 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1048 return srcSize
1051class PsfSdssTraceSizeDiff(Functor):
1052 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1053 name = "PSF - SDSS Trace Size"
1054 shortname = 'psf_sdssTrace'
1055 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1056 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1058 def _func(self, df):
1059 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1060 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1061 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1062 return sizeDiff
1065class HsmTraceSize(Functor):
1066 """Functor to calculate HSM trace radius size for sources"""
1067 name = 'HSM Trace Size'
1068 shortname = 'hsmTrace'
1069 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1070 "ext_shapeHSM_HsmSourceMoments_yy")
1072 def _func(self, df):
1073 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1074 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1075 return srcSize
1078class PsfHsmTraceSizeDiff(Functor):
1079 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1080 name = 'PSF - HSM Trace Size'
1081 shortname = 'psf_HsmTrace'
1082 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1083 "ext_shapeHSM_HsmSourceMoments_yy",
1084 "ext_shapeHSM_HsmPsfMoments_xx",
1085 "ext_shapeHSM_HsmPsfMoments_yy")
1087 def _func(self, df):
1088 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1089 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1090 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1091 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1092 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1093 return sizeDiff
1096class HsmFwhm(Functor):
1097 name = 'HSM Psf FWHM'
1098 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1099 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1100 pixelScale = 0.168
1101 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1103 def _func(self, df):
1104 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1105 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1108class E1(Functor):
1109 name = "Distortion Ellipticity (e1)"
1110 shortname = "Distortion"
1112 def __init__(self, colXX, colXY, colYY, **kwargs):
1113 self.colXX = colXX
1114 self.colXY = colXY
1115 self.colYY = colYY
1116 self._columns = [self.colXX, self.colXY, self.colYY]
1117 super().__init__(**kwargs)
1119 @property
1120 def columns(self):
1121 return [self.colXX, self.colXY, self.colYY]
1123 def _func(self, df):
1124 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1127class E2(Functor):
1128 name = "Ellipticity e2"
1130 def __init__(self, colXX, colXY, colYY, **kwargs):
1131 self.colXX = colXX
1132 self.colXY = colXY
1133 self.colYY = colYY
1134 super().__init__(**kwargs)
1136 @property
1137 def columns(self):
1138 return [self.colXX, self.colXY, self.colYY]
1140 def _func(self, df):
1141 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1144class RadiusFromQuadrupole(Functor):
1146 def __init__(self, colXX, colXY, colYY, **kwargs):
1147 self.colXX = colXX
1148 self.colXY = colXY
1149 self.colYY = colYY
1150 super().__init__(**kwargs)
1152 @property
1153 def columns(self):
1154 return [self.colXX, self.colXY, self.colYY]
1156 def _func(self, df):
1157 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1160class LocalWcs(Functor):
1161 """Computations using the stored localWcs.
1162 """
1163 name = "LocalWcsOperations"
1165 def __init__(self,
1166 colCD_1_1,
1167 colCD_1_2,
1168 colCD_2_1,
1169 colCD_2_2,
1170 **kwargs):
1171 self.colCD_1_1 = colCD_1_1
1172 self.colCD_1_2 = colCD_1_2
1173 self.colCD_2_1 = colCD_2_1
1174 self.colCD_2_2 = colCD_2_2
1175 super().__init__(**kwargs)
1177 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1178 """Compute the distance on the sphere from x2, y1 to x1, y1.
1180 Parameters
1181 ----------
1182 x : `pandas.Series`
1183 X pixel coordinate.
1184 y : `pandas.Series`
1185 Y pixel coordinate.
1186 cd11 : `pandas.Series`
1187 [1, 1] element of the local Wcs affine transform.
1188 cd11 : `pandas.Series`
1189 [1, 1] element of the local Wcs affine transform.
1190 cd12 : `pandas.Series`
1191 [1, 2] element of the local Wcs affine transform.
1192 cd21 : `pandas.Series`
1193 [2, 1] element of the local Wcs affine transform.
1194 cd22 : `pandas.Series`
1195 [2, 2] element of the local Wcs affine transform.
1197 Returns
1198 -------
1199 raDecTuple : tuple
1200 RA and dec conversion of x and y given the local Wcs. Returned
1201 units are in radians.
1203 """
1204 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1206 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1207 """Compute the local pixel scale conversion.
1209 Parameters
1210 ----------
1211 ra1 : `pandas.Series`
1212 Ra of the first coordinate in radians.
1213 dec1 : `pandas.Series`
1214 Dec of the first coordinate in radians.
1215 ra2 : `pandas.Series`
1216 Ra of the second coordinate in radians.
1217 dec2 : `pandas.Series`
1218 Dec of the second coordinate in radians.
1220 Returns
1221 -------
1222 dist : `pandas.Series`
1223 Distance on the sphere in radians.
1224 """
1225 deltaDec = dec2 - dec1
1226 deltaRa = ra2 - ra1
1227 return 2 * np.arcsin(
1228 np.sqrt(
1229 np.sin(deltaDec / 2) ** 2
1230 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1232 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1233 """Compute the distance on the sphere from x2, y1 to x1, y1.
1235 Parameters
1236 ----------
1237 x1 : `pandas.Series`
1238 X pixel coordinate.
1239 y1 : `pandas.Series`
1240 Y pixel coordinate.
1241 x2 : `pandas.Series`
1242 X pixel coordinate.
1243 y2 : `pandas.Series`
1244 Y pixel coordinate.
1245 cd11 : `pandas.Series`
1246 [1, 1] element of the local Wcs affine transform.
1247 cd11 : `pandas.Series`
1248 [1, 1] element of the local Wcs affine transform.
1249 cd12 : `pandas.Series`
1250 [1, 2] element of the local Wcs affine transform.
1251 cd21 : `pandas.Series`
1252 [2, 1] element of the local Wcs affine transform.
1253 cd22 : `pandas.Series`
1254 [2, 2] element of the local Wcs affine transform.
1256 Returns
1257 -------
1258 Distance : `pandas.Series`
1259 Arcseconds per pixel at the location of the local WC
1260 """
1261 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1262 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1263 # Great circle distance for small separations.
1264 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1267class ComputePixelScale(LocalWcs):
1268 """Compute the local pixel scale from the stored CDMatrix.
1269 """
1270 name = "PixelScale"
1272 @property
1273 def columns(self):
1274 return [self.colCD_1_1,
1275 self.colCD_1_2,
1276 self.colCD_2_1,
1277 self.colCD_2_2]
1279 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1280 """Compute the local pixel to scale conversion in arcseconds.
1282 Parameters
1283 ----------
1284 cd11 : `pandas.Series`
1285 [1, 1] element of the local Wcs affine transform in radians.
1286 cd11 : `pandas.Series`
1287 [1, 1] element of the local Wcs affine transform in radians.
1288 cd12 : `pandas.Series`
1289 [1, 2] element of the local Wcs affine transform in radians.
1290 cd21 : `pandas.Series`
1291 [2, 1] element of the local Wcs affine transform in radians.
1292 cd22 : `pandas.Series`
1293 [2, 2] element of the local Wcs affine transform in radians.
1295 Returns
1296 -------
1297 pixScale : `pandas.Series`
1298 Arcseconds per pixel at the location of the local WC
1299 """
1300 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1302 def _func(self, df):
1303 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1304 df[self.colCD_1_2],
1305 df[self.colCD_2_1],
1306 df[self.colCD_2_2])
1309class ConvertPixelToArcseconds(ComputePixelScale):
1310 """Convert a value in units pixels squared to units arcseconds squared.
1311 """
1313 def __init__(self,
1314 col,
1315 colCD_1_1,
1316 colCD_1_2,
1317 colCD_2_1,
1318 colCD_2_2,
1319 **kwargs):
1320 self.col = col
1321 super().__init__(colCD_1_1,
1322 colCD_1_2,
1323 colCD_2_1,
1324 colCD_2_2,
1325 **kwargs)
1327 @property
1328 def name(self):
1329 return f"{self.col}_asArcseconds"
1331 @property
1332 def columns(self):
1333 return [self.col,
1334 self.colCD_1_1,
1335 self.colCD_1_2,
1336 self.colCD_2_1,
1337 self.colCD_2_2]
1339 def _func(self, df):
1340 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1341 df[self.colCD_1_2],
1342 df[self.colCD_2_1],
1343 df[self.colCD_2_2])
1346class ConvertPixelSqToArcsecondsSq(ComputePixelScale):
1347 """Convert a value in units pixels to units arcseconds.
1348 """
1350 def __init__(self,
1351 col,
1352 colCD_1_1,
1353 colCD_1_2,
1354 colCD_2_1,
1355 colCD_2_2,
1356 **kwargs):
1357 self.col = col
1358 super().__init__(colCD_1_1,
1359 colCD_1_2,
1360 colCD_2_1,
1361 colCD_2_2,
1362 **kwargs)
1364 @property
1365 def name(self):
1366 return f"{self.col}_asArcsecondsSq"
1368 @property
1369 def columns(self):
1370 return [self.col,
1371 self.colCD_1_1,
1372 self.colCD_1_2,
1373 self.colCD_2_1,
1374 self.colCD_2_2]
1376 def _func(self, df):
1377 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1378 df[self.colCD_1_2],
1379 df[self.colCD_2_1],
1380 df[self.colCD_2_2])
1381 return df[self.col] * pixScale * pixScale
1384class ReferenceBand(Functor):
1385 name = 'Reference Band'
1386 shortname = 'refBand'
1388 @property
1389 def columns(self):
1390 return ["merge_measurement_i",
1391 "merge_measurement_r",
1392 "merge_measurement_z",
1393 "merge_measurement_y",
1394 "merge_measurement_g"]
1396 def _func(self, df):
1397 def getFilterAliasName(row):
1398 # get column name with the max value (True > False)
1399 colName = row.idxmax()
1400 return colName.replace('merge_measurement_', '')
1402 return df[self.columns].apply(getFilterAliasName, axis=1)
1405class Photometry(Functor):
1406 # AB to NanoJansky (3631 Jansky)
1407 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1408 LOG_AB_FLUX_SCALE = 12.56
1409 FIVE_OVER_2LOG10 = 1.085736204758129569
1410 # TO DO: DM-21955 Replace hard coded photometic calibration values
1411 COADD_ZP = 27
1413 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1414 self.vhypot = np.vectorize(self.hypot)
1415 self.col = colFlux
1416 self.colFluxErr = colFluxErr
1418 self.calib = calib
1419 if calib is not None:
1420 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1421 else:
1422 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1423 self.fluxMag0Err = 0.
1425 super().__init__(**kwargs)
1427 @property
1428 def columns(self):
1429 return [self.col]
1431 @property
1432 def name(self):
1433 return f'mag_{self.col}'
1435 @classmethod
1436 def hypot(cls, a, b):
1437 if np.abs(a) < np.abs(b):
1438 a, b = b, a
1439 if a == 0.:
1440 return 0.
1441 q = b/a
1442 return np.abs(a) * np.sqrt(1. + q*q)
1444 def dn2flux(self, dn, fluxMag0):
1445 return self.AB_FLUX_SCALE * dn / fluxMag0
1447 def dn2mag(self, dn, fluxMag0):
1448 with np.warnings.catch_warnings():
1449 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1450 np.warnings.filterwarnings('ignore', r'divide by zero')
1451 return -2.5 * np.log10(dn/fluxMag0)
1453 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1454 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1455 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1456 return retVal
1458 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1459 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1460 return self.FIVE_OVER_2LOG10 * retVal
1463class NanoJansky(Photometry):
1464 def _func(self, df):
1465 return self.dn2flux(df[self.col], self.fluxMag0)
1468class NanoJanskyErr(Photometry):
1469 @property
1470 def columns(self):
1471 return [self.col, self.colFluxErr]
1473 def _func(self, df):
1474 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1475 return pd.Series(retArr, index=df.index)
1478class Magnitude(Photometry):
1479 def _func(self, df):
1480 return self.dn2mag(df[self.col], self.fluxMag0)
1483class MagnitudeErr(Photometry):
1484 @property
1485 def columns(self):
1486 return [self.col, self.colFluxErr]
1488 def _func(self, df):
1489 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1490 return pd.Series(retArr, index=df.index)
1493class LocalPhotometry(Functor):
1494 """Base class for calibrating the specified instrument flux column using
1495 the local photometric calibration.
1497 Parameters
1498 ----------
1499 instFluxCol : `str`
1500 Name of the instrument flux column.
1501 instFluxErrCol : `str`
1502 Name of the assocated error columns for ``instFluxCol``.
1503 photoCalibCol : `str`
1504 Name of local calibration column.
1505 photoCalibErrCol : `str`
1506 Error associated with ``photoCalibCol``
1508 See also
1509 --------
1510 LocalPhotometry
1511 LocalNanojansky
1512 LocalNanojanskyErr
1513 LocalMagnitude
1514 LocalMagnitudeErr
1515 """
1516 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1518 def __init__(self,
1519 instFluxCol,
1520 instFluxErrCol,
1521 photoCalibCol,
1522 photoCalibErrCol,
1523 **kwargs):
1524 self.instFluxCol = instFluxCol
1525 self.instFluxErrCol = instFluxErrCol
1526 self.photoCalibCol = photoCalibCol
1527 self.photoCalibErrCol = photoCalibErrCol
1528 super().__init__(**kwargs)
1530 def instFluxToNanojansky(self, instFlux, localCalib):
1531 """Convert instrument flux to nanojanskys.
1533 Parameters
1534 ----------
1535 instFlux : `numpy.ndarray` or `pandas.Series`
1536 Array of instrument flux measurements
1537 localCalib : `numpy.ndarray` or `pandas.Series`
1538 Array of local photometric calibration estimates.
1540 Returns
1541 -------
1542 calibFlux : `numpy.ndarray` or `pandas.Series`
1543 Array of calibrated flux measurements.
1544 """
1545 return instFlux * localCalib
1547 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1548 """Convert instrument flux to nanojanskys.
1550 Parameters
1551 ----------
1552 instFlux : `numpy.ndarray` or `pandas.Series`
1553 Array of instrument flux measurements
1554 instFluxErr : `numpy.ndarray` or `pandas.Series`
1555 Errors on associated ``instFlux`` values
1556 localCalib : `numpy.ndarray` or `pandas.Series`
1557 Array of local photometric calibration estimates.
1558 localCalibErr : `numpy.ndarray` or `pandas.Series`
1559 Errors on associated ``localCalib`` values
1561 Returns
1562 -------
1563 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1564 Errors on calibrated flux measurements.
1565 """
1566 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1568 def instFluxToMagnitude(self, instFlux, localCalib):
1569 """Convert instrument flux to nanojanskys.
1571 Parameters
1572 ----------
1573 instFlux : `numpy.ndarray` or `pandas.Series`
1574 Array of instrument flux measurements
1575 localCalib : `numpy.ndarray` or `pandas.Series`
1576 Array of local photometric calibration estimates.
1578 Returns
1579 -------
1580 calibMag : `numpy.ndarray` or `pandas.Series`
1581 Array of calibrated AB magnitudes.
1582 """
1583 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1585 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1586 """Convert instrument flux err to nanojanskys.
1588 Parameters
1589 ----------
1590 instFlux : `numpy.ndarray` or `pandas.Series`
1591 Array of instrument flux measurements
1592 instFluxErr : `numpy.ndarray` or `pandas.Series`
1593 Errors on associated ``instFlux`` values
1594 localCalib : `numpy.ndarray` or `pandas.Series`
1595 Array of local photometric calibration estimates.
1596 localCalibErr : `numpy.ndarray` or `pandas.Series`
1597 Errors on associated ``localCalib`` values
1599 Returns
1600 -------
1601 calibMagErr: `numpy.ndarray` or `pandas.Series`
1602 Error on calibrated AB magnitudes.
1603 """
1604 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1605 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1608class LocalNanojansky(LocalPhotometry):
1609 """Compute calibrated fluxes using the local calibration value.
1611 See also
1612 --------
1613 LocalNanojansky
1614 LocalNanojanskyErr
1615 LocalMagnitude
1616 LocalMagnitudeErr
1617 """
1619 @property
1620 def columns(self):
1621 return [self.instFluxCol, self.photoCalibCol]
1623 @property
1624 def name(self):
1625 return f'flux_{self.instFluxCol}'
1627 def _func(self, df):
1628 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1631class LocalNanojanskyErr(LocalPhotometry):
1632 """Compute calibrated flux errors using the local calibration value.
1634 See also
1635 --------
1636 LocalNanojansky
1637 LocalNanojanskyErr
1638 LocalMagnitude
1639 LocalMagnitudeErr
1640 """
1642 @property
1643 def columns(self):
1644 return [self.instFluxCol, self.instFluxErrCol,
1645 self.photoCalibCol, self.photoCalibErrCol]
1647 @property
1648 def name(self):
1649 return f'fluxErr_{self.instFluxCol}'
1651 def _func(self, df):
1652 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1653 df[self.photoCalibCol], df[self.photoCalibErrCol])
1656class LocalMagnitude(LocalPhotometry):
1657 """Compute calibrated AB magnitudes using the local calibration value.
1659 See also
1660 --------
1661 LocalNanojansky
1662 LocalNanojanskyErr
1663 LocalMagnitude
1664 LocalMagnitudeErr
1665 """
1667 @property
1668 def columns(self):
1669 return [self.instFluxCol, self.photoCalibCol]
1671 @property
1672 def name(self):
1673 return f'mag_{self.instFluxCol}'
1675 def _func(self, df):
1676 return self.instFluxToMagnitude(df[self.instFluxCol],
1677 df[self.photoCalibCol])
1680class LocalMagnitudeErr(LocalPhotometry):
1681 """Compute calibrated AB magnitude errors using the local calibration value.
1683 See also
1684 --------
1685 LocalNanojansky
1686 LocalNanojanskyErr
1687 LocalMagnitude
1688 LocalMagnitudeErr
1689 """
1691 @property
1692 def columns(self):
1693 return [self.instFluxCol, self.instFluxErrCol,
1694 self.photoCalibCol, self.photoCalibErrCol]
1696 @property
1697 def name(self):
1698 return f'magErr_{self.instFluxCol}'
1700 def _func(self, df):
1701 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1702 df[self.instFluxErrCol],
1703 df[self.photoCalibCol],
1704 df[self.photoCalibErrCol])
1707class LocalDipoleMeanFlux(LocalPhotometry):
1708 """Compute absolute mean of dipole fluxes.
1710 See also
1711 --------
1712 LocalNanojansky
1713 LocalNanojanskyErr
1714 LocalMagnitude
1715 LocalMagnitudeErr
1716 LocalDipoleMeanFlux
1717 LocalDipoleMeanFluxErr
1718 LocalDipoleDiffFlux
1719 LocalDipoleDiffFluxErr
1720 """
1721 def __init__(self,
1722 instFluxPosCol,
1723 instFluxNegCol,
1724 instFluxPosErrCol,
1725 instFluxNegErrCol,
1726 photoCalibCol,
1727 photoCalibErrCol,
1728 **kwargs):
1729 self.instFluxNegCol = instFluxNegCol
1730 self.instFluxPosCol = instFluxPosCol
1731 self.instFluxNegErrCol = instFluxNegErrCol
1732 self.instFluxPosErrCol = instFluxPosErrCol
1733 self.photoCalibCol = photoCalibCol
1734 self.photoCalibErrCol = photoCalibErrCol
1735 super().__init__(instFluxNegCol,
1736 instFluxNegErrCol,
1737 photoCalibCol,
1738 photoCalibErrCol,
1739 **kwargs)
1741 @property
1742 def columns(self):
1743 return [self.instFluxPosCol,
1744 self.instFluxNegCol,
1745 self.photoCalibCol]
1747 @property
1748 def name(self):
1749 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1751 def _func(self, df):
1752 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))
1753 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])))
1756class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux):
1757 """Compute the error on the absolute mean of dipole fluxes.
1759 See also
1760 --------
1761 LocalNanojansky
1762 LocalNanojanskyErr
1763 LocalMagnitude
1764 LocalMagnitudeErr
1765 LocalDipoleMeanFlux
1766 LocalDipoleMeanFluxErr
1767 LocalDipoleDiffFlux
1768 LocalDipoleDiffFluxErr
1769 """
1771 @property
1772 def columns(self):
1773 return [self.instFluxPosCol,
1774 self.instFluxNegCol,
1775 self.instFluxPosErrCol,
1776 self.instFluxNegErrCol,
1777 self.photoCalibCol,
1778 self.photoCalibErrCol]
1780 @property
1781 def name(self):
1782 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1784 def _func(self, df):
1785 return 0.5*np.sqrt(
1786 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1787 * df[self.photoCalibErrCol])**2
1788 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1789 * df[self.photoCalibCol]**2)
1792class LocalDipoleDiffFlux(LocalDipoleMeanFlux):
1793 """Compute the absolute difference of dipole fluxes.
1795 Value is (abs(pos) - abs(neg))
1797 See also
1798 --------
1799 LocalNanojansky
1800 LocalNanojanskyErr
1801 LocalMagnitude
1802 LocalMagnitudeErr
1803 LocalDipoleMeanFlux
1804 LocalDipoleMeanFluxErr
1805 LocalDipoleDiffFlux
1806 LocalDipoleDiffFluxErr
1807 """
1809 @property
1810 def columns(self):
1811 return [self.instFluxPosCol,
1812 self.instFluxNegCol,
1813 self.photoCalibCol]
1815 @property
1816 def name(self):
1817 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1819 def _func(self, df):
1820 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))
1821 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])))
1824class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux):
1825 """Compute the error on the absolute difference of dipole fluxes.
1827 See also
1828 --------
1829 LocalNanojansky
1830 LocalNanojanskyErr
1831 LocalMagnitude
1832 LocalMagnitudeErr
1833 LocalDipoleMeanFlux
1834 LocalDipoleMeanFluxErr
1835 LocalDipoleDiffFlux
1836 LocalDipoleDiffFluxErr
1837 """
1839 @property
1840 def columns(self):
1841 return [self.instFluxPosCol,
1842 self.instFluxNegCol,
1843 self.instFluxPosErrCol,
1844 self.instFluxNegErrCol,
1845 self.photoCalibCol,
1846 self.photoCalibErrCol]
1848 @property
1849 def name(self):
1850 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1852 def _func(self, df):
1853 return np.sqrt(
1854 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1855 * df[self.photoCalibErrCol])**2
1856 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1857 * df[self.photoCalibCol]**2)
1860class Ratio(Functor):
1861 """Base class for returning the ratio of 2 columns.
1863 Can be used to compute a Signal to Noise ratio for any input flux.
1865 Parameters
1866 ----------
1867 numerator : `str`
1868 Name of the column to use at the numerator in the ratio
1869 denominator : `str`
1870 Name of the column to use as the denominator in the ratio.
1871 """
1872 def __init__(self,
1873 numerator,
1874 denominator,
1875 **kwargs):
1876 self.numerator = numerator
1877 self.denominator = denominator
1878 super().__init__(**kwargs)
1880 @property
1881 def columns(self):
1882 return [self.numerator, self.denominator]
1884 @property
1885 def name(self):
1886 return f'ratio_{self.numerator}_{self.denominator}'
1888 def _func(self, df):
1889 with np.warnings.catch_warnings():
1890 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1891 np.warnings.filterwarnings('ignore', r'divide by zero')
1892 return df[self.numerator] / df[self.denominator]