Coverage for python/lsst/pipe/tasks/functors.py : 34%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import yaml
2import re
4import pandas as pd
5import numpy as np
6import astropy.units as u
8from lsst.daf.persistence import doImport
9from .parquetTable import MultilevelParquetTable
12def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor'):
13 """Initialize an object defined in a dictionary
15 The object needs to be importable as
16 '{0}.{1}'.format(basePath, initDict[typeKey])
17 The positional and keyword arguments (if any) are contained in
18 "args" and "kwargs" entries in the dictionary, respectively.
19 This is used in `functors.CompositeFunctor.from_yaml` to initialize
20 a composite functor from a specification in a YAML file.
22 Parameters
23 ----------
24 initDict : dictionary
25 Dictionary describing object's initialization. Must contain
26 an entry keyed by ``typeKey`` that is the name of the object,
27 relative to ``basePath``.
28 basePath : str
29 Path relative to module in which ``initDict[typeKey]`` is defined.
30 typeKey : str
31 Key of ``initDict`` that is the name of the object
32 (relative to `basePath`).
33 """
34 initDict = initDict.copy()
35 # TO DO: DM-21956 We should be able to define functors outside this module
36 pythonType = doImport('{0}.{1}'.format(basePath, initDict.pop(typeKey)))
37 args = []
38 if 'args' in initDict:
39 args = initDict.pop('args')
40 if isinstance(args, str):
41 args = [args]
43 return pythonType(*args, **initDict)
46class Functor(object):
47 """Define and execute a calculation on a ParquetTable
49 The `__call__` method accepts a `ParquetTable` object, and returns the
50 result of the calculation as a single column. Each functor defines what
51 columns are needed for the calculation, and only these columns are read
52 from the `ParquetTable`.
54 The action of `__call__` consists of two steps: first, loading the
55 necessary columns from disk into memory as a `pandas.DataFrame` object;
56 and second, performing the computation on this dataframe and returning the
57 result.
60 To define a new `Functor`, a subclass must define a `_func` method,
61 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
62 In addition, it must define the following attributes
64 * `_columns`: The columns necessary to perform the calculation
65 * `name`: A name appropriate for a figure axis label
66 * `shortname`: A name appropriate for use as a dictionary key
68 On initialization, a `Functor` should declare what filter (`filt` kwarg)
69 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
70 applied to. This enables the `_get_cols` method to extract the proper
71 columns from the parquet file. If not specified, the dataset will fall back
72 on the `_defaultDataset`attribute. If filter is not specified and `dataset`
73 is anything other than `'ref'`, then an error will be raised when trying to
74 perform the calculation.
76 As currently implemented, `Functor` is only set up to expect a
77 `ParquetTable` of the format of the `deepCoadd_obj` dataset; that is, a
78 `MultilevelParquetTable` with the levels of the column index being `filter`,
79 `dataset`, and `column`. This is defined in the `_columnLevels` attribute,
80 as well as being implicit in the role of the `filt` and `dataset` attributes
81 defined at initialization. In addition, the `_get_cols` method that reads
82 the dataframe from the `ParquetTable` will return a dataframe with column
83 index levels defined by the `_dfLevels` attribute; by default, this is
84 `column`.
86 The `_columnLevels` and `_dfLevels` attributes should generally not need to
87 be changed, unless `_func` needs columns from multiple filters or datasets
88 to do the calculation.
89 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
90 which `_dfLevels = ('filter', 'column')`, and `_func` expects the dataframe
91 it gets to have those levels in the column index.
93 Parameters
94 ----------
95 filt : str
96 Filter upon which to do the calculation
98 dataset : str
99 Dataset upon which to do the calculation
100 (e.g., 'ref', 'meas', 'forced_src').
102 """
104 _defaultDataset = 'ref'
105 _columnLevels = ('filter', 'dataset', 'column')
106 _dfLevels = ('column',)
107 _defaultNoDup = False
109 def __init__(self, filt=None, dataset=None, noDup=None):
110 self.filt = filt
111 self.dataset = dataset if dataset is not None else self._defaultDataset
112 self._noDup = noDup
114 @property
115 def noDup(self):
116 if self._noDup is not None:
117 return self._noDup
118 else:
119 return self._defaultNoDup
121 @property
122 def columns(self):
123 """Columns required to perform calculation
124 """
125 if not hasattr(self, '_columns'):
126 raise NotImplementedError('Must define columns property or _columns attribute')
127 return self._columns
129 def multilevelColumns(self, parq):
130 if not set(parq.columnLevels) == set(self._columnLevels):
131 raise ValueError('ParquetTable does not have the expected column levels. ' +
132 'Got {0}; expected {1}.'.format(parq.columnLevels, self._columnLevels))
134 columnDict = {'column': self.columns,
135 'dataset': self.dataset}
136 if self.filt is None:
137 if 'filter' in parq.columnLevels:
138 if self.dataset == 'ref':
139 columnDict['filter'] = parq.columnLevelNames['filter'][0]
140 else:
141 raise ValueError("'filt' not set for functor {}".format(self.name) +
142 "(dataset {}) ".format(self.dataset) +
143 "and ParquetTable " +
144 "contains multiple filters in column index. " +
145 "Set 'filt' or set 'dataset' to 'ref'.")
146 else:
147 columnDict['filter'] = self.filt
149 return parq._colsFromDict(columnDict)
151 def _func(self, df, dropna=True):
152 raise NotImplementedError('Must define calculation on dataframe')
154 def _get_cols(self, parq):
155 """Retrieve dataframe necessary for calculation.
157 Returns dataframe upon which `self._func` can act.
158 """
159 if isinstance(parq, MultilevelParquetTable):
160 columns = self.multilevelColumns(parq)
161 df = parq.toDataFrame(columns=columns, droplevels=False)
162 df = self._setLevels(df)
163 else:
164 columns = self.columns
165 df = parq.toDataFrame(columns=columns)
167 return df
169 def _setLevels(self, df):
170 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
171 df.columns = df.columns.droplevel(levelsToDrop)
172 return df
174 def _dropna(self, vals):
175 return vals.dropna()
177 def __call__(self, parq, dropna=False):
178 try:
179 df = self._get_cols(parq)
180 vals = self._func(df)
181 except Exception:
182 vals = self.fail(df)
183 if dropna:
184 vals = self._dropna(vals)
186 return vals
188 def fail(self, df):
189 return pd.Series(np.full(len(df), np.nan), index=df.index)
191 @property
192 def name(self):
193 """Full name of functor (suitable for figure labels)
194 """
195 return NotImplementedError
197 @property
198 def shortname(self):
199 """Short name of functor (suitable for column name/dict key)
200 """
201 return self.name
204class CompositeFunctor(Functor):
205 """Perform multiple calculations at once on a catalog
207 The role of a `CompositeFunctor` is to group together computations from
208 multiple functors. Instead of returning `pandas.Series` a
209 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
210 being the keys of `funcDict`.
212 The `columns` attribute of a `CompositeFunctor` is the union of all columns
213 in all the component functors.
215 A `CompositeFunctor` does not use a `_func` method itself; rather,
216 when a `CompositeFunctor` is called, all its columns are loaded
217 at once, and the resulting dataframe is passed to the `_func` method of each component
218 functor. This has the advantage of only doing I/O (reading from parquet file) once,
219 and works because each individual `_func` method of each component functor does not
220 care if there are *extra* columns in the dataframe being passed; only that it must contain
221 *at least* the `columns` it expects.
223 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
224 file specifying a collection of functors.
226 Parameters
227 ----------
228 funcs : `dict` or `list`
229 Dictionary or list of functors. If a list, then it will be converted
230 into a dictonary according to the `.shortname` attribute of each functor.
232 """
233 dataset = None
235 def __init__(self, funcs, **kwargs):
237 if type(funcs) == dict:
238 self.funcDict = funcs
239 else:
240 self.funcDict = {f.shortname: f for f in funcs}
242 self._filt = None
244 super().__init__(**kwargs)
246 @property
247 def filt(self):
248 return self._filt
250 @filt.setter
251 def filt(self, filt):
252 if filt is not None:
253 for _, f in self.funcDict.items():
254 f.filt = filt
255 self._filt = filt
257 def update(self, new):
258 if isinstance(new, dict):
259 self.funcDict.update(new)
260 elif isinstance(new, CompositeFunctor):
261 self.funcDict.update(new.funcDict)
262 else:
263 raise TypeError('Can only update with dictionary or CompositeFunctor.')
265 # Make sure new functors have the same 'filt' set
266 if self.filt is not None:
267 self.filt = self.filt
269 @property
270 def columns(self):
271 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
273 def multilevelColumns(self, parq):
274 return list(set([x for y in [f.multilevelColumns(parq)
275 for f in self.funcDict.values()] for x in y]))
277 def __call__(self, parq, **kwargs):
278 if isinstance(parq, MultilevelParquetTable):
279 columns = self.multilevelColumns(parq)
280 df = parq.toDataFrame(columns=columns, droplevels=False)
281 valDict = {}
282 for k, f in self.funcDict.items():
283 try:
284 subdf = f._setLevels(df[f.multilevelColumns(parq)])
285 valDict[k] = f._func(subdf)
286 except Exception:
287 valDict[k] = f.fail(subdf)
288 else:
289 columns = self.columns
290 df = parq.toDataFrame(columns=columns)
291 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
293 try:
294 valDf = pd.concat(valDict, axis=1)
295 except TypeError:
296 print([(k, type(v)) for k, v in valDict.items()])
297 raise
299 if kwargs.get('dropna', False):
300 valDf = valDf.dropna(how='any')
302 return valDf
304 @classmethod
305 def renameCol(cls, col, renameRules):
306 if renameRules is None:
307 return col
308 for old, new in renameRules:
309 if col.startswith(old):
310 col = col.replace(old, new)
311 return col
313 @classmethod
314 def from_file(cls, filename, **kwargs):
315 with open(filename) as f:
316 translationDefinition = yaml.safe_load(f)
318 return cls.from_yaml(translationDefinition, **kwargs)
320 @classmethod
321 def from_yaml(cls, translationDefinition, **kwargs):
322 funcs = {}
323 for func, val in translationDefinition['funcs'].items():
324 funcs[func] = init_fromDict(val)
326 if 'flag_rename_rules' in translationDefinition:
327 renameRules = translationDefinition['flag_rename_rules']
328 else:
329 renameRules = None
331 if 'refFlags' in translationDefinition:
332 for flag in translationDefinition['refFlags']:
333 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
335 if 'flags' in translationDefinition:
336 for flag in translationDefinition['flags']:
337 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
339 return cls(funcs, **kwargs)
342def mag_aware_eval(df, expr):
343 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
345 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
347 Parameters
348 ----------
349 df : pandas.DataFrame
350 Dataframe on which to evaluate expression.
352 expr : str
353 Expression.
354 """
355 try:
356 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
357 val = df.eval(expr_new, truediv=True)
358 except Exception: # Should check what actually gets raised
359 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
360 val = df.eval(expr_new, truediv=True)
361 return val
364class CustomFunctor(Functor):
365 """Arbitrary computation on a catalog
367 Column names (and thus the columns to be loaded from catalog) are found
368 by finding all words and trying to ignore all "math-y" words.
370 Parameters
371 ----------
372 expr : str
373 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
374 """
375 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
377 def __init__(self, expr, **kwargs):
378 self.expr = expr
379 super().__init__(**kwargs)
381 @property
382 def name(self):
383 return self.expr
385 @property
386 def columns(self):
387 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
389 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
390 not_a_col = []
391 for c in flux_cols:
392 if not re.search('_instFlux$', c):
393 cols.append('{}_instFlux'.format(c))
394 not_a_col.append(c)
395 else:
396 cols.append(c)
398 return list(set([c for c in cols if c not in not_a_col]))
400 def _func(self, df):
401 return mag_aware_eval(df, self.expr)
404class Column(Functor):
405 """Get column with specified name
406 """
408 def __init__(self, col, **kwargs):
409 self.col = col
410 super().__init__(**kwargs)
412 @property
413 def name(self):
414 return self.col
416 @property
417 def columns(self):
418 return [self.col]
420 def _func(self, df):
421 return df[self.col]
424class Index(Functor):
425 """Return the value of the index for each object
426 """
428 columns = ['coord_ra'] # just a dummy; something has to be here
429 _defaultDataset = 'ref'
430 _defaultNoDup = True
432 def _func(self, df):
433 return pd.Series(df.index, index=df.index)
436class IDColumn(Column):
437 col = 'id'
438 _allow_difference = False
439 _defaultNoDup = True
441 def _func(self, df):
442 return pd.Series(df.index, index=df.index)
445class FootprintNPix(Column):
446 col = 'base_Footprint_nPix'
449class CoordColumn(Column):
450 """Base class for coordinate column, in degrees
451 """
452 _radians = True
454 def __init__(self, col, **kwargs):
455 super().__init__(col, **kwargs)
457 def _func(self, df):
458 res = df[self.col]
459 if self._radians:
460 res *= 180 / np.pi
461 return res
464class RAColumn(CoordColumn):
465 """Right Ascension, in degrees
466 """
467 name = 'RA'
468 _defaultNoDup = True
470 def __init__(self, **kwargs):
471 super().__init__('coord_ra', **kwargs)
473 def __call__(self, catalog, **kwargs):
474 return super().__call__(catalog, **kwargs)
477class DecColumn(CoordColumn):
478 """Declination, in degrees
479 """
480 name = 'Dec'
481 _defaultNoDup = True
483 def __init__(self, **kwargs):
484 super().__init__('coord_dec', **kwargs)
486 def __call__(self, catalog, **kwargs):
487 return super().__call__(catalog, **kwargs)
490def fluxName(col):
491 if not col.endswith('_instFlux'):
492 col += '_instFlux'
493 return col
496def fluxErrName(col):
497 if not col.endswith('_instFluxErr'):
498 col += '_instFluxErr'
499 return col
502class Mag(Functor):
503 """Compute calibrated magnitude
505 Takes a `calib` argument, which returns the flux at mag=0
506 as `calib.getFluxMag0()`. If not provided, then the default
507 `fluxMag0` is 63095734448.0194, which is default for HSC.
508 This default should be removed in DM-21955
510 This calculation hides warnings about invalid values and dividing by zero.
512 As for all functors, a `dataset` and `filt` kwarg should be provided upon
513 initialization. Unlike the default `Functor`, however, the default dataset
514 for a `Mag` is `'meas'`, rather than `'ref'`.
516 Parameters
517 ----------
518 col : `str`
519 Name of flux column from which to compute magnitude. Can be parseable
520 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
521 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
522 understand.
523 calib : `lsst.afw.image.calib.Calib` (optional)
524 Object that knows zero point.
525 """
526 _defaultDataset = 'meas'
528 def __init__(self, col, calib=None, **kwargs):
529 self.col = fluxName(col)
530 self.calib = calib
531 if calib is not None:
532 self.fluxMag0 = calib.getFluxMag0()[0]
533 else:
534 # TO DO: DM-21955 Replace hard coded photometic calibration values
535 self.fluxMag0 = 63095734448.0194
537 super().__init__(**kwargs)
539 @property
540 def columns(self):
541 return [self.col]
543 def _func(self, df):
544 with np.warnings.catch_warnings():
545 np.warnings.filterwarnings('ignore', r'invalid value encountered')
546 np.warnings.filterwarnings('ignore', r'divide by zero')
547 return -2.5*np.log10(df[self.col] / self.fluxMag0)
549 @property
550 def name(self):
551 return 'mag_{0}'.format(self.col)
554class MagErr(Mag):
555 """Compute calibrated magnitude uncertainty
557 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
559 Parameters
560 col : `str`
561 Name of flux column
562 calib : `lsst.afw.image.calib.Calib` (optional)
563 Object that knows zero point.
564 """
566 def __init__(self, *args, **kwargs):
567 super().__init__(*args, **kwargs)
568 if self.calib is not None:
569 self.fluxMag0Err = self.calib.getFluxMag0()[1]
570 else:
571 self.fluxMag0Err = 0.
573 @property
574 def columns(self):
575 return [self.col, self.col + 'Err']
577 def _func(self, df):
578 with np.warnings.catch_warnings():
579 np.warnings.filterwarnings('ignore', r'invalid value encountered')
580 np.warnings.filterwarnings('ignore', r'divide by zero')
581 fluxCol, fluxErrCol = self.columns
582 x = df[fluxErrCol] / df[fluxCol]
583 y = self.fluxMag0Err / self.fluxMag0
584 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
585 return magErr
587 @property
588 def name(self):
589 return super().name + '_err'
592class NanoMaggie(Mag):
593 """
594 """
596 def _func(self, df):
597 return (df[self.col] / self.fluxMag0) * 1e9
600class MagDiff(Functor):
601 _defaultDataset = 'meas'
603 """Functor to calculate magnitude difference"""
605 def __init__(self, col1, col2, **kwargs):
606 self.col1 = fluxName(col1)
607 self.col2 = fluxName(col2)
608 super().__init__(**kwargs)
610 @property
611 def columns(self):
612 return [self.col1, self.col2]
614 def _func(self, df):
615 with np.warnings.catch_warnings():
616 np.warnings.filterwarnings('ignore', r'invalid value encountered')
617 np.warnings.filterwarnings('ignore', r'divide by zero')
618 return -2.5*np.log10(df[self.col1]/df[self.col2])
620 @property
621 def name(self):
622 return '(mag_{0} - mag_{1})'.format(self.col1, self.col2)
624 @property
625 def shortname(self):
626 return 'magDiff_{0}_{1}'.format(self.col1, self.col2)
629class Color(Functor):
630 """Compute the color between two filters
632 Computes color by initializing two different `Mag`
633 functors based on the `col` and filters provided, and
634 then returning the difference.
636 This is enabled by the `_func` expecting a dataframe with a
637 multilevel column index, with both `'filter'` and `'column'`,
638 instead of just `'column'`, which is the `Functor` default.
639 This is controlled by the `_dfLevels` attribute.
641 Also of note, the default dataset for `Color` is `forced_src'`,
642 whereas for `Mag` it is `'meas'`.
644 Parameters
645 ----------
646 col : str
647 Name of flux column from which to compute; same as would be passed to
648 `lsst.pipe.tasks.functors.Mag`.
650 filt2, filt1 : str
651 Filters from which to compute magnitude difference.
652 Color computed is `Mag(filt2) - Mag(filt1)`.
653 """
654 _defaultDataset = 'forced_src'
655 _dfLevels = ('filter', 'column')
656 _defaultNoDup = True
658 def __init__(self, col, filt2, filt1, **kwargs):
659 self.col = fluxName(col)
660 if filt2 == filt1:
661 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
662 self.filt2 = filt2
663 self.filt1 = filt1
665 self.mag2 = Mag(col, filt=filt2, **kwargs)
666 self.mag1 = Mag(col, filt=filt1, **kwargs)
668 super().__init__(**kwargs)
670 @property
671 def filt(self):
672 return None
674 @filt.setter
675 def filt(self, filt):
676 pass
678 def _func(self, df):
679 mag2 = self.mag2._func(df[self.filt2])
680 mag1 = self.mag1._func(df[self.filt1])
681 return mag2 - mag1
683 @property
684 def columns(self):
685 return [self.mag1.col, self.mag2.col]
687 def multilevelColumns(self, parq):
688 return [(self.dataset, self.filt1, self.col),
689 (self.dataset, self.filt2, self.col)]
691 @property
692 def name(self):
693 return '{0} - {1} ({2})'.format(self.filt2, self.filt1, self.col)
695 @property
696 def shortname(self):
697 return '{0}_{1}m{2}'.format(self.col, self.filt2.replace('-', ''),
698 self.filt1.replace('-', ''))
701class Labeller(Functor):
702 """Main function of this subclass is to override the dropna=True
703 """
704 _null_label = 'null'
705 _allow_difference = False
706 name = 'label'
707 _force_str = False
709 def __call__(self, parq, dropna=False, **kwargs):
710 return super().__call__(parq, dropna=False, **kwargs)
713class StarGalaxyLabeller(Labeller):
714 _columns = ["base_ClassificationExtendedness_value"]
715 _column = "base_ClassificationExtendedness_value"
717 def _func(self, df):
718 x = df[self._columns][self._column]
719 mask = x.isnull()
720 test = (x < 0.5).astype(int)
721 test = test.mask(mask, 2)
723 # TODO: DM-21954 Look into veracity of inline comment below
724 # are these backwards?
725 categories = ['galaxy', 'star', self._null_label]
726 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
727 index=x.index, name='label')
728 if self._force_str:
729 label = label.astype(str)
730 return label
733class NumStarLabeller(Labeller):
734 _columns = ['numStarFlags']
735 labels = {"star": 0, "maybe": 1, "notStar": 2}
737 def _func(self, df):
738 x = df[self._columns][self._columns[0]]
740 # Number of filters
741 n = len(x.unique()) - 1
743 labels = ['noStar', 'maybe', 'star']
744 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
745 index=x.index, name='label')
747 if self._force_str:
748 label = label.astype(str)
750 return label
753class DeconvolvedMoments(Functor):
754 name = 'Deconvolved Moments'
755 shortname = 'deconvolvedMoments'
756 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
757 "ext_shapeHSM_HsmSourceMoments_yy",
758 "base_SdssShape_xx", "base_SdssShape_yy",
759 "ext_shapeHSM_HsmPsfMoments_xx",
760 "ext_shapeHSM_HsmPsfMoments_yy")
762 def _func(self, df):
763 """Calculate deconvolved moments"""
764 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
765 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
766 else:
767 hsm = np.ones(len(df))*np.nan
768 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
769 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
770 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
771 else:
772 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
773 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
774 # raise TaskError("No psf shape parameter found in catalog")
775 raise RuntimeError('No psf shape parameter found in catalog')
777 return hsm.where(np.isfinite(hsm), sdss) - psf
780class SdssTraceSize(Functor):
781 """Functor to calculate SDSS trace radius size for sources"""
782 name = "SDSS Trace Size"
783 shortname = 'sdssTrace'
784 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
786 def _func(self, df):
787 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
788 return srcSize
791class PsfSdssTraceSizeDiff(Functor):
792 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
793 name = "PSF - SDSS Trace Size"
794 shortname = 'psf_sdssTrace'
795 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
796 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
798 def _func(self, df):
799 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
800 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
801 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
802 return sizeDiff
805class HsmTraceSize(Functor):
806 """Functor to calculate HSM trace radius size for sources"""
807 name = 'HSM Trace Size'
808 shortname = 'hsmTrace'
809 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
810 "ext_shapeHSM_HsmSourceMoments_yy")
812 def _func(self, df):
813 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] +
814 df["ext_shapeHSM_HsmSourceMoments_yy"]))
815 return srcSize
818class PsfHsmTraceSizeDiff(Functor):
819 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
820 name = 'PSF - HSM Trace Size'
821 shortname = 'psf_HsmTrace'
822 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
823 "ext_shapeHSM_HsmSourceMoments_yy",
824 "ext_shapeHSM_HsmPsfMoments_xx",
825 "ext_shapeHSM_HsmPsfMoments_yy")
827 def _func(self, df):
828 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] +
829 df["ext_shapeHSM_HsmSourceMoments_yy"]))
830 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] +
831 df["ext_shapeHSM_HsmPsfMoments_yy"]))
832 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
833 return sizeDiff
836class HsmFwhm(Functor):
837 name = 'HSM Psf FWHM'
838 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
839 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
840 pixelScale = 0.168
841 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
843 def _func(self, df):
844 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
845 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
848class E1(Functor):
849 name = "Distortion Ellipticity (e1)"
850 shortname = "Distortion"
852 def __init__(self, colXX, colXY, colYY, **kwargs):
853 self.colXX = colXX
854 self.colXY = colXY
855 self.colYY = colYY
856 self._columns = [self.colXX, self.colXY, self.colYY]
857 super().__init__(**kwargs)
859 @property
860 def columns(self):
861 return [self.colXX, self.colXY, self.colYY]
863 def _func(self, df):
864 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
867class E2(Functor):
868 name = "Ellipticity e2"
870 def __init__(self, colXX, colXY, colYY, **kwargs):
871 self.colXX = colXX
872 self.colXY = colXY
873 self.colYY = colYY
874 super().__init__(**kwargs)
876 @property
877 def columns(self):
878 return [self.colXX, self.colXY, self.colYY]
880 def _func(self, df):
881 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
884class RadiusFromQuadrupole(Functor):
886 def __init__(self, colXX, colXY, colYY, **kwargs):
887 self.colXX = colXX
888 self.colXY = colXY
889 self.colYY = colYY
890 super().__init__(**kwargs)
892 @property
893 def columns(self):
894 return [self.colXX, self.colXY, self.colYY]
896 def _func(self, df):
897 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
900class ReferenceBand(Functor):
901 name = 'Reference Band'
902 shortname = 'refBand'
904 @property
905 def columns(self):
906 return ["merge_measurement_i",
907 "merge_measurement_r",
908 "merge_measurement_z",
909 "merge_measurement_y",
910 "merge_measurement_g"]
912 def _func(self, df):
913 def getFilterAliasName(row):
914 # get column name with the max value (True > False)
915 colName = row.idxmax()
916 return colName.replace('merge_measurement_', '')
918 return df[self.columns].apply(getFilterAliasName, axis=1)
921class Photometry(Functor):
922 # AB to NanoJansky (3631 Jansky)
923 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
924 LOG_AB_FLUX_SCALE = 12.56
925 FIVE_OVER_2LOG10 = 1.085736204758129569
926 # TO DO: DM-21955 Replace hard coded photometic calibration values
927 COADD_ZP = 27
929 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
930 self.vhypot = np.vectorize(self.hypot)
931 self.col = colFlux
932 self.colFluxErr = colFluxErr
934 self.calib = calib
935 if calib is not None:
936 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
937 else:
938 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
939 self.fluxMag0Err = 0.
941 super().__init__(**kwargs)
943 @property
944 def columns(self):
945 return [self.col]
947 @property
948 def name(self):
949 return 'mag_{0}'.format(self.col)
951 @classmethod
952 def hypot(cls, a, b):
953 if np.abs(a) < np.abs(b):
954 a, b = b, a
955 if a == 0.:
956 return 0.
957 q = b/a
958 return np.abs(a) * np.sqrt(1. + q*q)
960 def dn2flux(self, dn, fluxMag0):
961 return self.AB_FLUX_SCALE * dn / fluxMag0
963 def dn2mag(self, dn, fluxMag0):
964 with np.warnings.catch_warnings():
965 np.warnings.filterwarnings('ignore', r'invalid value encountered')
966 np.warnings.filterwarnings('ignore', r'divide by zero')
967 return -2.5 * np.log10(dn/fluxMag0)
969 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
970 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
971 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
972 return retVal
974 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
975 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
976 return self.FIVE_OVER_2LOG10 * retVal
979class NanoJansky(Photometry):
980 def _func(self, df):
981 return self.dn2flux(df[self.col], self.fluxMag0)
984class NanoJanskyErr(Photometry):
985 @property
986 def columns(self):
987 return [self.col, self.colFluxErr]
989 def _func(self, df):
990 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
991 return pd.Series(retArr, index=df.index)
994class Magnitude(Photometry):
995 def _func(self, df):
996 return self.dn2mag(df[self.col], self.fluxMag0)
999class MagnitudeErr(Photometry):
1000 @property
1001 def columns(self):
1002 return [self.col, self.colFluxErr]
1004 def _func(self, df):
1005 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1006 return pd.Series(retArr, index=df.index)