Coverage for python/lsst/pipe/tasks/functors.py : 36%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import yaml
2import re
4import pandas as pd
5import numpy as np
6import astropy.units as u
8from lsst.daf.persistence import doImport
9from .parquetTable import MultilevelParquetTable
12def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
13 typeKey='functor', name=None):
14 """Initialize an object defined in a dictionary
16 The object needs to be importable as
17 '{0}.{1}'.format(basePath, initDict[typeKey])
18 The positional and keyword arguments (if any) are contained in
19 "args" and "kwargs" entries in the dictionary, respectively.
20 This is used in `functors.CompositeFunctor.from_yaml` to initialize
21 a composite functor from a specification in a YAML file.
23 Parameters
24 ----------
25 initDict : dictionary
26 Dictionary describing object's initialization. Must contain
27 an entry keyed by ``typeKey`` that is the name of the object,
28 relative to ``basePath``.
29 basePath : str
30 Path relative to module in which ``initDict[typeKey]`` is defined.
31 typeKey : str
32 Key of ``initDict`` that is the name of the object
33 (relative to `basePath`).
34 """
35 initDict = initDict.copy()
36 # TO DO: DM-21956 We should be able to define functors outside this module
37 pythonType = doImport('{0}.{1}'.format(basePath, initDict.pop(typeKey)))
38 args = []
39 if 'args' in initDict:
40 args = initDict.pop('args')
41 if isinstance(args, str):
42 args = [args]
43 try:
44 element = pythonType(*args, **initDict)
45 except Exception as e:
46 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
47 raise type(e)(message, e.args)
48 return element
51class Functor(object):
52 """Define and execute a calculation on a ParquetTable
54 The `__call__` method accepts a `ParquetTable` object, and returns the
55 result of the calculation as a single column. Each functor defines what
56 columns are needed for the calculation, and only these columns are read
57 from the `ParquetTable`.
59 The action of `__call__` consists of two steps: first, loading the
60 necessary columns from disk into memory as a `pandas.DataFrame` object;
61 and second, performing the computation on this dataframe and returning the
62 result.
65 To define a new `Functor`, a subclass must define a `_func` method,
66 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
67 In addition, it must define the following attributes
69 * `_columns`: The columns necessary to perform the calculation
70 * `name`: A name appropriate for a figure axis label
71 * `shortname`: A name appropriate for use as a dictionary key
73 On initialization, a `Functor` should declare what filter (`filt` kwarg)
74 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
75 applied to. This enables the `_get_cols` method to extract the proper
76 columns from the parquet file. If not specified, the dataset will fall back
77 on the `_defaultDataset`attribute. If filter is not specified and `dataset`
78 is anything other than `'ref'`, then an error will be raised when trying to
79 perform the calculation.
81 As currently implemented, `Functor` is only set up to expect a
82 `ParquetTable` of the format of the `deepCoadd_obj` dataset; that is, a
83 `MultilevelParquetTable` with the levels of the column index being `filter`,
84 `dataset`, and `column`. This is defined in the `_columnLevels` attribute,
85 as well as being implicit in the role of the `filt` and `dataset` attributes
86 defined at initialization. In addition, the `_get_cols` method that reads
87 the dataframe from the `ParquetTable` will return a dataframe with column
88 index levels defined by the `_dfLevels` attribute; by default, this is
89 `column`.
91 The `_columnLevels` and `_dfLevels` attributes should generally not need to
92 be changed, unless `_func` needs columns from multiple filters or datasets
93 to do the calculation.
94 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
95 which `_dfLevels = ('filter', 'column')`, and `_func` expects the dataframe
96 it gets to have those levels in the column index.
98 Parameters
99 ----------
100 filt : str
101 Filter upon which to do the calculation
103 dataset : str
104 Dataset upon which to do the calculation
105 (e.g., 'ref', 'meas', 'forced_src').
107 """
109 _defaultDataset = 'ref'
110 _columnLevels = ('filter', 'dataset', 'column')
111 _dfLevels = ('column',)
112 _defaultNoDup = False
114 def __init__(self, filt=None, dataset=None, noDup=None):
115 self.filt = filt
116 self.dataset = dataset if dataset is not None else self._defaultDataset
117 self._noDup = noDup
119 @property
120 def noDup(self):
121 if self._noDup is not None:
122 return self._noDup
123 else:
124 return self._defaultNoDup
126 @property
127 def columns(self):
128 """Columns required to perform calculation
129 """
130 if not hasattr(self, '_columns'):
131 raise NotImplementedError('Must define columns property or _columns attribute')
132 return self._columns
134 def multilevelColumns(self, parq):
135 if not set(parq.columnLevels) == set(self._columnLevels):
136 raise ValueError('ParquetTable does not have the expected column levels. ' +
137 'Got {0}; expected {1}.'.format(parq.columnLevels, self._columnLevels))
139 columnDict = {'column': self.columns,
140 'dataset': self.dataset}
141 if self.filt is None:
142 if 'filter' in parq.columnLevels:
143 if self.dataset == 'ref':
144 columnDict['filter'] = parq.columnLevelNames['filter'][0]
145 else:
146 raise ValueError("'filt' not set for functor {}".format(self.name) +
147 "(dataset {}) ".format(self.dataset) +
148 "and ParquetTable " +
149 "contains multiple filters in column index. " +
150 "Set 'filt' or set 'dataset' to 'ref'.")
151 else:
152 columnDict['filter'] = self.filt
154 return parq._colsFromDict(columnDict)
156 def _func(self, df, dropna=True):
157 raise NotImplementedError('Must define calculation on dataframe')
159 def _get_cols(self, parq):
160 """Retrieve dataframe necessary for calculation.
162 Returns dataframe upon which `self._func` can act.
163 """
164 if isinstance(parq, MultilevelParquetTable):
165 columns = self.multilevelColumns(parq)
166 df = parq.toDataFrame(columns=columns, droplevels=False)
167 df = self._setLevels(df)
168 else:
169 columns = self.columns
170 df = parq.toDataFrame(columns=columns)
172 return df
174 def _setLevels(self, df):
175 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
176 df.columns = df.columns.droplevel(levelsToDrop)
177 return df
179 def _dropna(self, vals):
180 return vals.dropna()
182 def __call__(self, parq, dropna=False):
183 try:
184 df = self._get_cols(parq)
185 vals = self._func(df)
186 except Exception:
187 vals = self.fail(df)
188 if dropna:
189 vals = self._dropna(vals)
191 return vals
193 def fail(self, df):
194 return pd.Series(np.full(len(df), np.nan), index=df.index)
196 @property
197 def name(self):
198 """Full name of functor (suitable for figure labels)
199 """
200 return NotImplementedError
202 @property
203 def shortname(self):
204 """Short name of functor (suitable for column name/dict key)
205 """
206 return self.name
209class CompositeFunctor(Functor):
210 """Perform multiple calculations at once on a catalog
212 The role of a `CompositeFunctor` is to group together computations from
213 multiple functors. Instead of returning `pandas.Series` a
214 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
215 being the keys of `funcDict`.
217 The `columns` attribute of a `CompositeFunctor` is the union of all columns
218 in all the component functors.
220 A `CompositeFunctor` does not use a `_func` method itself; rather,
221 when a `CompositeFunctor` is called, all its columns are loaded
222 at once, and the resulting dataframe is passed to the `_func` method of each component
223 functor. This has the advantage of only doing I/O (reading from parquet file) once,
224 and works because each individual `_func` method of each component functor does not
225 care if there are *extra* columns in the dataframe being passed; only that it must contain
226 *at least* the `columns` it expects.
228 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
229 file specifying a collection of functors.
231 Parameters
232 ----------
233 funcs : `dict` or `list`
234 Dictionary or list of functors. If a list, then it will be converted
235 into a dictonary according to the `.shortname` attribute of each functor.
237 """
238 dataset = None
240 def __init__(self, funcs, **kwargs):
242 if type(funcs) == dict:
243 self.funcDict = funcs
244 else:
245 self.funcDict = {f.shortname: f for f in funcs}
247 self._filt = None
249 super().__init__(**kwargs)
251 @property
252 def filt(self):
253 return self._filt
255 @filt.setter
256 def filt(self, filt):
257 if filt is not None:
258 for _, f in self.funcDict.items():
259 f.filt = filt
260 self._filt = filt
262 def update(self, new):
263 if isinstance(new, dict):
264 self.funcDict.update(new)
265 elif isinstance(new, CompositeFunctor):
266 self.funcDict.update(new.funcDict)
267 else:
268 raise TypeError('Can only update with dictionary or CompositeFunctor.')
270 # Make sure new functors have the same 'filt' set
271 if self.filt is not None:
272 self.filt = self.filt
274 @property
275 def columns(self):
276 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
278 def multilevelColumns(self, parq):
279 return list(set([x for y in [f.multilevelColumns(parq)
280 for f in self.funcDict.values()] for x in y]))
282 def __call__(self, parq, **kwargs):
283 if isinstance(parq, MultilevelParquetTable):
284 columns = self.multilevelColumns(parq)
285 df = parq.toDataFrame(columns=columns, droplevels=False)
286 valDict = {}
287 for k, f in self.funcDict.items():
288 try:
289 subdf = f._setLevels(df[f.multilevelColumns(parq)])
290 valDict[k] = f._func(subdf)
291 except Exception:
292 valDict[k] = f.fail(subdf)
293 else:
294 columns = self.columns
295 df = parq.toDataFrame(columns=columns)
296 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
298 try:
299 valDf = pd.concat(valDict, axis=1)
300 except TypeError:
301 print([(k, type(v)) for k, v in valDict.items()])
302 raise
304 if kwargs.get('dropna', False):
305 valDf = valDf.dropna(how='any')
307 return valDf
309 @classmethod
310 def renameCol(cls, col, renameRules):
311 if renameRules is None:
312 return col
313 for old, new in renameRules:
314 if col.startswith(old):
315 col = col.replace(old, new)
316 return col
318 @classmethod
319 def from_file(cls, filename, **kwargs):
320 with open(filename) as f:
321 translationDefinition = yaml.safe_load(f)
323 return cls.from_yaml(translationDefinition, **kwargs)
325 @classmethod
326 def from_yaml(cls, translationDefinition, **kwargs):
327 funcs = {}
328 for func, val in translationDefinition['funcs'].items():
329 funcs[func] = init_fromDict(val, name=func)
331 if 'flag_rename_rules' in translationDefinition:
332 renameRules = translationDefinition['flag_rename_rules']
333 else:
334 renameRules = None
336 if 'refFlags' in translationDefinition:
337 for flag in translationDefinition['refFlags']:
338 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
340 if 'flags' in translationDefinition:
341 for flag in translationDefinition['flags']:
342 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
344 return cls(funcs, **kwargs)
347def mag_aware_eval(df, expr):
348 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
350 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
352 Parameters
353 ----------
354 df : pandas.DataFrame
355 Dataframe on which to evaluate expression.
357 expr : str
358 Expression.
359 """
360 try:
361 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
362 val = df.eval(expr_new, truediv=True)
363 except Exception: # Should check what actually gets raised
364 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
365 val = df.eval(expr_new, truediv=True)
366 return val
369class CustomFunctor(Functor):
370 """Arbitrary computation on a catalog
372 Column names (and thus the columns to be loaded from catalog) are found
373 by finding all words and trying to ignore all "math-y" words.
375 Parameters
376 ----------
377 expr : str
378 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
379 """
380 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
382 def __init__(self, expr, **kwargs):
383 self.expr = expr
384 super().__init__(**kwargs)
386 @property
387 def name(self):
388 return self.expr
390 @property
391 def columns(self):
392 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
394 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
395 not_a_col = []
396 for c in flux_cols:
397 if not re.search('_instFlux$', c):
398 cols.append('{}_instFlux'.format(c))
399 not_a_col.append(c)
400 else:
401 cols.append(c)
403 return list(set([c for c in cols if c not in not_a_col]))
405 def _func(self, df):
406 return mag_aware_eval(df, self.expr)
409class Column(Functor):
410 """Get column with specified name
411 """
413 def __init__(self, col, **kwargs):
414 self.col = col
415 super().__init__(**kwargs)
417 @property
418 def name(self):
419 return self.col
421 @property
422 def columns(self):
423 return [self.col]
425 def _func(self, df):
426 return df[self.col]
429class Index(Functor):
430 """Return the value of the index for each object
431 """
433 columns = ['coord_ra'] # just a dummy; something has to be here
434 _defaultDataset = 'ref'
435 _defaultNoDup = True
437 def _func(self, df):
438 return pd.Series(df.index, index=df.index)
441class IDColumn(Column):
442 col = 'id'
443 _allow_difference = False
444 _defaultNoDup = True
446 def _func(self, df):
447 return pd.Series(df.index, index=df.index)
450class FootprintNPix(Column):
451 col = 'base_Footprint_nPix'
454class CoordColumn(Column):
455 """Base class for coordinate column, in degrees
456 """
457 _radians = True
459 def __init__(self, col, **kwargs):
460 super().__init__(col, **kwargs)
462 def _func(self, df):
463 # Must not modify original column in case that column is used by another functor
464 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
465 return output
468class RAColumn(CoordColumn):
469 """Right Ascension, in degrees
470 """
471 name = 'RA'
472 _defaultNoDup = True
474 def __init__(self, **kwargs):
475 super().__init__('coord_ra', **kwargs)
477 def __call__(self, catalog, **kwargs):
478 return super().__call__(catalog, **kwargs)
481class DecColumn(CoordColumn):
482 """Declination, in degrees
483 """
484 name = 'Dec'
485 _defaultNoDup = True
487 def __init__(self, **kwargs):
488 super().__init__('coord_dec', **kwargs)
490 def __call__(self, catalog, **kwargs):
491 return super().__call__(catalog, **kwargs)
494def fluxName(col):
495 if not col.endswith('_instFlux'):
496 col += '_instFlux'
497 return col
500def fluxErrName(col):
501 if not col.endswith('_instFluxErr'):
502 col += '_instFluxErr'
503 return col
506class Mag(Functor):
507 """Compute calibrated magnitude
509 Takes a `calib` argument, which returns the flux at mag=0
510 as `calib.getFluxMag0()`. If not provided, then the default
511 `fluxMag0` is 63095734448.0194, which is default for HSC.
512 This default should be removed in DM-21955
514 This calculation hides warnings about invalid values and dividing by zero.
516 As for all functors, a `dataset` and `filt` kwarg should be provided upon
517 initialization. Unlike the default `Functor`, however, the default dataset
518 for a `Mag` is `'meas'`, rather than `'ref'`.
520 Parameters
521 ----------
522 col : `str`
523 Name of flux column from which to compute magnitude. Can be parseable
524 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
525 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
526 understand.
527 calib : `lsst.afw.image.calib.Calib` (optional)
528 Object that knows zero point.
529 """
530 _defaultDataset = 'meas'
532 def __init__(self, col, calib=None, **kwargs):
533 self.col = fluxName(col)
534 self.calib = calib
535 if calib is not None:
536 self.fluxMag0 = calib.getFluxMag0()[0]
537 else:
538 # TO DO: DM-21955 Replace hard coded photometic calibration values
539 self.fluxMag0 = 63095734448.0194
541 super().__init__(**kwargs)
543 @property
544 def columns(self):
545 return [self.col]
547 def _func(self, df):
548 with np.warnings.catch_warnings():
549 np.warnings.filterwarnings('ignore', r'invalid value encountered')
550 np.warnings.filterwarnings('ignore', r'divide by zero')
551 return -2.5*np.log10(df[self.col] / self.fluxMag0)
553 @property
554 def name(self):
555 return 'mag_{0}'.format(self.col)
558class MagErr(Mag):
559 """Compute calibrated magnitude uncertainty
561 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
563 Parameters
564 col : `str`
565 Name of flux column
566 calib : `lsst.afw.image.calib.Calib` (optional)
567 Object that knows zero point.
568 """
570 def __init__(self, *args, **kwargs):
571 super().__init__(*args, **kwargs)
572 if self.calib is not None:
573 self.fluxMag0Err = self.calib.getFluxMag0()[1]
574 else:
575 self.fluxMag0Err = 0.
577 @property
578 def columns(self):
579 return [self.col, self.col + 'Err']
581 def _func(self, df):
582 with np.warnings.catch_warnings():
583 np.warnings.filterwarnings('ignore', r'invalid value encountered')
584 np.warnings.filterwarnings('ignore', r'divide by zero')
585 fluxCol, fluxErrCol = self.columns
586 x = df[fluxErrCol] / df[fluxCol]
587 y = self.fluxMag0Err / self.fluxMag0
588 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
589 return magErr
591 @property
592 def name(self):
593 return super().name + '_err'
596class NanoMaggie(Mag):
597 """
598 """
600 def _func(self, df):
601 return (df[self.col] / self.fluxMag0) * 1e9
604class MagDiff(Functor):
605 _defaultDataset = 'meas'
607 """Functor to calculate magnitude difference"""
609 def __init__(self, col1, col2, **kwargs):
610 self.col1 = fluxName(col1)
611 self.col2 = fluxName(col2)
612 super().__init__(**kwargs)
614 @property
615 def columns(self):
616 return [self.col1, self.col2]
618 def _func(self, df):
619 with np.warnings.catch_warnings():
620 np.warnings.filterwarnings('ignore', r'invalid value encountered')
621 np.warnings.filterwarnings('ignore', r'divide by zero')
622 return -2.5*np.log10(df[self.col1]/df[self.col2])
624 @property
625 def name(self):
626 return '(mag_{0} - mag_{1})'.format(self.col1, self.col2)
628 @property
629 def shortname(self):
630 return 'magDiff_{0}_{1}'.format(self.col1, self.col2)
633class Color(Functor):
634 """Compute the color between two filters
636 Computes color by initializing two different `Mag`
637 functors based on the `col` and filters provided, and
638 then returning the difference.
640 This is enabled by the `_func` expecting a dataframe with a
641 multilevel column index, with both `'filter'` and `'column'`,
642 instead of just `'column'`, which is the `Functor` default.
643 This is controlled by the `_dfLevels` attribute.
645 Also of note, the default dataset for `Color` is `forced_src'`,
646 whereas for `Mag` it is `'meas'`.
648 Parameters
649 ----------
650 col : str
651 Name of flux column from which to compute; same as would be passed to
652 `lsst.pipe.tasks.functors.Mag`.
654 filt2, filt1 : str
655 Filters from which to compute magnitude difference.
656 Color computed is `Mag(filt2) - Mag(filt1)`.
657 """
658 _defaultDataset = 'forced_src'
659 _dfLevels = ('filter', 'column')
660 _defaultNoDup = True
662 def __init__(self, col, filt2, filt1, **kwargs):
663 self.col = fluxName(col)
664 if filt2 == filt1:
665 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
666 self.filt2 = filt2
667 self.filt1 = filt1
669 self.mag2 = Mag(col, filt=filt2, **kwargs)
670 self.mag1 = Mag(col, filt=filt1, **kwargs)
672 super().__init__(**kwargs)
674 @property
675 def filt(self):
676 return None
678 @filt.setter
679 def filt(self, filt):
680 pass
682 def _func(self, df):
683 mag2 = self.mag2._func(df[self.filt2])
684 mag1 = self.mag1._func(df[self.filt1])
685 return mag2 - mag1
687 @property
688 def columns(self):
689 return [self.mag1.col, self.mag2.col]
691 def multilevelColumns(self, parq):
692 return [(self.dataset, self.filt1, self.col),
693 (self.dataset, self.filt2, self.col)]
695 @property
696 def name(self):
697 return '{0} - {1} ({2})'.format(self.filt2, self.filt1, self.col)
699 @property
700 def shortname(self):
701 return '{0}_{1}m{2}'.format(self.col, self.filt2.replace('-', ''),
702 self.filt1.replace('-', ''))
705class Labeller(Functor):
706 """Main function of this subclass is to override the dropna=True
707 """
708 _null_label = 'null'
709 _allow_difference = False
710 name = 'label'
711 _force_str = False
713 def __call__(self, parq, dropna=False, **kwargs):
714 return super().__call__(parq, dropna=False, **kwargs)
717class StarGalaxyLabeller(Labeller):
718 _columns = ["base_ClassificationExtendedness_value"]
719 _column = "base_ClassificationExtendedness_value"
721 def _func(self, df):
722 x = df[self._columns][self._column]
723 mask = x.isnull()
724 test = (x < 0.5).astype(int)
725 test = test.mask(mask, 2)
727 # TODO: DM-21954 Look into veracity of inline comment below
728 # are these backwards?
729 categories = ['galaxy', 'star', self._null_label]
730 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
731 index=x.index, name='label')
732 if self._force_str:
733 label = label.astype(str)
734 return label
737class NumStarLabeller(Labeller):
738 _columns = ['numStarFlags']
739 labels = {"star": 0, "maybe": 1, "notStar": 2}
741 def _func(self, df):
742 x = df[self._columns][self._columns[0]]
744 # Number of filters
745 n = len(x.unique()) - 1
747 labels = ['noStar', 'maybe', 'star']
748 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
749 index=x.index, name='label')
751 if self._force_str:
752 label = label.astype(str)
754 return label
757class DeconvolvedMoments(Functor):
758 name = 'Deconvolved Moments'
759 shortname = 'deconvolvedMoments'
760 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
761 "ext_shapeHSM_HsmSourceMoments_yy",
762 "base_SdssShape_xx", "base_SdssShape_yy",
763 "ext_shapeHSM_HsmPsfMoments_xx",
764 "ext_shapeHSM_HsmPsfMoments_yy")
766 def _func(self, df):
767 """Calculate deconvolved moments"""
768 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
769 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
770 else:
771 hsm = np.ones(len(df))*np.nan
772 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
773 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
774 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
775 else:
776 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
777 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
778 # raise TaskError("No psf shape parameter found in catalog")
779 raise RuntimeError('No psf shape parameter found in catalog')
781 return hsm.where(np.isfinite(hsm), sdss) - psf
784class SdssTraceSize(Functor):
785 """Functor to calculate SDSS trace radius size for sources"""
786 name = "SDSS Trace Size"
787 shortname = 'sdssTrace'
788 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
790 def _func(self, df):
791 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
792 return srcSize
795class PsfSdssTraceSizeDiff(Functor):
796 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
797 name = "PSF - SDSS Trace Size"
798 shortname = 'psf_sdssTrace'
799 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
800 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
802 def _func(self, df):
803 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
804 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
805 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
806 return sizeDiff
809class HsmTraceSize(Functor):
810 """Functor to calculate HSM trace radius size for sources"""
811 name = 'HSM Trace Size'
812 shortname = 'hsmTrace'
813 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
814 "ext_shapeHSM_HsmSourceMoments_yy")
816 def _func(self, df):
817 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] +
818 df["ext_shapeHSM_HsmSourceMoments_yy"]))
819 return srcSize
822class PsfHsmTraceSizeDiff(Functor):
823 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
824 name = 'PSF - HSM Trace Size'
825 shortname = 'psf_HsmTrace'
826 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
827 "ext_shapeHSM_HsmSourceMoments_yy",
828 "ext_shapeHSM_HsmPsfMoments_xx",
829 "ext_shapeHSM_HsmPsfMoments_yy")
831 def _func(self, df):
832 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] +
833 df["ext_shapeHSM_HsmSourceMoments_yy"]))
834 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] +
835 df["ext_shapeHSM_HsmPsfMoments_yy"]))
836 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
837 return sizeDiff
840class HsmFwhm(Functor):
841 name = 'HSM Psf FWHM'
842 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
843 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
844 pixelScale = 0.168
845 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
847 def _func(self, df):
848 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
849 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
852class E1(Functor):
853 name = "Distortion Ellipticity (e1)"
854 shortname = "Distortion"
856 def __init__(self, colXX, colXY, colYY, **kwargs):
857 self.colXX = colXX
858 self.colXY = colXY
859 self.colYY = colYY
860 self._columns = [self.colXX, self.colXY, self.colYY]
861 super().__init__(**kwargs)
863 @property
864 def columns(self):
865 return [self.colXX, self.colXY, self.colYY]
867 def _func(self, df):
868 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
871class E2(Functor):
872 name = "Ellipticity e2"
874 def __init__(self, colXX, colXY, colYY, **kwargs):
875 self.colXX = colXX
876 self.colXY = colXY
877 self.colYY = colYY
878 super().__init__(**kwargs)
880 @property
881 def columns(self):
882 return [self.colXX, self.colXY, self.colYY]
884 def _func(self, df):
885 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
888class RadiusFromQuadrupole(Functor):
890 def __init__(self, colXX, colXY, colYY, **kwargs):
891 self.colXX = colXX
892 self.colXY = colXY
893 self.colYY = colYY
894 super().__init__(**kwargs)
896 @property
897 def columns(self):
898 return [self.colXX, self.colXY, self.colYY]
900 def _func(self, df):
901 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
904class ComputePixelScale(Functor):
905 """Compute the local pixel scale from the stored CDMatrix.
906 """
907 name = "Pixel Scale"
909 def __init__(self,
910 colCD_1_1,
911 colCD_1_2,
912 colCD_2_1,
913 colCD_2_2,
914 **kwargs):
915 self.colCD_1_1 = colCD_1_1
916 self.colCD_1_2 = colCD_1_2
917 self.colCD_2_1 = colCD_2_1
918 self.colCD_2_2 = colCD_2_2
919 super().__init__(**kwargs)
921 @property
922 def columns(self):
923 return [self.colCD_1_1, self.colCD_1_2,
924 self.colCD_2_1, self.colCD_2_2]
926 def pixelScale(self, cd11, cd12, cd21, cd22):
927 """Compute the local pixel scale conversion.
929 Parameters
930 ----------
931 cd11 : `pandas.Series`
932 [1, 1] element of the local CDMatricies.
933 cd12 : `pandas.Series`
934 [1, 2] element of the local CDMatricies.
935 cd21 : `pandas.Series`
936 [2, 1] element of the local CDMatricies.
937 cd2 : `pandas.Series`
938 [2, 2] element of the local CDMatricies.
940 Returns
941 -------
942 pixScale : `pandas.Series`
943 Arcseconds per pixel at the location of the local WC
944 """
945 return 3600 * np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))
947 def _func(self, df):
948 return self.pixelScale(df[self.colCD_1_1], df[self.colCD_1_2],
949 df[self.colCD_2_1], df[self.colCD_2_2])
952class ConvertPixelToArcseconds(ComputePixelScale):
953 """Convert a value in units pixels to units arcseconds.
954 """
955 name = "Pixel scale converter"
957 def __init__(self,
958 col,
959 colCD_1_1,
960 colCD_1_2,
961 colCD_2_1,
962 colCD_2_2, **kwargs):
963 self.col = col
964 super().__init__(colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
966 @property
967 def name(self):
968 return f"{self.col}_asArcseconds"
970 @property
971 def columns(self):
972 return [self.col,
973 self.colCD_1_1, self.colCD_1_2,
974 self.colCD_2_1, self.colCD_2_2]
976 def _func(self, df):
977 return df[self.col] * self.pixelScale(df[self.colCD_1_1], df[self.colCD_1_2],
978 df[self.colCD_2_1], df[self.colCD_2_2])
981class ReferenceBand(Functor):
982 name = 'Reference Band'
983 shortname = 'refBand'
985 @property
986 def columns(self):
987 return ["merge_measurement_i",
988 "merge_measurement_r",
989 "merge_measurement_z",
990 "merge_measurement_y",
991 "merge_measurement_g"]
993 def _func(self, df):
994 def getFilterAliasName(row):
995 # get column name with the max value (True > False)
996 colName = row.idxmax()
997 return colName.replace('merge_measurement_', '')
999 return df[self.columns].apply(getFilterAliasName, axis=1)
1002class Photometry(Functor):
1003 # AB to NanoJansky (3631 Jansky)
1004 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1005 LOG_AB_FLUX_SCALE = 12.56
1006 FIVE_OVER_2LOG10 = 1.085736204758129569
1007 # TO DO: DM-21955 Replace hard coded photometic calibration values
1008 COADD_ZP = 27
1010 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1011 self.vhypot = np.vectorize(self.hypot)
1012 self.col = colFlux
1013 self.colFluxErr = colFluxErr
1015 self.calib = calib
1016 if calib is not None:
1017 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1018 else:
1019 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1020 self.fluxMag0Err = 0.
1022 super().__init__(**kwargs)
1024 @property
1025 def columns(self):
1026 return [self.col]
1028 @property
1029 def name(self):
1030 return 'mag_{0}'.format(self.col)
1032 @classmethod
1033 def hypot(cls, a, b):
1034 if np.abs(a) < np.abs(b):
1035 a, b = b, a
1036 if a == 0.:
1037 return 0.
1038 q = b/a
1039 return np.abs(a) * np.sqrt(1. + q*q)
1041 def dn2flux(self, dn, fluxMag0):
1042 return self.AB_FLUX_SCALE * dn / fluxMag0
1044 def dn2mag(self, dn, fluxMag0):
1045 with np.warnings.catch_warnings():
1046 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1047 np.warnings.filterwarnings('ignore', r'divide by zero')
1048 return -2.5 * np.log10(dn/fluxMag0)
1050 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1051 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1052 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1053 return retVal
1055 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1056 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1057 return self.FIVE_OVER_2LOG10 * retVal
1060class NanoJansky(Photometry):
1061 def _func(self, df):
1062 return self.dn2flux(df[self.col], self.fluxMag0)
1065class NanoJanskyErr(Photometry):
1066 @property
1067 def columns(self):
1068 return [self.col, self.colFluxErr]
1070 def _func(self, df):
1071 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1072 return pd.Series(retArr, index=df.index)
1075class Magnitude(Photometry):
1076 def _func(self, df):
1077 return self.dn2mag(df[self.col], self.fluxMag0)
1080class MagnitudeErr(Photometry):
1081 @property
1082 def columns(self):
1083 return [self.col, self.colFluxErr]
1085 def _func(self, df):
1086 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1087 return pd.Series(retArr, index=df.index)
1090class LocalPhotometry(Functor):
1091 """Base class for calibrating the specified instrument flux column using
1092 the local photometric calibration.
1094 Parameters
1095 ----------
1096 instFluxCol : `str`
1097 Name of the instrument flux column.
1098 instFluxErrCol : `str`
1099 Name of the assocated error columns for ``instFluxCol``.
1100 photoCalibCol : `str`
1101 Name of local calibration column.
1102 photoCalibErrCol : `str`
1103 Error associated with ``photoCalibCol``
1105 See also
1106 --------
1107 LocalPhotometry
1108 LocalNanojansky
1109 LocalNanojanskyErr
1110 LocalMagnitude
1111 LocalMagnitudeErr
1112 """
1113 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1115 def __init__(self,
1116 instFluxCol,
1117 instFluxErrCol,
1118 photoCalibCol,
1119 photoCalibErrCol,
1120 **kwargs):
1121 self.instFluxCol = instFluxCol
1122 self.instFluxErrCol = instFluxErrCol
1123 self.photoCalibCol = photoCalibCol
1124 self.photoCalibErrCol = photoCalibErrCol
1125 super().__init__(**kwargs)
1127 def instFluxToNanojansky(self, instFlux, localCalib):
1128 """Convert instrument flux to nanojanskys.
1130 Parameters
1131 ----------
1132 instFlux : `numpy.ndarray` or `pandas.Series`
1133 Array of instrument flux measurements
1134 localCalib : `numpy.ndarray` or `pandas.Series`
1135 Array of local photometric calibration estimates.
1137 Returns
1138 -------
1139 calibFlux : `numpy.ndarray` or `pandas.Series`
1140 Array of calibrated flux measurements.
1141 """
1142 return instFlux * localCalib
1144 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1145 """Convert instrument flux to nanojanskys.
1147 Parameters
1148 ----------
1149 instFlux : `numpy.ndarray` or `pandas.Series`
1150 Array of instrument flux measurements
1151 instFluxErr : `numpy.ndarray` or `pandas.Series`
1152 Errors on associated ``instFlux`` values
1153 localCalib : `numpy.ndarray` or `pandas.Series`
1154 Array of local photometric calibration estimates.
1155 localCalibErr : `numpy.ndarray` or `pandas.Series`
1156 Errors on associated ``localCalib`` values
1158 Returns
1159 -------
1160 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1161 Errors on calibrated flux measurements.
1162 """
1163 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1165 def instFluxToMagnitude(self, instFlux, localCalib):
1166 """Convert instrument flux to nanojanskys.
1168 Parameters
1169 ----------
1170 instFlux : `numpy.ndarray` or `pandas.Series`
1171 Array of instrument flux measurements
1172 localCalib : `numpy.ndarray` or `pandas.Series`
1173 Array of local photometric calibration estimates.
1175 Returns
1176 -------
1177 calibMag : `numpy.ndarray` or `pandas.Series`
1178 Array of calibrated AB magnitudes.
1179 """
1180 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1182 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1183 """Convert instrument flux err to nanojanskys.
1185 Parameters
1186 ----------
1187 instFlux : `numpy.ndarray` or `pandas.Series`
1188 Array of instrument flux measurements
1189 instFluxErr : `numpy.ndarray` or `pandas.Series`
1190 Errors on associated ``instFlux`` values
1191 localCalib : `numpy.ndarray` or `pandas.Series`
1192 Array of local photometric calibration estimates.
1193 localCalibErr : `numpy.ndarray` or `pandas.Series`
1194 Errors on associated ``localCalib`` values
1196 Returns
1197 -------
1198 calibMagErr: `numpy.ndarray` or `pandas.Series`
1199 Error on calibrated AB magnitudes.
1200 """
1201 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1202 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1205class LocalNanojansky(LocalPhotometry):
1206 """Compute calibrated fluxes using the local calibration value.
1208 See also
1209 --------
1210 LocalNanojansky
1211 LocalNanojanskyErr
1212 LocalMagnitude
1213 LocalMagnitudeErr
1214 """
1216 @property
1217 def columns(self):
1218 return [self.instFluxCol, self.photoCalibCol]
1220 @property
1221 def name(self):
1222 return f'flux_{self.instFluxCol}'
1224 def _func(self, df):
1225 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1228class LocalNanojanskyErr(LocalPhotometry):
1229 """Compute calibrated flux errors using the local calibration value.
1231 See also
1232 --------
1233 LocalNanojansky
1234 LocalNanojanskyErr
1235 LocalMagnitude
1236 LocalMagnitudeErr
1237 """
1239 @property
1240 def columns(self):
1241 return [self.instFluxCol, self.instFluxErrCol,
1242 self.photoCalibCol, self.photoCalibErrCol]
1244 @property
1245 def name(self):
1246 return f'fluxErr_{self.instFluxCol}'
1248 def _func(self, df):
1249 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1250 df[self.photoCalibCol], df[self.photoCalibErrCol])
1253class LocalMagnitude(LocalPhotometry):
1254 """Compute calibrated AB magnitudes using the local calibration value.
1256 See also
1257 --------
1258 LocalNanojansky
1259 LocalNanojanskyErr
1260 LocalMagnitude
1261 LocalMagnitudeErr
1262 """
1264 @property
1265 def columns(self):
1266 return [self.instFluxCol, self.photoCalibCol]
1268 @property
1269 def name(self):
1270 return f'mag_{self.instFluxCol}'
1272 def _func(self, df):
1273 return self.instFluxToMagnitude(df[self.instFluxCol],
1274 df[self.photoCalibCol])
1277class LocalMagnitudeErr(LocalPhotometry):
1278 """Compute calibrated AB magnitude errors using the local calibration value.
1280 See also
1281 --------
1282 LocalNanojansky
1283 LocalNanojanskyErr
1284 LocalMagnitude
1285 LocalMagnitudeErr
1286 """
1288 @property
1289 def columns(self):
1290 return [self.instFluxCol, self.instFluxErrCol,
1291 self.photoCalibCol, self.photoCalibErrCol]
1293 @property
1294 def name(self):
1295 return f'magErr_{self.instFluxCol}'
1297 def _func(self, df):
1298 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1299 df[self.instFluxErrCol],
1300 df[self.photoCalibCol],
1301 df[self.photoCalibErrCol])