Coverage for python/lsst/pipe/tasks/functors.py : 36%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import yaml
2import re
4import pandas as pd
5import numpy as np
6import astropy.units as u
8from lsst.daf.persistence import doImport
9from .parquetTable import MultilevelParquetTable
12def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor'):
13 """Initialize an object defined in a dictionary
15 The object needs to be importable as
16 '{0}.{1}'.format(basePath, initDict[typeKey])
17 The positional and keyword arguments (if any) are contained in
18 "args" and "kwargs" entries in the dictionary, respectively.
19 This is used in `functors.CompositeFunctor.from_yaml` to initialize
20 a composite functor from a specification in a YAML file.
22 Parameters
23 ----------
24 initDict : dictionary
25 Dictionary describing object's initialization. Must contain
26 an entry keyed by ``typeKey`` that is the name of the object,
27 relative to ``basePath``.
28 basePath : str
29 Path relative to module in which ``initDict[typeKey]`` is defined.
30 typeKey : str
31 Key of ``initDict`` that is the name of the object
32 (relative to `basePath`).
33 """
34 initDict = initDict.copy()
35 # TO DO: DM-21956 We should be able to define functors outside this module
36 pythonType = doImport('{0}.{1}'.format(basePath, initDict.pop(typeKey)))
37 args = []
38 if 'args' in initDict:
39 args = initDict.pop('args')
40 if isinstance(args, str):
41 args = [args]
43 return pythonType(*args, **initDict)
46class Functor(object):
47 """Define and execute a calculation on a ParquetTable
49 The `__call__` method accepts a `ParquetTable` object, and returns the
50 result of the calculation as a single column. Each functor defines what
51 columns are needed for the calculation, and only these columns are read
52 from the `ParquetTable`.
54 The action of `__call__` consists of two steps: first, loading the
55 necessary columns from disk into memory as a `pandas.DataFrame` object;
56 and second, performing the computation on this dataframe and returning the
57 result.
60 To define a new `Functor`, a subclass must define a `_func` method,
61 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
62 In addition, it must define the following attributes
64 * `_columns`: The columns necessary to perform the calculation
65 * `name`: A name appropriate for a figure axis label
66 * `shortname`: A name appropriate for use as a dictionary key
68 On initialization, a `Functor` should declare what filter (`filt` kwarg)
69 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
70 applied to. This enables the `_get_cols` method to extract the proper
71 columns from the parquet file. If not specified, the dataset will fall back
72 on the `_defaultDataset`attribute. If filter is not specified and `dataset`
73 is anything other than `'ref'`, then an error will be raised when trying to
74 perform the calculation.
76 As currently implemented, `Functor` is only set up to expect a
77 `ParquetTable` of the format of the `deepCoadd_obj` dataset; that is, a
78 `MultilevelParquetTable` with the levels of the column index being `filter`,
79 `dataset`, and `column`. This is defined in the `_columnLevels` attribute,
80 as well as being implicit in the role of the `filt` and `dataset` attributes
81 defined at initialization. In addition, the `_get_cols` method that reads
82 the dataframe from the `ParquetTable` will return a dataframe with column
83 index levels defined by the `_dfLevels` attribute; by default, this is
84 `column`.
86 The `_columnLevels` and `_dfLevels` attributes should generally not need to
87 be changed, unless `_func` needs columns from multiple filters or datasets
88 to do the calculation.
89 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
90 which `_dfLevels = ('filter', 'column')`, and `_func` expects the dataframe
91 it gets to have those levels in the column index.
93 Parameters
94 ----------
95 filt : str
96 Filter upon which to do the calculation
98 dataset : str
99 Dataset upon which to do the calculation
100 (e.g., 'ref', 'meas', 'forced_src').
102 """
104 _defaultDataset = 'ref'
105 _columnLevels = ('filter', 'dataset', 'column')
106 _dfLevels = ('column',)
107 _defaultNoDup = False
109 def __init__(self, filt=None, dataset=None, noDup=None):
110 self.filt = filt
111 self.dataset = dataset if dataset is not None else self._defaultDataset
112 self._noDup = noDup
114 @property
115 def noDup(self):
116 if self._noDup is not None:
117 return self._noDup
118 else:
119 return self._defaultNoDup
121 @property
122 def columns(self):
123 """Columns required to perform calculation
124 """
125 if not hasattr(self, '_columns'):
126 raise NotImplementedError('Must define columns property or _columns attribute')
127 return self._columns
129 def multilevelColumns(self, parq):
130 if not set(parq.columnLevels) == set(self._columnLevels):
131 raise ValueError('ParquetTable does not have the expected column levels. ' +
132 'Got {0}; expected {1}.'.format(parq.columnLevels, self._columnLevels))
134 columnDict = {'column': self.columns,
135 'dataset': self.dataset}
136 if self.filt is None:
137 if 'filter' in parq.columnLevels:
138 if self.dataset == 'ref':
139 columnDict['filter'] = parq.columnLevelNames['filter'][0]
140 else:
141 raise ValueError("'filt' not set for functor {}".format(self.name) +
142 "(dataset {}) ".format(self.dataset) +
143 "and ParquetTable " +
144 "contains multiple filters in column index. " +
145 "Set 'filt' or set 'dataset' to 'ref'.")
146 else:
147 columnDict['filter'] = self.filt
149 return parq._colsFromDict(columnDict)
151 def _func(self, df, dropna=True):
152 raise NotImplementedError('Must define calculation on dataframe')
154 def _get_cols(self, parq):
155 """Retrieve dataframe necessary for calculation.
157 Returns dataframe upon which `self._func` can act.
158 """
159 if isinstance(parq, MultilevelParquetTable):
160 columns = self.multilevelColumns(parq)
161 df = parq.toDataFrame(columns=columns, droplevels=False)
162 df = self._setLevels(df)
163 else:
164 columns = self.columns
165 df = parq.toDataFrame(columns=columns)
167 return df
169 def _setLevels(self, df):
170 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
171 df.columns = df.columns.droplevel(levelsToDrop)
172 return df
174 def _dropna(self, vals):
175 return vals.dropna()
177 def __call__(self, parq, dropna=False):
178 try:
179 df = self._get_cols(parq)
180 vals = self._func(df)
181 except Exception:
182 vals = self.fail(df)
183 if dropna:
184 vals = self._dropna(vals)
186 return vals
188 def fail(self, df):
189 return pd.Series(np.full(len(df), np.nan), index=df.index)
191 @property
192 def name(self):
193 """Full name of functor (suitable for figure labels)
194 """
195 return NotImplementedError
197 @property
198 def shortname(self):
199 """Short name of functor (suitable for column name/dict key)
200 """
201 return self.name
204class CompositeFunctor(Functor):
205 """Perform multiple calculations at once on a catalog
207 The role of a `CompositeFunctor` is to group together computations from
208 multiple functors. Instead of returning `pandas.Series` a
209 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
210 being the keys of `funcDict`.
212 The `columns` attribute of a `CompositeFunctor` is the union of all columns
213 in all the component functors.
215 A `CompositeFunctor` does not use a `_func` method itself; rather,
216 when a `CompositeFunctor` is called, all its columns are loaded
217 at once, and the resulting dataframe is passed to the `_func` method of each component
218 functor. This has the advantage of only doing I/O (reading from parquet file) once,
219 and works because each individual `_func` method of each component functor does not
220 care if there are *extra* columns in the dataframe being passed; only that it must contain
221 *at least* the `columns` it expects.
223 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
224 file specifying a collection of functors.
226 Parameters
227 ----------
228 funcs : `dict` or `list`
229 Dictionary or list of functors. If a list, then it will be converted
230 into a dictonary according to the `.shortname` attribute of each functor.
232 """
233 dataset = None
235 def __init__(self, funcs, **kwargs):
237 if type(funcs) == dict:
238 self.funcDict = funcs
239 else:
240 self.funcDict = {f.shortname: f for f in funcs}
242 self._filt = None
244 super().__init__(**kwargs)
246 @property
247 def filt(self):
248 return self._filt
250 @filt.setter
251 def filt(self, filt):
252 if filt is not None:
253 for _, f in self.funcDict.items():
254 f.filt = filt
255 self._filt = filt
257 def update(self, new):
258 if isinstance(new, dict):
259 self.funcDict.update(new)
260 elif isinstance(new, CompositeFunctor):
261 self.funcDict.update(new.funcDict)
262 else:
263 raise TypeError('Can only update with dictionary or CompositeFunctor.')
265 # Make sure new functors have the same 'filt' set
266 if self.filt is not None:
267 self.filt = self.filt
269 @property
270 def columns(self):
271 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
273 def multilevelColumns(self, parq):
274 return list(set([x for y in [f.multilevelColumns(parq)
275 for f in self.funcDict.values()] for x in y]))
277 def __call__(self, parq, **kwargs):
278 if isinstance(parq, MultilevelParquetTable):
279 columns = self.multilevelColumns(parq)
280 df = parq.toDataFrame(columns=columns, droplevels=False)
281 valDict = {}
282 for k, f in self.funcDict.items():
283 try:
284 subdf = f._setLevels(df[f.multilevelColumns(parq)])
285 valDict[k] = f._func(subdf)
286 except Exception:
287 valDict[k] = f.fail(subdf)
288 else:
289 columns = self.columns
290 df = parq.toDataFrame(columns=columns)
291 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
293 try:
294 valDf = pd.concat(valDict, axis=1)
295 except TypeError:
296 print([(k, type(v)) for k, v in valDict.items()])
297 raise
299 if kwargs.get('dropna', False):
300 valDf = valDf.dropna(how='any')
302 return valDf
304 @classmethod
305 def renameCol(cls, col, renameRules):
306 if renameRules is None:
307 return col
308 for old, new in renameRules:
309 if col.startswith(old):
310 col = col.replace(old, new)
311 return col
313 @classmethod
314 def from_file(cls, filename, **kwargs):
315 with open(filename) as f:
316 translationDefinition = yaml.safe_load(f)
318 return cls.from_yaml(translationDefinition, **kwargs)
320 @classmethod
321 def from_yaml(cls, translationDefinition, **kwargs):
322 funcs = {}
323 for func, val in translationDefinition['funcs'].items():
324 funcs[func] = init_fromDict(val)
326 if 'flag_rename_rules' in translationDefinition:
327 renameRules = translationDefinition['flag_rename_rules']
328 else:
329 renameRules = None
331 if 'refFlags' in translationDefinition:
332 for flag in translationDefinition['refFlags']:
333 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
335 if 'flags' in translationDefinition:
336 for flag in translationDefinition['flags']:
337 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
339 return cls(funcs, **kwargs)
342def mag_aware_eval(df, expr):
343 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
345 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
347 Parameters
348 ----------
349 df : pandas.DataFrame
350 Dataframe on which to evaluate expression.
352 expr : str
353 Expression.
354 """
355 try:
356 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
357 val = df.eval(expr_new, truediv=True)
358 except Exception: # Should check what actually gets raised
359 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
360 val = df.eval(expr_new, truediv=True)
361 return val
364class CustomFunctor(Functor):
365 """Arbitrary computation on a catalog
367 Column names (and thus the columns to be loaded from catalog) are found
368 by finding all words and trying to ignore all "math-y" words.
370 Parameters
371 ----------
372 expr : str
373 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
374 """
375 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
377 def __init__(self, expr, **kwargs):
378 self.expr = expr
379 super().__init__(**kwargs)
381 @property
382 def name(self):
383 return self.expr
385 @property
386 def columns(self):
387 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
389 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
390 not_a_col = []
391 for c in flux_cols:
392 if not re.search('_instFlux$', c):
393 cols.append('{}_instFlux'.format(c))
394 not_a_col.append(c)
395 else:
396 cols.append(c)
398 return list(set([c for c in cols if c not in not_a_col]))
400 def _func(self, df):
401 return mag_aware_eval(df, self.expr)
404class Column(Functor):
405 """Get column with specified name
406 """
408 def __init__(self, col, **kwargs):
409 self.col = col
410 super().__init__(**kwargs)
412 @property
413 def name(self):
414 return self.col
416 @property
417 def columns(self):
418 return [self.col]
420 def _func(self, df):
421 return df[self.col]
424class Index(Functor):
425 """Return the value of the index for each object
426 """
428 columns = ['coord_ra'] # just a dummy; something has to be here
429 _defaultDataset = 'ref'
430 _defaultNoDup = True
432 def _func(self, df):
433 return pd.Series(df.index, index=df.index)
436class IDColumn(Column):
437 col = 'id'
438 _allow_difference = False
439 _defaultNoDup = True
441 def _func(self, df):
442 return pd.Series(df.index, index=df.index)
445class FootprintNPix(Column):
446 col = 'base_Footprint_nPix'
449class CoordColumn(Column):
450 """Base class for coordinate column, in degrees
451 """
452 _radians = True
454 def __init__(self, col, **kwargs):
455 super().__init__(col, **kwargs)
457 def _func(self, df):
458 res = df[self.col]
459 if self._radians:
460 res *= 180 / np.pi
461 return res
464class RAColumn(CoordColumn):
465 """Right Ascension, in degrees
466 """
467 name = 'RA'
468 _defaultNoDup = True
470 def __init__(self, **kwargs):
471 super().__init__('coord_ra', **kwargs)
473 def __call__(self, catalog, **kwargs):
474 return super().__call__(catalog, **kwargs)
477class DecColumn(CoordColumn):
478 """Declination, in degrees
479 """
480 name = 'Dec'
481 _defaultNoDup = True
483 def __init__(self, **kwargs):
484 super().__init__('coord_dec', **kwargs)
486 def __call__(self, catalog, **kwargs):
487 return super().__call__(catalog, **kwargs)
490def fluxName(col):
491 if not col.endswith('_instFlux'):
492 col += '_instFlux'
493 return col
496def fluxErrName(col):
497 if not col.endswith('_instFluxErr'):
498 col += '_instFluxErr'
499 return col
502class Mag(Functor):
503 """Compute calibrated magnitude
505 Takes a `calib` argument, which returns the flux at mag=0
506 as `calib.getFluxMag0()`. If not provided, then the default
507 `fluxMag0` is 63095734448.0194, which is default for HSC.
508 This default should be removed in DM-21955
510 This calculation hides warnings about invalid values and dividing by zero.
512 As for all functors, a `dataset` and `filt` kwarg should be provided upon
513 initialization. Unlike the default `Functor`, however, the default dataset
514 for a `Mag` is `'meas'`, rather than `'ref'`.
516 Parameters
517 ----------
518 col : `str`
519 Name of flux column from which to compute magnitude. Can be parseable
520 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
521 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
522 understand.
523 calib : `lsst.afw.image.calib.Calib` (optional)
524 Object that knows zero point.
525 """
526 _defaultDataset = 'meas'
528 def __init__(self, col, calib=None, **kwargs):
529 self.col = fluxName(col)
530 self.calib = calib
531 if calib is not None:
532 self.fluxMag0 = calib.getFluxMag0()[0]
533 else:
534 # TO DO: DM-21955 Replace hard coded photometic calibration values
535 self.fluxMag0 = 63095734448.0194
537 super().__init__(**kwargs)
539 @property
540 def columns(self):
541 return [self.col]
543 def _func(self, df):
544 with np.warnings.catch_warnings():
545 np.warnings.filterwarnings('ignore', r'invalid value encountered')
546 np.warnings.filterwarnings('ignore', r'divide by zero')
547 return -2.5*np.log10(df[self.col] / self.fluxMag0)
549 @property
550 def name(self):
551 return 'mag_{0}'.format(self.col)
554class MagErr(Mag):
555 """Compute calibrated magnitude uncertainty
557 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
559 Parameters
560 col : `str`
561 Name of flux column
562 calib : `lsst.afw.image.calib.Calib` (optional)
563 Object that knows zero point.
564 """
566 def __init__(self, *args, **kwargs):
567 super().__init__(*args, **kwargs)
568 if self.calib is not None:
569 self.fluxMag0Err = self.calib.getFluxMag0()[1]
570 else:
571 self.fluxMag0Err = 0.
573 @property
574 def columns(self):
575 return [self.col, self.col + 'Err']
577 def _func(self, df):
578 with np.warnings.catch_warnings():
579 np.warnings.filterwarnings('ignore', r'invalid value encountered')
580 np.warnings.filterwarnings('ignore', r'divide by zero')
581 fluxCol, fluxErrCol = self.columns
582 x = df[fluxErrCol] / df[fluxCol]
583 y = self.fluxMag0Err / self.fluxMag0
584 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
585 return magErr
587 @property
588 def name(self):
589 return super().name + '_err'
592class NanoMaggie(Mag):
593 """
594 """
596 def _func(self, df):
597 return (df[self.col] / self.fluxMag0) * 1e9
600class MagDiff(Functor):
601 _defaultDataset = 'meas'
603 """Functor to calculate magnitude difference"""
605 def __init__(self, col1, col2, **kwargs):
606 self.col1 = fluxName(col1)
607 self.col2 = fluxName(col2)
608 super().__init__(**kwargs)
610 @property
611 def columns(self):
612 return [self.col1, self.col2]
614 def _func(self, df):
615 with np.warnings.catch_warnings():
616 np.warnings.filterwarnings('ignore', r'invalid value encountered')
617 np.warnings.filterwarnings('ignore', r'divide by zero')
618 return -2.5*np.log10(df[self.col1]/df[self.col2])
620 @property
621 def name(self):
622 return '(mag_{0} - mag_{1})'.format(self.col1, self.col2)
624 @property
625 def shortname(self):
626 return 'magDiff_{0}_{1}'.format(self.col1, self.col2)
629class Color(Functor):
630 """Compute the color between two filters
632 Computes color by initializing two different `Mag`
633 functors based on the `col` and filters provided, and
634 then returning the difference.
636 This is enabled by the `_func` expecting a dataframe with a
637 multilevel column index, with both `'filter'` and `'column'`,
638 instead of just `'column'`, which is the `Functor` default.
639 This is controlled by the `_dfLevels` attribute.
641 Also of note, the default dataset for `Color` is `forced_src'`,
642 whereas for `Mag` it is `'meas'`.
644 Parameters
645 ----------
646 col : str
647 Name of flux column from which to compute; same as would be passed to
648 `lsst.pipe.tasks.functors.Mag`.
650 filt2, filt1 : str
651 Filters from which to compute magnitude difference.
652 Color computed is `Mag(filt2) - Mag(filt1)`.
653 """
654 _defaultDataset = 'forced_src'
655 _dfLevels = ('filter', 'column')
656 _defaultNoDup = True
658 def __init__(self, col, filt2, filt1, **kwargs):
659 self.col = fluxName(col)
660 if filt2 == filt1:
661 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
662 self.filt2 = filt2
663 self.filt1 = filt1
665 self.mag2 = Mag(col, filt=filt2, **kwargs)
666 self.mag1 = Mag(col, filt=filt1, **kwargs)
668 super().__init__(**kwargs)
670 @property
671 def filt(self):
672 return None
674 @filt.setter
675 def filt(self, filt):
676 pass
678 def _func(self, df):
679 mag2 = self.mag2._func(df[self.filt2])
680 mag1 = self.mag1._func(df[self.filt1])
681 return mag2 - mag1
683 @property
684 def columns(self):
685 return [self.mag1.col, self.mag2.col]
687 def multilevelColumns(self, parq):
688 return [(self.dataset, self.filt1, self.col),
689 (self.dataset, self.filt2, self.col)]
691 @property
692 def name(self):
693 return '{0} - {1} ({2})'.format(self.filt2, self.filt1, self.col)
695 @property
696 def shortname(self):
697 return '{0}_{1}m{2}'.format(self.col, self.filt2.replace('-', ''),
698 self.filt1.replace('-', ''))
701class Labeller(Functor):
702 """Main function of this subclass is to override the dropna=True
703 """
704 _null_label = 'null'
705 _allow_difference = False
706 name = 'label'
707 _force_str = False
709 def __call__(self, parq, dropna=False, **kwargs):
710 return super().__call__(parq, dropna=False, **kwargs)
713class StarGalaxyLabeller(Labeller):
714 _columns = ["base_ClassificationExtendedness_value"]
715 _column = "base_ClassificationExtendedness_value"
717 def _func(self, df):
718 x = df[self._columns][self._column]
719 mask = x.isnull()
720 test = (x < 0.5).astype(int)
721 test = test.mask(mask, 2)
723 # TODO: DM-21954 Look into veracity of inline comment below
724 # are these backwards?
725 categories = ['galaxy', 'star', self._null_label]
726 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
727 index=x.index, name='label')
728 if self._force_str:
729 label = label.astype(str)
730 return label
733class NumStarLabeller(Labeller):
734 _columns = ['numStarFlags']
735 labels = {"star": 0, "maybe": 1, "notStar": 2}
737 def _func(self, df):
738 x = df[self._columns][self._columns[0]]
740 # Number of filters
741 n = len(x.unique()) - 1
743 labels = ['noStar', 'maybe', 'star']
744 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
745 index=x.index, name='label')
747 if self._force_str:
748 label = label.astype(str)
750 return label
753class DeconvolvedMoments(Functor):
754 name = 'Deconvolved Moments'
755 shortname = 'deconvolvedMoments'
756 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
757 "ext_shapeHSM_HsmSourceMoments_yy",
758 "base_SdssShape_xx", "base_SdssShape_yy",
759 "ext_shapeHSM_HsmPsfMoments_xx",
760 "ext_shapeHSM_HsmPsfMoments_yy")
762 def _func(self, df):
763 """Calculate deconvolved moments"""
764 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
765 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
766 else:
767 hsm = np.ones(len(df))*np.nan
768 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
769 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
770 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
771 else:
772 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
773 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
774 # raise TaskError("No psf shape parameter found in catalog")
775 raise RuntimeError('No psf shape parameter found in catalog')
777 return hsm.where(np.isfinite(hsm), sdss) - psf
780class SdssTraceSize(Functor):
781 """Functor to calculate SDSS trace radius size for sources"""
782 name = "SDSS Trace Size"
783 shortname = 'sdssTrace'
784 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
786 def _func(self, df):
787 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
788 return srcSize
791class PsfSdssTraceSizeDiff(Functor):
792 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
793 name = "PSF - SDSS Trace Size"
794 shortname = 'psf_sdssTrace'
795 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
796 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
798 def _func(self, df):
799 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
800 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
801 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
802 return sizeDiff
805class HsmTraceSize(Functor):
806 """Functor to calculate HSM trace radius size for sources"""
807 name = 'HSM Trace Size'
808 shortname = 'hsmTrace'
809 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
810 "ext_shapeHSM_HsmSourceMoments_yy")
812 def _func(self, df):
813 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] +
814 df["ext_shapeHSM_HsmSourceMoments_yy"]))
815 return srcSize
818class PsfHsmTraceSizeDiff(Functor):
819 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
820 name = 'PSF - HSM Trace Size'
821 shortname = 'psf_HsmTrace'
822 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
823 "ext_shapeHSM_HsmSourceMoments_yy",
824 "ext_shapeHSM_HsmPsfMoments_xx",
825 "ext_shapeHSM_HsmPsfMoments_yy")
827 def _func(self, df):
828 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] +
829 df["ext_shapeHSM_HsmSourceMoments_yy"]))
830 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] +
831 df["ext_shapeHSM_HsmPsfMoments_yy"]))
832 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
833 return sizeDiff
836class HsmFwhm(Functor):
837 name = 'HSM Psf FWHM'
838 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
839 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
840 pixelScale = 0.168
841 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
843 def _func(self, df):
844 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
845 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
848class E1(Functor):
849 name = "Distortion Ellipticity (e1)"
850 shortname = "Distortion"
852 def __init__(self, colXX, colXY, colYY, **kwargs):
853 self.colXX = colXX
854 self.colXY = colXY
855 self.colYY = colYY
856 self._columns = [self.colXX, self.colXY, self.colYY]
857 super().__init__(**kwargs)
859 @property
860 def columns(self):
861 return [self.colXX, self.colXY, self.colYY]
863 def _func(self, df):
864 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
867class E2(Functor):
868 name = "Ellipticity e2"
870 def __init__(self, colXX, colXY, colYY, **kwargs):
871 self.colXX = colXX
872 self.colXY = colXY
873 self.colYY = colYY
874 super().__init__(**kwargs)
876 @property
877 def columns(self):
878 return [self.colXX, self.colXY, self.colYY]
880 def _func(self, df):
881 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
884class RadiusFromQuadrupole(Functor):
886 def __init__(self, colXX, colXY, colYY, **kwargs):
887 self.colXX = colXX
888 self.colXY = colXY
889 self.colYY = colYY
890 super().__init__(**kwargs)
892 @property
893 def columns(self):
894 return [self.colXX, self.colXY, self.colYY]
896 def _func(self, df):
897 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
900class ComputePixelScale(Functor):
901 """Compute the local pixel scale from the stored CDMatrix.
902 """
903 name = "Pixel Scale"
905 def __init__(self,
906 colCD_1_1,
907 colCD_1_2,
908 colCD_2_1,
909 colCD_2_2,
910 **kwargs):
911 self.colCD_1_1 = colCD_1_1
912 self.colCD_1_2 = colCD_1_2
913 self.colCD_2_1 = colCD_2_1
914 self.colCD_2_2 = colCD_2_2
915 super().__init__(**kwargs)
917 @property
918 def columns(self):
919 return [self.colCD_1_1, self.colCD_1_2,
920 self.colCD_2_1, self.colCD_2_2]
922 def pixelScale(self, cd11, cd12, cd21, cd22):
923 """Compute the local pixel scale conversion.
925 Parameters
926 ----------
927 cd11 : `pandas.Series`
928 [1, 1] element of the local CDMatricies.
929 cd12 : `pandas.Series`
930 [1, 2] element of the local CDMatricies.
931 cd21 : `pandas.Series`
932 [2, 1] element of the local CDMatricies.
933 cd2 : `pandas.Series`
934 [2, 2] element of the local CDMatricies.
936 Returns
937 -------
938 pixScale : `pandas.Series`
939 Arcseconds per pixel at the location of the local WC
940 """
941 return 3600 * np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))
943 def _func(self, df):
944 return self.pixelScale(df[self.colCD_1_1], df[self.colCD_1_2],
945 df[self.colCD_2_1], df[self.colCD_2_2])
948class ConvertPixelToArcseconds(ComputePixelScale):
949 """Convert a value in units pixels to units arcseconds.
950 """
951 name = "Pixel scale converter"
953 def __init__(self,
954 col,
955 colCD_1_1,
956 colCD_1_2,
957 colCD_2_1,
958 colCD_2_2, **kwargs):
959 self.col = col
960 super().__init__(colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
962 @property
963 def name(self):
964 return f"{self.col}_asArcseconds"
966 @property
967 def columns(self):
968 return [self.col,
969 self.colCD_1_1, self.colCD_1_2,
970 self.colCD_2_1, self.colCD_2_2]
972 def _func(self, df):
973 return df[self.col] * self.pixelScale(df[self.colCD_1_1], df[self.colCD_1_2],
974 df[self.colCD_2_1], df[self.colCD_2_2])
977class ReferenceBand(Functor):
978 name = 'Reference Band'
979 shortname = 'refBand'
981 @property
982 def columns(self):
983 return ["merge_measurement_i",
984 "merge_measurement_r",
985 "merge_measurement_z",
986 "merge_measurement_y",
987 "merge_measurement_g"]
989 def _func(self, df):
990 def getFilterAliasName(row):
991 # get column name with the max value (True > False)
992 colName = row.idxmax()
993 return colName.replace('merge_measurement_', '')
995 return df[self.columns].apply(getFilterAliasName, axis=1)
998class Photometry(Functor):
999 # AB to NanoJansky (3631 Jansky)
1000 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1001 LOG_AB_FLUX_SCALE = 12.56
1002 FIVE_OVER_2LOG10 = 1.085736204758129569
1003 # TO DO: DM-21955 Replace hard coded photometic calibration values
1004 COADD_ZP = 27
1006 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1007 self.vhypot = np.vectorize(self.hypot)
1008 self.col = colFlux
1009 self.colFluxErr = colFluxErr
1011 self.calib = calib
1012 if calib is not None:
1013 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1014 else:
1015 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1016 self.fluxMag0Err = 0.
1018 super().__init__(**kwargs)
1020 @property
1021 def columns(self):
1022 return [self.col]
1024 @property
1025 def name(self):
1026 return 'mag_{0}'.format(self.col)
1028 @classmethod
1029 def hypot(cls, a, b):
1030 if np.abs(a) < np.abs(b):
1031 a, b = b, a
1032 if a == 0.:
1033 return 0.
1034 q = b/a
1035 return np.abs(a) * np.sqrt(1. + q*q)
1037 def dn2flux(self, dn, fluxMag0):
1038 return self.AB_FLUX_SCALE * dn / fluxMag0
1040 def dn2mag(self, dn, fluxMag0):
1041 with np.warnings.catch_warnings():
1042 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1043 np.warnings.filterwarnings('ignore', r'divide by zero')
1044 return -2.5 * np.log10(dn/fluxMag0)
1046 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1047 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1048 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1049 return retVal
1051 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1052 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1053 return self.FIVE_OVER_2LOG10 * retVal
1056class NanoJansky(Photometry):
1057 def _func(self, df):
1058 return self.dn2flux(df[self.col], self.fluxMag0)
1061class NanoJanskyErr(Photometry):
1062 @property
1063 def columns(self):
1064 return [self.col, self.colFluxErr]
1066 def _func(self, df):
1067 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1068 return pd.Series(retArr, index=df.index)
1071class Magnitude(Photometry):
1072 def _func(self, df):
1073 return self.dn2mag(df[self.col], self.fluxMag0)
1076class MagnitudeErr(Photometry):
1077 @property
1078 def columns(self):
1079 return [self.col, self.colFluxErr]
1081 def _func(self, df):
1082 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1083 return pd.Series(retArr, index=df.index)
1086class LocalPhotometry(Functor):
1087 """Base class for calibrating the specified instrument flux column using
1088 the local photometric calibration.
1090 Parameters
1091 ----------
1092 instFluxCol : `str`
1093 Name of the instrument flux column.
1094 instFluxErrCol : `str`
1095 Name of the assocated error columns for ``instFluxCol``.
1096 photoCalibCol : `str`
1097 Name of local calibration column.
1098 photoCalibErrCol : `str`
1099 Error associated with ``photoCalibCol``
1101 See also
1102 --------
1103 LocalPhotometry
1104 LocalNanojansky
1105 LocalNanojanskyErr
1106 LocalMagnitude
1107 LocalMagnitudeErr
1108 """
1109 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1111 def __init__(self,
1112 instFluxCol,
1113 instFluxErrCol,
1114 photoCalibCol,
1115 photoCalibErrCol,
1116 **kwargs):
1117 self.instFluxCol = instFluxCol
1118 self.instFluxErrCol = instFluxErrCol
1119 self.photoCalibCol = photoCalibCol
1120 self.photoCalibErrCol = photoCalibErrCol
1121 super().__init__(**kwargs)
1123 def instFluxToNanojansky(self, instFlux, localCalib):
1124 """Convert instrument flux to nanojanskys.
1126 Parameters
1127 ----------
1128 instFlux : `numpy.ndarray` or `pandas.Series`
1129 Array of instrument flux measurements
1130 localCalib : `numpy.ndarray` or `pandas.Series`
1131 Array of local photometric calibration estimates.
1133 Returns
1134 -------
1135 calibFlux : `numpy.ndarray` or `pandas.Series`
1136 Array of calibrated flux measurements.
1137 """
1138 return instFlux * localCalib
1140 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1141 """Convert instrument flux to nanojanskys.
1143 Parameters
1144 ----------
1145 instFlux : `numpy.ndarray` or `pandas.Series`
1146 Array of instrument flux measurements
1147 instFluxErr : `numpy.ndarray` or `pandas.Series`
1148 Errors on associated ``instFlux`` values
1149 localCalib : `numpy.ndarray` or `pandas.Series`
1150 Array of local photometric calibration estimates.
1151 localCalibErr : `numpy.ndarray` or `pandas.Series`
1152 Errors on associated ``localCalib`` values
1154 Returns
1155 -------
1156 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1157 Errors on calibrated flux measurements.
1158 """
1159 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1161 def instFluxToMagnitude(self, instFlux, localCalib):
1162 """Convert instrument flux to nanojanskys.
1164 Parameters
1165 ----------
1166 instFlux : `numpy.ndarray` or `pandas.Series`
1167 Array of instrument flux measurements
1168 localCalib : `numpy.ndarray` or `pandas.Series`
1169 Array of local photometric calibration estimates.
1171 Returns
1172 -------
1173 calibMag : `numpy.ndarray` or `pandas.Series`
1174 Array of calibrated AB magnitudes.
1175 """
1176 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1178 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1179 """Convert instrument flux err to nanojanskys.
1181 Parameters
1182 ----------
1183 instFlux : `numpy.ndarray` or `pandas.Series`
1184 Array of instrument flux measurements
1185 instFluxErr : `numpy.ndarray` or `pandas.Series`
1186 Errors on associated ``instFlux`` values
1187 localCalib : `numpy.ndarray` or `pandas.Series`
1188 Array of local photometric calibration estimates.
1189 localCalibErr : `numpy.ndarray` or `pandas.Series`
1190 Errors on associated ``localCalib`` values
1192 Returns
1193 -------
1194 calibMagErr: `numpy.ndarray` or `pandas.Series`
1195 Error on calibrated AB magnitudes.
1196 """
1197 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1198 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1201class LocalNanojansky(LocalPhotometry):
1202 """Compute calibrated fluxes using the local calibration value.
1204 See also
1205 --------
1206 LocalNanojansky
1207 LocalNanojanskyErr
1208 LocalMagnitude
1209 LocalMagnitudeErr
1210 """
1212 @property
1213 def columns(self):
1214 return [self.instFluxCol, self.photoCalibCol]
1216 @property
1217 def name(self):
1218 return f'flux_{self.instFluxCol}'
1220 def _func(self, df):
1221 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1224class LocalNanojanskyErr(LocalPhotometry):
1225 """Compute calibrated flux errors using the local calibration value.
1227 See also
1228 --------
1229 LocalNanojansky
1230 LocalNanojanskyErr
1231 LocalMagnitude
1232 LocalMagnitudeErr
1233 """
1235 @property
1236 def columns(self):
1237 return [self.instFluxCol, self.instFluxErrCol,
1238 self.photoCalibCol, self.photoCalibErrCol]
1240 @property
1241 def name(self):
1242 return f'fluxErr_{self.instFluxCol}'
1244 def _func(self, df):
1245 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1246 df[self.photoCalibCol], df[self.photoCalibErrCol])
1249class LocalMagnitude(LocalPhotometry):
1250 """Compute calibrated AB magnitudes using the local calibration value.
1252 See also
1253 --------
1254 LocalNanojansky
1255 LocalNanojanskyErr
1256 LocalMagnitude
1257 LocalMagnitudeErr
1258 """
1260 @property
1261 def columns(self):
1262 return [self.instFluxCol, self.photoCalibCol]
1264 @property
1265 def name(self):
1266 return f'mag_{self.instFluxCol}'
1268 def _func(self, df):
1269 return self.instFluxToMagnitude(df[self.instFluxCol],
1270 df[self.photoCalibCol])
1273class LocalMagnitudeErr(LocalPhotometry):
1274 """Compute calibrated AB magnitude errors using the local calibration value.
1276 See also
1277 --------
1278 LocalNanojansky
1279 LocalNanojanskyErr
1280 LocalMagnitude
1281 LocalMagnitudeErr
1282 """
1284 @property
1285 def columns(self):
1286 return [self.instFluxCol, self.instFluxErrCol,
1287 self.photoCalibCol, self.photoCalibErrCol]
1289 @property
1290 def name(self):
1291 return f'magErr_{self.instFluxCol}'
1293 def _func(self, df):
1294 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1295 df[self.instFluxErrCol],
1296 df[self.photoCalibCol],
1297 df[self.photoCalibErrCol])