Coverage for python/lsst/pipe/tasks/functors.py : 67%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import yaml
2import re
4import pandas as pd
5import numpy as np
6import astropy.units as u
8from lsst.daf.persistence import doImport
9from .parquetTable import MultilevelParquetTable
12def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
13 typeKey='functor', name=None):
14 """Initialize an object defined in a dictionary
16 The object needs to be importable as
17 f'{basePath}.{initDict[typeKey]}'
18 The positional and keyword arguments (if any) are contained in
19 "args" and "kwargs" entries in the dictionary, respectively.
20 This is used in `functors.CompositeFunctor.from_yaml` to initialize
21 a composite functor from a specification in a YAML file.
23 Parameters
24 ----------
25 initDict : dictionary
26 Dictionary describing object's initialization. Must contain
27 an entry keyed by ``typeKey`` that is the name of the object,
28 relative to ``basePath``.
29 basePath : str
30 Path relative to module in which ``initDict[typeKey]`` is defined.
31 typeKey : str
32 Key of ``initDict`` that is the name of the object
33 (relative to `basePath`).
34 """
35 initDict = initDict.copy()
36 # TO DO: DM-21956 We should be able to define functors outside this module
37 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
38 args = []
39 if 'args' in initDict:
40 args = initDict.pop('args')
41 if isinstance(args, str):
42 args = [args]
43 try:
44 element = pythonType(*args, **initDict)
45 except Exception as e:
46 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
47 raise type(e)(message, e.args)
48 return element
51class Functor(object):
52 """Define and execute a calculation on a ParquetTable
54 The `__call__` method accepts a `ParquetTable` object, and returns the
55 result of the calculation as a single column. Each functor defines what
56 columns are needed for the calculation, and only these columns are read
57 from the `ParquetTable`.
59 The action of `__call__` consists of two steps: first, loading the
60 necessary columns from disk into memory as a `pandas.DataFrame` object;
61 and second, performing the computation on this dataframe and returning the
62 result.
65 To define a new `Functor`, a subclass must define a `_func` method,
66 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
67 In addition, it must define the following attributes
69 * `_columns`: The columns necessary to perform the calculation
70 * `name`: A name appropriate for a figure axis label
71 * `shortname`: A name appropriate for use as a dictionary key
73 On initialization, a `Functor` should declare what filter (`filt` kwarg)
74 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
75 applied to. This enables the `_get_cols` method to extract the proper
76 columns from the parquet file. If not specified, the dataset will fall back
77 on the `_defaultDataset`attribute. If filter is not specified and `dataset`
78 is anything other than `'ref'`, then an error will be raised when trying to
79 perform the calculation.
81 As currently implemented, `Functor` is only set up to expect a
82 `ParquetTable` of the format of the `deepCoadd_obj` dataset; that is, a
83 `MultilevelParquetTable` with the levels of the column index being `filter`,
84 `dataset`, and `column`. This is defined in the `_columnLevels` attribute,
85 as well as being implicit in the role of the `filt` and `dataset` attributes
86 defined at initialization. In addition, the `_get_cols` method that reads
87 the dataframe from the `ParquetTable` will return a dataframe with column
88 index levels defined by the `_dfLevels` attribute; by default, this is
89 `column`.
91 The `_columnLevels` and `_dfLevels` attributes should generally not need to
92 be changed, unless `_func` needs columns from multiple filters or datasets
93 to do the calculation.
94 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
95 which `_dfLevels = ('filter', 'column')`, and `_func` expects the dataframe
96 it gets to have those levels in the column index.
98 Parameters
99 ----------
100 filt : str
101 Filter upon which to do the calculation
103 dataset : str
104 Dataset upon which to do the calculation
105 (e.g., 'ref', 'meas', 'forced_src').
107 """
109 _defaultDataset = 'ref'
110 _columnLevels = ('filter', 'dataset', 'column')
111 _dfLevels = ('column',)
112 _defaultNoDup = False
114 def __init__(self, filt=None, dataset=None, noDup=None):
115 self.filt = filt
116 self.dataset = dataset if dataset is not None else self._defaultDataset
117 self._noDup = noDup
119 @property
120 def noDup(self):
121 if self._noDup is not None: 121 ↛ 122line 121 didn't jump to line 122, because the condition on line 121 was never true
122 return self._noDup
123 else:
124 return self._defaultNoDup
126 @property
127 def columns(self):
128 """Columns required to perform calculation
129 """
130 if not hasattr(self, '_columns'): 130 ↛ 131line 130 didn't jump to line 131, because the condition on line 130 was never true
131 raise NotImplementedError('Must define columns property or _columns attribute')
132 return self._columns
134 def multilevelColumns(self, parq):
135 if not set(parq.columnLevels) == set(self._columnLevels): 135 ↛ 136line 135 didn't jump to line 136, because the condition on line 135 was never true
136 raise ValueError('ParquetTable does not have the expected column levels. '
137 f'Got {parq.columnLevels}; expected {self._columnLevels}.')
139 columnDict = {'column': self.columns,
140 'dataset': self.dataset}
141 if self.filt is None:
142 if 'filter' in parq.columnLevels: 142 ↛ 154line 142 didn't jump to line 154, because the condition on line 142 was never false
143 if self.dataset == 'ref': 143 ↛ 146line 143 didn't jump to line 146, because the condition on line 143 was never false
144 columnDict['filter'] = parq.columnLevelNames['filter'][0]
145 else:
146 raise ValueError(f"'filt' not set for functor {self.name}"
147 f"(dataset {self.dataset}) "
148 "and ParquetTable "
149 "contains multiple filters in column index. "
150 "Set 'filt' or set 'dataset' to 'ref'.")
151 else:
152 columnDict['filter'] = self.filt
154 return parq._colsFromDict(columnDict)
156 def _func(self, df, dropna=True):
157 raise NotImplementedError('Must define calculation on dataframe')
159 def _get_cols(self, parq):
160 """Retrieve dataframe necessary for calculation.
162 Returns dataframe upon which `self._func` can act.
163 """
164 if isinstance(parq, MultilevelParquetTable): 164 ↛ 169line 164 didn't jump to line 169, because the condition on line 164 was never false
165 columns = self.multilevelColumns(parq)
166 df = parq.toDataFrame(columns=columns, droplevels=False)
167 df = self._setLevels(df)
168 else:
169 columns = self.columns
170 df = parq.toDataFrame(columns=columns)
172 return df
174 def _setLevels(self, df):
175 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
176 df.columns = df.columns.droplevel(levelsToDrop)
177 return df
179 def _dropna(self, vals):
180 return vals.dropna()
182 def __call__(self, parq, dropna=False):
183 try:
184 df = self._get_cols(parq)
185 vals = self._func(df)
186 except Exception:
187 vals = self.fail(df)
188 if dropna:
189 vals = self._dropna(vals)
191 return vals
193 def fail(self, df):
194 return pd.Series(np.full(len(df), np.nan), index=df.index)
196 @property
197 def name(self):
198 """Full name of functor (suitable for figure labels)
199 """
200 return NotImplementedError
202 @property
203 def shortname(self):
204 """Short name of functor (suitable for column name/dict key)
205 """
206 return self.name
209class CompositeFunctor(Functor):
210 """Perform multiple calculations at once on a catalog
212 The role of a `CompositeFunctor` is to group together computations from
213 multiple functors. Instead of returning `pandas.Series` a
214 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
215 being the keys of `funcDict`.
217 The `columns` attribute of a `CompositeFunctor` is the union of all columns
218 in all the component functors.
220 A `CompositeFunctor` does not use a `_func` method itself; rather,
221 when a `CompositeFunctor` is called, all its columns are loaded
222 at once, and the resulting dataframe is passed to the `_func` method of each component
223 functor. This has the advantage of only doing I/O (reading from parquet file) once,
224 and works because each individual `_func` method of each component functor does not
225 care if there are *extra* columns in the dataframe being passed; only that it must contain
226 *at least* the `columns` it expects.
228 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
229 file specifying a collection of functors.
231 Parameters
232 ----------
233 funcs : `dict` or `list`
234 Dictionary or list of functors. If a list, then it will be converted
235 into a dictonary according to the `.shortname` attribute of each functor.
237 """
238 dataset = None
240 def __init__(self, funcs, **kwargs):
242 if type(funcs) == dict: 242 ↛ 245line 242 didn't jump to line 245, because the condition on line 242 was never false
243 self.funcDict = funcs
244 else:
245 self.funcDict = {f.shortname: f for f in funcs}
247 self._filt = None
249 super().__init__(**kwargs)
251 @property
252 def filt(self):
253 return self._filt
255 @filt.setter
256 def filt(self, filt):
257 if filt is not None:
258 for _, f in self.funcDict.items():
259 f.filt = filt
260 self._filt = filt
262 def update(self, new):
263 if isinstance(new, dict):
264 self.funcDict.update(new)
265 elif isinstance(new, CompositeFunctor):
266 self.funcDict.update(new.funcDict)
267 else:
268 raise TypeError('Can only update with dictionary or CompositeFunctor.')
270 # Make sure new functors have the same 'filt' set
271 if self.filt is not None:
272 self.filt = self.filt
274 @property
275 def columns(self):
276 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
278 def multilevelColumns(self, parq):
279 return list(set([x for y in [f.multilevelColumns(parq)
280 for f in self.funcDict.values()] for x in y]))
282 def __call__(self, parq, **kwargs):
283 if isinstance(parq, MultilevelParquetTable): 283 ↛ 294line 283 didn't jump to line 294, because the condition on line 283 was never false
284 columns = self.multilevelColumns(parq)
285 df = parq.toDataFrame(columns=columns, droplevels=False)
286 valDict = {}
287 for k, f in self.funcDict.items():
288 try:
289 subdf = f._setLevels(df[f.multilevelColumns(parq)])
290 valDict[k] = f._func(subdf)
291 except Exception:
292 valDict[k] = f.fail(subdf)
293 else:
294 columns = self.columns
295 df = parq.toDataFrame(columns=columns)
296 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
298 try:
299 valDf = pd.concat(valDict, axis=1)
300 except TypeError:
301 print([(k, type(v)) for k, v in valDict.items()])
302 raise
304 if kwargs.get('dropna', False):
305 valDf = valDf.dropna(how='any')
307 return valDf
309 @classmethod
310 def renameCol(cls, col, renameRules):
311 if renameRules is None:
312 return col
313 for old, new in renameRules:
314 if col.startswith(old):
315 col = col.replace(old, new)
316 return col
318 @classmethod
319 def from_file(cls, filename, **kwargs):
320 with open(filename) as f:
321 translationDefinition = yaml.safe_load(f)
323 return cls.from_yaml(translationDefinition, **kwargs)
325 @classmethod
326 def from_yaml(cls, translationDefinition, **kwargs):
327 funcs = {}
328 for func, val in translationDefinition['funcs'].items():
329 funcs[func] = init_fromDict(val, name=func)
331 if 'flag_rename_rules' in translationDefinition:
332 renameRules = translationDefinition['flag_rename_rules']
333 else:
334 renameRules = None
336 if 'refFlags' in translationDefinition:
337 for flag in translationDefinition['refFlags']:
338 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
340 if 'flags' in translationDefinition:
341 for flag in translationDefinition['flags']:
342 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
344 return cls(funcs, **kwargs)
347def mag_aware_eval(df, expr):
348 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
350 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
352 Parameters
353 ----------
354 df : pandas.DataFrame
355 Dataframe on which to evaluate expression.
357 expr : str
358 Expression.
359 """
360 try:
361 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
362 val = df.eval(expr_new, truediv=True)
363 except Exception: # Should check what actually gets raised
364 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
365 val = df.eval(expr_new, truediv=True)
366 return val
369class CustomFunctor(Functor):
370 """Arbitrary computation on a catalog
372 Column names (and thus the columns to be loaded from catalog) are found
373 by finding all words and trying to ignore all "math-y" words.
375 Parameters
376 ----------
377 expr : str
378 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
379 """
380 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
382 def __init__(self, expr, **kwargs):
383 self.expr = expr
384 super().__init__(**kwargs)
386 @property
387 def name(self):
388 return self.expr
390 @property
391 def columns(self):
392 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
394 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
395 not_a_col = []
396 for c in flux_cols: 396 ↛ 397line 396 didn't jump to line 397, because the loop on line 396 never started
397 if not re.search('_instFlux$', c):
398 cols.append(f'{c}_instFlux')
399 not_a_col.append(c)
400 else:
401 cols.append(c)
403 return list(set([c for c in cols if c not in not_a_col]))
405 def _func(self, df):
406 return mag_aware_eval(df, self.expr)
409class Column(Functor):
410 """Get column with specified name
411 """
413 def __init__(self, col, **kwargs):
414 self.col = col
415 super().__init__(**kwargs)
417 @property
418 def name(self):
419 return self.col
421 @property
422 def columns(self):
423 return [self.col]
425 def _func(self, df):
426 return df[self.col]
429class Index(Functor):
430 """Return the value of the index for each object
431 """
433 columns = ['coord_ra'] # just a dummy; something has to be here
434 _defaultDataset = 'ref'
435 _defaultNoDup = True
437 def _func(self, df):
438 return pd.Series(df.index, index=df.index)
441class IDColumn(Column):
442 col = 'id'
443 _allow_difference = False
444 _defaultNoDup = True
446 def _func(self, df):
447 return pd.Series(df.index, index=df.index)
450class FootprintNPix(Column):
451 col = 'base_Footprint_nPix'
454class CoordColumn(Column):
455 """Base class for coordinate column, in degrees
456 """
457 _radians = True
459 def __init__(self, col, **kwargs):
460 super().__init__(col, **kwargs)
462 def _func(self, df):
463 # Must not modify original column in case that column is used by another functor
464 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
465 return output
468class RAColumn(CoordColumn):
469 """Right Ascension, in degrees
470 """
471 name = 'RA'
472 _defaultNoDup = True
474 def __init__(self, **kwargs):
475 super().__init__('coord_ra', **kwargs)
477 def __call__(self, catalog, **kwargs):
478 return super().__call__(catalog, **kwargs)
481class DecColumn(CoordColumn):
482 """Declination, in degrees
483 """
484 name = 'Dec'
485 _defaultNoDup = True
487 def __init__(self, **kwargs):
488 super().__init__('coord_dec', **kwargs)
490 def __call__(self, catalog, **kwargs):
491 return super().__call__(catalog, **kwargs)
494def fluxName(col):
495 if not col.endswith('_instFlux'): 495 ↛ 497line 495 didn't jump to line 497, because the condition on line 495 was never false
496 col += '_instFlux'
497 return col
500def fluxErrName(col):
501 if not col.endswith('_instFluxErr'):
502 col += '_instFluxErr'
503 return col
506class Mag(Functor):
507 """Compute calibrated magnitude
509 Takes a `calib` argument, which returns the flux at mag=0
510 as `calib.getFluxMag0()`. If not provided, then the default
511 `fluxMag0` is 63095734448.0194, which is default for HSC.
512 This default should be removed in DM-21955
514 This calculation hides warnings about invalid values and dividing by zero.
516 As for all functors, a `dataset` and `filt` kwarg should be provided upon
517 initialization. Unlike the default `Functor`, however, the default dataset
518 for a `Mag` is `'meas'`, rather than `'ref'`.
520 Parameters
521 ----------
522 col : `str`
523 Name of flux column from which to compute magnitude. Can be parseable
524 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
525 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
526 understand.
527 calib : `lsst.afw.image.calib.Calib` (optional)
528 Object that knows zero point.
529 """
530 _defaultDataset = 'meas'
532 def __init__(self, col, calib=None, **kwargs):
533 self.col = fluxName(col)
534 self.calib = calib
535 if calib is not None: 535 ↛ 536line 535 didn't jump to line 536, because the condition on line 535 was never true
536 self.fluxMag0 = calib.getFluxMag0()[0]
537 else:
538 # TO DO: DM-21955 Replace hard coded photometic calibration values
539 self.fluxMag0 = 63095734448.0194
541 super().__init__(**kwargs)
543 @property
544 def columns(self):
545 return [self.col]
547 def _func(self, df):
548 with np.warnings.catch_warnings():
549 np.warnings.filterwarnings('ignore', r'invalid value encountered')
550 np.warnings.filterwarnings('ignore', r'divide by zero')
551 return -2.5*np.log10(df[self.col] / self.fluxMag0)
553 @property
554 def name(self):
555 return f'mag_{self.col}'
558class MagErr(Mag):
559 """Compute calibrated magnitude uncertainty
561 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
563 Parameters
564 col : `str`
565 Name of flux column
566 calib : `lsst.afw.image.calib.Calib` (optional)
567 Object that knows zero point.
568 """
570 def __init__(self, *args, **kwargs):
571 super().__init__(*args, **kwargs)
572 if self.calib is not None:
573 self.fluxMag0Err = self.calib.getFluxMag0()[1]
574 else:
575 self.fluxMag0Err = 0.
577 @property
578 def columns(self):
579 return [self.col, self.col + 'Err']
581 def _func(self, df):
582 with np.warnings.catch_warnings():
583 np.warnings.filterwarnings('ignore', r'invalid value encountered')
584 np.warnings.filterwarnings('ignore', r'divide by zero')
585 fluxCol, fluxErrCol = self.columns
586 x = df[fluxErrCol] / df[fluxCol]
587 y = self.fluxMag0Err / self.fluxMag0
588 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
589 return magErr
591 @property
592 def name(self):
593 return super().name + '_err'
596class NanoMaggie(Mag):
597 """
598 """
600 def _func(self, df):
601 return (df[self.col] / self.fluxMag0) * 1e9
604class MagDiff(Functor):
605 _defaultDataset = 'meas'
607 """Functor to calculate magnitude difference"""
609 def __init__(self, col1, col2, **kwargs):
610 self.col1 = fluxName(col1)
611 self.col2 = fluxName(col2)
612 super().__init__(**kwargs)
614 @property
615 def columns(self):
616 return [self.col1, self.col2]
618 def _func(self, df):
619 with np.warnings.catch_warnings():
620 np.warnings.filterwarnings('ignore', r'invalid value encountered')
621 np.warnings.filterwarnings('ignore', r'divide by zero')
622 return -2.5*np.log10(df[self.col1]/df[self.col2])
624 @property
625 def name(self):
626 return f'(mag_{self.col1} - mag_{self.col2})'
628 @property
629 def shortname(self):
630 return f'magDiff_{self.col1}_{self.col2}'
633class Color(Functor):
634 """Compute the color between two filters
636 Computes color by initializing two different `Mag`
637 functors based on the `col` and filters provided, and
638 then returning the difference.
640 This is enabled by the `_func` expecting a dataframe with a
641 multilevel column index, with both `'filter'` and `'column'`,
642 instead of just `'column'`, which is the `Functor` default.
643 This is controlled by the `_dfLevels` attribute.
645 Also of note, the default dataset for `Color` is `forced_src'`,
646 whereas for `Mag` it is `'meas'`.
648 Parameters
649 ----------
650 col : str
651 Name of flux column from which to compute; same as would be passed to
652 `lsst.pipe.tasks.functors.Mag`.
654 filt2, filt1 : str
655 Filters from which to compute magnitude difference.
656 Color computed is `Mag(filt2) - Mag(filt1)`.
657 """
658 _defaultDataset = 'forced_src'
659 _dfLevels = ('filter', 'column')
660 _defaultNoDup = True
662 def __init__(self, col, filt2, filt1, **kwargs):
663 self.col = fluxName(col)
664 if filt2 == filt1: 664 ↛ 665line 664 didn't jump to line 665, because the condition on line 664 was never true
665 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
666 self.filt2 = filt2
667 self.filt1 = filt1
669 self.mag2 = Mag(col, filt=filt2, **kwargs)
670 self.mag1 = Mag(col, filt=filt1, **kwargs)
672 super().__init__(**kwargs)
674 @property
675 def filt(self):
676 return None
678 @filt.setter
679 def filt(self, filt):
680 pass
682 def _func(self, df):
683 mag2 = self.mag2._func(df[self.filt2])
684 mag1 = self.mag1._func(df[self.filt1])
685 return mag2 - mag1
687 @property
688 def columns(self):
689 return [self.mag1.col, self.mag2.col]
691 def multilevelColumns(self, parq):
692 return [(self.dataset, self.filt1, self.col),
693 (self.dataset, self.filt2, self.col)]
695 @property
696 def name(self):
697 return f'{self.filt2} - {self.filt1} ({self.col})'
699 @property
700 def shortname(self):
701 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
704class Labeller(Functor):
705 """Main function of this subclass is to override the dropna=True
706 """
707 _null_label = 'null'
708 _allow_difference = False
709 name = 'label'
710 _force_str = False
712 def __call__(self, parq, dropna=False, **kwargs):
713 return super().__call__(parq, dropna=False, **kwargs)
716class StarGalaxyLabeller(Labeller):
717 _columns = ["base_ClassificationExtendedness_value"]
718 _column = "base_ClassificationExtendedness_value"
720 def _func(self, df):
721 x = df[self._columns][self._column]
722 mask = x.isnull()
723 test = (x < 0.5).astype(int)
724 test = test.mask(mask, 2)
726 # TODO: DM-21954 Look into veracity of inline comment below
727 # are these backwards?
728 categories = ['galaxy', 'star', self._null_label]
729 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
730 index=x.index, name='label')
731 if self._force_str: 731 ↛ 732line 731 didn't jump to line 732, because the condition on line 731 was never true
732 label = label.astype(str)
733 return label
736class NumStarLabeller(Labeller):
737 _columns = ['numStarFlags']
738 labels = {"star": 0, "maybe": 1, "notStar": 2}
740 def _func(self, df):
741 x = df[self._columns][self._columns[0]]
743 # Number of filters
744 n = len(x.unique()) - 1
746 labels = ['noStar', 'maybe', 'star']
747 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
748 index=x.index, name='label')
750 if self._force_str:
751 label = label.astype(str)
753 return label
756class DeconvolvedMoments(Functor):
757 name = 'Deconvolved Moments'
758 shortname = 'deconvolvedMoments'
759 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
760 "ext_shapeHSM_HsmSourceMoments_yy",
761 "base_SdssShape_xx", "base_SdssShape_yy",
762 "ext_shapeHSM_HsmPsfMoments_xx",
763 "ext_shapeHSM_HsmPsfMoments_yy")
765 def _func(self, df):
766 """Calculate deconvolved moments"""
767 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 767 ↛ 770line 767 didn't jump to line 770, because the condition on line 767 was never false
768 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
769 else:
770 hsm = np.ones(len(df))*np.nan
771 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
772 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 772 ↛ 778line 772 didn't jump to line 778, because the condition on line 772 was never false
773 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
774 else:
775 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
776 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
777 # raise TaskError("No psf shape parameter found in catalog")
778 raise RuntimeError('No psf shape parameter found in catalog')
780 return hsm.where(np.isfinite(hsm), sdss) - psf
783class SdssTraceSize(Functor):
784 """Functor to calculate SDSS trace radius size for sources"""
785 name = "SDSS Trace Size"
786 shortname = 'sdssTrace'
787 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
789 def _func(self, df):
790 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
791 return srcSize
794class PsfSdssTraceSizeDiff(Functor):
795 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
796 name = "PSF - SDSS Trace Size"
797 shortname = 'psf_sdssTrace'
798 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
799 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
801 def _func(self, df):
802 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
803 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
804 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
805 return sizeDiff
808class HsmTraceSize(Functor):
809 """Functor to calculate HSM trace radius size for sources"""
810 name = 'HSM Trace Size'
811 shortname = 'hsmTrace'
812 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
813 "ext_shapeHSM_HsmSourceMoments_yy")
815 def _func(self, df):
816 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
817 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
818 return srcSize
821class PsfHsmTraceSizeDiff(Functor):
822 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
823 name = 'PSF - HSM Trace Size'
824 shortname = 'psf_HsmTrace'
825 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
826 "ext_shapeHSM_HsmSourceMoments_yy",
827 "ext_shapeHSM_HsmPsfMoments_xx",
828 "ext_shapeHSM_HsmPsfMoments_yy")
830 def _func(self, df):
831 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
832 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
833 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
834 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
835 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
836 return sizeDiff
839class HsmFwhm(Functor):
840 name = 'HSM Psf FWHM'
841 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
842 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
843 pixelScale = 0.168
844 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
846 def _func(self, df):
847 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
848 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
851class E1(Functor):
852 name = "Distortion Ellipticity (e1)"
853 shortname = "Distortion"
855 def __init__(self, colXX, colXY, colYY, **kwargs):
856 self.colXX = colXX
857 self.colXY = colXY
858 self.colYY = colYY
859 self._columns = [self.colXX, self.colXY, self.colYY]
860 super().__init__(**kwargs)
862 @property
863 def columns(self):
864 return [self.colXX, self.colXY, self.colYY]
866 def _func(self, df):
867 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
870class E2(Functor):
871 name = "Ellipticity e2"
873 def __init__(self, colXX, colXY, colYY, **kwargs):
874 self.colXX = colXX
875 self.colXY = colXY
876 self.colYY = colYY
877 super().__init__(**kwargs)
879 @property
880 def columns(self):
881 return [self.colXX, self.colXY, self.colYY]
883 def _func(self, df):
884 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
887class RadiusFromQuadrupole(Functor):
889 def __init__(self, colXX, colXY, colYY, **kwargs):
890 self.colXX = colXX
891 self.colXY = colXY
892 self.colYY = colYY
893 super().__init__(**kwargs)
895 @property
896 def columns(self):
897 return [self.colXX, self.colXY, self.colYY]
899 def _func(self, df):
900 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
903class LocalWcs(Functor):
904 """Computations using the stored localWcs.
905 """
906 name = "LocalWcsOperations"
908 def __init__(self,
909 colCD_1_1,
910 colCD_1_2,
911 colCD_2_1,
912 colCD_2_2,
913 **kwargs):
914 self.colCD_1_1 = colCD_1_1
915 self.colCD_1_2 = colCD_1_2
916 self.colCD_2_1 = colCD_2_1
917 self.colCD_2_2 = colCD_2_2
918 super().__init__(**kwargs)
920 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
921 """Compute the distance on the sphere from x2, y1 to x1, y1.
923 Parameters
924 ----------
925 x : `pandas.Series`
926 X pixel coordinate.
927 y : `pandas.Series`
928 Y pixel coordinate.
929 cd11 : `pandas.Series`
930 [1, 1] element of the local Wcs affine transform.
931 cd11 : `pandas.Series`
932 [1, 1] element of the local Wcs affine transform.
933 cd12 : `pandas.Series`
934 [1, 2] element of the local Wcs affine transform.
935 cd21 : `pandas.Series`
936 [2, 1] element of the local Wcs affine transform.
937 cd22 : `pandas.Series`
938 [2, 2] element of the local Wcs affine transform.
940 Returns
941 -------
942 raDecTuple : tuple
943 RA and dec conversion of x and y given the local Wcs. Returned
944 units are in radians.
946 """
947 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
949 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
950 """Compute the local pixel scale conversion.
952 Parameters
953 ----------
954 ra1 : `pandas.Series`
955 Ra of the first coordinate in radians.
956 dec1 : `pandas.Series`
957 Dec of the first coordinate in radians.
958 ra2 : `pandas.Series`
959 Ra of the second coordinate in radians.
960 dec2 : `pandas.Series`
961 Dec of the second coordinate in radians.
963 Returns
964 -------
965 dist : `pandas.Series`
966 Distance on the sphere in radians.
967 """
968 deltaDec = dec2 - dec1
969 deltaRa = ra2 - ra1
970 return 2 * np.arcsin(
971 np.sqrt(
972 np.sin(deltaDec / 2) ** 2
973 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
975 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
976 """Compute the distance on the sphere from x2, y1 to x1, y1.
978 Parameters
979 ----------
980 x1 : `pandas.Series`
981 X pixel coordinate.
982 y1 : `pandas.Series`
983 Y pixel coordinate.
984 x2 : `pandas.Series`
985 X pixel coordinate.
986 y2 : `pandas.Series`
987 Y pixel coordinate.
988 cd11 : `pandas.Series`
989 [1, 1] element of the local Wcs affine transform.
990 cd11 : `pandas.Series`
991 [1, 1] element of the local Wcs affine transform.
992 cd12 : `pandas.Series`
993 [1, 2] element of the local Wcs affine transform.
994 cd21 : `pandas.Series`
995 [2, 1] element of the local Wcs affine transform.
996 cd22 : `pandas.Series`
997 [2, 2] element of the local Wcs affine transform.
999 Returns
1000 -------
1001 Distance : `pandas.Series`
1002 Arcseconds per pixel at the location of the local WC
1003 """
1004 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1005 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1006 # Great circle distance for small separations.
1007 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1010class ComputePixelScale(LocalWcs):
1011 """Compute the local pixel scale from the stored CDMatrix.
1012 """
1013 name = "PixelScale"
1015 @property
1016 def columns(self):
1017 return [self.colCD_1_1,
1018 self.colCD_1_2,
1019 self.colCD_2_1,
1020 self.colCD_2_2]
1022 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1023 """Compute the local pixel to scale conversion in arcseconds.
1025 Parameters
1026 ----------
1027 cd11 : `pandas.Series`
1028 [1, 1] element of the local Wcs affine transform in radians.
1029 cd11 : `pandas.Series`
1030 [1, 1] element of the local Wcs affine transform in radians.
1031 cd12 : `pandas.Series`
1032 [1, 2] element of the local Wcs affine transform in radians.
1033 cd21 : `pandas.Series`
1034 [2, 1] element of the local Wcs affine transform in radians.
1035 cd22 : `pandas.Series`
1036 [2, 2] element of the local Wcs affine transform in radians.
1038 Returns
1039 -------
1040 pixScale : `pandas.Series`
1041 Arcseconds per pixel at the location of the local WC
1042 """
1043 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1045 def _func(self, df):
1046 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1047 df[self.colCD_1_2],
1048 df[self.colCD_2_1],
1049 df[self.colCD_2_2])
1052class ConvertPixelToArcseconds(ComputePixelScale):
1053 """Convert a value in units pixels to units arcseconds.
1054 """
1056 def __init__(self,
1057 col,
1058 colCD_1_1,
1059 colCD_1_2,
1060 colCD_2_1,
1061 colCD_2_2,
1062 **kwargs):
1063 self.col = col
1064 super().__init__(colCD_1_1,
1065 colCD_1_2,
1066 colCD_2_1,
1067 colCD_2_2,
1068 **kwargs)
1070 @property
1071 def name(self):
1072 return f"{self.col}_asArcseconds"
1074 @property
1075 def columns(self):
1076 return [self.col,
1077 self.colCD_1_1,
1078 self.colCD_1_2,
1079 self.colCD_2_1,
1080 self.colCD_2_2]
1082 def _func(self, df):
1083 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1084 df[self.colCD_1_2],
1085 df[self.colCD_2_1],
1086 df[self.colCD_2_2])
1089class ReferenceBand(Functor):
1090 name = 'Reference Band'
1091 shortname = 'refBand'
1093 @property
1094 def columns(self):
1095 return ["merge_measurement_i",
1096 "merge_measurement_r",
1097 "merge_measurement_z",
1098 "merge_measurement_y",
1099 "merge_measurement_g"]
1101 def _func(self, df):
1102 def getFilterAliasName(row):
1103 # get column name with the max value (True > False)
1104 colName = row.idxmax()
1105 return colName.replace('merge_measurement_', '')
1107 return df[self.columns].apply(getFilterAliasName, axis=1)
1110class Photometry(Functor):
1111 # AB to NanoJansky (3631 Jansky)
1112 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1113 LOG_AB_FLUX_SCALE = 12.56
1114 FIVE_OVER_2LOG10 = 1.085736204758129569
1115 # TO DO: DM-21955 Replace hard coded photometic calibration values
1116 COADD_ZP = 27
1118 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1119 self.vhypot = np.vectorize(self.hypot)
1120 self.col = colFlux
1121 self.colFluxErr = colFluxErr
1123 self.calib = calib
1124 if calib is not None:
1125 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1126 else:
1127 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1128 self.fluxMag0Err = 0.
1130 super().__init__(**kwargs)
1132 @property
1133 def columns(self):
1134 return [self.col]
1136 @property
1137 def name(self):
1138 return f'mag_{self.col}'
1140 @classmethod
1141 def hypot(cls, a, b):
1142 if np.abs(a) < np.abs(b):
1143 a, b = b, a
1144 if a == 0.:
1145 return 0.
1146 q = b/a
1147 return np.abs(a) * np.sqrt(1. + q*q)
1149 def dn2flux(self, dn, fluxMag0):
1150 return self.AB_FLUX_SCALE * dn / fluxMag0
1152 def dn2mag(self, dn, fluxMag0):
1153 with np.warnings.catch_warnings():
1154 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1155 np.warnings.filterwarnings('ignore', r'divide by zero')
1156 return -2.5 * np.log10(dn/fluxMag0)
1158 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1159 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1160 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1161 return retVal
1163 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1164 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1165 return self.FIVE_OVER_2LOG10 * retVal
1168class NanoJansky(Photometry):
1169 def _func(self, df):
1170 return self.dn2flux(df[self.col], self.fluxMag0)
1173class NanoJanskyErr(Photometry):
1174 @property
1175 def columns(self):
1176 return [self.col, self.colFluxErr]
1178 def _func(self, df):
1179 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1180 return pd.Series(retArr, index=df.index)
1183class Magnitude(Photometry):
1184 def _func(self, df):
1185 return self.dn2mag(df[self.col], self.fluxMag0)
1188class MagnitudeErr(Photometry):
1189 @property
1190 def columns(self):
1191 return [self.col, self.colFluxErr]
1193 def _func(self, df):
1194 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1195 return pd.Series(retArr, index=df.index)
1198class LocalPhotometry(Functor):
1199 """Base class for calibrating the specified instrument flux column using
1200 the local photometric calibration.
1202 Parameters
1203 ----------
1204 instFluxCol : `str`
1205 Name of the instrument flux column.
1206 instFluxErrCol : `str`
1207 Name of the assocated error columns for ``instFluxCol``.
1208 photoCalibCol : `str`
1209 Name of local calibration column.
1210 photoCalibErrCol : `str`
1211 Error associated with ``photoCalibCol``
1213 See also
1214 --------
1215 LocalPhotometry
1216 LocalNanojansky
1217 LocalNanojanskyErr
1218 LocalMagnitude
1219 LocalMagnitudeErr
1220 """
1221 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1223 def __init__(self,
1224 instFluxCol,
1225 instFluxErrCol,
1226 photoCalibCol,
1227 photoCalibErrCol,
1228 **kwargs):
1229 self.instFluxCol = instFluxCol
1230 self.instFluxErrCol = instFluxErrCol
1231 self.photoCalibCol = photoCalibCol
1232 self.photoCalibErrCol = photoCalibErrCol
1233 super().__init__(**kwargs)
1235 def instFluxToNanojansky(self, instFlux, localCalib):
1236 """Convert instrument flux to nanojanskys.
1238 Parameters
1239 ----------
1240 instFlux : `numpy.ndarray` or `pandas.Series`
1241 Array of instrument flux measurements
1242 localCalib : `numpy.ndarray` or `pandas.Series`
1243 Array of local photometric calibration estimates.
1245 Returns
1246 -------
1247 calibFlux : `numpy.ndarray` or `pandas.Series`
1248 Array of calibrated flux measurements.
1249 """
1250 return instFlux * localCalib
1252 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1253 """Convert instrument flux to nanojanskys.
1255 Parameters
1256 ----------
1257 instFlux : `numpy.ndarray` or `pandas.Series`
1258 Array of instrument flux measurements
1259 instFluxErr : `numpy.ndarray` or `pandas.Series`
1260 Errors on associated ``instFlux`` values
1261 localCalib : `numpy.ndarray` or `pandas.Series`
1262 Array of local photometric calibration estimates.
1263 localCalibErr : `numpy.ndarray` or `pandas.Series`
1264 Errors on associated ``localCalib`` values
1266 Returns
1267 -------
1268 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1269 Errors on calibrated flux measurements.
1270 """
1271 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1273 def instFluxToMagnitude(self, instFlux, localCalib):
1274 """Convert instrument flux to nanojanskys.
1276 Parameters
1277 ----------
1278 instFlux : `numpy.ndarray` or `pandas.Series`
1279 Array of instrument flux measurements
1280 localCalib : `numpy.ndarray` or `pandas.Series`
1281 Array of local photometric calibration estimates.
1283 Returns
1284 -------
1285 calibMag : `numpy.ndarray` or `pandas.Series`
1286 Array of calibrated AB magnitudes.
1287 """
1288 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1290 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1291 """Convert instrument flux err to nanojanskys.
1293 Parameters
1294 ----------
1295 instFlux : `numpy.ndarray` or `pandas.Series`
1296 Array of instrument flux measurements
1297 instFluxErr : `numpy.ndarray` or `pandas.Series`
1298 Errors on associated ``instFlux`` values
1299 localCalib : `numpy.ndarray` or `pandas.Series`
1300 Array of local photometric calibration estimates.
1301 localCalibErr : `numpy.ndarray` or `pandas.Series`
1302 Errors on associated ``localCalib`` values
1304 Returns
1305 -------
1306 calibMagErr: `numpy.ndarray` or `pandas.Series`
1307 Error on calibrated AB magnitudes.
1308 """
1309 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1310 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1313class LocalNanojansky(LocalPhotometry):
1314 """Compute calibrated fluxes using the local calibration value.
1316 See also
1317 --------
1318 LocalNanojansky
1319 LocalNanojanskyErr
1320 LocalMagnitude
1321 LocalMagnitudeErr
1322 """
1324 @property
1325 def columns(self):
1326 return [self.instFluxCol, self.photoCalibCol]
1328 @property
1329 def name(self):
1330 return f'flux_{self.instFluxCol}'
1332 def _func(self, df):
1333 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1336class LocalNanojanskyErr(LocalPhotometry):
1337 """Compute calibrated flux errors using the local calibration value.
1339 See also
1340 --------
1341 LocalNanojansky
1342 LocalNanojanskyErr
1343 LocalMagnitude
1344 LocalMagnitudeErr
1345 """
1347 @property
1348 def columns(self):
1349 return [self.instFluxCol, self.instFluxErrCol,
1350 self.photoCalibCol, self.photoCalibErrCol]
1352 @property
1353 def name(self):
1354 return f'fluxErr_{self.instFluxCol}'
1356 def _func(self, df):
1357 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1358 df[self.photoCalibCol], df[self.photoCalibErrCol])
1361class LocalMagnitude(LocalPhotometry):
1362 """Compute calibrated AB magnitudes using the local calibration value.
1364 See also
1365 --------
1366 LocalNanojansky
1367 LocalNanojanskyErr
1368 LocalMagnitude
1369 LocalMagnitudeErr
1370 """
1372 @property
1373 def columns(self):
1374 return [self.instFluxCol, self.photoCalibCol]
1376 @property
1377 def name(self):
1378 return f'mag_{self.instFluxCol}'
1380 def _func(self, df):
1381 return self.instFluxToMagnitude(df[self.instFluxCol],
1382 df[self.photoCalibCol])
1385class LocalMagnitudeErr(LocalPhotometry):
1386 """Compute calibrated AB magnitude errors using the local calibration value.
1388 See also
1389 --------
1390 LocalNanojansky
1391 LocalNanojanskyErr
1392 LocalMagnitude
1393 LocalMagnitudeErr
1394 """
1396 @property
1397 def columns(self):
1398 return [self.instFluxCol, self.instFluxErrCol,
1399 self.photoCalibCol, self.photoCalibErrCol]
1401 @property
1402 def name(self):
1403 return f'magErr_{self.instFluxCol}'
1405 def _func(self, df):
1406 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1407 df[self.instFluxErrCol],
1408 df[self.photoCalibCol],
1409 df[self.photoCalibErrCol])