Coverage for python/lsst/pipe/tasks/functors.py : 67%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import yaml
2import re
4import pandas as pd
5import numpy as np
6import astropy.units as u
8from lsst.daf.persistence import doImport
9from .parquetTable import MultilevelParquetTable
12def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
13 typeKey='functor', name=None):
14 """Initialize an object defined in a dictionary
16 The object needs to be importable as
17 f'{basePath}.{initDict[typeKey]}'
18 The positional and keyword arguments (if any) are contained in
19 "args" and "kwargs" entries in the dictionary, respectively.
20 This is used in `functors.CompositeFunctor.from_yaml` to initialize
21 a composite functor from a specification in a YAML file.
23 Parameters
24 ----------
25 initDict : dictionary
26 Dictionary describing object's initialization. Must contain
27 an entry keyed by ``typeKey`` that is the name of the object,
28 relative to ``basePath``.
29 basePath : str
30 Path relative to module in which ``initDict[typeKey]`` is defined.
31 typeKey : str
32 Key of ``initDict`` that is the name of the object
33 (relative to `basePath`).
34 """
35 initDict = initDict.copy()
36 # TO DO: DM-21956 We should be able to define functors outside this module
37 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
38 args = []
39 if 'args' in initDict:
40 args = initDict.pop('args')
41 if isinstance(args, str):
42 args = [args]
43 try:
44 element = pythonType(*args, **initDict)
45 except Exception as e:
46 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
47 raise type(e)(message, e.args)
48 return element
51class Functor(object):
52 """Define and execute a calculation on a ParquetTable
54 The `__call__` method accepts a `ParquetTable` object, and returns the
55 result of the calculation as a single column. Each functor defines what
56 columns are needed for the calculation, and only these columns are read
57 from the `ParquetTable`.
59 The action of `__call__` consists of two steps: first, loading the
60 necessary columns from disk into memory as a `pandas.DataFrame` object;
61 and second, performing the computation on this dataframe and returning the
62 result.
65 To define a new `Functor`, a subclass must define a `_func` method,
66 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
67 In addition, it must define the following attributes
69 * `_columns`: The columns necessary to perform the calculation
70 * `name`: A name appropriate for a figure axis label
71 * `shortname`: A name appropriate for use as a dictionary key
73 On initialization, a `Functor` should declare what filter (`filt` kwarg)
74 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
75 applied to. This enables the `_get_cols` method to extract the proper
76 columns from the parquet file. If not specified, the dataset will fall back
77 on the `_defaultDataset`attribute. If filter is not specified and `dataset`
78 is anything other than `'ref'`, then an error will be raised when trying to
79 perform the calculation.
81 As currently implemented, `Functor` is only set up to expect a
82 `ParquetTable` of the format of the `deepCoadd_obj` dataset; that is, a
83 `MultilevelParquetTable` with the levels of the column index being `filter`,
84 `dataset`, and `column`. This is defined in the `_columnLevels` attribute,
85 as well as being implicit in the role of the `filt` and `dataset` attributes
86 defined at initialization. In addition, the `_get_cols` method that reads
87 the dataframe from the `ParquetTable` will return a dataframe with column
88 index levels defined by the `_dfLevels` attribute; by default, this is
89 `column`.
91 The `_columnLevels` and `_dfLevels` attributes should generally not need to
92 be changed, unless `_func` needs columns from multiple filters or datasets
93 to do the calculation.
94 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
95 which `_dfLevels = ('filter', 'column')`, and `_func` expects the dataframe
96 it gets to have those levels in the column index.
98 Parameters
99 ----------
100 filt : str
101 Filter upon which to do the calculation
103 dataset : str
104 Dataset upon which to do the calculation
105 (e.g., 'ref', 'meas', 'forced_src').
107 """
109 _defaultDataset = 'ref'
110 _columnLevels = ('filter', 'dataset', 'column')
111 _dfLevels = ('column',)
112 _defaultNoDup = False
114 def __init__(self, filt=None, dataset=None, noDup=None):
115 self.filt = filt
116 self.dataset = dataset if dataset is not None else self._defaultDataset
117 self._noDup = noDup
119 @property
120 def noDup(self):
121 if self._noDup is not None: 121 ↛ 122line 121 didn't jump to line 122, because the condition on line 121 was never true
122 return self._noDup
123 else:
124 return self._defaultNoDup
126 @property
127 def columns(self):
128 """Columns required to perform calculation
129 """
130 if not hasattr(self, '_columns'): 130 ↛ 131line 130 didn't jump to line 131, because the condition on line 130 was never true
131 raise NotImplementedError('Must define columns property or _columns attribute')
132 return self._columns
134 def multilevelColumns(self, parq):
135 if not set(parq.columnLevels) == set(self._columnLevels): 135 ↛ 136line 135 didn't jump to line 136, because the condition on line 135 was never true
136 raise ValueError('ParquetTable does not have the expected column levels. '
137 f'Got {parq.columnLevels}; expected {self._columnLevels}.')
139 columnDict = {'column': self.columns,
140 'dataset': self.dataset}
141 if self.filt is None:
142 if 'filter' in parq.columnLevels: 142 ↛ 154line 142 didn't jump to line 154, because the condition on line 142 was never false
143 if self.dataset == 'ref': 143 ↛ 146line 143 didn't jump to line 146, because the condition on line 143 was never false
144 columnDict['filter'] = parq.columnLevelNames['filter'][0]
145 else:
146 raise ValueError(f"'filt' not set for functor {self.name}"
147 f"(dataset {self.dataset}) "
148 "and ParquetTable "
149 "contains multiple filters in column index. "
150 "Set 'filt' or set 'dataset' to 'ref'.")
151 else:
152 columnDict['filter'] = self.filt
154 return parq._colsFromDict(columnDict)
156 def _func(self, df, dropna=True):
157 raise NotImplementedError('Must define calculation on dataframe')
159 def _get_cols(self, parq):
160 """Retrieve dataframe necessary for calculation.
162 Returns dataframe upon which `self._func` can act.
163 """
164 if isinstance(parq, MultilevelParquetTable): 164 ↛ 169line 164 didn't jump to line 169, because the condition on line 164 was never false
165 columns = self.multilevelColumns(parq)
166 df = parq.toDataFrame(columns=columns, droplevels=False)
167 df = self._setLevels(df)
168 else:
169 columns = self.columns
170 df = parq.toDataFrame(columns=columns)
172 return df
174 def _setLevels(self, df):
175 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
176 df.columns = df.columns.droplevel(levelsToDrop)
177 return df
179 def _dropna(self, vals):
180 return vals.dropna()
182 def __call__(self, parq, dropna=False):
183 try:
184 df = self._get_cols(parq)
185 vals = self._func(df)
186 except Exception:
187 vals = self.fail(df)
188 if dropna:
189 vals = self._dropna(vals)
191 return vals
193 def difference(self, parq1, parq2, **kwargs):
194 """Computes difference between functor called on two different ParquetTable objects
195 """
196 return self(parq1, **kwargs) - self(parq2, **kwargs)
198 def fail(self, df):
199 return pd.Series(np.full(len(df), np.nan), index=df.index)
201 @property
202 def name(self):
203 """Full name of functor (suitable for figure labels)
204 """
205 return NotImplementedError
207 @property
208 def shortname(self):
209 """Short name of functor (suitable for column name/dict key)
210 """
211 return self.name
214class CompositeFunctor(Functor):
215 """Perform multiple calculations at once on a catalog
217 The role of a `CompositeFunctor` is to group together computations from
218 multiple functors. Instead of returning `pandas.Series` a
219 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
220 being the keys of `funcDict`.
222 The `columns` attribute of a `CompositeFunctor` is the union of all columns
223 in all the component functors.
225 A `CompositeFunctor` does not use a `_func` method itself; rather,
226 when a `CompositeFunctor` is called, all its columns are loaded
227 at once, and the resulting dataframe is passed to the `_func` method of each component
228 functor. This has the advantage of only doing I/O (reading from parquet file) once,
229 and works because each individual `_func` method of each component functor does not
230 care if there are *extra* columns in the dataframe being passed; only that it must contain
231 *at least* the `columns` it expects.
233 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
234 file specifying a collection of functors.
236 Parameters
237 ----------
238 funcs : `dict` or `list`
239 Dictionary or list of functors. If a list, then it will be converted
240 into a dictonary according to the `.shortname` attribute of each functor.
242 """
243 dataset = None
245 def __init__(self, funcs, **kwargs):
247 if type(funcs) == dict:
248 self.funcDict = funcs
249 else:
250 self.funcDict = {f.shortname: f for f in funcs}
252 self._filt = None
254 super().__init__(**kwargs)
256 @property
257 def filt(self):
258 return self._filt
260 @filt.setter
261 def filt(self, filt):
262 if filt is not None:
263 for _, f in self.funcDict.items():
264 f.filt = filt
265 self._filt = filt
267 def update(self, new):
268 if isinstance(new, dict):
269 self.funcDict.update(new)
270 elif isinstance(new, CompositeFunctor):
271 self.funcDict.update(new.funcDict)
272 else:
273 raise TypeError('Can only update with dictionary or CompositeFunctor.')
275 # Make sure new functors have the same 'filt' set
276 if self.filt is not None:
277 self.filt = self.filt
279 @property
280 def columns(self):
281 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
283 def multilevelColumns(self, parq):
284 return list(set([x for y in [f.multilevelColumns(parq)
285 for f in self.funcDict.values()] for x in y]))
287 def __call__(self, parq, **kwargs):
288 if isinstance(parq, MultilevelParquetTable): 288 ↛ 299line 288 didn't jump to line 299, because the condition on line 288 was never false
289 columns = self.multilevelColumns(parq)
290 df = parq.toDataFrame(columns=columns, droplevels=False)
291 valDict = {}
292 for k, f in self.funcDict.items():
293 try:
294 subdf = f._setLevels(df[f.multilevelColumns(parq)])
295 valDict[k] = f._func(subdf)
296 except Exception:
297 valDict[k] = f.fail(subdf)
298 else:
299 columns = self.columns
300 df = parq.toDataFrame(columns=columns)
301 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
303 try:
304 valDf = pd.concat(valDict, axis=1)
305 except TypeError:
306 print([(k, type(v)) for k, v in valDict.items()])
307 raise
309 if kwargs.get('dropna', False):
310 valDf = valDf.dropna(how='any')
312 return valDf
314 @classmethod
315 def renameCol(cls, col, renameRules):
316 if renameRules is None:
317 return col
318 for old, new in renameRules:
319 if col.startswith(old):
320 col = col.replace(old, new)
321 return col
323 @classmethod
324 def from_file(cls, filename, **kwargs):
325 with open(filename) as f:
326 translationDefinition = yaml.safe_load(f)
328 return cls.from_yaml(translationDefinition, **kwargs)
330 @classmethod
331 def from_yaml(cls, translationDefinition, **kwargs):
332 funcs = {}
333 for func, val in translationDefinition['funcs'].items():
334 funcs[func] = init_fromDict(val, name=func)
336 if 'flag_rename_rules' in translationDefinition:
337 renameRules = translationDefinition['flag_rename_rules']
338 else:
339 renameRules = None
341 if 'refFlags' in translationDefinition:
342 for flag in translationDefinition['refFlags']:
343 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
345 if 'flags' in translationDefinition:
346 for flag in translationDefinition['flags']:
347 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
349 return cls(funcs, **kwargs)
352def mag_aware_eval(df, expr):
353 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
355 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
357 Parameters
358 ----------
359 df : pandas.DataFrame
360 Dataframe on which to evaluate expression.
362 expr : str
363 Expression.
364 """
365 try:
366 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
367 val = df.eval(expr_new, truediv=True)
368 except Exception: # Should check what actually gets raised
369 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
370 val = df.eval(expr_new, truediv=True)
371 return val
374class CustomFunctor(Functor):
375 """Arbitrary computation on a catalog
377 Column names (and thus the columns to be loaded from catalog) are found
378 by finding all words and trying to ignore all "math-y" words.
380 Parameters
381 ----------
382 expr : str
383 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
384 """
385 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
387 def __init__(self, expr, **kwargs):
388 self.expr = expr
389 super().__init__(**kwargs)
391 @property
392 def name(self):
393 return self.expr
395 @property
396 def columns(self):
397 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
399 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
400 not_a_col = []
401 for c in flux_cols: 401 ↛ 402line 401 didn't jump to line 402, because the loop on line 401 never started
402 if not re.search('_instFlux$', c):
403 cols.append(f'{c}_instFlux')
404 not_a_col.append(c)
405 else:
406 cols.append(c)
408 return list(set([c for c in cols if c not in not_a_col]))
410 def _func(self, df):
411 return mag_aware_eval(df, self.expr)
414class Column(Functor):
415 """Get column with specified name
416 """
418 def __init__(self, col, **kwargs):
419 self.col = col
420 super().__init__(**kwargs)
422 @property
423 def name(self):
424 return self.col
426 @property
427 def columns(self):
428 return [self.col]
430 def _func(self, df):
431 return df[self.col]
434class Index(Functor):
435 """Return the value of the index for each object
436 """
438 columns = ['coord_ra'] # just a dummy; something has to be here
439 _defaultDataset = 'ref'
440 _defaultNoDup = True
442 def _func(self, df):
443 return pd.Series(df.index, index=df.index)
446class IDColumn(Column):
447 col = 'id'
448 _allow_difference = False
449 _defaultNoDup = True
451 def _func(self, df):
452 return pd.Series(df.index, index=df.index)
455class FootprintNPix(Column):
456 col = 'base_Footprint_nPix'
459class CoordColumn(Column):
460 """Base class for coordinate column, in degrees
461 """
462 _radians = True
464 def __init__(self, col, **kwargs):
465 super().__init__(col, **kwargs)
467 def _func(self, df):
468 # Must not modify original column in case that column is used by another functor
469 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
470 return output
473class RAColumn(CoordColumn):
474 """Right Ascension, in degrees
475 """
476 name = 'RA'
477 _defaultNoDup = True
479 def __init__(self, **kwargs):
480 super().__init__('coord_ra', **kwargs)
482 def __call__(self, catalog, **kwargs):
483 return super().__call__(catalog, **kwargs)
486class DecColumn(CoordColumn):
487 """Declination, in degrees
488 """
489 name = 'Dec'
490 _defaultNoDup = True
492 def __init__(self, **kwargs):
493 super().__init__('coord_dec', **kwargs)
495 def __call__(self, catalog, **kwargs):
496 return super().__call__(catalog, **kwargs)
499def fluxName(col):
500 if not col.endswith('_instFlux'): 500 ↛ 502line 500 didn't jump to line 502, because the condition on line 500 was never false
501 col += '_instFlux'
502 return col
505def fluxErrName(col):
506 if not col.endswith('_instFluxErr'):
507 col += '_instFluxErr'
508 return col
511class Mag(Functor):
512 """Compute calibrated magnitude
514 Takes a `calib` argument, which returns the flux at mag=0
515 as `calib.getFluxMag0()`. If not provided, then the default
516 `fluxMag0` is 63095734448.0194, which is default for HSC.
517 This default should be removed in DM-21955
519 This calculation hides warnings about invalid values and dividing by zero.
521 As for all functors, a `dataset` and `filt` kwarg should be provided upon
522 initialization. Unlike the default `Functor`, however, the default dataset
523 for a `Mag` is `'meas'`, rather than `'ref'`.
525 Parameters
526 ----------
527 col : `str`
528 Name of flux column from which to compute magnitude. Can be parseable
529 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
530 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
531 understand.
532 calib : `lsst.afw.image.calib.Calib` (optional)
533 Object that knows zero point.
534 """
535 _defaultDataset = 'meas'
537 def __init__(self, col, calib=None, **kwargs):
538 self.col = fluxName(col)
539 self.calib = calib
540 if calib is not None: 540 ↛ 541line 540 didn't jump to line 541, because the condition on line 540 was never true
541 self.fluxMag0 = calib.getFluxMag0()[0]
542 else:
543 # TO DO: DM-21955 Replace hard coded photometic calibration values
544 self.fluxMag0 = 63095734448.0194
546 super().__init__(**kwargs)
548 @property
549 def columns(self):
550 return [self.col]
552 def _func(self, df):
553 with np.warnings.catch_warnings():
554 np.warnings.filterwarnings('ignore', r'invalid value encountered')
555 np.warnings.filterwarnings('ignore', r'divide by zero')
556 return -2.5*np.log10(df[self.col] / self.fluxMag0)
558 @property
559 def name(self):
560 return f'mag_{self.col}'
563class MagErr(Mag):
564 """Compute calibrated magnitude uncertainty
566 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
568 Parameters
569 col : `str`
570 Name of flux column
571 calib : `lsst.afw.image.calib.Calib` (optional)
572 Object that knows zero point.
573 """
575 def __init__(self, *args, **kwargs):
576 super().__init__(*args, **kwargs)
577 if self.calib is not None:
578 self.fluxMag0Err = self.calib.getFluxMag0()[1]
579 else:
580 self.fluxMag0Err = 0.
582 @property
583 def columns(self):
584 return [self.col, self.col + 'Err']
586 def _func(self, df):
587 with np.warnings.catch_warnings():
588 np.warnings.filterwarnings('ignore', r'invalid value encountered')
589 np.warnings.filterwarnings('ignore', r'divide by zero')
590 fluxCol, fluxErrCol = self.columns
591 x = df[fluxErrCol] / df[fluxCol]
592 y = self.fluxMag0Err / self.fluxMag0
593 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
594 return magErr
596 @property
597 def name(self):
598 return super().name + '_err'
601class NanoMaggie(Mag):
602 """
603 """
605 def _func(self, df):
606 return (df[self.col] / self.fluxMag0) * 1e9
609class MagDiff(Functor):
610 _defaultDataset = 'meas'
612 """Functor to calculate magnitude difference"""
614 def __init__(self, col1, col2, **kwargs):
615 self.col1 = fluxName(col1)
616 self.col2 = fluxName(col2)
617 super().__init__(**kwargs)
619 @property
620 def columns(self):
621 return [self.col1, self.col2]
623 def _func(self, df):
624 with np.warnings.catch_warnings():
625 np.warnings.filterwarnings('ignore', r'invalid value encountered')
626 np.warnings.filterwarnings('ignore', r'divide by zero')
627 return -2.5*np.log10(df[self.col1]/df[self.col2])
629 @property
630 def name(self):
631 return f'(mag_{self.col1} - mag_{self.col2})'
633 @property
634 def shortname(self):
635 return f'magDiff_{self.col1}_{self.col2}'
638class Color(Functor):
639 """Compute the color between two filters
641 Computes color by initializing two different `Mag`
642 functors based on the `col` and filters provided, and
643 then returning the difference.
645 This is enabled by the `_func` expecting a dataframe with a
646 multilevel column index, with both `'filter'` and `'column'`,
647 instead of just `'column'`, which is the `Functor` default.
648 This is controlled by the `_dfLevels` attribute.
650 Also of note, the default dataset for `Color` is `forced_src'`,
651 whereas for `Mag` it is `'meas'`.
653 Parameters
654 ----------
655 col : str
656 Name of flux column from which to compute; same as would be passed to
657 `lsst.pipe.tasks.functors.Mag`.
659 filt2, filt1 : str
660 Filters from which to compute magnitude difference.
661 Color computed is `Mag(filt2) - Mag(filt1)`.
662 """
663 _defaultDataset = 'forced_src'
664 _dfLevels = ('filter', 'column')
665 _defaultNoDup = True
667 def __init__(self, col, filt2, filt1, **kwargs):
668 self.col = fluxName(col)
669 if filt2 == filt1: 669 ↛ 670line 669 didn't jump to line 670, because the condition on line 669 was never true
670 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
671 self.filt2 = filt2
672 self.filt1 = filt1
674 self.mag2 = Mag(col, filt=filt2, **kwargs)
675 self.mag1 = Mag(col, filt=filt1, **kwargs)
677 super().__init__(**kwargs)
679 @property
680 def filt(self):
681 return None
683 @filt.setter
684 def filt(self, filt):
685 pass
687 def _func(self, df):
688 mag2 = self.mag2._func(df[self.filt2])
689 mag1 = self.mag1._func(df[self.filt1])
690 return mag2 - mag1
692 @property
693 def columns(self):
694 return [self.mag1.col, self.mag2.col]
696 def multilevelColumns(self, parq):
697 return [(self.dataset, self.filt1, self.col),
698 (self.dataset, self.filt2, self.col)]
700 @property
701 def name(self):
702 return f'{self.filt2} - {self.filt1} ({self.col})'
704 @property
705 def shortname(self):
706 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
709class Labeller(Functor):
710 """Main function of this subclass is to override the dropna=True
711 """
712 _null_label = 'null'
713 _allow_difference = False
714 name = 'label'
715 _force_str = False
717 def __call__(self, parq, dropna=False, **kwargs):
718 return super().__call__(parq, dropna=False, **kwargs)
721class StarGalaxyLabeller(Labeller):
722 _columns = ["base_ClassificationExtendedness_value"]
723 _column = "base_ClassificationExtendedness_value"
725 def _func(self, df):
726 x = df[self._columns][self._column]
727 mask = x.isnull()
728 test = (x < 0.5).astype(int)
729 test = test.mask(mask, 2)
731 # TODO: DM-21954 Look into veracity of inline comment below
732 # are these backwards?
733 categories = ['galaxy', 'star', self._null_label]
734 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
735 index=x.index, name='label')
736 if self._force_str: 736 ↛ 737line 736 didn't jump to line 737, because the condition on line 736 was never true
737 label = label.astype(str)
738 return label
741class NumStarLabeller(Labeller):
742 _columns = ['numStarFlags']
743 labels = {"star": 0, "maybe": 1, "notStar": 2}
745 def _func(self, df):
746 x = df[self._columns][self._columns[0]]
748 # Number of filters
749 n = len(x.unique()) - 1
751 labels = ['noStar', 'maybe', 'star']
752 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
753 index=x.index, name='label')
755 if self._force_str:
756 label = label.astype(str)
758 return label
761class DeconvolvedMoments(Functor):
762 name = 'Deconvolved Moments'
763 shortname = 'deconvolvedMoments'
764 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
765 "ext_shapeHSM_HsmSourceMoments_yy",
766 "base_SdssShape_xx", "base_SdssShape_yy",
767 "ext_shapeHSM_HsmPsfMoments_xx",
768 "ext_shapeHSM_HsmPsfMoments_yy")
770 def _func(self, df):
771 """Calculate deconvolved moments"""
772 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 772 ↛ 775line 772 didn't jump to line 775, because the condition on line 772 was never false
773 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
774 else:
775 hsm = np.ones(len(df))*np.nan
776 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
777 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 777 ↛ 783line 777 didn't jump to line 783, because the condition on line 777 was never false
778 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
779 else:
780 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
781 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
782 # raise TaskError("No psf shape parameter found in catalog")
783 raise RuntimeError('No psf shape parameter found in catalog')
785 return hsm.where(np.isfinite(hsm), sdss) - psf
788class SdssTraceSize(Functor):
789 """Functor to calculate SDSS trace radius size for sources"""
790 name = "SDSS Trace Size"
791 shortname = 'sdssTrace'
792 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
794 def _func(self, df):
795 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
796 return srcSize
799class PsfSdssTraceSizeDiff(Functor):
800 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
801 name = "PSF - SDSS Trace Size"
802 shortname = 'psf_sdssTrace'
803 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
804 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
806 def _func(self, df):
807 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
808 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
809 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
810 return sizeDiff
813class HsmTraceSize(Functor):
814 """Functor to calculate HSM trace radius size for sources"""
815 name = 'HSM Trace Size'
816 shortname = 'hsmTrace'
817 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
818 "ext_shapeHSM_HsmSourceMoments_yy")
820 def _func(self, df):
821 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
822 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
823 return srcSize
826class PsfHsmTraceSizeDiff(Functor):
827 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
828 name = 'PSF - HSM Trace Size'
829 shortname = 'psf_HsmTrace'
830 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
831 "ext_shapeHSM_HsmSourceMoments_yy",
832 "ext_shapeHSM_HsmPsfMoments_xx",
833 "ext_shapeHSM_HsmPsfMoments_yy")
835 def _func(self, df):
836 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
837 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
838 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
839 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
840 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
841 return sizeDiff
844class HsmFwhm(Functor):
845 name = 'HSM Psf FWHM'
846 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
847 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
848 pixelScale = 0.168
849 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
851 def _func(self, df):
852 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
853 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
856class E1(Functor):
857 name = "Distortion Ellipticity (e1)"
858 shortname = "Distortion"
860 def __init__(self, colXX, colXY, colYY, **kwargs):
861 self.colXX = colXX
862 self.colXY = colXY
863 self.colYY = colYY
864 self._columns = [self.colXX, self.colXY, self.colYY]
865 super().__init__(**kwargs)
867 @property
868 def columns(self):
869 return [self.colXX, self.colXY, self.colYY]
871 def _func(self, df):
872 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
875class E2(Functor):
876 name = "Ellipticity e2"
878 def __init__(self, colXX, colXY, colYY, **kwargs):
879 self.colXX = colXX
880 self.colXY = colXY
881 self.colYY = colYY
882 super().__init__(**kwargs)
884 @property
885 def columns(self):
886 return [self.colXX, self.colXY, self.colYY]
888 def _func(self, df):
889 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
892class RadiusFromQuadrupole(Functor):
894 def __init__(self, colXX, colXY, colYY, **kwargs):
895 self.colXX = colXX
896 self.colXY = colXY
897 self.colYY = colYY
898 super().__init__(**kwargs)
900 @property
901 def columns(self):
902 return [self.colXX, self.colXY, self.colYY]
904 def _func(self, df):
905 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
908class LocalWcs(Functor):
909 """Computations using the stored localWcs.
910 """
911 name = "LocalWcsOperations"
913 def __init__(self,
914 colCD_1_1,
915 colCD_1_2,
916 colCD_2_1,
917 colCD_2_2,
918 **kwargs):
919 self.colCD_1_1 = colCD_1_1
920 self.colCD_1_2 = colCD_1_2
921 self.colCD_2_1 = colCD_2_1
922 self.colCD_2_2 = colCD_2_2
923 super().__init__(**kwargs)
925 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
926 """Compute the distance on the sphere from x2, y1 to x1, y1.
928 Parameters
929 ----------
930 x : `pandas.Series`
931 X pixel coordinate.
932 y : `pandas.Series`
933 Y pixel coordinate.
934 cd11 : `pandas.Series`
935 [1, 1] element of the local Wcs affine transform.
936 cd11 : `pandas.Series`
937 [1, 1] element of the local Wcs affine transform.
938 cd12 : `pandas.Series`
939 [1, 2] element of the local Wcs affine transform.
940 cd21 : `pandas.Series`
941 [2, 1] element of the local Wcs affine transform.
942 cd22 : `pandas.Series`
943 [2, 2] element of the local Wcs affine transform.
945 Returns
946 -------
947 raDecTuple : tuple
948 RA and dec conversion of x and y given the local Wcs. Returned
949 units are in radians.
951 """
952 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
954 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
955 """Compute the local pixel scale conversion.
957 Parameters
958 ----------
959 ra1 : `pandas.Series`
960 Ra of the first coordinate in radians.
961 dec1 : `pandas.Series`
962 Dec of the first coordinate in radians.
963 ra2 : `pandas.Series`
964 Ra of the second coordinate in radians.
965 dec2 : `pandas.Series`
966 Dec of the second coordinate in radians.
968 Returns
969 -------
970 dist : `pandas.Series`
971 Distance on the sphere in radians.
972 """
973 deltaDec = dec2 - dec1
974 deltaRa = ra2 - ra1
975 return 2 * np.arcsin(
976 np.sqrt(
977 np.sin(deltaDec / 2) ** 2
978 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
980 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
981 """Compute the distance on the sphere from x2, y1 to x1, y1.
983 Parameters
984 ----------
985 x1 : `pandas.Series`
986 X pixel coordinate.
987 y1 : `pandas.Series`
988 Y pixel coordinate.
989 x2 : `pandas.Series`
990 X pixel coordinate.
991 y2 : `pandas.Series`
992 Y pixel coordinate.
993 cd11 : `pandas.Series`
994 [1, 1] element of the local Wcs affine transform.
995 cd11 : `pandas.Series`
996 [1, 1] element of the local Wcs affine transform.
997 cd12 : `pandas.Series`
998 [1, 2] element of the local Wcs affine transform.
999 cd21 : `pandas.Series`
1000 [2, 1] element of the local Wcs affine transform.
1001 cd22 : `pandas.Series`
1002 [2, 2] element of the local Wcs affine transform.
1004 Returns
1005 -------
1006 Distance : `pandas.Series`
1007 Arcseconds per pixel at the location of the local WC
1008 """
1009 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1010 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1011 # Great circle distance for small separations.
1012 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1015class ComputePixelScale(LocalWcs):
1016 """Compute the local pixel scale from the stored CDMatrix.
1017 """
1018 name = "PixelScale"
1020 @property
1021 def columns(self):
1022 return [self.colCD_1_1,
1023 self.colCD_1_2,
1024 self.colCD_2_1,
1025 self.colCD_2_2]
1027 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1028 """Compute the local pixel to scale conversion in arcseconds.
1030 Parameters
1031 ----------
1032 cd11 : `pandas.Series`
1033 [1, 1] element of the local Wcs affine transform in radians.
1034 cd11 : `pandas.Series`
1035 [1, 1] element of the local Wcs affine transform in radians.
1036 cd12 : `pandas.Series`
1037 [1, 2] element of the local Wcs affine transform in radians.
1038 cd21 : `pandas.Series`
1039 [2, 1] element of the local Wcs affine transform in radians.
1040 cd22 : `pandas.Series`
1041 [2, 2] element of the local Wcs affine transform in radians.
1043 Returns
1044 -------
1045 pixScale : `pandas.Series`
1046 Arcseconds per pixel at the location of the local WC
1047 """
1048 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1050 def _func(self, df):
1051 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1052 df[self.colCD_1_2],
1053 df[self.colCD_2_1],
1054 df[self.colCD_2_2])
1057class ConvertPixelToArcseconds(ComputePixelScale):
1058 """Convert a value in units pixels to units arcseconds.
1059 """
1061 def __init__(self,
1062 col,
1063 colCD_1_1,
1064 colCD_1_2,
1065 colCD_2_1,
1066 colCD_2_2,
1067 **kwargs):
1068 self.col = col
1069 super().__init__(colCD_1_1,
1070 colCD_1_2,
1071 colCD_2_1,
1072 colCD_2_2,
1073 **kwargs)
1075 @property
1076 def name(self):
1077 return f"{self.col}_asArcseconds"
1079 @property
1080 def columns(self):
1081 return [self.col,
1082 self.colCD_1_1,
1083 self.colCD_1_2,
1084 self.colCD_2_1,
1085 self.colCD_2_2]
1087 def _func(self, df):
1088 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1089 df[self.colCD_1_2],
1090 df[self.colCD_2_1],
1091 df[self.colCD_2_2])
1094class ReferenceBand(Functor):
1095 name = 'Reference Band'
1096 shortname = 'refBand'
1098 @property
1099 def columns(self):
1100 return ["merge_measurement_i",
1101 "merge_measurement_r",
1102 "merge_measurement_z",
1103 "merge_measurement_y",
1104 "merge_measurement_g"]
1106 def _func(self, df):
1107 def getFilterAliasName(row):
1108 # get column name with the max value (True > False)
1109 colName = row.idxmax()
1110 return colName.replace('merge_measurement_', '')
1112 return df[self.columns].apply(getFilterAliasName, axis=1)
1115class Photometry(Functor):
1116 # AB to NanoJansky (3631 Jansky)
1117 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1118 LOG_AB_FLUX_SCALE = 12.56
1119 FIVE_OVER_2LOG10 = 1.085736204758129569
1120 # TO DO: DM-21955 Replace hard coded photometic calibration values
1121 COADD_ZP = 27
1123 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1124 self.vhypot = np.vectorize(self.hypot)
1125 self.col = colFlux
1126 self.colFluxErr = colFluxErr
1128 self.calib = calib
1129 if calib is not None:
1130 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1131 else:
1132 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1133 self.fluxMag0Err = 0.
1135 super().__init__(**kwargs)
1137 @property
1138 def columns(self):
1139 return [self.col]
1141 @property
1142 def name(self):
1143 return f'mag_{self.col}'
1145 @classmethod
1146 def hypot(cls, a, b):
1147 if np.abs(a) < np.abs(b):
1148 a, b = b, a
1149 if a == 0.:
1150 return 0.
1151 q = b/a
1152 return np.abs(a) * np.sqrt(1. + q*q)
1154 def dn2flux(self, dn, fluxMag0):
1155 return self.AB_FLUX_SCALE * dn / fluxMag0
1157 def dn2mag(self, dn, fluxMag0):
1158 with np.warnings.catch_warnings():
1159 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1160 np.warnings.filterwarnings('ignore', r'divide by zero')
1161 return -2.5 * np.log10(dn/fluxMag0)
1163 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1164 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1165 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1166 return retVal
1168 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1169 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1170 return self.FIVE_OVER_2LOG10 * retVal
1173class NanoJansky(Photometry):
1174 def _func(self, df):
1175 return self.dn2flux(df[self.col], self.fluxMag0)
1178class NanoJanskyErr(Photometry):
1179 @property
1180 def columns(self):
1181 return [self.col, self.colFluxErr]
1183 def _func(self, df):
1184 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1185 return pd.Series(retArr, index=df.index)
1188class Magnitude(Photometry):
1189 def _func(self, df):
1190 return self.dn2mag(df[self.col], self.fluxMag0)
1193class MagnitudeErr(Photometry):
1194 @property
1195 def columns(self):
1196 return [self.col, self.colFluxErr]
1198 def _func(self, df):
1199 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1200 return pd.Series(retArr, index=df.index)
1203class LocalPhotometry(Functor):
1204 """Base class for calibrating the specified instrument flux column using
1205 the local photometric calibration.
1207 Parameters
1208 ----------
1209 instFluxCol : `str`
1210 Name of the instrument flux column.
1211 instFluxErrCol : `str`
1212 Name of the assocated error columns for ``instFluxCol``.
1213 photoCalibCol : `str`
1214 Name of local calibration column.
1215 photoCalibErrCol : `str`
1216 Error associated with ``photoCalibCol``
1218 See also
1219 --------
1220 LocalPhotometry
1221 LocalNanojansky
1222 LocalNanojanskyErr
1223 LocalMagnitude
1224 LocalMagnitudeErr
1225 """
1226 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1228 def __init__(self,
1229 instFluxCol,
1230 instFluxErrCol,
1231 photoCalibCol,
1232 photoCalibErrCol,
1233 **kwargs):
1234 self.instFluxCol = instFluxCol
1235 self.instFluxErrCol = instFluxErrCol
1236 self.photoCalibCol = photoCalibCol
1237 self.photoCalibErrCol = photoCalibErrCol
1238 super().__init__(**kwargs)
1240 def instFluxToNanojansky(self, instFlux, localCalib):
1241 """Convert instrument flux to nanojanskys.
1243 Parameters
1244 ----------
1245 instFlux : `numpy.ndarray` or `pandas.Series`
1246 Array of instrument flux measurements
1247 localCalib : `numpy.ndarray` or `pandas.Series`
1248 Array of local photometric calibration estimates.
1250 Returns
1251 -------
1252 calibFlux : `numpy.ndarray` or `pandas.Series`
1253 Array of calibrated flux measurements.
1254 """
1255 return instFlux * localCalib
1257 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1258 """Convert instrument flux to nanojanskys.
1260 Parameters
1261 ----------
1262 instFlux : `numpy.ndarray` or `pandas.Series`
1263 Array of instrument flux measurements
1264 instFluxErr : `numpy.ndarray` or `pandas.Series`
1265 Errors on associated ``instFlux`` values
1266 localCalib : `numpy.ndarray` or `pandas.Series`
1267 Array of local photometric calibration estimates.
1268 localCalibErr : `numpy.ndarray` or `pandas.Series`
1269 Errors on associated ``localCalib`` values
1271 Returns
1272 -------
1273 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1274 Errors on calibrated flux measurements.
1275 """
1276 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1278 def instFluxToMagnitude(self, instFlux, localCalib):
1279 """Convert instrument flux to nanojanskys.
1281 Parameters
1282 ----------
1283 instFlux : `numpy.ndarray` or `pandas.Series`
1284 Array of instrument flux measurements
1285 localCalib : `numpy.ndarray` or `pandas.Series`
1286 Array of local photometric calibration estimates.
1288 Returns
1289 -------
1290 calibMag : `numpy.ndarray` or `pandas.Series`
1291 Array of calibrated AB magnitudes.
1292 """
1293 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1295 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1296 """Convert instrument flux err to nanojanskys.
1298 Parameters
1299 ----------
1300 instFlux : `numpy.ndarray` or `pandas.Series`
1301 Array of instrument flux measurements
1302 instFluxErr : `numpy.ndarray` or `pandas.Series`
1303 Errors on associated ``instFlux`` values
1304 localCalib : `numpy.ndarray` or `pandas.Series`
1305 Array of local photometric calibration estimates.
1306 localCalibErr : `numpy.ndarray` or `pandas.Series`
1307 Errors on associated ``localCalib`` values
1309 Returns
1310 -------
1311 calibMagErr: `numpy.ndarray` or `pandas.Series`
1312 Error on calibrated AB magnitudes.
1313 """
1314 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1315 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1318class LocalNanojansky(LocalPhotometry):
1319 """Compute calibrated fluxes using the local calibration value.
1321 See also
1322 --------
1323 LocalNanojansky
1324 LocalNanojanskyErr
1325 LocalMagnitude
1326 LocalMagnitudeErr
1327 """
1329 @property
1330 def columns(self):
1331 return [self.instFluxCol, self.photoCalibCol]
1333 @property
1334 def name(self):
1335 return f'flux_{self.instFluxCol}'
1337 def _func(self, df):
1338 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1341class LocalNanojanskyErr(LocalPhotometry):
1342 """Compute calibrated flux errors using the local calibration value.
1344 See also
1345 --------
1346 LocalNanojansky
1347 LocalNanojanskyErr
1348 LocalMagnitude
1349 LocalMagnitudeErr
1350 """
1352 @property
1353 def columns(self):
1354 return [self.instFluxCol, self.instFluxErrCol,
1355 self.photoCalibCol, self.photoCalibErrCol]
1357 @property
1358 def name(self):
1359 return f'fluxErr_{self.instFluxCol}'
1361 def _func(self, df):
1362 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1363 df[self.photoCalibCol], df[self.photoCalibErrCol])
1366class LocalMagnitude(LocalPhotometry):
1367 """Compute calibrated AB magnitudes using the local calibration value.
1369 See also
1370 --------
1371 LocalNanojansky
1372 LocalNanojanskyErr
1373 LocalMagnitude
1374 LocalMagnitudeErr
1375 """
1377 @property
1378 def columns(self):
1379 return [self.instFluxCol, self.photoCalibCol]
1381 @property
1382 def name(self):
1383 return f'mag_{self.instFluxCol}'
1385 def _func(self, df):
1386 return self.instFluxToMagnitude(df[self.instFluxCol],
1387 df[self.photoCalibCol])
1390class LocalMagnitudeErr(LocalPhotometry):
1391 """Compute calibrated AB magnitude errors using the local calibration value.
1393 See also
1394 --------
1395 LocalNanojansky
1396 LocalNanojanskyErr
1397 LocalMagnitude
1398 LocalMagnitudeErr
1399 """
1401 @property
1402 def columns(self):
1403 return [self.instFluxCol, self.instFluxErrCol,
1404 self.photoCalibCol, self.photoCalibErrCol]
1406 @property
1407 def name(self):
1408 return f'magErr_{self.instFluxCol}'
1410 def _func(self, df):
1411 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1412 df[self.instFluxErrCol],
1413 df[self.photoCalibCol],
1414 df[self.photoCalibErrCol])