lsst.pipe.tasks  19.0.0-41-g13db8fbc+1
functors.py
Go to the documentation of this file.
1 import yaml
2 import re
3 
4 import pandas as pd
5 import numpy as np
6 import astropy.units as u
7 
8 from lsst.daf.persistence import doImport
9 from .parquetTable import MultilevelParquetTable
10 
11 
12 def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
13  typeKey='functor', name=None):
14  """Initialize an object defined in a dictionary
15 
16  The object needs to be importable as
17  '{0}.{1}'.format(basePath, initDict[typeKey])
18  The positional and keyword arguments (if any) are contained in
19  "args" and "kwargs" entries in the dictionary, respectively.
20  This is used in `functors.CompositeFunctor.from_yaml` to initialize
21  a composite functor from a specification in a YAML file.
22 
23  Parameters
24  ----------
25  initDict : dictionary
26  Dictionary describing object's initialization. Must contain
27  an entry keyed by ``typeKey`` that is the name of the object,
28  relative to ``basePath``.
29  basePath : str
30  Path relative to module in which ``initDict[typeKey]`` is defined.
31  typeKey : str
32  Key of ``initDict`` that is the name of the object
33  (relative to `basePath`).
34  """
35  initDict = initDict.copy()
36  # TO DO: DM-21956 We should be able to define functors outside this module
37  pythonType = doImport('{0}.{1}'.format(basePath, initDict.pop(typeKey)))
38  args = []
39  if 'args' in initDict:
40  args = initDict.pop('args')
41  if isinstance(args, str):
42  args = [args]
43  try:
44  element = pythonType(*args, **initDict)
45  except Exception as e:
46  message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
47  raise type(e)(message, e.args)
48  return element
49 
50 
51 class Functor(object):
52  """Define and execute a calculation on a ParquetTable
53 
54  The `__call__` method accepts a `ParquetTable` object, and returns the
55  result of the calculation as a single column. Each functor defines what
56  columns are needed for the calculation, and only these columns are read
57  from the `ParquetTable`.
58 
59  The action of `__call__` consists of two steps: first, loading the
60  necessary columns from disk into memory as a `pandas.DataFrame` object;
61  and second, performing the computation on this dataframe and returning the
62  result.
63 
64 
65  To define a new `Functor`, a subclass must define a `_func` method,
66  that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
67  In addition, it must define the following attributes
68 
69  * `_columns`: The columns necessary to perform the calculation
70  * `name`: A name appropriate for a figure axis label
71  * `shortname`: A name appropriate for use as a dictionary key
72 
73  On initialization, a `Functor` should declare what filter (`filt` kwarg)
74  and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
75  applied to. This enables the `_get_cols` method to extract the proper
76  columns from the parquet file. If not specified, the dataset will fall back
77  on the `_defaultDataset`attribute. If filter is not specified and `dataset`
78  is anything other than `'ref'`, then an error will be raised when trying to
79  perform the calculation.
80 
81  As currently implemented, `Functor` is only set up to expect a
82  `ParquetTable` of the format of the `deepCoadd_obj` dataset; that is, a
83  `MultilevelParquetTable` with the levels of the column index being `filter`,
84  `dataset`, and `column`. This is defined in the `_columnLevels` attribute,
85  as well as being implicit in the role of the `filt` and `dataset` attributes
86  defined at initialization. In addition, the `_get_cols` method that reads
87  the dataframe from the `ParquetTable` will return a dataframe with column
88  index levels defined by the `_dfLevels` attribute; by default, this is
89  `column`.
90 
91  The `_columnLevels` and `_dfLevels` attributes should generally not need to
92  be changed, unless `_func` needs columns from multiple filters or datasets
93  to do the calculation.
94  An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
95  which `_dfLevels = ('filter', 'column')`, and `_func` expects the dataframe
96  it gets to have those levels in the column index.
97 
98  Parameters
99  ----------
100  filt : str
101  Filter upon which to do the calculation
102 
103  dataset : str
104  Dataset upon which to do the calculation
105  (e.g., 'ref', 'meas', 'forced_src').
106 
107  """
108 
109  _defaultDataset = 'ref'
110  _columnLevels = ('filter', 'dataset', 'column')
111  _dfLevels = ('column',)
112  _defaultNoDup = False
113 
114  def __init__(self, filt=None, dataset=None, noDup=None):
115  self.filt = filt
116  self.dataset = dataset if dataset is not None else self._defaultDataset
117  self._noDup = noDup
118 
119  @property
120  def noDup(self):
121  if self._noDup is not None:
122  return self._noDup
123  else:
124  return self._defaultNoDup
125 
126  @property
127  def columns(self):
128  """Columns required to perform calculation
129  """
130  if not hasattr(self, '_columns'):
131  raise NotImplementedError('Must define columns property or _columns attribute')
132  return self._columns
133 
134  def multilevelColumns(self, parq):
135  if not set(parq.columnLevels) == set(self._columnLevels):
136  raise ValueError('ParquetTable does not have the expected column levels. ' +
137  'Got {0}; expected {1}.'.format(parq.columnLevels, self._columnLevels))
138 
139  columnDict = {'column': self.columns,
140  'dataset': self.dataset}
141  if self.filt is None:
142  if 'filter' in parq.columnLevels:
143  if self.dataset == 'ref':
144  columnDict['filter'] = parq.columnLevelNames['filter'][0]
145  else:
146  raise ValueError("'filt' not set for functor {}".format(self.name) +
147  "(dataset {}) ".format(self.dataset) +
148  "and ParquetTable " +
149  "contains multiple filters in column index. " +
150  "Set 'filt' or set 'dataset' to 'ref'.")
151  else:
152  columnDict['filter'] = self.filt
153 
154  return parq._colsFromDict(columnDict)
155 
156  def _func(self, df, dropna=True):
157  raise NotImplementedError('Must define calculation on dataframe')
158 
159  def _get_cols(self, parq):
160  """Retrieve dataframe necessary for calculation.
161 
162  Returns dataframe upon which `self._func` can act.
163  """
164  if isinstance(parq, MultilevelParquetTable):
165  columns = self.multilevelColumns(parq)
166  df = parq.toDataFrame(columns=columns, droplevels=False)
167  df = self._setLevels(df)
168  else:
169  columns = self.columns
170  df = parq.toDataFrame(columns=columns)
171 
172  return df
173 
174  def _setLevels(self, df):
175  levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
176  df.columns = df.columns.droplevel(levelsToDrop)
177  return df
178 
179  def _dropna(self, vals):
180  return vals.dropna()
181 
182  def __call__(self, parq, dropna=False):
183  try:
184  df = self._get_cols(parq)
185  vals = self._func(df)
186  except Exception:
187  vals = self.fail(df)
188  if dropna:
189  vals = self._dropna(vals)
190 
191  return vals
192 
193  def fail(self, df):
194  return pd.Series(np.full(len(df), np.nan), index=df.index)
195 
196  @property
197  def name(self):
198  """Full name of functor (suitable for figure labels)
199  """
200  return NotImplementedError
201 
202  @property
203  def shortname(self):
204  """Short name of functor (suitable for column name/dict key)
205  """
206  return self.name
207 
208 
210  """Perform multiple calculations at once on a catalog
211 
212  The role of a `CompositeFunctor` is to group together computations from
213  multiple functors. Instead of returning `pandas.Series` a
214  `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
215  being the keys of `funcDict`.
216 
217  The `columns` attribute of a `CompositeFunctor` is the union of all columns
218  in all the component functors.
219 
220  A `CompositeFunctor` does not use a `_func` method itself; rather,
221  when a `CompositeFunctor` is called, all its columns are loaded
222  at once, and the resulting dataframe is passed to the `_func` method of each component
223  functor. This has the advantage of only doing I/O (reading from parquet file) once,
224  and works because each individual `_func` method of each component functor does not
225  care if there are *extra* columns in the dataframe being passed; only that it must contain
226  *at least* the `columns` it expects.
227 
228  An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
229  file specifying a collection of functors.
230 
231  Parameters
232  ----------
233  funcs : `dict` or `list`
234  Dictionary or list of functors. If a list, then it will be converted
235  into a dictonary according to the `.shortname` attribute of each functor.
236 
237  """
238  dataset = None
239 
240  def __init__(self, funcs, **kwargs):
241 
242  if type(funcs) == dict:
243  self.funcDict = funcs
244  else:
245  self.funcDict = {f.shortname: f for f in funcs}
246 
247  self._filt = None
248 
249  super().__init__(**kwargs)
250 
251  @property
252  def filt(self):
253  return self._filt
254 
255  @filt.setter
256  def filt(self, filt):
257  if filt is not None:
258  for _, f in self.funcDict.items():
259  f.filt = filt
260  self._filt = filt
261 
262  def update(self, new):
263  if isinstance(new, dict):
264  self.funcDict.update(new)
265  elif isinstance(new, CompositeFunctor):
266  self.funcDict.update(new.funcDict)
267  else:
268  raise TypeError('Can only update with dictionary or CompositeFunctor.')
269 
270  # Make sure new functors have the same 'filt' set
271  if self.filt is not None:
272  self.filt = self.filt
273 
274  @property
275  def columns(self):
276  return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
277 
278  def multilevelColumns(self, parq):
279  return list(set([x for y in [f.multilevelColumns(parq)
280  for f in self.funcDict.values()] for x in y]))
281 
282  def __call__(self, parq, **kwargs):
283  if isinstance(parq, MultilevelParquetTable):
284  columns = self.multilevelColumns(parq)
285  df = parq.toDataFrame(columns=columns, droplevels=False)
286  valDict = {}
287  for k, f in self.funcDict.items():
288  try:
289  subdf = f._setLevels(df[f.multilevelColumns(parq)])
290  valDict[k] = f._func(subdf)
291  except Exception:
292  valDict[k] = f.fail(subdf)
293  else:
294  columns = self.columns
295  df = parq.toDataFrame(columns=columns)
296  valDict = {k: f._func(df) for k, f in self.funcDict.items()}
297 
298  try:
299  valDf = pd.concat(valDict, axis=1)
300  except TypeError:
301  print([(k, type(v)) for k, v in valDict.items()])
302  raise
303 
304  if kwargs.get('dropna', False):
305  valDf = valDf.dropna(how='any')
306 
307  return valDf
308 
309  @classmethod
310  def renameCol(cls, col, renameRules):
311  if renameRules is None:
312  return col
313  for old, new in renameRules:
314  if col.startswith(old):
315  col = col.replace(old, new)
316  return col
317 
318  @classmethod
319  def from_file(cls, filename, **kwargs):
320  with open(filename) as f:
321  translationDefinition = yaml.safe_load(f)
322 
323  return cls.from_yaml(translationDefinition, **kwargs)
324 
325  @classmethod
326  def from_yaml(cls, translationDefinition, **kwargs):
327  funcs = {}
328  for func, val in translationDefinition['funcs'].items():
329  funcs[func] = init_fromDict(val, name=func)
330 
331  if 'flag_rename_rules' in translationDefinition:
332  renameRules = translationDefinition['flag_rename_rules']
333  else:
334  renameRules = None
335 
336  if 'refFlags' in translationDefinition:
337  for flag in translationDefinition['refFlags']:
338  funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
339 
340  if 'flags' in translationDefinition:
341  for flag in translationDefinition['flags']:
342  funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
343 
344  return cls(funcs, **kwargs)
345 
346 
347 def mag_aware_eval(df, expr):
348  """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
349 
350  Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
351 
352  Parameters
353  ----------
354  df : pandas.DataFrame
355  Dataframe on which to evaluate expression.
356 
357  expr : str
358  Expression.
359  """
360  try:
361  expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
362  val = df.eval(expr_new, truediv=True)
363  except Exception: # Should check what actually gets raised
364  expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
365  val = df.eval(expr_new, truediv=True)
366  return val
367 
368 
370  """Arbitrary computation on a catalog
371 
372  Column names (and thus the columns to be loaded from catalog) are found
373  by finding all words and trying to ignore all "math-y" words.
374 
375  Parameters
376  ----------
377  expr : str
378  Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
379  """
380  _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
381 
382  def __init__(self, expr, **kwargs):
383  self.expr = expr
384  super().__init__(**kwargs)
385 
386  @property
387  def name(self):
388  return self.expr
389 
390  @property
391  def columns(self):
392  flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
393 
394  cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
395  not_a_col = []
396  for c in flux_cols:
397  if not re.search('_instFlux$', c):
398  cols.append('{}_instFlux'.format(c))
399  not_a_col.append(c)
400  else:
401  cols.append(c)
402 
403  return list(set([c for c in cols if c not in not_a_col]))
404 
405  def _func(self, df):
406  return mag_aware_eval(df, self.expr)
407 
408 
410  """Get column with specified name
411  """
412 
413  def __init__(self, col, **kwargs):
414  self.col = col
415  super().__init__(**kwargs)
416 
417  @property
418  def name(self):
419  return self.col
420 
421  @property
422  def columns(self):
423  return [self.col]
424 
425  def _func(self, df):
426  return df[self.col]
427 
428 
429 class Index(Functor):
430  """Return the value of the index for each object
431  """
432 
433  columns = ['coord_ra'] # just a dummy; something has to be here
434  _defaultDataset = 'ref'
435  _defaultNoDup = True
436 
437  def _func(self, df):
438  return pd.Series(df.index, index=df.index)
439 
440 
442  col = 'id'
443  _allow_difference = False
444  _defaultNoDup = True
445 
446  def _func(self, df):
447  return pd.Series(df.index, index=df.index)
448 
449 
451  col = 'base_Footprint_nPix'
452 
453 
455  """Base class for coordinate column, in degrees
456  """
457  _radians = True
458 
459  def __init__(self, col, **kwargs):
460  super().__init__(col, **kwargs)
461 
462  def _func(self, df):
463  # Must not modify original column in case that column is used by another functor
464  output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
465  return output
466 
467 
469  """Right Ascension, in degrees
470  """
471  name = 'RA'
472  _defaultNoDup = True
473 
474  def __init__(self, **kwargs):
475  super().__init__('coord_ra', **kwargs)
476 
477  def __call__(self, catalog, **kwargs):
478  return super().__call__(catalog, **kwargs)
479 
480 
482  """Declination, in degrees
483  """
484  name = 'Dec'
485  _defaultNoDup = True
486 
487  def __init__(self, **kwargs):
488  super().__init__('coord_dec', **kwargs)
489 
490  def __call__(self, catalog, **kwargs):
491  return super().__call__(catalog, **kwargs)
492 
493 
494 def fluxName(col):
495  if not col.endswith('_instFlux'):
496  col += '_instFlux'
497  return col
498 
499 
500 def fluxErrName(col):
501  if not col.endswith('_instFluxErr'):
502  col += '_instFluxErr'
503  return col
504 
505 
506 class Mag(Functor):
507  """Compute calibrated magnitude
508 
509  Takes a `calib` argument, which returns the flux at mag=0
510  as `calib.getFluxMag0()`. If not provided, then the default
511  `fluxMag0` is 63095734448.0194, which is default for HSC.
512  This default should be removed in DM-21955
513 
514  This calculation hides warnings about invalid values and dividing by zero.
515 
516  As for all functors, a `dataset` and `filt` kwarg should be provided upon
517  initialization. Unlike the default `Functor`, however, the default dataset
518  for a `Mag` is `'meas'`, rather than `'ref'`.
519 
520  Parameters
521  ----------
522  col : `str`
523  Name of flux column from which to compute magnitude. Can be parseable
524  by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
525  `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
526  understand.
527  calib : `lsst.afw.image.calib.Calib` (optional)
528  Object that knows zero point.
529  """
530  _defaultDataset = 'meas'
531 
532  def __init__(self, col, calib=None, **kwargs):
533  self.col = fluxName(col)
534  self.calib = calib
535  if calib is not None:
536  self.fluxMag0 = calib.getFluxMag0()[0]
537  else:
538  # TO DO: DM-21955 Replace hard coded photometic calibration values
539  self.fluxMag0 = 63095734448.0194
540 
541  super().__init__(**kwargs)
542 
543  @property
544  def columns(self):
545  return [self.col]
546 
547  def _func(self, df):
548  with np.warnings.catch_warnings():
549  np.warnings.filterwarnings('ignore', r'invalid value encountered')
550  np.warnings.filterwarnings('ignore', r'divide by zero')
551  return -2.5*np.log10(df[self.col] / self.fluxMag0)
552 
553  @property
554  def name(self):
555  return 'mag_{0}'.format(self.col)
556 
557 
558 class MagErr(Mag):
559  """Compute calibrated magnitude uncertainty
560 
561  Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
562 
563  Parameters
564  col : `str`
565  Name of flux column
566  calib : `lsst.afw.image.calib.Calib` (optional)
567  Object that knows zero point.
568  """
569 
570  def __init__(self, *args, **kwargs):
571  super().__init__(*args, **kwargs)
572  if self.calib is not None:
573  self.fluxMag0Err = self.calib.getFluxMag0()[1]
574  else:
575  self.fluxMag0Err = 0.
576 
577  @property
578  def columns(self):
579  return [self.col, self.col + 'Err']
580 
581  def _func(self, df):
582  with np.warnings.catch_warnings():
583  np.warnings.filterwarnings('ignore', r'invalid value encountered')
584  np.warnings.filterwarnings('ignore', r'divide by zero')
585  fluxCol, fluxErrCol = self.columns
586  x = df[fluxErrCol] / df[fluxCol]
587  y = self.fluxMag0Err / self.fluxMag0
588  magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
589  return magErr
590 
591  @property
592  def name(self):
593  return super().name + '_err'
594 
595 
597  """
598  """
599 
600  def _func(self, df):
601  return (df[self.col] / self.fluxMag0) * 1e9
602 
603 
605  _defaultDataset = 'meas'
606 
607  """Functor to calculate magnitude difference"""
608 
609  def __init__(self, col1, col2, **kwargs):
610  self.col1 = fluxName(col1)
611  self.col2 = fluxName(col2)
612  super().__init__(**kwargs)
613 
614  @property
615  def columns(self):
616  return [self.col1, self.col2]
617 
618  def _func(self, df):
619  with np.warnings.catch_warnings():
620  np.warnings.filterwarnings('ignore', r'invalid value encountered')
621  np.warnings.filterwarnings('ignore', r'divide by zero')
622  return -2.5*np.log10(df[self.col1]/df[self.col2])
623 
624  @property
625  def name(self):
626  return '(mag_{0} - mag_{1})'.format(self.col1, self.col2)
627 
628  @property
629  def shortname(self):
630  return 'magDiff_{0}_{1}'.format(self.col1, self.col2)
631 
632 
633 class Color(Functor):
634  """Compute the color between two filters
635 
636  Computes color by initializing two different `Mag`
637  functors based on the `col` and filters provided, and
638  then returning the difference.
639 
640  This is enabled by the `_func` expecting a dataframe with a
641  multilevel column index, with both `'filter'` and `'column'`,
642  instead of just `'column'`, which is the `Functor` default.
643  This is controlled by the `_dfLevels` attribute.
644 
645  Also of note, the default dataset for `Color` is `forced_src'`,
646  whereas for `Mag` it is `'meas'`.
647 
648  Parameters
649  ----------
650  col : str
651  Name of flux column from which to compute; same as would be passed to
652  `lsst.pipe.tasks.functors.Mag`.
653 
654  filt2, filt1 : str
655  Filters from which to compute magnitude difference.
656  Color computed is `Mag(filt2) - Mag(filt1)`.
657  """
658  _defaultDataset = 'forced_src'
659  _dfLevels = ('filter', 'column')
660  _defaultNoDup = True
661 
662  def __init__(self, col, filt2, filt1, **kwargs):
663  self.col = fluxName(col)
664  if filt2 == filt1:
665  raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
666  self.filt2 = filt2
667  self.filt1 = filt1
668 
669  self.mag2 = Mag(col, filt=filt2, **kwargs)
670  self.mag1 = Mag(col, filt=filt1, **kwargs)
671 
672  super().__init__(**kwargs)
673 
674  @property
675  def filt(self):
676  return None
677 
678  @filt.setter
679  def filt(self, filt):
680  pass
681 
682  def _func(self, df):
683  mag2 = self.mag2._func(df[self.filt2])
684  mag1 = self.mag1._func(df[self.filt1])
685  return mag2 - mag1
686 
687  @property
688  def columns(self):
689  return [self.mag1.col, self.mag2.col]
690 
691  def multilevelColumns(self, parq):
692  return [(self.dataset, self.filt1, self.col),
693  (self.dataset, self.filt2, self.col)]
694 
695  @property
696  def name(self):
697  return '{0} - {1} ({2})'.format(self.filt2, self.filt1, self.col)
698 
699  @property
700  def shortname(self):
701  return '{0}_{1}m{2}'.format(self.col, self.filt2.replace('-', ''),
702  self.filt1.replace('-', ''))
703 
704 
706  """Main function of this subclass is to override the dropna=True
707  """
708  _null_label = 'null'
709  _allow_difference = False
710  name = 'label'
711  _force_str = False
712 
713  def __call__(self, parq, dropna=False, **kwargs):
714  return super().__call__(parq, dropna=False, **kwargs)
715 
716 
718  _columns = ["base_ClassificationExtendedness_value"]
719  _column = "base_ClassificationExtendedness_value"
720 
721  def _func(self, df):
722  x = df[self._columns][self._column]
723  mask = x.isnull()
724  test = (x < 0.5).astype(int)
725  test = test.mask(mask, 2)
726 
727  # TODO: DM-21954 Look into veracity of inline comment below
728  # are these backwards?
729  categories = ['galaxy', 'star', self._null_label]
730  label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
731  index=x.index, name='label')
732  if self._force_str:
733  label = label.astype(str)
734  return label
735 
736 
738  _columns = ['numStarFlags']
739  labels = {"star": 0, "maybe": 1, "notStar": 2}
740 
741  def _func(self, df):
742  x = df[self._columns][self._columns[0]]
743 
744  # Number of filters
745  n = len(x.unique()) - 1
746 
747  labels = ['noStar', 'maybe', 'star']
748  label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
749  index=x.index, name='label')
750 
751  if self._force_str:
752  label = label.astype(str)
753 
754  return label
755 
756 
758  name = 'Deconvolved Moments'
759  shortname = 'deconvolvedMoments'
760  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
761  "ext_shapeHSM_HsmSourceMoments_yy",
762  "base_SdssShape_xx", "base_SdssShape_yy",
763  "ext_shapeHSM_HsmPsfMoments_xx",
764  "ext_shapeHSM_HsmPsfMoments_yy")
765 
766  def _func(self, df):
767  """Calculate deconvolved moments"""
768  if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
769  hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
770  else:
771  hsm = np.ones(len(df))*np.nan
772  sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
773  if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
774  psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
775  else:
776  # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
777  # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
778  # raise TaskError("No psf shape parameter found in catalog")
779  raise RuntimeError('No psf shape parameter found in catalog')
780 
781  return hsm.where(np.isfinite(hsm), sdss) - psf
782 
783 
785  """Functor to calculate SDSS trace radius size for sources"""
786  name = "SDSS Trace Size"
787  shortname = 'sdssTrace'
788  _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
789 
790  def _func(self, df):
791  srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
792  return srcSize
793 
794 
796  """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
797  name = "PSF - SDSS Trace Size"
798  shortname = 'psf_sdssTrace'
799  _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
800  "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
801 
802  def _func(self, df):
803  srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
804  psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
805  sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
806  return sizeDiff
807 
808 
810  """Functor to calculate HSM trace radius size for sources"""
811  name = 'HSM Trace Size'
812  shortname = 'hsmTrace'
813  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
814  "ext_shapeHSM_HsmSourceMoments_yy")
815 
816  def _func(self, df):
817  srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] +
818  df["ext_shapeHSM_HsmSourceMoments_yy"]))
819  return srcSize
820 
821 
823  """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
824  name = 'PSF - HSM Trace Size'
825  shortname = 'psf_HsmTrace'
826  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
827  "ext_shapeHSM_HsmSourceMoments_yy",
828  "ext_shapeHSM_HsmPsfMoments_xx",
829  "ext_shapeHSM_HsmPsfMoments_yy")
830 
831  def _func(self, df):
832  srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] +
833  df["ext_shapeHSM_HsmSourceMoments_yy"]))
834  psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] +
835  df["ext_shapeHSM_HsmPsfMoments_yy"]))
836  sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
837  return sizeDiff
838 
839 
841  name = 'HSM Psf FWHM'
842  _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
843  # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
844  pixelScale = 0.168
845  SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
846 
847  def _func(self, df):
848  return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
849  0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
850 
851 
852 class E1(Functor):
853  name = "Distortion Ellipticity (e1)"
854  shortname = "Distortion"
855 
856  def __init__(self, colXX, colXY, colYY, **kwargs):
857  self.colXX = colXX
858  self.colXY = colXY
859  self.colYY = colYY
860  self._columns = [self.colXX, self.colXY, self.colYY]
861  super().__init__(**kwargs)
862 
863  @property
864  def columns(self):
865  return [self.colXX, self.colXY, self.colYY]
866 
867  def _func(self, df):
868  return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
869 
870 
871 class E2(Functor):
872  name = "Ellipticity e2"
873 
874  def __init__(self, colXX, colXY, colYY, **kwargs):
875  self.colXX = colXX
876  self.colXY = colXY
877  self.colYY = colYY
878  super().__init__(**kwargs)
879 
880  @property
881  def columns(self):
882  return [self.colXX, self.colXY, self.colYY]
883 
884  def _func(self, df):
885  return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
886 
887 
889 
890  def __init__(self, colXX, colXY, colYY, **kwargs):
891  self.colXX = colXX
892  self.colXY = colXY
893  self.colYY = colYY
894  super().__init__(**kwargs)
895 
896  @property
897  def columns(self):
898  return [self.colXX, self.colXY, self.colYY]
899 
900  def _func(self, df):
901  return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
902 
903 
905  """Compute the local pixel scale from the stored CDMatrix.
906  """
907  name = "Pixel Scale"
908 
909  def __init__(self,
910  colCD_1_1,
911  colCD_1_2,
912  colCD_2_1,
913  colCD_2_2,
914  **kwargs):
915  self.colCD_1_1 = colCD_1_1
916  self.colCD_1_2 = colCD_1_2
917  self.colCD_2_1 = colCD_2_1
918  self.colCD_2_2 = colCD_2_2
919  super().__init__(**kwargs)
920 
921  @property
922  def columns(self):
923  return [self.colCD_1_1, self.colCD_1_2,
924  self.colCD_2_1, self.colCD_2_2]
925 
926  def pixelScale(self, cd11, cd12, cd21, cd22):
927  """Compute the local pixel scale conversion.
928 
929  Parameters
930  ----------
931  cd11 : `pandas.Series`
932  [1, 1] element of the local CDMatricies.
933  cd12 : `pandas.Series`
934  [1, 2] element of the local CDMatricies.
935  cd21 : `pandas.Series`
936  [2, 1] element of the local CDMatricies.
937  cd2 : `pandas.Series`
938  [2, 2] element of the local CDMatricies.
939 
940  Returns
941  -------
942  pixScale : `pandas.Series`
943  Arcseconds per pixel at the location of the local WC
944  """
945  return 3600 * np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))
946 
947  def _func(self, df):
948  return self.pixelScale(df[self.colCD_1_1], df[self.colCD_1_2],
949  df[self.colCD_2_1], df[self.colCD_2_2])
950 
951 
953  """Convert a value in units pixels to units arcseconds.
954  """
955  name = "Pixel scale converter"
956 
957  def __init__(self,
958  col,
959  colCD_1_1,
960  colCD_1_2,
961  colCD_2_1,
962  colCD_2_2, **kwargs):
963  self.col = col
964  super().__init__(colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
965 
966  @property
967  def name(self):
968  return f"{self.col}_asArcseconds"
969 
970  @property
971  def columns(self):
972  return [self.col,
973  self.colCD_1_1, self.colCD_1_2,
974  self.colCD_2_1, self.colCD_2_2]
975 
976  def _func(self, df):
977  return df[self.col] * self.pixelScale(df[self.colCD_1_1], df[self.colCD_1_2],
978  df[self.colCD_2_1], df[self.colCD_2_2])
979 
980 
982  name = 'Reference Band'
983  shortname = 'refBand'
984 
985  @property
986  def columns(self):
987  return ["merge_measurement_i",
988  "merge_measurement_r",
989  "merge_measurement_z",
990  "merge_measurement_y",
991  "merge_measurement_g"]
992 
993  def _func(self, df):
994  def getFilterAliasName(row):
995  # get column name with the max value (True > False)
996  colName = row.idxmax()
997  return colName.replace('merge_measurement_', '')
998 
999  return df[self.columns].apply(getFilterAliasName, axis=1)
1000 
1001 
1003  # AB to NanoJansky (3631 Jansky)
1004  AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1005  LOG_AB_FLUX_SCALE = 12.56
1006  FIVE_OVER_2LOG10 = 1.085736204758129569
1007  # TO DO: DM-21955 Replace hard coded photometic calibration values
1008  COADD_ZP = 27
1009 
1010  def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1011  self.vhypot = np.vectorize(self.hypot)
1012  self.col = colFlux
1013  self.colFluxErr = colFluxErr
1014 
1015  self.calib = calib
1016  if calib is not None:
1017  self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1018  else:
1019  self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1020  self.fluxMag0Err = 0.
1021 
1022  super().__init__(**kwargs)
1023 
1024  @property
1025  def columns(self):
1026  return [self.col]
1027 
1028  @property
1029  def name(self):
1030  return 'mag_{0}'.format(self.col)
1031 
1032  @classmethod
1033  def hypot(cls, a, b):
1034  if np.abs(a) < np.abs(b):
1035  a, b = b, a
1036  if a == 0.:
1037  return 0.
1038  q = b/a
1039  return np.abs(a) * np.sqrt(1. + q*q)
1040 
1041  def dn2flux(self, dn, fluxMag0):
1042  return self.AB_FLUX_SCALE * dn / fluxMag0
1043 
1044  def dn2mag(self, dn, fluxMag0):
1045  with np.warnings.catch_warnings():
1046  np.warnings.filterwarnings('ignore', r'invalid value encountered')
1047  np.warnings.filterwarnings('ignore', r'divide by zero')
1048  return -2.5 * np.log10(dn/fluxMag0)
1049 
1050  def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1051  retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1052  retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1053  return retVal
1054 
1055  def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1056  retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1057  return self.FIVE_OVER_2LOG10 * retVal
1058 
1059 
1061  def _func(self, df):
1062  return self.dn2flux(df[self.col], self.fluxMag0)
1063 
1064 
1066  @property
1067  def columns(self):
1068  return [self.col, self.colFluxErr]
1069 
1070  def _func(self, df):
1071  retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1072  return pd.Series(retArr, index=df.index)
1073 
1074 
1076  def _func(self, df):
1077  return self.dn2mag(df[self.col], self.fluxMag0)
1078 
1079 
1081  @property
1082  def columns(self):
1083  return [self.col, self.colFluxErr]
1084 
1085  def _func(self, df):
1086  retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1087  return pd.Series(retArr, index=df.index)
1088 
1089 
1091  """Base class for calibrating the specified instrument flux column using
1092  the local photometric calibration.
1093 
1094  Parameters
1095  ----------
1096  instFluxCol : `str`
1097  Name of the instrument flux column.
1098  instFluxErrCol : `str`
1099  Name of the assocated error columns for ``instFluxCol``.
1100  photoCalibCol : `str`
1101  Name of local calibration column.
1102  photoCalibErrCol : `str`
1103  Error associated with ``photoCalibCol``
1104 
1105  See also
1106  --------
1107  LocalPhotometry
1108  LocalNanojansky
1109  LocalNanojanskyErr
1110  LocalMagnitude
1111  LocalMagnitudeErr
1112  """
1113  logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1114 
1115  def __init__(self,
1116  instFluxCol,
1117  instFluxErrCol,
1118  photoCalibCol,
1119  photoCalibErrCol,
1120  **kwargs):
1121  self.instFluxCol = instFluxCol
1122  self.instFluxErrCol = instFluxErrCol
1123  self.photoCalibCol = photoCalibCol
1124  self.photoCalibErrCol = photoCalibErrCol
1125  super().__init__(**kwargs)
1126 
1127  def instFluxToNanojansky(self, instFlux, localCalib):
1128  """Convert instrument flux to nanojanskys.
1129 
1130  Parameters
1131  ----------
1132  instFlux : `numpy.ndarray` or `pandas.Series`
1133  Array of instrument flux measurements
1134  localCalib : `numpy.ndarray` or `pandas.Series`
1135  Array of local photometric calibration estimates.
1136 
1137  Returns
1138  -------
1139  calibFlux : `numpy.ndarray` or `pandas.Series`
1140  Array of calibrated flux measurements.
1141  """
1142  return instFlux * localCalib
1143 
1144  def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1145  """Convert instrument flux to nanojanskys.
1146 
1147  Parameters
1148  ----------
1149  instFlux : `numpy.ndarray` or `pandas.Series`
1150  Array of instrument flux measurements
1151  instFluxErr : `numpy.ndarray` or `pandas.Series`
1152  Errors on associated ``instFlux`` values
1153  localCalib : `numpy.ndarray` or `pandas.Series`
1154  Array of local photometric calibration estimates.
1155  localCalibErr : `numpy.ndarray` or `pandas.Series`
1156  Errors on associated ``localCalib`` values
1157 
1158  Returns
1159  -------
1160  calibFluxErr : `numpy.ndarray` or `pandas.Series`
1161  Errors on calibrated flux measurements.
1162  """
1163  return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1164 
1165  def instFluxToMagnitude(self, instFlux, localCalib):
1166  """Convert instrument flux to nanojanskys.
1167 
1168  Parameters
1169  ----------
1170  instFlux : `numpy.ndarray` or `pandas.Series`
1171  Array of instrument flux measurements
1172  localCalib : `numpy.ndarray` or `pandas.Series`
1173  Array of local photometric calibration estimates.
1174 
1175  Returns
1176  -------
1177  calibMag : `numpy.ndarray` or `pandas.Series`
1178  Array of calibrated AB magnitudes.
1179  """
1180  return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1181 
1182  def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1183  """Convert instrument flux err to nanojanskys.
1184 
1185  Parameters
1186  ----------
1187  instFlux : `numpy.ndarray` or `pandas.Series`
1188  Array of instrument flux measurements
1189  instFluxErr : `numpy.ndarray` or `pandas.Series`
1190  Errors on associated ``instFlux`` values
1191  localCalib : `numpy.ndarray` or `pandas.Series`
1192  Array of local photometric calibration estimates.
1193  localCalibErr : `numpy.ndarray` or `pandas.Series`
1194  Errors on associated ``localCalib`` values
1195 
1196  Returns
1197  -------
1198  calibMagErr: `numpy.ndarray` or `pandas.Series`
1199  Error on calibrated AB magnitudes.
1200  """
1201  err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1202  return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1203 
1204 
1206  """Compute calibrated fluxes using the local calibration value.
1207 
1208  See also
1209  --------
1210  LocalNanojansky
1211  LocalNanojanskyErr
1212  LocalMagnitude
1213  LocalMagnitudeErr
1214  """
1215 
1216  @property
1217  def columns(self):
1218  return [self.instFluxCol, self.photoCalibCol]
1219 
1220  @property
1221  def name(self):
1222  return f'flux_{self.instFluxCol}'
1223 
1224  def _func(self, df):
1225  return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1226 
1227 
1229  """Compute calibrated flux errors using the local calibration value.
1230 
1231  See also
1232  --------
1233  LocalNanojansky
1234  LocalNanojanskyErr
1235  LocalMagnitude
1236  LocalMagnitudeErr
1237  """
1238 
1239  @property
1240  def columns(self):
1241  return [self.instFluxCol, self.instFluxErrCol,
1242  self.photoCalibCol, self.photoCalibErrCol]
1243 
1244  @property
1245  def name(self):
1246  return f'fluxErr_{self.instFluxCol}'
1247 
1248  def _func(self, df):
1249  return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1250  df[self.photoCalibCol], df[self.photoCalibErrCol])
1251 
1252 
1254  """Compute calibrated AB magnitudes using the local calibration value.
1255 
1256  See also
1257  --------
1258  LocalNanojansky
1259  LocalNanojanskyErr
1260  LocalMagnitude
1261  LocalMagnitudeErr
1262  """
1263 
1264  @property
1265  def columns(self):
1266  return [self.instFluxCol, self.photoCalibCol]
1267 
1268  @property
1269  def name(self):
1270  return f'mag_{self.instFluxCol}'
1271 
1272  def _func(self, df):
1273  return self.instFluxToMagnitude(df[self.instFluxCol],
1274  df[self.photoCalibCol])
1275 
1276 
1278  """Compute calibrated AB magnitude errors using the local calibration value.
1279 
1280  See also
1281  --------
1282  LocalNanojansky
1283  LocalNanojanskyErr
1284  LocalMagnitude
1285  LocalMagnitudeErr
1286  """
1287 
1288  @property
1289  def columns(self):
1290  return [self.instFluxCol, self.instFluxErrCol,
1291  self.photoCalibCol, self.photoCalibErrCol]
1292 
1293  @property
1294  def name(self):
1295  return f'magErr_{self.instFluxCol}'
1296 
1297  def _func(self, df):
1298  return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1299  df[self.instFluxErrCol],
1300  df[self.photoCalibCol],
1301  df[self.photoCalibErrCol])
def instFluxToNanojansky(self, instFlux, localCalib)
Definition: functors.py:1127
def __init__(self, expr, kwargs)
Definition: functors.py:382
def __call__(self, parq, dropna=False)
Definition: functors.py:182
def pixelScale(self, cd11, cd12, cd21, cd22)
Definition: functors.py:926
def __init__(self, col, kwargs)
Definition: functors.py:413
def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1055
def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1144
def __call__(self, catalog, kwargs)
Definition: functors.py:477
def _func(self, df, dropna=True)
Definition: functors.py:156
def __init__(self, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, kwargs)
Definition: functors.py:914
def __call__(self, parq, kwargs)
Definition: functors.py:282
def __call__(self, catalog, kwargs)
Definition: functors.py:490
def __init__(self, colXX, colXY, colYY, kwargs)
Definition: functors.py:890
def __init__(self, col1, col2, kwargs)
Definition: functors.py:609
def multilevelColumns(self, parq)
Definition: functors.py:691
def __init__(self, col, filt2, filt1, kwargs)
Definition: functors.py:662
def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor', name=None)
Definition: functors.py:13
def __call__(self, parq, dropna=False, kwargs)
Definition: functors.py:713
def mag_aware_eval(df, expr)
Definition: functors.py:347
def renameCol(cls, col, renameRules)
Definition: functors.py:310
def __init__(self, instFluxCol, instFluxErrCol, photoCalibCol, photoCalibErrCol, kwargs)
Definition: functors.py:1120
def __init__(self, filt=None, dataset=None, noDup=None)
Definition: functors.py:114
def __init__(self, colXX, colXY, colYY, kwargs)
Definition: functors.py:856
def from_yaml(cls, translationDefinition, kwargs)
Definition: functors.py:326
def from_file(cls, filename, kwargs)
Definition: functors.py:319
def __init__(self, colFlux, colFluxErr=None, calib=None, kwargs)
Definition: functors.py:1010
def __init__(self, kwargs)
Definition: functors.py:474
def dn2mag(self, dn, fluxMag0)
Definition: functors.py:1044
def __init__(self, col, calib=None, kwargs)
Definition: functors.py:532
def __init__(self, args, kwargs)
Definition: functors.py:570
def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1050
def multilevelColumns(self, parq)
Definition: functors.py:134
def dn2flux(self, dn, fluxMag0)
Definition: functors.py:1041
def __init__(self, funcs, kwargs)
Definition: functors.py:240
def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1182
def __init__(self, colXX, colXY, colYY, kwargs)
Definition: functors.py:874
def instFluxToMagnitude(self, instFlux, localCalib)
Definition: functors.py:1165
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, kwargs)
Definition: functors.py:962
def __init__(self, col, kwargs)
Definition: functors.py:459