lsst.pipe.tasks  19.0.0-35-gf07cdd31
functors.py
Go to the documentation of this file.
1 import yaml
2 import re
3 
4 import pandas as pd
5 import numpy as np
6 import astropy.units as u
7 
8 from lsst.daf.persistence import doImport
9 from .parquetTable import MultilevelParquetTable
10 
11 
12 def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor'):
13  """Initialize an object defined in a dictionary
14 
15  The object needs to be importable as
16  '{0}.{1}'.format(basePath, initDict[typeKey])
17  The positional and keyword arguments (if any) are contained in
18  "args" and "kwargs" entries in the dictionary, respectively.
19  This is used in `functors.CompositeFunctor.from_yaml` to initialize
20  a composite functor from a specification in a YAML file.
21 
22  Parameters
23  ----------
24  initDict : dictionary
25  Dictionary describing object's initialization. Must contain
26  an entry keyed by ``typeKey`` that is the name of the object,
27  relative to ``basePath``.
28  basePath : str
29  Path relative to module in which ``initDict[typeKey]`` is defined.
30  typeKey : str
31  Key of ``initDict`` that is the name of the object
32  (relative to `basePath`).
33  """
34  initDict = initDict.copy()
35  # TO DO: DM-21956 We should be able to define functors outside this module
36  pythonType = doImport('{0}.{1}'.format(basePath, initDict.pop(typeKey)))
37  args = []
38  if 'args' in initDict:
39  args = initDict.pop('args')
40  if isinstance(args, str):
41  args = [args]
42 
43  return pythonType(*args, **initDict)
44 
45 
46 class Functor(object):
47  """Define and execute a calculation on a ParquetTable
48 
49  The `__call__` method accepts a `ParquetTable` object, and returns the
50  result of the calculation as a single column. Each functor defines what
51  columns are needed for the calculation, and only these columns are read
52  from the `ParquetTable`.
53 
54  The action of `__call__` consists of two steps: first, loading the
55  necessary columns from disk into memory as a `pandas.DataFrame` object;
56  and second, performing the computation on this dataframe and returning the
57  result.
58 
59 
60  To define a new `Functor`, a subclass must define a `_func` method,
61  that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
62  In addition, it must define the following attributes
63 
64  * `_columns`: The columns necessary to perform the calculation
65  * `name`: A name appropriate for a figure axis label
66  * `shortname`: A name appropriate for use as a dictionary key
67 
68  On initialization, a `Functor` should declare what filter (`filt` kwarg)
69  and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
70  applied to. This enables the `_get_cols` method to extract the proper
71  columns from the parquet file. If not specified, the dataset will fall back
72  on the `_defaultDataset`attribute. If filter is not specified and `dataset`
73  is anything other than `'ref'`, then an error will be raised when trying to
74  perform the calculation.
75 
76  As currently implemented, `Functor` is only set up to expect a
77  `ParquetTable` of the format of the `deepCoadd_obj` dataset; that is, a
78  `MultilevelParquetTable` with the levels of the column index being `filter`,
79  `dataset`, and `column`. This is defined in the `_columnLevels` attribute,
80  as well as being implicit in the role of the `filt` and `dataset` attributes
81  defined at initialization. In addition, the `_get_cols` method that reads
82  the dataframe from the `ParquetTable` will return a dataframe with column
83  index levels defined by the `_dfLevels` attribute; by default, this is
84  `column`.
85 
86  The `_columnLevels` and `_dfLevels` attributes should generally not need to
87  be changed, unless `_func` needs columns from multiple filters or datasets
88  to do the calculation.
89  An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
90  which `_dfLevels = ('filter', 'column')`, and `_func` expects the dataframe
91  it gets to have those levels in the column index.
92 
93  Parameters
94  ----------
95  filt : str
96  Filter upon which to do the calculation
97 
98  dataset : str
99  Dataset upon which to do the calculation
100  (e.g., 'ref', 'meas', 'forced_src').
101 
102  """
103 
104  _defaultDataset = 'ref'
105  _columnLevels = ('filter', 'dataset', 'column')
106  _dfLevels = ('column',)
107  _defaultNoDup = False
108 
109  def __init__(self, filt=None, dataset=None, noDup=None):
110  self.filt = filt
111  self.dataset = dataset if dataset is not None else self._defaultDataset
112  self._noDup = noDup
113 
114  @property
115  def noDup(self):
116  if self._noDup is not None:
117  return self._noDup
118  else:
119  return self._defaultNoDup
120 
121  @property
122  def columns(self):
123  """Columns required to perform calculation
124  """
125  if not hasattr(self, '_columns'):
126  raise NotImplementedError('Must define columns property or _columns attribute')
127  return self._columns
128 
129  def multilevelColumns(self, parq):
130  if not set(parq.columnLevels) == set(self._columnLevels):
131  raise ValueError('ParquetTable does not have the expected column levels. ' +
132  'Got {0}; expected {1}.'.format(parq.columnLevels, self._columnLevels))
133 
134  columnDict = {'column': self.columns,
135  'dataset': self.dataset}
136  if self.filt is None:
137  if 'filter' in parq.columnLevels:
138  if self.dataset == 'ref':
139  columnDict['filter'] = parq.columnLevelNames['filter'][0]
140  else:
141  raise ValueError("'filt' not set for functor {}".format(self.name) +
142  "(dataset {}) ".format(self.dataset) +
143  "and ParquetTable " +
144  "contains multiple filters in column index. " +
145  "Set 'filt' or set 'dataset' to 'ref'.")
146  else:
147  columnDict['filter'] = self.filt
148 
149  return parq._colsFromDict(columnDict)
150 
151  def _func(self, df, dropna=True):
152  raise NotImplementedError('Must define calculation on dataframe')
153 
154  def _get_cols(self, parq):
155  """Retrieve dataframe necessary for calculation.
156 
157  Returns dataframe upon which `self._func` can act.
158  """
159  if isinstance(parq, MultilevelParquetTable):
160  columns = self.multilevelColumns(parq)
161  df = parq.toDataFrame(columns=columns, droplevels=False)
162  df = self._setLevels(df)
163  else:
164  columns = self.columns
165  df = parq.toDataFrame(columns=columns)
166 
167  return df
168 
169  def _setLevels(self, df):
170  levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
171  df.columns = df.columns.droplevel(levelsToDrop)
172  return df
173 
174  def _dropna(self, vals):
175  return vals.dropna()
176 
177  def __call__(self, parq, dropna=False):
178  try:
179  df = self._get_cols(parq)
180  vals = self._func(df)
181  except Exception:
182  vals = self.fail(df)
183  if dropna:
184  vals = self._dropna(vals)
185 
186  return vals
187 
188  def fail(self, df):
189  return pd.Series(np.full(len(df), np.nan), index=df.index)
190 
191  @property
192  def name(self):
193  """Full name of functor (suitable for figure labels)
194  """
195  return NotImplementedError
196 
197  @property
198  def shortname(self):
199  """Short name of functor (suitable for column name/dict key)
200  """
201  return self.name
202 
203 
205  """Perform multiple calculations at once on a catalog
206 
207  The role of a `CompositeFunctor` is to group together computations from
208  multiple functors. Instead of returning `pandas.Series` a
209  `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
210  being the keys of `funcDict`.
211 
212  The `columns` attribute of a `CompositeFunctor` is the union of all columns
213  in all the component functors.
214 
215  A `CompositeFunctor` does not use a `_func` method itself; rather,
216  when a `CompositeFunctor` is called, all its columns are loaded
217  at once, and the resulting dataframe is passed to the `_func` method of each component
218  functor. This has the advantage of only doing I/O (reading from parquet file) once,
219  and works because each individual `_func` method of each component functor does not
220  care if there are *extra* columns in the dataframe being passed; only that it must contain
221  *at least* the `columns` it expects.
222 
223  An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
224  file specifying a collection of functors.
225 
226  Parameters
227  ----------
228  funcs : `dict` or `list`
229  Dictionary or list of functors. If a list, then it will be converted
230  into a dictonary according to the `.shortname` attribute of each functor.
231 
232  """
233  dataset = None
234 
235  def __init__(self, funcs, **kwargs):
236 
237  if type(funcs) == dict:
238  self.funcDict = funcs
239  else:
240  self.funcDict = {f.shortname: f for f in funcs}
241 
242  self._filt = None
243 
244  super().__init__(**kwargs)
245 
246  @property
247  def filt(self):
248  return self._filt
249 
250  @filt.setter
251  def filt(self, filt):
252  if filt is not None:
253  for _, f in self.funcDict.items():
254  f.filt = filt
255  self._filt = filt
256 
257  def update(self, new):
258  if isinstance(new, dict):
259  self.funcDict.update(new)
260  elif isinstance(new, CompositeFunctor):
261  self.funcDict.update(new.funcDict)
262  else:
263  raise TypeError('Can only update with dictionary or CompositeFunctor.')
264 
265  # Make sure new functors have the same 'filt' set
266  if self.filt is not None:
267  self.filt = self.filt
268 
269  @property
270  def columns(self):
271  return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
272 
273  def multilevelColumns(self, parq):
274  return list(set([x for y in [f.multilevelColumns(parq)
275  for f in self.funcDict.values()] for x in y]))
276 
277  def __call__(self, parq, **kwargs):
278  if isinstance(parq, MultilevelParquetTable):
279  columns = self.multilevelColumns(parq)
280  df = parq.toDataFrame(columns=columns, droplevels=False)
281  valDict = {}
282  for k, f in self.funcDict.items():
283  try:
284  subdf = f._setLevels(df[f.multilevelColumns(parq)])
285  valDict[k] = f._func(subdf)
286  except Exception:
287  valDict[k] = f.fail(subdf)
288  else:
289  columns = self.columns
290  df = parq.toDataFrame(columns=columns)
291  valDict = {k: f._func(df) for k, f in self.funcDict.items()}
292 
293  try:
294  valDf = pd.concat(valDict, axis=1)
295  except TypeError:
296  print([(k, type(v)) for k, v in valDict.items()])
297  raise
298 
299  if kwargs.get('dropna', False):
300  valDf = valDf.dropna(how='any')
301 
302  return valDf
303 
304  @classmethod
305  def renameCol(cls, col, renameRules):
306  if renameRules is None:
307  return col
308  for old, new in renameRules:
309  if col.startswith(old):
310  col = col.replace(old, new)
311  return col
312 
313  @classmethod
314  def from_file(cls, filename, **kwargs):
315  with open(filename) as f:
316  translationDefinition = yaml.safe_load(f)
317 
318  return cls.from_yaml(translationDefinition, **kwargs)
319 
320  @classmethod
321  def from_yaml(cls, translationDefinition, **kwargs):
322  funcs = {}
323  for func, val in translationDefinition['funcs'].items():
324  funcs[func] = init_fromDict(val)
325 
326  if 'flag_rename_rules' in translationDefinition:
327  renameRules = translationDefinition['flag_rename_rules']
328  else:
329  renameRules = None
330 
331  if 'refFlags' in translationDefinition:
332  for flag in translationDefinition['refFlags']:
333  funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
334 
335  if 'flags' in translationDefinition:
336  for flag in translationDefinition['flags']:
337  funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
338 
339  return cls(funcs, **kwargs)
340 
341 
342 def mag_aware_eval(df, expr):
343  """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
344 
345  Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
346 
347  Parameters
348  ----------
349  df : pandas.DataFrame
350  Dataframe on which to evaluate expression.
351 
352  expr : str
353  Expression.
354  """
355  try:
356  expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
357  val = df.eval(expr_new, truediv=True)
358  except Exception: # Should check what actually gets raised
359  expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
360  val = df.eval(expr_new, truediv=True)
361  return val
362 
363 
365  """Arbitrary computation on a catalog
366 
367  Column names (and thus the columns to be loaded from catalog) are found
368  by finding all words and trying to ignore all "math-y" words.
369 
370  Parameters
371  ----------
372  expr : str
373  Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
374  """
375  _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
376 
377  def __init__(self, expr, **kwargs):
378  self.expr = expr
379  super().__init__(**kwargs)
380 
381  @property
382  def name(self):
383  return self.expr
384 
385  @property
386  def columns(self):
387  flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
388 
389  cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
390  not_a_col = []
391  for c in flux_cols:
392  if not re.search('_instFlux$', c):
393  cols.append('{}_instFlux'.format(c))
394  not_a_col.append(c)
395  else:
396  cols.append(c)
397 
398  return list(set([c for c in cols if c not in not_a_col]))
399 
400  def _func(self, df):
401  return mag_aware_eval(df, self.expr)
402 
403 
405  """Get column with specified name
406  """
407 
408  def __init__(self, col, **kwargs):
409  self.col = col
410  super().__init__(**kwargs)
411 
412  @property
413  def name(self):
414  return self.col
415 
416  @property
417  def columns(self):
418  return [self.col]
419 
420  def _func(self, df):
421  return df[self.col]
422 
423 
424 class Index(Functor):
425  """Return the value of the index for each object
426  """
427 
428  columns = ['coord_ra'] # just a dummy; something has to be here
429  _defaultDataset = 'ref'
430  _defaultNoDup = True
431 
432  def _func(self, df):
433  return pd.Series(df.index, index=df.index)
434 
435 
437  col = 'id'
438  _allow_difference = False
439  _defaultNoDup = True
440 
441  def _func(self, df):
442  return pd.Series(df.index, index=df.index)
443 
444 
446  col = 'base_Footprint_nPix'
447 
448 
450  """Base class for coordinate column, in degrees
451  """
452  _radians = True
453 
454  def __init__(self, col, **kwargs):
455  super().__init__(col, **kwargs)
456 
457  def _func(self, df):
458  res = df[self.col]
459  if self._radians:
460  res *= 180 / np.pi
461  return res
462 
463 
465  """Right Ascension, in degrees
466  """
467  name = 'RA'
468  _defaultNoDup = True
469 
470  def __init__(self, **kwargs):
471  super().__init__('coord_ra', **kwargs)
472 
473  def __call__(self, catalog, **kwargs):
474  return super().__call__(catalog, **kwargs)
475 
476 
478  """Declination, in degrees
479  """
480  name = 'Dec'
481  _defaultNoDup = True
482 
483  def __init__(self, **kwargs):
484  super().__init__('coord_dec', **kwargs)
485 
486  def __call__(self, catalog, **kwargs):
487  return super().__call__(catalog, **kwargs)
488 
489 
490 def fluxName(col):
491  if not col.endswith('_instFlux'):
492  col += '_instFlux'
493  return col
494 
495 
496 def fluxErrName(col):
497  if not col.endswith('_instFluxErr'):
498  col += '_instFluxErr'
499  return col
500 
501 
502 class Mag(Functor):
503  """Compute calibrated magnitude
504 
505  Takes a `calib` argument, which returns the flux at mag=0
506  as `calib.getFluxMag0()`. If not provided, then the default
507  `fluxMag0` is 63095734448.0194, which is default for HSC.
508  This default should be removed in DM-21955
509 
510  This calculation hides warnings about invalid values and dividing by zero.
511 
512  As for all functors, a `dataset` and `filt` kwarg should be provided upon
513  initialization. Unlike the default `Functor`, however, the default dataset
514  for a `Mag` is `'meas'`, rather than `'ref'`.
515 
516  Parameters
517  ----------
518  col : `str`
519  Name of flux column from which to compute magnitude. Can be parseable
520  by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
521  `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
522  understand.
523  calib : `lsst.afw.image.calib.Calib` (optional)
524  Object that knows zero point.
525  """
526  _defaultDataset = 'meas'
527 
528  def __init__(self, col, calib=None, **kwargs):
529  self.col = fluxName(col)
530  self.calib = calib
531  if calib is not None:
532  self.fluxMag0 = calib.getFluxMag0()[0]
533  else:
534  # TO DO: DM-21955 Replace hard coded photometic calibration values
535  self.fluxMag0 = 63095734448.0194
536 
537  super().__init__(**kwargs)
538 
539  @property
540  def columns(self):
541  return [self.col]
542 
543  def _func(self, df):
544  with np.warnings.catch_warnings():
545  np.warnings.filterwarnings('ignore', r'invalid value encountered')
546  np.warnings.filterwarnings('ignore', r'divide by zero')
547  return -2.5*np.log10(df[self.col] / self.fluxMag0)
548 
549  @property
550  def name(self):
551  return 'mag_{0}'.format(self.col)
552 
553 
554 class MagErr(Mag):
555  """Compute calibrated magnitude uncertainty
556 
557  Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
558 
559  Parameters
560  col : `str`
561  Name of flux column
562  calib : `lsst.afw.image.calib.Calib` (optional)
563  Object that knows zero point.
564  """
565 
566  def __init__(self, *args, **kwargs):
567  super().__init__(*args, **kwargs)
568  if self.calib is not None:
569  self.fluxMag0Err = self.calib.getFluxMag0()[1]
570  else:
571  self.fluxMag0Err = 0.
572 
573  @property
574  def columns(self):
575  return [self.col, self.col + 'Err']
576 
577  def _func(self, df):
578  with np.warnings.catch_warnings():
579  np.warnings.filterwarnings('ignore', r'invalid value encountered')
580  np.warnings.filterwarnings('ignore', r'divide by zero')
581  fluxCol, fluxErrCol = self.columns
582  x = df[fluxErrCol] / df[fluxCol]
583  y = self.fluxMag0Err / self.fluxMag0
584  magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
585  return magErr
586 
587  @property
588  def name(self):
589  return super().name + '_err'
590 
591 
593  """
594  """
595 
596  def _func(self, df):
597  return (df[self.col] / self.fluxMag0) * 1e9
598 
599 
601  _defaultDataset = 'meas'
602 
603  """Functor to calculate magnitude difference"""
604 
605  def __init__(self, col1, col2, **kwargs):
606  self.col1 = fluxName(col1)
607  self.col2 = fluxName(col2)
608  super().__init__(**kwargs)
609 
610  @property
611  def columns(self):
612  return [self.col1, self.col2]
613 
614  def _func(self, df):
615  with np.warnings.catch_warnings():
616  np.warnings.filterwarnings('ignore', r'invalid value encountered')
617  np.warnings.filterwarnings('ignore', r'divide by zero')
618  return -2.5*np.log10(df[self.col1]/df[self.col2])
619 
620  @property
621  def name(self):
622  return '(mag_{0} - mag_{1})'.format(self.col1, self.col2)
623 
624  @property
625  def shortname(self):
626  return 'magDiff_{0}_{1}'.format(self.col1, self.col2)
627 
628 
629 class Color(Functor):
630  """Compute the color between two filters
631 
632  Computes color by initializing two different `Mag`
633  functors based on the `col` and filters provided, and
634  then returning the difference.
635 
636  This is enabled by the `_func` expecting a dataframe with a
637  multilevel column index, with both `'filter'` and `'column'`,
638  instead of just `'column'`, which is the `Functor` default.
639  This is controlled by the `_dfLevels` attribute.
640 
641  Also of note, the default dataset for `Color` is `forced_src'`,
642  whereas for `Mag` it is `'meas'`.
643 
644  Parameters
645  ----------
646  col : str
647  Name of flux column from which to compute; same as would be passed to
648  `lsst.pipe.tasks.functors.Mag`.
649 
650  filt2, filt1 : str
651  Filters from which to compute magnitude difference.
652  Color computed is `Mag(filt2) - Mag(filt1)`.
653  """
654  _defaultDataset = 'forced_src'
655  _dfLevels = ('filter', 'column')
656  _defaultNoDup = True
657 
658  def __init__(self, col, filt2, filt1, **kwargs):
659  self.col = fluxName(col)
660  if filt2 == filt1:
661  raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
662  self.filt2 = filt2
663  self.filt1 = filt1
664 
665  self.mag2 = Mag(col, filt=filt2, **kwargs)
666  self.mag1 = Mag(col, filt=filt1, **kwargs)
667 
668  super().__init__(**kwargs)
669 
670  @property
671  def filt(self):
672  return None
673 
674  @filt.setter
675  def filt(self, filt):
676  pass
677 
678  def _func(self, df):
679  mag2 = self.mag2._func(df[self.filt2])
680  mag1 = self.mag1._func(df[self.filt1])
681  return mag2 - mag1
682 
683  @property
684  def columns(self):
685  return [self.mag1.col, self.mag2.col]
686 
687  def multilevelColumns(self, parq):
688  return [(self.dataset, self.filt1, self.col),
689  (self.dataset, self.filt2, self.col)]
690 
691  @property
692  def name(self):
693  return '{0} - {1} ({2})'.format(self.filt2, self.filt1, self.col)
694 
695  @property
696  def shortname(self):
697  return '{0}_{1}m{2}'.format(self.col, self.filt2.replace('-', ''),
698  self.filt1.replace('-', ''))
699 
700 
702  """Main function of this subclass is to override the dropna=True
703  """
704  _null_label = 'null'
705  _allow_difference = False
706  name = 'label'
707  _force_str = False
708 
709  def __call__(self, parq, dropna=False, **kwargs):
710  return super().__call__(parq, dropna=False, **kwargs)
711 
712 
714  _columns = ["base_ClassificationExtendedness_value"]
715  _column = "base_ClassificationExtendedness_value"
716 
717  def _func(self, df):
718  x = df[self._columns][self._column]
719  mask = x.isnull()
720  test = (x < 0.5).astype(int)
721  test = test.mask(mask, 2)
722 
723  # TODO: DM-21954 Look into veracity of inline comment below
724  # are these backwards?
725  categories = ['galaxy', 'star', self._null_label]
726  label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
727  index=x.index, name='label')
728  if self._force_str:
729  label = label.astype(str)
730  return label
731 
732 
734  _columns = ['numStarFlags']
735  labels = {"star": 0, "maybe": 1, "notStar": 2}
736 
737  def _func(self, df):
738  x = df[self._columns][self._columns[0]]
739 
740  # Number of filters
741  n = len(x.unique()) - 1
742 
743  labels = ['noStar', 'maybe', 'star']
744  label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
745  index=x.index, name='label')
746 
747  if self._force_str:
748  label = label.astype(str)
749 
750  return label
751 
752 
754  name = 'Deconvolved Moments'
755  shortname = 'deconvolvedMoments'
756  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
757  "ext_shapeHSM_HsmSourceMoments_yy",
758  "base_SdssShape_xx", "base_SdssShape_yy",
759  "ext_shapeHSM_HsmPsfMoments_xx",
760  "ext_shapeHSM_HsmPsfMoments_yy")
761 
762  def _func(self, df):
763  """Calculate deconvolved moments"""
764  if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
765  hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
766  else:
767  hsm = np.ones(len(df))*np.nan
768  sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
769  if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
770  psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
771  else:
772  # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
773  # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
774  # raise TaskError("No psf shape parameter found in catalog")
775  raise RuntimeError('No psf shape parameter found in catalog')
776 
777  return hsm.where(np.isfinite(hsm), sdss) - psf
778 
779 
781  """Functor to calculate SDSS trace radius size for sources"""
782  name = "SDSS Trace Size"
783  shortname = 'sdssTrace'
784  _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
785 
786  def _func(self, df):
787  srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
788  return srcSize
789 
790 
792  """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
793  name = "PSF - SDSS Trace Size"
794  shortname = 'psf_sdssTrace'
795  _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
796  "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
797 
798  def _func(self, df):
799  srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
800  psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
801  sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
802  return sizeDiff
803 
804 
806  """Functor to calculate HSM trace radius size for sources"""
807  name = 'HSM Trace Size'
808  shortname = 'hsmTrace'
809  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
810  "ext_shapeHSM_HsmSourceMoments_yy")
811 
812  def _func(self, df):
813  srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] +
814  df["ext_shapeHSM_HsmSourceMoments_yy"]))
815  return srcSize
816 
817 
819  """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
820  name = 'PSF - HSM Trace Size'
821  shortname = 'psf_HsmTrace'
822  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
823  "ext_shapeHSM_HsmSourceMoments_yy",
824  "ext_shapeHSM_HsmPsfMoments_xx",
825  "ext_shapeHSM_HsmPsfMoments_yy")
826 
827  def _func(self, df):
828  srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] +
829  df["ext_shapeHSM_HsmSourceMoments_yy"]))
830  psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] +
831  df["ext_shapeHSM_HsmPsfMoments_yy"]))
832  sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
833  return sizeDiff
834 
835 
837  name = 'HSM Psf FWHM'
838  _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
839  # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
840  pixelScale = 0.168
841  SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
842 
843  def _func(self, df):
844  return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
845  0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
846 
847 
848 class E1(Functor):
849  name = "Distortion Ellipticity (e1)"
850  shortname = "Distortion"
851 
852  def __init__(self, colXX, colXY, colYY, **kwargs):
853  self.colXX = colXX
854  self.colXY = colXY
855  self.colYY = colYY
856  self._columns = [self.colXX, self.colXY, self.colYY]
857  super().__init__(**kwargs)
858 
859  @property
860  def columns(self):
861  return [self.colXX, self.colXY, self.colYY]
862 
863  def _func(self, df):
864  return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
865 
866 
867 class E2(Functor):
868  name = "Ellipticity e2"
869 
870  def __init__(self, colXX, colXY, colYY, **kwargs):
871  self.colXX = colXX
872  self.colXY = colXY
873  self.colYY = colYY
874  super().__init__(**kwargs)
875 
876  @property
877  def columns(self):
878  return [self.colXX, self.colXY, self.colYY]
879 
880  def _func(self, df):
881  return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
882 
883 
885 
886  def __init__(self, colXX, colXY, colYY, **kwargs):
887  self.colXX = colXX
888  self.colXY = colXY
889  self.colYY = colYY
890  super().__init__(**kwargs)
891 
892  @property
893  def columns(self):
894  return [self.colXX, self.colXY, self.colYY]
895 
896  def _func(self, df):
897  return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
898 
899 
901  """Compute the local pixel scale from the stored CDMatrix.
902  """
903  name = "Pixel Scale"
904 
905  def __init__(self,
906  colCD_1_1,
907  colCD_1_2,
908  colCD_2_1,
909  colCD_2_2,
910  **kwargs):
911  self.colCD_1_1 = colCD_1_1
912  self.colCD_1_2 = colCD_1_2
913  self.colCD_2_1 = colCD_2_1
914  self.colCD_2_2 = colCD_2_2
915  super().__init__(**kwargs)
916 
917  @property
918  def columns(self):
919  return [self.colCD_1_1, self.colCD_1_2,
920  self.colCD_2_1, self.colCD_2_2]
921 
922  def pixelScale(self, cd11, cd12, cd21, cd22):
923  """Compute the local pixel scale conversion.
924 
925  Parameters
926  ----------
927  cd11 : `pandas.Series`
928  [1, 1] element of the local CDMatricies.
929  cd12 : `pandas.Series`
930  [1, 2] element of the local CDMatricies.
931  cd21 : `pandas.Series`
932  [2, 1] element of the local CDMatricies.
933  cd2 : `pandas.Series`
934  [2, 2] element of the local CDMatricies.
935 
936  Returns
937  -------
938  pixScale : `pandas.Series`
939  Arcseconds per pixel at the location of the local WC
940  """
941  return 3600 * np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))
942 
943  def _func(self, df):
944  return self.pixelScale(df[self.colCD_1_1], df[self.colCD_1_2],
945  df[self.colCD_2_1], df[self.colCD_2_2])
946 
947 
949  """Convert a value in units pixels to units arcseconds.
950  """
951  name = "Pixel scale converter"
952 
953  def __init__(self,
954  col,
955  colCD_1_1,
956  colCD_1_2,
957  colCD_2_1,
958  colCD_2_2, **kwargs):
959  self.col = col
960  super().__init__(colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
961 
962  @property
963  def name(self):
964  return f"{self.col}_asArcseconds"
965 
966  @property
967  def columns(self):
968  return [self.col,
969  self.colCD_1_1, self.colCD_1_2,
970  self.colCD_2_1, self.colCD_2_2]
971 
972  def _func(self, df):
973  return df[self.col] * self.pixelScale(df[self.colCD_1_1], df[self.colCD_1_2],
974  df[self.colCD_2_1], df[self.colCD_2_2])
975 
976 
978  name = 'Reference Band'
979  shortname = 'refBand'
980 
981  @property
982  def columns(self):
983  return ["merge_measurement_i",
984  "merge_measurement_r",
985  "merge_measurement_z",
986  "merge_measurement_y",
987  "merge_measurement_g"]
988 
989  def _func(self, df):
990  def getFilterAliasName(row):
991  # get column name with the max value (True > False)
992  colName = row.idxmax()
993  return colName.replace('merge_measurement_', '')
994 
995  return df[self.columns].apply(getFilterAliasName, axis=1)
996 
997 
999  # AB to NanoJansky (3631 Jansky)
1000  AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1001  LOG_AB_FLUX_SCALE = 12.56
1002  FIVE_OVER_2LOG10 = 1.085736204758129569
1003  # TO DO: DM-21955 Replace hard coded photometic calibration values
1004  COADD_ZP = 27
1005 
1006  def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1007  self.vhypot = np.vectorize(self.hypot)
1008  self.col = colFlux
1009  self.colFluxErr = colFluxErr
1010 
1011  self.calib = calib
1012  if calib is not None:
1013  self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1014  else:
1015  self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1016  self.fluxMag0Err = 0.
1017 
1018  super().__init__(**kwargs)
1019 
1020  @property
1021  def columns(self):
1022  return [self.col]
1023 
1024  @property
1025  def name(self):
1026  return 'mag_{0}'.format(self.col)
1027 
1028  @classmethod
1029  def hypot(cls, a, b):
1030  if np.abs(a) < np.abs(b):
1031  a, b = b, a
1032  if a == 0.:
1033  return 0.
1034  q = b/a
1035  return np.abs(a) * np.sqrt(1. + q*q)
1036 
1037  def dn2flux(self, dn, fluxMag0):
1038  return self.AB_FLUX_SCALE * dn / fluxMag0
1039 
1040  def dn2mag(self, dn, fluxMag0):
1041  with np.warnings.catch_warnings():
1042  np.warnings.filterwarnings('ignore', r'invalid value encountered')
1043  np.warnings.filterwarnings('ignore', r'divide by zero')
1044  return -2.5 * np.log10(dn/fluxMag0)
1045 
1046  def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1047  retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1048  retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1049  return retVal
1050 
1051  def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1052  retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1053  return self.FIVE_OVER_2LOG10 * retVal
1054 
1055 
1057  def _func(self, df):
1058  return self.dn2flux(df[self.col], self.fluxMag0)
1059 
1060 
1062  @property
1063  def columns(self):
1064  return [self.col, self.colFluxErr]
1065 
1066  def _func(self, df):
1067  retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1068  return pd.Series(retArr, index=df.index)
1069 
1070 
1072  def _func(self, df):
1073  return self.dn2mag(df[self.col], self.fluxMag0)
1074 
1075 
1077  @property
1078  def columns(self):
1079  return [self.col, self.colFluxErr]
1080 
1081  def _func(self, df):
1082  retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1083  return pd.Series(retArr, index=df.index)
1084 
1085 
1087  """Base class for calibrating the specified instrument flux column using
1088  the local photometric calibration.
1089 
1090  Parameters
1091  ----------
1092  instFluxCol : `str`
1093  Name of the instrument flux column.
1094  instFluxErrCol : `str`
1095  Name of the assocated error columns for ``instFluxCol``.
1096  photoCalibCol : `str`
1097  Name of local calibration column.
1098  photoCalibErrCol : `str`
1099  Error associated with ``photoCalibCol``
1100 
1101  See also
1102  --------
1103  LocalPhotometry
1104  LocalNanojansky
1105  LocalNanojanskyErr
1106  LocalMagnitude
1107  LocalMagnitudeErr
1108  """
1109  logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1110 
1111  def __init__(self,
1112  instFluxCol,
1113  instFluxErrCol,
1114  photoCalibCol,
1115  photoCalibErrCol,
1116  **kwargs):
1117  self.instFluxCol = instFluxCol
1118  self.instFluxErrCol = instFluxErrCol
1119  self.photoCalibCol = photoCalibCol
1120  self.photoCalibErrCol = photoCalibErrCol
1121  super().__init__(**kwargs)
1122 
1123  def instFluxToNanojansky(self, instFlux, localCalib):
1124  """Convert instrument flux to nanojanskys.
1125 
1126  Parameters
1127  ----------
1128  instFlux : `numpy.ndarray` or `pandas.Series`
1129  Array of instrument flux measurements
1130  localCalib : `numpy.ndarray` or `pandas.Series`
1131  Array of local photometric calibration estimates.
1132 
1133  Returns
1134  -------
1135  calibFlux : `numpy.ndarray` or `pandas.Series`
1136  Array of calibrated flux measurements.
1137  """
1138  return instFlux * localCalib
1139 
1140  def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1141  """Convert instrument flux to nanojanskys.
1142 
1143  Parameters
1144  ----------
1145  instFlux : `numpy.ndarray` or `pandas.Series`
1146  Array of instrument flux measurements
1147  instFluxErr : `numpy.ndarray` or `pandas.Series`
1148  Errors on associated ``instFlux`` values
1149  localCalib : `numpy.ndarray` or `pandas.Series`
1150  Array of local photometric calibration estimates.
1151  localCalibErr : `numpy.ndarray` or `pandas.Series`
1152  Errors on associated ``localCalib`` values
1153 
1154  Returns
1155  -------
1156  calibFluxErr : `numpy.ndarray` or `pandas.Series`
1157  Errors on calibrated flux measurements.
1158  """
1159  return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1160 
1161  def instFluxToMagnitude(self, instFlux, localCalib):
1162  """Convert instrument flux to nanojanskys.
1163 
1164  Parameters
1165  ----------
1166  instFlux : `numpy.ndarray` or `pandas.Series`
1167  Array of instrument flux measurements
1168  localCalib : `numpy.ndarray` or `pandas.Series`
1169  Array of local photometric calibration estimates.
1170 
1171  Returns
1172  -------
1173  calibMag : `numpy.ndarray` or `pandas.Series`
1174  Array of calibrated AB magnitudes.
1175  """
1176  return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1177 
1178  def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1179  """Convert instrument flux err to nanojanskys.
1180 
1181  Parameters
1182  ----------
1183  instFlux : `numpy.ndarray` or `pandas.Series`
1184  Array of instrument flux measurements
1185  instFluxErr : `numpy.ndarray` or `pandas.Series`
1186  Errors on associated ``instFlux`` values
1187  localCalib : `numpy.ndarray` or `pandas.Series`
1188  Array of local photometric calibration estimates.
1189  localCalibErr : `numpy.ndarray` or `pandas.Series`
1190  Errors on associated ``localCalib`` values
1191 
1192  Returns
1193  -------
1194  calibMagErr: `numpy.ndarray` or `pandas.Series`
1195  Error on calibrated AB magnitudes.
1196  """
1197  err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1198  return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1199 
1200 
1202  """Compute calibrated fluxes using the local calibration value.
1203 
1204  See also
1205  --------
1206  LocalNanojansky
1207  LocalNanojanskyErr
1208  LocalMagnitude
1209  LocalMagnitudeErr
1210  """
1211 
1212  @property
1213  def columns(self):
1214  return [self.instFluxCol, self.photoCalibCol]
1215 
1216  @property
1217  def name(self):
1218  return f'flux_{self.instFluxCol}'
1219 
1220  def _func(self, df):
1221  return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1222 
1223 
1225  """Compute calibrated flux errors using the local calibration value.
1226 
1227  See also
1228  --------
1229  LocalNanojansky
1230  LocalNanojanskyErr
1231  LocalMagnitude
1232  LocalMagnitudeErr
1233  """
1234 
1235  @property
1236  def columns(self):
1237  return [self.instFluxCol, self.instFluxErrCol,
1238  self.photoCalibCol, self.photoCalibErrCol]
1239 
1240  @property
1241  def name(self):
1242  return f'fluxErr_{self.instFluxCol}'
1243 
1244  def _func(self, df):
1245  return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1246  df[self.photoCalibCol], df[self.photoCalibErrCol])
1247 
1248 
1250  """Compute calibrated AB magnitudes using the local calibration value.
1251 
1252  See also
1253  --------
1254  LocalNanojansky
1255  LocalNanojanskyErr
1256  LocalMagnitude
1257  LocalMagnitudeErr
1258  """
1259 
1260  @property
1261  def columns(self):
1262  return [self.instFluxCol, self.photoCalibCol]
1263 
1264  @property
1265  def name(self):
1266  return f'mag_{self.instFluxCol}'
1267 
1268  def _func(self, df):
1269  return self.instFluxToMagnitude(df[self.instFluxCol],
1270  df[self.photoCalibCol])
1271 
1272 
1274  """Compute calibrated AB magnitude errors using the local calibration value.
1275 
1276  See also
1277  --------
1278  LocalNanojansky
1279  LocalNanojanskyErr
1280  LocalMagnitude
1281  LocalMagnitudeErr
1282  """
1283 
1284  @property
1285  def columns(self):
1286  return [self.instFluxCol, self.instFluxErrCol,
1287  self.photoCalibCol, self.photoCalibErrCol]
1288 
1289  @property
1290  def name(self):
1291  return f'magErr_{self.instFluxCol}'
1292 
1293  def _func(self, df):
1294  return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1295  df[self.instFluxErrCol],
1296  df[self.photoCalibCol],
1297  df[self.photoCalibErrCol])
def instFluxToNanojansky(self, instFlux, localCalib)
Definition: functors.py:1123
def __init__(self, expr, kwargs)
Definition: functors.py:377
def __call__(self, parq, dropna=False)
Definition: functors.py:177
def pixelScale(self, cd11, cd12, cd21, cd22)
Definition: functors.py:922
def __init__(self, col, kwargs)
Definition: functors.py:408
def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1051
def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1140
def __call__(self, catalog, kwargs)
Definition: functors.py:473
def _func(self, df, dropna=True)
Definition: functors.py:151
def __init__(self, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, kwargs)
Definition: functors.py:910
def __call__(self, parq, kwargs)
Definition: functors.py:277
def __call__(self, catalog, kwargs)
Definition: functors.py:486
def __init__(self, colXX, colXY, colYY, kwargs)
Definition: functors.py:886
def __init__(self, col1, col2, kwargs)
Definition: functors.py:605
def multilevelColumns(self, parq)
Definition: functors.py:687
def __init__(self, col, filt2, filt1, kwargs)
Definition: functors.py:658
def __call__(self, parq, dropna=False, kwargs)
Definition: functors.py:709
def mag_aware_eval(df, expr)
Definition: functors.py:342
def renameCol(cls, col, renameRules)
Definition: functors.py:305
def __init__(self, instFluxCol, instFluxErrCol, photoCalibCol, photoCalibErrCol, kwargs)
Definition: functors.py:1116
def __init__(self, filt=None, dataset=None, noDup=None)
Definition: functors.py:109
def __init__(self, colXX, colXY, colYY, kwargs)
Definition: functors.py:852
def from_yaml(cls, translationDefinition, kwargs)
Definition: functors.py:321
def from_file(cls, filename, kwargs)
Definition: functors.py:314
def __init__(self, colFlux, colFluxErr=None, calib=None, kwargs)
Definition: functors.py:1006
def __init__(self, kwargs)
Definition: functors.py:470
def dn2mag(self, dn, fluxMag0)
Definition: functors.py:1040
def __init__(self, col, calib=None, kwargs)
Definition: functors.py:528
def __init__(self, args, kwargs)
Definition: functors.py:566
def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1046
def multilevelColumns(self, parq)
Definition: functors.py:129
def dn2flux(self, dn, fluxMag0)
Definition: functors.py:1037
def __init__(self, funcs, kwargs)
Definition: functors.py:235
def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor')
Definition: functors.py:12
def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1178
def __init__(self, colXX, colXY, colYY, kwargs)
Definition: functors.py:870
def instFluxToMagnitude(self, instFlux, localCalib)
Definition: functors.py:1161
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, kwargs)
Definition: functors.py:958
def __init__(self, col, kwargs)
Definition: functors.py:454