lsst.pipe.tasks  21.0.0-141-gec8a224e+c9dc52cf11
functors.py
Go to the documentation of this file.
1 # This file is part of pipe_tasks.
2 #
3 # LSST Data Management System
4 # This product includes software developed by the
5 # LSST Project (http://www.lsst.org/).
6 # See COPYRIGHT file at the top of the source tree.
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 import yaml
23 import re
24 from itertools import product
25 import os.path
26 
27 import pandas as pd
28 import numpy as np
29 import astropy.units as u
30 
31 from lsst.daf.persistence import doImport
32 from lsst.daf.butler import DeferredDatasetHandle
33 from .parquetTable import ParquetTable, MultilevelParquetTable
34 
35 
36 def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
37  typeKey='functor', name=None):
38  """Initialize an object defined in a dictionary
39 
40  The object needs to be importable as
41  f'{basePath}.{initDict[typeKey]}'
42  The positional and keyword arguments (if any) are contained in
43  "args" and "kwargs" entries in the dictionary, respectively.
44  This is used in `functors.CompositeFunctor.from_yaml` to initialize
45  a composite functor from a specification in a YAML file.
46 
47  Parameters
48  ----------
49  initDict : dictionary
50  Dictionary describing object's initialization. Must contain
51  an entry keyed by ``typeKey`` that is the name of the object,
52  relative to ``basePath``.
53  basePath : str
54  Path relative to module in which ``initDict[typeKey]`` is defined.
55  typeKey : str
56  Key of ``initDict`` that is the name of the object
57  (relative to `basePath`).
58  """
59  initDict = initDict.copy()
60  # TO DO: DM-21956 We should be able to define functors outside this module
61  pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
62  args = []
63  if 'args' in initDict:
64  args = initDict.pop('args')
65  if isinstance(args, str):
66  args = [args]
67  try:
68  element = pythonType(*args, **initDict)
69  except Exception as e:
70  message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
71  raise type(e)(message, e.args)
72  return element
73 
74 
75 class Functor(object):
76  """Define and execute a calculation on a ParquetTable
77 
78  The `__call__` method accepts either a `ParquetTable` object or a
79  `DeferredDatasetHandle`, and returns the
80  result of the calculation as a single column. Each functor defines what
81  columns are needed for the calculation, and only these columns are read
82  from the `ParquetTable`.
83 
84  The action of `__call__` consists of two steps: first, loading the
85  necessary columns from disk into memory as a `pandas.DataFrame` object;
86  and second, performing the computation on this dataframe and returning the
87  result.
88 
89 
90  To define a new `Functor`, a subclass must define a `_func` method,
91  that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
92  In addition, it must define the following attributes
93 
94  * `_columns`: The columns necessary to perform the calculation
95  * `name`: A name appropriate for a figure axis label
96  * `shortname`: A name appropriate for use as a dictionary key
97 
98  On initialization, a `Functor` should declare what band (`filt` kwarg)
99  and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
100  applied to. This enables the `_get_data` method to extract the proper
101  columns from the parquet file. If not specified, the dataset will fall back
102  on the `_defaultDataset`attribute. If band is not specified and `dataset`
103  is anything other than `'ref'`, then an error will be raised when trying to
104  perform the calculation.
105 
106  Originally, `Functor` was set up to expect
107  datasets formatted like the `deepCoadd_obj` dataset; that is, a
108  dataframe with a multi-level column index, with the levels of the
109  column index being `band`, `dataset`, and `column`.
110  It has since been generalized to apply to dataframes without mutli-level
111  indices and multi-level indices with just `dataset` and `column` levels.
112  In addition, the `_get_data` method that reads
113  the dataframe from the `ParquetTable` will return a dataframe with column
114  index levels defined by the `_dfLevels` attribute; by default, this is
115  `column`.
116 
117  The `_dfLevels` attributes should generally not need to
118  be changed, unless `_func` needs columns from multiple filters or datasets
119  to do the calculation.
120  An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
121  which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
122  it gets to have those levels in the column index.
123 
124  Parameters
125  ----------
126  filt : str
127  Filter upon which to do the calculation
128 
129  dataset : str
130  Dataset upon which to do the calculation
131  (e.g., 'ref', 'meas', 'forced_src').
132 
133  """
134 
135  _defaultDataset = 'ref'
136  _dfLevels = ('column',)
137  _defaultNoDup = False
138 
139  def __init__(self, filt=None, dataset=None, noDup=None):
140  self.filtfilt = filt
141  self.datasetdataset = dataset if dataset is not None else self._defaultDataset_defaultDataset
142  self._noDup_noDup = noDup
143 
144  @property
145  def noDup(self):
146  if self._noDup_noDup is not None:
147  return self._noDup_noDup
148  else:
149  return self._defaultNoDup_defaultNoDup
150 
151  @property
152  def columns(self):
153  """Columns required to perform calculation
154  """
155  if not hasattr(self, '_columns'):
156  raise NotImplementedError('Must define columns property or _columns attribute')
157  return self._columns
158 
159  def _get_data_columnLevels(self, data, columnIndex=None):
160  """Gets the names of the column index levels
161 
162  This should only be called in the context of a multilevel table.
163  The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
164  and with the gen3 `DeferredDatasetHandle`.
165 
166  Parameters
167  ----------
168  data : `MultilevelParquetTable` or `DeferredDatasetHandle`
169 
170  columnnIndex (optional): pandas `Index` object
171  if not passed, then it is read from the `DeferredDatasetHandle`
172  """
173  if isinstance(data, DeferredDatasetHandle):
174  if columnIndex is None:
175  columnIndex = data.get(component="columns")
176  if columnIndex is not None:
177  return columnIndex.names
178  if isinstance(data, MultilevelParquetTable):
179  return data.columnLevels
180  else:
181  raise TypeError(f"Unknown type for data: {type(data)}!")
182 
183  def _get_data_columnLevelNames(self, data, columnIndex=None):
184  """Gets the content of each of the column levels for a multilevel table
185 
186  Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
187 
188  Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
189  """
190  if isinstance(data, DeferredDatasetHandle):
191  if columnIndex is None:
192  columnIndex = data.get(component="columns")
193  if columnIndex is not None:
194  columnLevels = columnIndex.names
195  columnLevelNames = {
196  level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
197  for i, level in enumerate(columnLevels)
198  }
199  return columnLevelNames
200  if isinstance(data, MultilevelParquetTable):
201  return data.columnLevelNames
202  else:
203  raise TypeError(f"Unknown type for data: {type(data)}!")
204 
205  def _colsFromDict(self, colDict, columnIndex=None):
206  """Converts dictionary column specficiation to a list of columns
207 
208  This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
209  """
210  new_colDict = {}
211  columnLevels = self._get_data_columnLevels_get_data_columnLevels(None, columnIndex=columnIndex)
212 
213  for i, lev in enumerate(columnLevels):
214  if lev in colDict:
215  if isinstance(colDict[lev], str):
216  new_colDict[lev] = [colDict[lev]]
217  else:
218  new_colDict[lev] = colDict[lev]
219  else:
220  new_colDict[lev] = columnIndex.levels[i]
221 
222  levelCols = [new_colDict[lev] for lev in columnLevels]
223  cols = product(*levelCols)
224  return list(cols)
225 
226  def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
227  """Returns columns needed by functor from multilevel dataset
228 
229  To access tables with multilevel column structure, the `MultilevelParquetTable`
230  or `DeferredDatasetHandle` need to be passed either a list of tuples or a
231  dictionary.
232 
233  Parameters
234  ----------
235  data : `MultilevelParquetTable` or `DeferredDatasetHandle`
236 
237  columnIndex (optional): pandas `Index` object
238  either passed or read in from `DeferredDatasetHandle`.
239 
240  `returnTuple` : bool
241  If true, then return a list of tuples rather than the column dictionary
242  specification. This is set to `True` by `CompositeFunctor` in order to be able to
243  combine columns from the various component functors.
244 
245  """
246  if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
247  columnIndex = data.get(component="columns")
248 
249  # Confirm that the dataset has the column levels the functor is expecting it to have.
250  columnLevels = self._get_data_columnLevels_get_data_columnLevels(data, columnIndex)
251 
252  columnDict = {'column': self.columnscolumns,
253  'dataset': self.datasetdataset}
254  if self.filtfilt is None:
255  columnLevelNames = self._get_data_columnLevelNames_get_data_columnLevelNames(data, columnIndex)
256  if "band" in columnLevels:
257  if self.datasetdataset == "ref":
258  columnDict["band"] = columnLevelNames["band"][0]
259  else:
260  raise ValueError(f"'filt' not set for functor {self.name}"
261  f"(dataset {self.dataset}) "
262  "and ParquetTable "
263  "contains multiple filters in column index. "
264  "Set 'filt' or set 'dataset' to 'ref'.")
265  else:
266  columnDict['band'] = self.filtfilt
267 
268  if isinstance(data, MultilevelParquetTable):
269  return data._colsFromDict(columnDict)
270  elif isinstance(data, DeferredDatasetHandle):
271  if returnTuple:
272  return self._colsFromDict_colsFromDict(columnDict, columnIndex=columnIndex)
273  else:
274  return columnDict
275 
276  def _func(self, df, dropna=True):
277  raise NotImplementedError('Must define calculation on dataframe')
278 
279  def _get_columnIndex(self, data):
280  """Return columnIndex
281  """
282 
283  if isinstance(data, DeferredDatasetHandle):
284  return data.get(component="columns")
285  else:
286  return None
287 
288  def _get_data(self, data):
289  """Retrieve dataframe necessary for calculation.
290 
291  The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
292 
293  Returns dataframe upon which `self._func` can act.
294 
295  N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
296  """
297  if isinstance(data, pd.DataFrame):
298  return data
299 
300  # First thing to do: check to see if the data source has a multilevel column index or not.
301  columnIndex = self._get_columnIndex_get_columnIndex(data)
302  is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
303 
304  # Simple single-level parquet table, gen2
305  if isinstance(data, ParquetTable) and not is_multiLevel:
306  columns = self.columnscolumns
307  df = data.toDataFrame(columns=columns)
308  return df
309 
310  # Get proper columns specification for this functor
311  if is_multiLevel:
312  columns = self.multilevelColumnsmultilevelColumns(data, columnIndex=columnIndex)
313  else:
314  columns = self.columnscolumns
315 
316  if isinstance(data, MultilevelParquetTable):
317  # Load in-memory dataframe with appropriate columns the gen2 way
318  df = data.toDataFrame(columns=columns, droplevels=False)
319  elif isinstance(data, DeferredDatasetHandle):
320  # Load in-memory dataframe with appropriate columns the gen3 way
321  df = data.get(parameters={"columns": columns})
322 
323  # Drop unnecessary column levels
324  if is_multiLevel:
325  df = self._setLevels_setLevels(df)
326 
327  return df
328 
329  def _setLevels(self, df):
330  levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels_dfLevels]
331  df.columns = df.columns.droplevel(levelsToDrop)
332  return df
333 
334  def _dropna(self, vals):
335  return vals.dropna()
336 
337  def __call__(self, data, dropna=False):
338  try:
339  df = self._get_data_get_data(data)
340  vals = self._func_func(df)
341  except Exception:
342  vals = self.failfail(df)
343  if dropna:
344  vals = self._dropna_dropna(vals)
345 
346  return vals
347 
348  def difference(self, data1, data2, **kwargs):
349  """Computes difference between functor called on two different ParquetTable objects
350  """
351  return self(data1, **kwargs) - self(data2, **kwargs)
352 
353  def fail(self, df):
354  return pd.Series(np.full(len(df), np.nan), index=df.index)
355 
356  @property
357  def name(self):
358  """Full name of functor (suitable for figure labels)
359  """
360  return NotImplementedError
361 
362  @property
363  def shortname(self):
364  """Short name of functor (suitable for column name/dict key)
365  """
366  return self.namename
367 
368 
370  """Perform multiple calculations at once on a catalog
371 
372  The role of a `CompositeFunctor` is to group together computations from
373  multiple functors. Instead of returning `pandas.Series` a
374  `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
375  being the keys of `funcDict`.
376 
377  The `columns` attribute of a `CompositeFunctor` is the union of all columns
378  in all the component functors.
379 
380  A `CompositeFunctor` does not use a `_func` method itself; rather,
381  when a `CompositeFunctor` is called, all its columns are loaded
382  at once, and the resulting dataframe is passed to the `_func` method of each component
383  functor. This has the advantage of only doing I/O (reading from parquet file) once,
384  and works because each individual `_func` method of each component functor does not
385  care if there are *extra* columns in the dataframe being passed; only that it must contain
386  *at least* the `columns` it expects.
387 
388  An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
389  file specifying a collection of functors.
390 
391  Parameters
392  ----------
393  funcs : `dict` or `list`
394  Dictionary or list of functors. If a list, then it will be converted
395  into a dictonary according to the `.shortname` attribute of each functor.
396 
397  """
398  dataset = None
399 
400  def __init__(self, funcs, **kwargs):
401 
402  if type(funcs) == dict:
403  self.funcDictfuncDict = funcs
404  else:
405  self.funcDictfuncDict = {f.shortname: f for f in funcs}
406 
407  self._filt_filt = None
408 
409  super().__init__(**kwargs)
410 
411  @property
412  def filt(self):
413  return self._filt_filt
414 
415  @filt.setter
416  def filt(self, filt):
417  if filt is not None:
418  for _, f in self.funcDictfuncDict.items():
419  f.filt = filt
420  self._filt_filt = filt
421 
422  def update(self, new):
423  if isinstance(new, dict):
424  self.funcDictfuncDict.update(new)
425  elif isinstance(new, CompositeFunctor):
426  self.funcDictfuncDict.update(new.funcDict)
427  else:
428  raise TypeError('Can only update with dictionary or CompositeFunctor.')
429 
430  # Make sure new functors have the same 'filt' set
431  if self.filtfiltfiltfiltfilt is not None:
432  self.filtfiltfiltfiltfilt = self.filtfiltfiltfiltfilt
433 
434  @property
435  def columns(self):
436  return list(set([x for y in [f.columns for f in self.funcDictfuncDict.values()] for x in y]))
437 
438  def multilevelColumns(self, data, **kwargs):
439  # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
440  return list(
441  set(
442  [
443  x
444  for y in [
445  f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDictfuncDict.values()
446  ]
447  for x in y
448  ]
449  )
450  )
451 
452  def __call__(self, data, **kwargs):
453  """Apply the functor to the data table
454 
455  Parameters
456  ----------
457  data : `lsst.daf.butler.DeferredDatasetHandle`,
458  `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
459  `lsst.pipe.tasks.parquetTable.ParquetTable`,
460  or `pandas.DataFrame`.
461  The table or a pointer to a table on disk from which columns can
462  be accessed
463  """
464  columnIndex = self._get_columnIndex_get_columnIndex(data)
465 
466  # First, determine whether data has a multilevel index (either gen2 or gen3)
467  is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
468 
469  # Multilevel index, gen2 or gen3
470  if is_multiLevel:
471  columns = self.multilevelColumnsmultilevelColumnsmultilevelColumns(data, columnIndex=columnIndex)
472 
473  if isinstance(data, MultilevelParquetTable):
474  # Read data into memory the gen2 way
475  df = data.toDataFrame(columns=columns, droplevels=False)
476  elif isinstance(data, DeferredDatasetHandle):
477  # Read data into memory the gen3 way
478  df = data.get(parameters={"columns": columns})
479 
480  valDict = {}
481  for k, f in self.funcDictfuncDict.items():
482  try:
483  subdf = f._setLevels(
484  df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
485  )
486  valDict[k] = f._func(subdf)
487  except Exception:
488  valDict[k] = f.fail(subdf)
489 
490  else:
491  if isinstance(data, DeferredDatasetHandle):
492  # input if Gen3 deferLoad=True
493  df = data.get(parameters={"columns": self.columnscolumnscolumns})
494  elif isinstance(data, pd.DataFrame):
495  # input if Gen3 deferLoad=False
496  df = data
497  else:
498  # Original Gen2 input is type ParquetTable and the fallback
499  df = data.toDataFrame(columns=self.columnscolumnscolumns)
500 
501  valDict = {k: f._func(df) for k, f in self.funcDictfuncDict.items()}
502 
503  try:
504  valDf = pd.concat(valDict, axis=1)
505  except TypeError:
506  print([(k, type(v)) for k, v in valDict.items()])
507  raise
508 
509  if kwargs.get('dropna', False):
510  valDf = valDf.dropna(how='any')
511 
512  return valDf
513 
514  @classmethod
515  def renameCol(cls, col, renameRules):
516  if renameRules is None:
517  return col
518  for old, new in renameRules:
519  if col.startswith(old):
520  col = col.replace(old, new)
521  return col
522 
523  @classmethod
524  def from_file(cls, filename, **kwargs):
525  # Allow environment variables in the filename.
526  filename = os.path.expandvars(filename)
527  with open(filename) as f:
528  translationDefinition = yaml.safe_load(f)
529 
530  return cls.from_yamlfrom_yaml(translationDefinition, **kwargs)
531 
532  @classmethod
533  def from_yaml(cls, translationDefinition, **kwargs):
534  funcs = {}
535  for func, val in translationDefinition['funcs'].items():
536  funcs[func] = init_fromDict(val, name=func)
537 
538  if 'flag_rename_rules' in translationDefinition:
539  renameRules = translationDefinition['flag_rename_rules']
540  else:
541  renameRules = None
542 
543  if 'calexpFlags' in translationDefinition:
544  for flag in translationDefinition['calexpFlags']:
545  funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='calexp')
546 
547  if 'refFlags' in translationDefinition:
548  for flag in translationDefinition['refFlags']:
549  funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='ref')
550 
551  if 'forcedFlags' in translationDefinition:
552  for flag in translationDefinition['forcedFlags']:
553  funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
554 
555  if 'flags' in translationDefinition:
556  for flag in translationDefinition['flags']:
557  funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='meas')
558 
559  return cls(funcs, **kwargs)
560 
561 
562 def mag_aware_eval(df, expr):
563  """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
564 
565  Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
566 
567  Parameters
568  ----------
569  df : pandas.DataFrame
570  Dataframe on which to evaluate expression.
571 
572  expr : str
573  Expression.
574  """
575  try:
576  expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>)/log(10)', expr)
577  val = df.eval(expr_new, truediv=True)
578  except Exception: # Should check what actually gets raised
579  expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
580  val = df.eval(expr_new, truediv=True)
581  return val
582 
583 
585  """Arbitrary computation on a catalog
586 
587  Column names (and thus the columns to be loaded from catalog) are found
588  by finding all words and trying to ignore all "math-y" words.
589 
590  Parameters
591  ----------
592  expr : str
593  Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
594  """
595  _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
596 
597  def __init__(self, expr, **kwargs):
598  self.exprexpr = expr
599  super().__init__(**kwargs)
600 
601  @property
602  def name(self):
603  return self.exprexpr
604 
605  @property
606  def columns(self):
607  flux_cols = re.findall(r'mag\‍(\s*(\w+)\s*\‍)', self.exprexpr)
608 
609  cols = [c for c in re.findall(r'[a-zA-Z_]+', self.exprexpr) if c not in self._ignore_words_ignore_words]
610  not_a_col = []
611  for c in flux_cols:
612  if not re.search('_instFlux$', c):
613  cols.append(f'{c}_instFlux')
614  not_a_col.append(c)
615  else:
616  cols.append(c)
617 
618  return list(set([c for c in cols if c not in not_a_col]))
619 
620  def _func(self, df):
621  return mag_aware_eval(df, self.exprexpr)
622 
623 
625  """Get column with specified name
626  """
627 
628  def __init__(self, col, **kwargs):
629  self.colcol = col
630  super().__init__(**kwargs)
631 
632  @property
633  def name(self):
634  return self.colcol
635 
636  @property
637  def columns(self):
638  return [self.colcol]
639 
640  def _func(self, df):
641  return df[self.colcol]
642 
643 
644 class Index(Functor):
645  """Return the value of the index for each object
646  """
647 
648  columns = ['coord_ra'] # just a dummy; something has to be here
649  _defaultDataset = 'ref'
650  _defaultNoDup = True
651 
652  def _func(self, df):
653  return pd.Series(df.index, index=df.index)
654 
655 
657  col = 'id'
658  _allow_difference = False
659  _defaultNoDup = True
660 
661  def _func(self, df):
662  return pd.Series(df.index, index=df.index)
663 
664 
666  col = 'base_Footprint_nPix'
667 
668 
670  """Base class for coordinate column, in degrees
671  """
672  _radians = True
673 
674  def __init__(self, col, **kwargs):
675  super().__init__(col, **kwargs)
676 
677  def _func(self, df):
678  # Must not modify original column in case that column is used by another functor
679  output = df[self.colcol] * 180 / np.pi if self._radians_radians else df[self.colcol]
680  return output
681 
682 
684  """Right Ascension, in degrees
685  """
686  name = 'RA'
687  _defaultNoDup = True
688 
689  def __init__(self, **kwargs):
690  super().__init__('coord_ra', **kwargs)
691 
692  def __call__(self, catalog, **kwargs):
693  return super().__call__(catalog, **kwargs)
694 
695 
697  """Declination, in degrees
698  """
699  name = 'Dec'
700  _defaultNoDup = True
701 
702  def __init__(self, **kwargs):
703  super().__init__('coord_dec', **kwargs)
704 
705  def __call__(self, catalog, **kwargs):
706  return super().__call__(catalog, **kwargs)
707 
708 
709 def fluxName(col):
710  if not col.endswith('_instFlux'):
711  col += '_instFlux'
712  return col
713 
714 
715 def fluxErrName(col):
716  if not col.endswith('_instFluxErr'):
717  col += '_instFluxErr'
718  return col
719 
720 
721 class Mag(Functor):
722  """Compute calibrated magnitude
723 
724  Takes a `calib` argument, which returns the flux at mag=0
725  as `calib.getFluxMag0()`. If not provided, then the default
726  `fluxMag0` is 63095734448.0194, which is default for HSC.
727  This default should be removed in DM-21955
728 
729  This calculation hides warnings about invalid values and dividing by zero.
730 
731  As for all functors, a `dataset` and `filt` kwarg should be provided upon
732  initialization. Unlike the default `Functor`, however, the default dataset
733  for a `Mag` is `'meas'`, rather than `'ref'`.
734 
735  Parameters
736  ----------
737  col : `str`
738  Name of flux column from which to compute magnitude. Can be parseable
739  by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
740  `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
741  understand.
742  calib : `lsst.afw.image.calib.Calib` (optional)
743  Object that knows zero point.
744  """
745  _defaultDataset = 'meas'
746 
747  def __init__(self, col, calib=None, **kwargs):
748  self.colcol = fluxName(col)
749  self.calibcalib = calib
750  if calib is not None:
751  self.fluxMag0fluxMag0 = calib.getFluxMag0()[0]
752  else:
753  # TO DO: DM-21955 Replace hard coded photometic calibration values
754  self.fluxMag0fluxMag0 = 63095734448.0194
755 
756  super().__init__(**kwargs)
757 
758  @property
759  def columns(self):
760  return [self.colcol]
761 
762  def _func(self, df):
763  with np.warnings.catch_warnings():
764  np.warnings.filterwarnings('ignore', r'invalid value encountered')
765  np.warnings.filterwarnings('ignore', r'divide by zero')
766  return -2.5*np.log10(df[self.colcol] / self.fluxMag0fluxMag0)
767 
768  @property
769  def name(self):
770  return f'mag_{self.col}'
771 
772 
773 class MagErr(Mag):
774  """Compute calibrated magnitude uncertainty
775 
776  Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
777 
778  Parameters
779  col : `str`
780  Name of flux column
781  calib : `lsst.afw.image.calib.Calib` (optional)
782  Object that knows zero point.
783  """
784 
785  def __init__(self, *args, **kwargs):
786  super().__init__(*args, **kwargs)
787  if self.calibcalib is not None:
788  self.fluxMag0ErrfluxMag0Err = self.calibcalib.getFluxMag0()[1]
789  else:
790  self.fluxMag0ErrfluxMag0Err = 0.
791 
792  @property
793  def columns(self):
794  return [self.colcol, self.colcol + 'Err']
795 
796  def _func(self, df):
797  with np.warnings.catch_warnings():
798  np.warnings.filterwarnings('ignore', r'invalid value encountered')
799  np.warnings.filterwarnings('ignore', r'divide by zero')
800  fluxCol, fluxErrCol = self.columnscolumnscolumnscolumns
801  x = df[fluxErrCol] / df[fluxCol]
802  y = self.fluxMag0ErrfluxMag0Err / self.fluxMag0fluxMag0
803  magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
804  return magErr
805 
806  @property
807  def name(self):
808  return super().name + '_err'
809 
810 
812  """
813  """
814 
815  def _func(self, df):
816  return (df[self.colcol] / self.fluxMag0fluxMag0) * 1e9
817 
818 
820  _defaultDataset = 'meas'
821 
822  """Functor to calculate magnitude difference"""
823 
824  def __init__(self, col1, col2, **kwargs):
825  self.col1col1 = fluxName(col1)
826  self.col2col2 = fluxName(col2)
827  super().__init__(**kwargs)
828 
829  @property
830  def columns(self):
831  return [self.col1col1, self.col2col2]
832 
833  def _func(self, df):
834  with np.warnings.catch_warnings():
835  np.warnings.filterwarnings('ignore', r'invalid value encountered')
836  np.warnings.filterwarnings('ignore', r'divide by zero')
837  return -2.5*np.log10(df[self.col1col1]/df[self.col2col2])
838 
839  @property
840  def name(self):
841  return f'(mag_{self.col1} - mag_{self.col2})'
842 
843  @property
844  def shortname(self):
845  return f'magDiff_{self.col1}_{self.col2}'
846 
847 
848 class Color(Functor):
849  """Compute the color between two filters
850 
851  Computes color by initializing two different `Mag`
852  functors based on the `col` and filters provided, and
853  then returning the difference.
854 
855  This is enabled by the `_func` expecting a dataframe with a
856  multilevel column index, with both `'band'` and `'column'`,
857  instead of just `'column'`, which is the `Functor` default.
858  This is controlled by the `_dfLevels` attribute.
859 
860  Also of note, the default dataset for `Color` is `forced_src'`,
861  whereas for `Mag` it is `'meas'`.
862 
863  Parameters
864  ----------
865  col : str
866  Name of flux column from which to compute; same as would be passed to
867  `lsst.pipe.tasks.functors.Mag`.
868 
869  filt2, filt1 : str
870  Filters from which to compute magnitude difference.
871  Color computed is `Mag(filt2) - Mag(filt1)`.
872  """
873  _defaultDataset = 'forced_src'
874  _dfLevels = ('band', 'column')
875  _defaultNoDup = True
876 
877  def __init__(self, col, filt2, filt1, **kwargs):
878  self.colcol = fluxName(col)
879  if filt2 == filt1:
880  raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
881  self.filt2filt2 = filt2
882  self.filt1filt1 = filt1
883 
884  self.mag2mag2 = Mag(col, filt=filt2, **kwargs)
885  self.mag1mag1 = Mag(col, filt=filt1, **kwargs)
886 
887  super().__init__(**kwargs)
888 
889  @property
890  def filt(self):
891  return None
892 
893  @filt.setter
894  def filt(self, filt):
895  pass
896 
897  def _func(self, df):
898  mag2 = self.mag2._func(df[self.filt2])
899  mag1 = self.mag1._func(df[self.filt1])
900  return mag2 - mag1
901 
902  @property
903  def columns(self):
904  return [self.mag1mag1.col, self.mag2mag2.col]
905 
906  def multilevelColumns(self, parq, **kwargs):
907  return [(self.datasetdataset, self.filt1filt1, self.colcol), (self.datasetdataset, self.filt2filt2, self.colcol)]
908 
909  @property
910  def name(self):
911  return f'{self.filt2} - {self.filt1} ({self.col})'
912 
913  @property
914  def shortname(self):
915  return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
916 
917 
919  """Main function of this subclass is to override the dropna=True
920  """
921  _null_label = 'null'
922  _allow_difference = False
923  name = 'label'
924  _force_str = False
925 
926  def __call__(self, parq, dropna=False, **kwargs):
927  return super().__call__(parq, dropna=False, **kwargs)
928 
929 
931  _columns = ["base_ClassificationExtendedness_value"]
932  _column = "base_ClassificationExtendedness_value"
933 
934  def _func(self, df):
935  x = df[self._columns_columns][self._column_column]
936  mask = x.isnull()
937  test = (x < 0.5).astype(int)
938  test = test.mask(mask, 2)
939 
940  # TODO: DM-21954 Look into veracity of inline comment below
941  # are these backwards?
942  categories = ['galaxy', 'star', self._null_label_null_label]
943  label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
944  index=x.index, name='label')
945  if self._force_str_force_str:
946  label = label.astype(str)
947  return label
948 
949 
951  _columns = ['numStarFlags']
952  labels = {"star": 0, "maybe": 1, "notStar": 2}
953 
954  def _func(self, df):
955  x = df[self._columns_columns][self._columns_columns[0]]
956 
957  # Number of filters
958  n = len(x.unique()) - 1
959 
960  labels = ['noStar', 'maybe', 'star']
961  label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
962  index=x.index, name='label')
963 
964  if self._force_str_force_str:
965  label = label.astype(str)
966 
967  return label
968 
969 
971  name = 'Deconvolved Moments'
972  shortname = 'deconvolvedMoments'
973  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
974  "ext_shapeHSM_HsmSourceMoments_yy",
975  "base_SdssShape_xx", "base_SdssShape_yy",
976  "ext_shapeHSM_HsmPsfMoments_xx",
977  "ext_shapeHSM_HsmPsfMoments_yy")
978 
979  def _func(self, df):
980  """Calculate deconvolved moments"""
981  if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
982  hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
983  else:
984  hsm = np.ones(len(df))*np.nan
985  sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
986  if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
987  psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
988  else:
989  # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
990  # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
991  # raise TaskError("No psf shape parameter found in catalog")
992  raise RuntimeError('No psf shape parameter found in catalog')
993 
994  return hsm.where(np.isfinite(hsm), sdss) - psf
995 
996 
998  """Functor to calculate SDSS trace radius size for sources"""
999  name = "SDSS Trace Size"
1000  shortname = 'sdssTrace'
1001  _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1002 
1003  def _func(self, df):
1004  srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1005  return srcSize
1006 
1007 
1009  """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1010  name = "PSF - SDSS Trace Size"
1011  shortname = 'psf_sdssTrace'
1012  _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1013  "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1014 
1015  def _func(self, df):
1016  srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1017  psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1018  sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1019  return sizeDiff
1020 
1021 
1023  """Functor to calculate HSM trace radius size for sources"""
1024  name = 'HSM Trace Size'
1025  shortname = 'hsmTrace'
1026  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1027  "ext_shapeHSM_HsmSourceMoments_yy")
1028 
1029  def _func(self, df):
1030  srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1031  + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1032  return srcSize
1033 
1034 
1036  """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1037  name = 'PSF - HSM Trace Size'
1038  shortname = 'psf_HsmTrace'
1039  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1040  "ext_shapeHSM_HsmSourceMoments_yy",
1041  "ext_shapeHSM_HsmPsfMoments_xx",
1042  "ext_shapeHSM_HsmPsfMoments_yy")
1043 
1044  def _func(self, df):
1045  srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1046  + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1047  psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1048  + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1049  sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1050  return sizeDiff
1051 
1052 
1054  name = 'HSM Psf FWHM'
1055  _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1056  # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1057  pixelScale = 0.168
1058  SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1059 
1060  def _func(self, df):
1061  return self.pixelScalepixelScale*self.SIGMA2FWHMSIGMA2FWHM*np.sqrt(
1062  0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1063 
1064 
1065 class E1(Functor):
1066  name = "Distortion Ellipticity (e1)"
1067  shortname = "Distortion"
1068 
1069  def __init__(self, colXX, colXY, colYY, **kwargs):
1070  self.colXXcolXX = colXX
1071  self.colXYcolXY = colXY
1072  self.colYYcolYY = colYY
1073  self._columns_columns = [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1074  super().__init__(**kwargs)
1075 
1076  @property
1077  def columns(self):
1078  return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1079 
1080  def _func(self, df):
1081  return df[self.colXXcolXX] - df[self.colYYcolYY] / (df[self.colXXcolXX] + df[self.colYYcolYY])
1082 
1083 
1084 class E2(Functor):
1085  name = "Ellipticity e2"
1086 
1087  def __init__(self, colXX, colXY, colYY, **kwargs):
1088  self.colXXcolXX = colXX
1089  self.colXYcolXY = colXY
1090  self.colYYcolYY = colYY
1091  super().__init__(**kwargs)
1092 
1093  @property
1094  def columns(self):
1095  return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1096 
1097  def _func(self, df):
1098  return 2*df[self.colXYcolXY] / (df[self.colXXcolXX] + df[self.colYYcolYY])
1099 
1100 
1102 
1103  def __init__(self, colXX, colXY, colYY, **kwargs):
1104  self.colXXcolXX = colXX
1105  self.colXYcolXY = colXY
1106  self.colYYcolYY = colYY
1107  super().__init__(**kwargs)
1108 
1109  @property
1110  def columns(self):
1111  return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1112 
1113  def _func(self, df):
1114  return (df[self.colXXcolXX]*df[self.colYYcolYY] - df[self.colXYcolXY]**2)**0.25
1115 
1116 
1118  """Computations using the stored localWcs.
1119  """
1120  name = "LocalWcsOperations"
1121 
1122  def __init__(self,
1123  colCD_1_1,
1124  colCD_1_2,
1125  colCD_2_1,
1126  colCD_2_2,
1127  **kwargs):
1128  self.colCD_1_1colCD_1_1 = colCD_1_1
1129  self.colCD_1_2colCD_1_2 = colCD_1_2
1130  self.colCD_2_1colCD_2_1 = colCD_2_1
1131  self.colCD_2_2colCD_2_2 = colCD_2_2
1132  super().__init__(**kwargs)
1133 
1134  def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1135  """Compute the distance on the sphere from x2, y1 to x1, y1.
1136 
1137  Parameters
1138  ----------
1139  x : `pandas.Series`
1140  X pixel coordinate.
1141  y : `pandas.Series`
1142  Y pixel coordinate.
1143  cd11 : `pandas.Series`
1144  [1, 1] element of the local Wcs affine transform.
1145  cd11 : `pandas.Series`
1146  [1, 1] element of the local Wcs affine transform.
1147  cd12 : `pandas.Series`
1148  [1, 2] element of the local Wcs affine transform.
1149  cd21 : `pandas.Series`
1150  [2, 1] element of the local Wcs affine transform.
1151  cd22 : `pandas.Series`
1152  [2, 2] element of the local Wcs affine transform.
1153 
1154  Returns
1155  -------
1156  raDecTuple : tuple
1157  RA and dec conversion of x and y given the local Wcs. Returned
1158  units are in radians.
1159 
1160  """
1161  return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1162 
1163  def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1164  """Compute the local pixel scale conversion.
1165 
1166  Parameters
1167  ----------
1168  ra1 : `pandas.Series`
1169  Ra of the first coordinate in radians.
1170  dec1 : `pandas.Series`
1171  Dec of the first coordinate in radians.
1172  ra2 : `pandas.Series`
1173  Ra of the second coordinate in radians.
1174  dec2 : `pandas.Series`
1175  Dec of the second coordinate in radians.
1176 
1177  Returns
1178  -------
1179  dist : `pandas.Series`
1180  Distance on the sphere in radians.
1181  """
1182  deltaDec = dec2 - dec1
1183  deltaRa = ra2 - ra1
1184  return 2 * np.arcsin(
1185  np.sqrt(
1186  np.sin(deltaDec / 2) ** 2
1187  + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1188 
1189  def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1190  """Compute the distance on the sphere from x2, y1 to x1, y1.
1191 
1192  Parameters
1193  ----------
1194  x1 : `pandas.Series`
1195  X pixel coordinate.
1196  y1 : `pandas.Series`
1197  Y pixel coordinate.
1198  x2 : `pandas.Series`
1199  X pixel coordinate.
1200  y2 : `pandas.Series`
1201  Y pixel coordinate.
1202  cd11 : `pandas.Series`
1203  [1, 1] element of the local Wcs affine transform.
1204  cd11 : `pandas.Series`
1205  [1, 1] element of the local Wcs affine transform.
1206  cd12 : `pandas.Series`
1207  [1, 2] element of the local Wcs affine transform.
1208  cd21 : `pandas.Series`
1209  [2, 1] element of the local Wcs affine transform.
1210  cd22 : `pandas.Series`
1211  [2, 2] element of the local Wcs affine transform.
1212 
1213  Returns
1214  -------
1215  Distance : `pandas.Series`
1216  Arcseconds per pixel at the location of the local WC
1217  """
1218  ra1, dec1 = self.computeDeltaRaDeccomputeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1219  ra2, dec2 = self.computeDeltaRaDeccomputeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1220  # Great circle distance for small separations.
1221  return self.computeSkySeperationcomputeSkySeperation(ra1, dec1, ra2, dec2)
1222 
1223 
1225  """Compute the local pixel scale from the stored CDMatrix.
1226  """
1227  name = "PixelScale"
1228 
1229  @property
1230  def columns(self):
1231  return [self.colCD_1_1colCD_1_1,
1232  self.colCD_1_2colCD_1_2,
1233  self.colCD_2_1colCD_2_1,
1234  self.colCD_2_2colCD_2_2]
1235 
1236  def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1237  """Compute the local pixel to scale conversion in arcseconds.
1238 
1239  Parameters
1240  ----------
1241  cd11 : `pandas.Series`
1242  [1, 1] element of the local Wcs affine transform in radians.
1243  cd11 : `pandas.Series`
1244  [1, 1] element of the local Wcs affine transform in radians.
1245  cd12 : `pandas.Series`
1246  [1, 2] element of the local Wcs affine transform in radians.
1247  cd21 : `pandas.Series`
1248  [2, 1] element of the local Wcs affine transform in radians.
1249  cd22 : `pandas.Series`
1250  [2, 2] element of the local Wcs affine transform in radians.
1251 
1252  Returns
1253  -------
1254  pixScale : `pandas.Series`
1255  Arcseconds per pixel at the location of the local WC
1256  """
1257  return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1258 
1259  def _func(self, df):
1260  return self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1261  df[self.colCD_1_2colCD_1_2],
1262  df[self.colCD_2_1colCD_2_1],
1263  df[self.colCD_2_2colCD_2_2])
1264 
1265 
1267  """Convert a value in units pixels squared to units arcseconds squared.
1268  """
1269 
1270  def __init__(self,
1271  col,
1272  colCD_1_1,
1273  colCD_1_2,
1274  colCD_2_1,
1275  colCD_2_2,
1276  **kwargs):
1277  self.colcol = col
1278  super().__init__(colCD_1_1,
1279  colCD_1_2,
1280  colCD_2_1,
1281  colCD_2_2,
1282  **kwargs)
1283 
1284  @property
1285  def name(self):
1286  return f"{self.col}_asArcseconds"
1287 
1288  @property
1289  def columns(self):
1290  return [self.colcol,
1291  self.colCD_1_1colCD_1_1,
1292  self.colCD_1_2colCD_1_2,
1293  self.colCD_2_1colCD_2_1,
1294  self.colCD_2_2colCD_2_2]
1295 
1296  def _func(self, df):
1297  return df[self.colcol] * self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1298  df[self.colCD_1_2colCD_1_2],
1299  df[self.colCD_2_1colCD_2_1],
1300  df[self.colCD_2_2colCD_2_2])
1301 
1302 
1304  """Convert a value in units pixels to units arcseconds.
1305  """
1306 
1307  def __init__(self,
1308  col,
1309  colCD_1_1,
1310  colCD_1_2,
1311  colCD_2_1,
1312  colCD_2_2,
1313  **kwargs):
1314  self.colcol = col
1315  super().__init__(colCD_1_1,
1316  colCD_1_2,
1317  colCD_2_1,
1318  colCD_2_2,
1319  **kwargs)
1320 
1321  @property
1322  def name(self):
1323  return f"{self.col}_asArcsecondsSq"
1324 
1325  @property
1326  def columns(self):
1327  return [self.colcol,
1328  self.colCD_1_1colCD_1_1,
1329  self.colCD_1_2colCD_1_2,
1330  self.colCD_2_1colCD_2_1,
1331  self.colCD_2_2colCD_2_2]
1332 
1333  def _func(self, df):
1334  pixScale = self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1335  df[self.colCD_1_2colCD_1_2],
1336  df[self.colCD_2_1colCD_2_1],
1337  df[self.colCD_2_2colCD_2_2])
1338  return df[self.colcol] * pixScale * pixScale
1339 
1340 
1342  name = 'Reference Band'
1343  shortname = 'refBand'
1344 
1345  @property
1346  def columns(self):
1347  return ["merge_measurement_i",
1348  "merge_measurement_r",
1349  "merge_measurement_z",
1350  "merge_measurement_y",
1351  "merge_measurement_g"]
1352 
1353  def _func(self, df):
1354  def getFilterAliasName(row):
1355  # get column name with the max value (True > False)
1356  colName = row.idxmax()
1357  return colName.replace('merge_measurement_', '')
1358 
1359  return df[self.columnscolumnscolumns].apply(getFilterAliasName, axis=1)
1360 
1361 
1363  # AB to NanoJansky (3631 Jansky)
1364  AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1365  LOG_AB_FLUX_SCALE = 12.56
1366  FIVE_OVER_2LOG10 = 1.085736204758129569
1367  # TO DO: DM-21955 Replace hard coded photometic calibration values
1368  COADD_ZP = 27
1369 
1370  def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1371  self.vhypotvhypot = np.vectorize(self.hypothypot)
1372  self.colcol = colFlux
1373  self.colFluxErrcolFluxErr = colFluxErr
1374 
1375  self.calibcalib = calib
1376  if calib is not None:
1377  self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err = calib.getFluxMag0()
1378  else:
1379  self.fluxMag0fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZPCOADD_ZP)
1380  self.fluxMag0ErrfluxMag0Err = 0.
1381 
1382  super().__init__(**kwargs)
1383 
1384  @property
1385  def columns(self):
1386  return [self.colcol]
1387 
1388  @property
1389  def name(self):
1390  return f'mag_{self.col}'
1391 
1392  @classmethod
1393  def hypot(cls, a, b):
1394  if np.abs(a) < np.abs(b):
1395  a, b = b, a
1396  if a == 0.:
1397  return 0.
1398  q = b/a
1399  return np.abs(a) * np.sqrt(1. + q*q)
1400 
1401  def dn2flux(self, dn, fluxMag0):
1402  return self.AB_FLUX_SCALEAB_FLUX_SCALE * dn / fluxMag0
1403 
1404  def dn2mag(self, dn, fluxMag0):
1405  with np.warnings.catch_warnings():
1406  np.warnings.filterwarnings('ignore', r'invalid value encountered')
1407  np.warnings.filterwarnings('ignore', r'divide by zero')
1408  return -2.5 * np.log10(dn/fluxMag0)
1409 
1410  def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1411  retVal = self.vhypotvhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1412  retVal *= self.AB_FLUX_SCALEAB_FLUX_SCALE / fluxMag0 / fluxMag0
1413  return retVal
1414 
1415  def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1416  retVal = self.dn2fluxErrdn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2fluxdn2flux(dn, fluxMag0)
1417  return self.FIVE_OVER_2LOG10FIVE_OVER_2LOG10 * retVal
1418 
1419 
1421  def _func(self, df):
1422  return self.dn2fluxdn2flux(df[self.colcol], self.fluxMag0fluxMag0)
1423 
1424 
1426  @property
1427  def columns(self):
1428  return [self.colcol, self.colFluxErrcolFluxErr]
1429 
1430  def _func(self, df):
1431  retArr = self.dn2fluxErrdn2fluxErr(df[self.colcol], df[self.colFluxErrcolFluxErr], self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err)
1432  return pd.Series(retArr, index=df.index)
1433 
1434 
1436  def _func(self, df):
1437  return self.dn2magdn2mag(df[self.colcol], self.fluxMag0fluxMag0)
1438 
1439 
1441  @property
1442  def columns(self):
1443  return [self.colcol, self.colFluxErrcolFluxErr]
1444 
1445  def _func(self, df):
1446  retArr = self.dn2MagErrdn2MagErr(df[self.colcol], df[self.colFluxErrcolFluxErr], self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err)
1447  return pd.Series(retArr, index=df.index)
1448 
1449 
1451  """Base class for calibrating the specified instrument flux column using
1452  the local photometric calibration.
1453 
1454  Parameters
1455  ----------
1456  instFluxCol : `str`
1457  Name of the instrument flux column.
1458  instFluxErrCol : `str`
1459  Name of the assocated error columns for ``instFluxCol``.
1460  photoCalibCol : `str`
1461  Name of local calibration column.
1462  photoCalibErrCol : `str`
1463  Error associated with ``photoCalibCol``
1464 
1465  See also
1466  --------
1467  LocalPhotometry
1468  LocalNanojansky
1469  LocalNanojanskyErr
1470  LocalMagnitude
1471  LocalMagnitudeErr
1472  """
1473  logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1474 
1475  def __init__(self,
1476  instFluxCol,
1477  instFluxErrCol,
1478  photoCalibCol,
1479  photoCalibErrCol,
1480  **kwargs):
1481  self.instFluxColinstFluxCol = instFluxCol
1482  self.instFluxErrColinstFluxErrCol = instFluxErrCol
1483  self.photoCalibColphotoCalibCol = photoCalibCol
1484  self.photoCalibErrColphotoCalibErrCol = photoCalibErrCol
1485  super().__init__(**kwargs)
1486 
1487  def instFluxToNanojansky(self, instFlux, localCalib):
1488  """Convert instrument flux to nanojanskys.
1489 
1490  Parameters
1491  ----------
1492  instFlux : `numpy.ndarray` or `pandas.Series`
1493  Array of instrument flux measurements
1494  localCalib : `numpy.ndarray` or `pandas.Series`
1495  Array of local photometric calibration estimates.
1496 
1497  Returns
1498  -------
1499  calibFlux : `numpy.ndarray` or `pandas.Series`
1500  Array of calibrated flux measurements.
1501  """
1502  return instFlux * localCalib
1503 
1504  def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1505  """Convert instrument flux to nanojanskys.
1506 
1507  Parameters
1508  ----------
1509  instFlux : `numpy.ndarray` or `pandas.Series`
1510  Array of instrument flux measurements
1511  instFluxErr : `numpy.ndarray` or `pandas.Series`
1512  Errors on associated ``instFlux`` values
1513  localCalib : `numpy.ndarray` or `pandas.Series`
1514  Array of local photometric calibration estimates.
1515  localCalibErr : `numpy.ndarray` or `pandas.Series`
1516  Errors on associated ``localCalib`` values
1517 
1518  Returns
1519  -------
1520  calibFluxErr : `numpy.ndarray` or `pandas.Series`
1521  Errors on calibrated flux measurements.
1522  """
1523  return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1524 
1525  def instFluxToMagnitude(self, instFlux, localCalib):
1526  """Convert instrument flux to nanojanskys.
1527 
1528  Parameters
1529  ----------
1530  instFlux : `numpy.ndarray` or `pandas.Series`
1531  Array of instrument flux measurements
1532  localCalib : `numpy.ndarray` or `pandas.Series`
1533  Array of local photometric calibration estimates.
1534 
1535  Returns
1536  -------
1537  calibMag : `numpy.ndarray` or `pandas.Series`
1538  Array of calibrated AB magnitudes.
1539  """
1540  return -2.5 * np.log10(self.instFluxToNanojanskyinstFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToABlogNJanskyToAB
1541 
1542  def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1543  """Convert instrument flux err to nanojanskys.
1544 
1545  Parameters
1546  ----------
1547  instFlux : `numpy.ndarray` or `pandas.Series`
1548  Array of instrument flux measurements
1549  instFluxErr : `numpy.ndarray` or `pandas.Series`
1550  Errors on associated ``instFlux`` values
1551  localCalib : `numpy.ndarray` or `pandas.Series`
1552  Array of local photometric calibration estimates.
1553  localCalibErr : `numpy.ndarray` or `pandas.Series`
1554  Errors on associated ``localCalib`` values
1555 
1556  Returns
1557  -------
1558  calibMagErr: `numpy.ndarray` or `pandas.Series`
1559  Error on calibrated AB magnitudes.
1560  """
1561  err = self.instFluxErrToNanojanskyErrinstFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1562  return 2.5 / np.log(10) * err / self.instFluxToNanojanskyinstFluxToNanojansky(instFlux, instFluxErr)
1563 
1564 
1566  """Compute calibrated fluxes using the local calibration value.
1567 
1568  See also
1569  --------
1570  LocalNanojansky
1571  LocalNanojanskyErr
1572  LocalMagnitude
1573  LocalMagnitudeErr
1574  """
1575 
1576  @property
1577  def columns(self):
1578  return [self.instFluxColinstFluxCol, self.photoCalibColphotoCalibCol]
1579 
1580  @property
1581  def name(self):
1582  return f'flux_{self.instFluxCol}'
1583 
1584  def _func(self, df):
1585  return self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxColinstFluxCol], df[self.photoCalibColphotoCalibCol])
1586 
1587 
1589  """Compute calibrated flux errors using the local calibration value.
1590 
1591  See also
1592  --------
1593  LocalNanojansky
1594  LocalNanojanskyErr
1595  LocalMagnitude
1596  LocalMagnitudeErr
1597  """
1598 
1599  @property
1600  def columns(self):
1601  return [self.instFluxColinstFluxCol, self.instFluxErrColinstFluxErrCol,
1602  self.photoCalibColphotoCalibCol, self.photoCalibErrColphotoCalibErrCol]
1603 
1604  @property
1605  def name(self):
1606  return f'fluxErr_{self.instFluxCol}'
1607 
1608  def _func(self, df):
1609  return self.instFluxErrToNanojanskyErrinstFluxErrToNanojanskyErr(df[self.instFluxColinstFluxCol], df[self.instFluxErrColinstFluxErrCol],
1610  df[self.photoCalibColphotoCalibCol], df[self.photoCalibErrColphotoCalibErrCol])
1611 
1612 
1614  """Compute calibrated AB magnitudes using the local calibration value.
1615 
1616  See also
1617  --------
1618  LocalNanojansky
1619  LocalNanojanskyErr
1620  LocalMagnitude
1621  LocalMagnitudeErr
1622  """
1623 
1624  @property
1625  def columns(self):
1626  return [self.instFluxColinstFluxCol, self.photoCalibColphotoCalibCol]
1627 
1628  @property
1629  def name(self):
1630  return f'mag_{self.instFluxCol}'
1631 
1632  def _func(self, df):
1633  return self.instFluxToMagnitudeinstFluxToMagnitude(df[self.instFluxColinstFluxCol],
1634  df[self.photoCalibColphotoCalibCol])
1635 
1636 
1638  """Compute calibrated AB magnitude errors using the local calibration value.
1639 
1640  See also
1641  --------
1642  LocalNanojansky
1643  LocalNanojanskyErr
1644  LocalMagnitude
1645  LocalMagnitudeErr
1646  """
1647 
1648  @property
1649  def columns(self):
1650  return [self.instFluxColinstFluxCol, self.instFluxErrColinstFluxErrCol,
1651  self.photoCalibColphotoCalibCol, self.photoCalibErrColphotoCalibErrCol]
1652 
1653  @property
1654  def name(self):
1655  return f'magErr_{self.instFluxCol}'
1656 
1657  def _func(self, df):
1658  return self.instFluxErrToMagnitudeErrinstFluxErrToMagnitudeErr(df[self.instFluxColinstFluxCol],
1659  df[self.instFluxErrColinstFluxErrCol],
1660  df[self.photoCalibColphotoCalibCol],
1661  df[self.photoCalibErrColphotoCalibErrCol])
1662 
1663 
1665  """Compute absolute mean of dipole fluxes.
1666 
1667  See also
1668  --------
1669  LocalNanojansky
1670  LocalNanojanskyErr
1671  LocalMagnitude
1672  LocalMagnitudeErr
1673  LocalDipoleMeanFlux
1674  LocalDipoleMeanFluxErr
1675  LocalDipoleDiffFlux
1676  LocalDipoleDiffFluxErr
1677  """
1678  def __init__(self,
1679  instFluxPosCol,
1680  instFluxNegCol,
1681  instFluxPosErrCol,
1682  instFluxNegErrCol,
1683  photoCalibCol,
1684  photoCalibErrCol,
1685  **kwargs):
1686  self.instFluxNegColinstFluxNegCol = instFluxNegCol
1687  self.instFluxPosColinstFluxPosCol = instFluxPosCol
1688  self.instFluxNegErrColinstFluxNegErrCol = instFluxNegErrCol
1689  self.instFluxPosErrColinstFluxPosErrCol = instFluxPosErrCol
1690  self.photoCalibColphotoCalibColphotoCalibCol = photoCalibCol
1691  self.photoCalibErrColphotoCalibErrColphotoCalibErrCol = photoCalibErrCol
1692  super().__init__(instFluxNegCol,
1693  instFluxNegErrCol,
1694  photoCalibCol,
1695  photoCalibErrCol,
1696  **kwargs)
1697 
1698  @property
1699  def columns(self):
1700  return [self.instFluxPosColinstFluxPosCol,
1701  self.instFluxNegColinstFluxNegCol,
1702  self.photoCalibColphotoCalibColphotoCalibCol]
1703 
1704  @property
1705  def name(self):
1706  return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1707 
1708  def _func(self, df):
1709  return 0.5*(np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxNegColinstFluxNegCol], df[self.photoCalibColphotoCalibColphotoCalibCol]))
1710  + np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxPosColinstFluxPosCol], df[self.photoCalibColphotoCalibColphotoCalibCol])))
1711 
1712 
1714  """Compute the error on the absolute mean of dipole fluxes.
1715 
1716  See also
1717  --------
1718  LocalNanojansky
1719  LocalNanojanskyErr
1720  LocalMagnitude
1721  LocalMagnitudeErr
1722  LocalDipoleMeanFlux
1723  LocalDipoleMeanFluxErr
1724  LocalDipoleDiffFlux
1725  LocalDipoleDiffFluxErr
1726  """
1727 
1728  @property
1729  def columns(self):
1730  return [self.instFluxPosColinstFluxPosCol,
1731  self.instFluxNegColinstFluxNegCol,
1732  self.instFluxPosErrColinstFluxPosErrCol,
1733  self.instFluxNegErrColinstFluxNegErrCol,
1734  self.photoCalibColphotoCalibColphotoCalibCol,
1735  self.photoCalibErrColphotoCalibErrColphotoCalibErrCol]
1736 
1737  @property
1738  def name(self):
1739  return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1740 
1741  def _func(self, df):
1742  return 0.5*np.sqrt(
1743  (np.fabs(df[self.instFluxNegColinstFluxNegCol]) + np.fabs(df[self.instFluxPosColinstFluxPosCol])
1744  * df[self.photoCalibErrColphotoCalibErrColphotoCalibErrCol])**2
1745  + (df[self.instFluxNegErrColinstFluxNegErrCol]**2 + df[self.instFluxPosErrColinstFluxPosErrCol]**2)
1746  * df[self.photoCalibColphotoCalibColphotoCalibCol]**2)
1747 
1748 
1750  """Compute the absolute difference of dipole fluxes.
1751 
1752  Value is (abs(pos) - abs(neg))
1753 
1754  See also
1755  --------
1756  LocalNanojansky
1757  LocalNanojanskyErr
1758  LocalMagnitude
1759  LocalMagnitudeErr
1760  LocalDipoleMeanFlux
1761  LocalDipoleMeanFluxErr
1762  LocalDipoleDiffFlux
1763  LocalDipoleDiffFluxErr
1764  """
1765 
1766  @property
1767  def columns(self):
1768  return [self.instFluxPosColinstFluxPosCol,
1769  self.instFluxNegColinstFluxNegCol,
1770  self.photoCalibColphotoCalibColphotoCalibCol]
1771 
1772  @property
1773  def name(self):
1774  return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1775 
1776  def _func(self, df):
1777  return (np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxPosColinstFluxPosCol], df[self.photoCalibColphotoCalibColphotoCalibCol]))
1778  - np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxNegColinstFluxNegCol], df[self.photoCalibColphotoCalibColphotoCalibCol])))
1779 
1780 
1782  """Compute the error on the absolute difference of dipole fluxes.
1783 
1784  See also
1785  --------
1786  LocalNanojansky
1787  LocalNanojanskyErr
1788  LocalMagnitude
1789  LocalMagnitudeErr
1790  LocalDipoleMeanFlux
1791  LocalDipoleMeanFluxErr
1792  LocalDipoleDiffFlux
1793  LocalDipoleDiffFluxErr
1794  """
1795 
1796  @property
1797  def columns(self):
1798  return [self.instFluxPosColinstFluxPosCol,
1799  self.instFluxNegColinstFluxNegCol,
1800  self.instFluxPosErrColinstFluxPosErrCol,
1801  self.instFluxNegErrColinstFluxNegErrCol,
1802  self.photoCalibColphotoCalibColphotoCalibCol,
1803  self.photoCalibErrColphotoCalibErrColphotoCalibErrCol]
1804 
1805  @property
1806  def name(self):
1807  return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1808 
1809  def _func(self, df):
1810  return np.sqrt(
1811  ((np.fabs(df[self.instFluxPosColinstFluxPosCol]) - np.fabs(df[self.instFluxNegColinstFluxNegCol]))
1812  * df[self.photoCalibErrColphotoCalibErrColphotoCalibErrCol])**2
1813  + (df[self.instFluxPosErrColinstFluxPosErrCol]**2 + df[self.instFluxNegErrColinstFluxNegErrCol]**2)
1814  * df[self.photoCalibColphotoCalibColphotoCalibCol]**2)
1815 
1816 
1818  """Base class for returning the ratio of 2 columns.
1819 
1820  Can be used to compute a Signal to Noise ratio for any input flux.
1821 
1822  Parameters
1823  ----------
1824  numerator : `str`
1825  Name of the column to use at the numerator in the ratio
1826  denominator : `str`
1827  Name of the column to use as the denominator in the ratio.
1828  """
1829  def __init__(self,
1830  numerator,
1831  denominator,
1832  **kwargs):
1833  self.numeratornumerator = numerator
1834  self.denominatordenominator = denominator
1835  super().__init__(**kwargs)
1836 
1837  @property
1838  def columns(self):
1839  return [self.numeratornumerator, self.denominatordenominator]
1840 
1841  @property
1842  def name(self):
1843  return f'ratio_{self.numerator}_{self.denominator}'
1844 
1845  def _func(self, df):
1846  with np.warnings.catch_warnings():
1847  np.warnings.filterwarnings('ignore', r'invalid value encountered')
1848  np.warnings.filterwarnings('ignore', r'divide by zero')
1849  return df[self.numeratornumerator] / df[self.denominatordenominator]
def multilevelColumns(self, parq, **kwargs)
Definition: functors.py:906
def __init__(self, col, filt2, filt1, **kwargs)
Definition: functors.py:877
def __init__(self, col, **kwargs)
Definition: functors.py:628
def __init__(self, funcs, **kwargs)
Definition: functors.py:400
def __call__(self, data, **kwargs)
Definition: functors.py:452
def from_file(cls, filename, **kwargs)
Definition: functors.py:524
def from_yaml(cls, translationDefinition, **kwargs)
Definition: functors.py:533
def renameCol(cls, col, renameRules)
Definition: functors.py:515
def multilevelColumns(self, data, **kwargs)
Definition: functors.py:438
def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22)
Definition: functors.py:1236
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1313
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1276
def __init__(self, col, **kwargs)
Definition: functors.py:674
def __init__(self, expr, **kwargs)
Definition: functors.py:597
def __init__(self, **kwargs)
Definition: functors.py:702
def __call__(self, catalog, **kwargs)
Definition: functors.py:705
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1069
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1087
def __call__(self, data, dropna=False)
Definition: functors.py:337
def _func(self, df, dropna=True)
Definition: functors.py:276
def multilevelColumns(self, data, columnIndex=None, returnTuple=False)
Definition: functors.py:226
def _get_data_columnLevelNames(self, data, columnIndex=None)
Definition: functors.py:183
def difference(self, data1, data2, **kwargs)
Definition: functors.py:348
def __init__(self, filt=None, dataset=None, noDup=None)
Definition: functors.py:139
def _get_columnIndex(self, data)
Definition: functors.py:279
def _colsFromDict(self, colDict, columnIndex=None)
Definition: functors.py:205
def _get_data_columnLevels(self, data, columnIndex=None)
Definition: functors.py:159
def __call__(self, parq, dropna=False, **kwargs)
Definition: functors.py:926
def __init__(self, instFluxPosCol, instFluxNegCol, instFluxPosErrCol, instFluxNegErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1685
def instFluxToNanojansky(self, instFlux, localCalib)
Definition: functors.py:1487
def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1542
def __init__(self, instFluxCol, instFluxErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1480
def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1504
def instFluxToMagnitude(self, instFlux, localCalib)
Definition: functors.py:1525
def __init__(self, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1127
def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22)
Definition: functors.py:1134
def computeSkySeperation(self, ra1, dec1, ra2, dec2)
Definition: functors.py:1163
def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22)
Definition: functors.py:1189
def __init__(self, col1, col2, **kwargs)
Definition: functors.py:824
def __init__(self, *args, **kwargs)
Definition: functors.py:785
def __init__(self, col, calib=None, **kwargs)
Definition: functors.py:747
def dn2mag(self, dn, fluxMag0)
Definition: functors.py:1404
def dn2flux(self, dn, fluxMag0)
Definition: functors.py:1401
def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1410
def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1415
def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs)
Definition: functors.py:1370
def __call__(self, catalog, **kwargs)
Definition: functors.py:692
def __init__(self, **kwargs)
Definition: functors.py:689
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1103
def __init__(self, numerator, denominator, **kwargs)
Definition: functors.py:1832
def mag_aware_eval(df, expr)
Definition: functors.py:562
def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor', name=None)
Definition: functors.py:37