lsst.pipe.tasks  21.0.0-120-g57749b33+77c36da417
functors.py
Go to the documentation of this file.
1 # This file is part of pipe_tasks.
2 #
3 # LSST Data Management System
4 # This product includes software developed by the
5 # LSST Project (http://www.lsst.org/).
6 # See COPYRIGHT file at the top of the source tree.
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 import yaml
23 import re
24 from itertools import product
25 import os.path
26 
27 import pandas as pd
28 import numpy as np
29 import astropy.units as u
30 
31 from lsst.daf.persistence import doImport
32 from lsst.daf.butler import DeferredDatasetHandle
33 from .parquetTable import ParquetTable, MultilevelParquetTable
34 
35 
36 def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
37  typeKey='functor', name=None):
38  """Initialize an object defined in a dictionary
39 
40  The object needs to be importable as
41  f'{basePath}.{initDict[typeKey]}'
42  The positional and keyword arguments (if any) are contained in
43  "args" and "kwargs" entries in the dictionary, respectively.
44  This is used in `functors.CompositeFunctor.from_yaml` to initialize
45  a composite functor from a specification in a YAML file.
46 
47  Parameters
48  ----------
49  initDict : dictionary
50  Dictionary describing object's initialization. Must contain
51  an entry keyed by ``typeKey`` that is the name of the object,
52  relative to ``basePath``.
53  basePath : str
54  Path relative to module in which ``initDict[typeKey]`` is defined.
55  typeKey : str
56  Key of ``initDict`` that is the name of the object
57  (relative to `basePath`).
58  """
59  initDict = initDict.copy()
60  # TO DO: DM-21956 We should be able to define functors outside this module
61  pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
62  args = []
63  if 'args' in initDict:
64  args = initDict.pop('args')
65  if isinstance(args, str):
66  args = [args]
67  try:
68  element = pythonType(*args, **initDict)
69  except Exception as e:
70  message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
71  raise type(e)(message, e.args)
72  return element
73 
74 
75 class Functor(object):
76  """Define and execute a calculation on a ParquetTable
77 
78  The `__call__` method accepts either a `ParquetTable` object or a
79  `DeferredDatasetHandle`, and returns the
80  result of the calculation as a single column. Each functor defines what
81  columns are needed for the calculation, and only these columns are read
82  from the `ParquetTable`.
83 
84  The action of `__call__` consists of two steps: first, loading the
85  necessary columns from disk into memory as a `pandas.DataFrame` object;
86  and second, performing the computation on this dataframe and returning the
87  result.
88 
89 
90  To define a new `Functor`, a subclass must define a `_func` method,
91  that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
92  In addition, it must define the following attributes
93 
94  * `_columns`: The columns necessary to perform the calculation
95  * `name`: A name appropriate for a figure axis label
96  * `shortname`: A name appropriate for use as a dictionary key
97 
98  On initialization, a `Functor` should declare what band (`filt` kwarg)
99  and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
100  applied to. This enables the `_get_data` method to extract the proper
101  columns from the parquet file. If not specified, the dataset will fall back
102  on the `_defaultDataset`attribute. If band is not specified and `dataset`
103  is anything other than `'ref'`, then an error will be raised when trying to
104  perform the calculation.
105 
106  As currently implemented, `Functor` is only set up to expect a
107  dataset of the format of the `deepCoadd_obj` dataset; that is, a
108  dataframe with a multi-level column index,
109  with the levels of the column index being `band`,
110  `dataset`, and `column`. This is defined in the `_columnLevels` attribute,
111  as well as being implicit in the role of the `filt` and `dataset` attributes
112  defined at initialization. In addition, the `_get_data` method that reads
113  the dataframe from the `ParquetTable` will return a dataframe with column
114  index levels defined by the `_dfLevels` attribute; by default, this is
115  `column`.
116 
117  The `_columnLevels` and `_dfLevels` attributes should generally not need to
118  be changed, unless `_func` needs columns from multiple filters or datasets
119  to do the calculation.
120  An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
121  which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
122  it gets to have those levels in the column index.
123 
124  Parameters
125  ----------
126  filt : str
127  Filter upon which to do the calculation
128 
129  dataset : str
130  Dataset upon which to do the calculation
131  (e.g., 'ref', 'meas', 'forced_src').
132 
133  """
134 
135  _defaultDataset = 'ref'
136  _columnLevels = ('band', 'dataset', 'column')
137  _dfLevels = ('column',)
138  _defaultNoDup = False
139 
140  def __init__(self, filt=None, dataset=None, noDup=None):
141  self.filtfilt = filt
142  self.datasetdataset = dataset if dataset is not None else self._defaultDataset_defaultDataset
143  self._noDup_noDup = noDup
144 
145  @property
146  def noDup(self):
147  if self._noDup_noDup is not None:
148  return self._noDup_noDup
149  else:
150  return self._defaultNoDup_defaultNoDup
151 
152  @property
153  def columns(self):
154  """Columns required to perform calculation
155  """
156  if not hasattr(self, '_columns'):
157  raise NotImplementedError('Must define columns property or _columns attribute')
158  return self._columns
159 
160  def _get_data_columnLevels(self, data, columnIndex=None):
161  """Gets the names of the column index levels
162 
163  This should only be called in the context of a multilevel table.
164  The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
165  and with the gen3 `DeferredDatasetHandle`.
166 
167  Parameters
168  ----------
169  data : `MultilevelParquetTable` or `DeferredDatasetHandle`
170 
171  columnnIndex (optional): pandas `Index` object
172  if not passed, then it is read from the `DeferredDatasetHandle`
173  """
174  if isinstance(data, DeferredDatasetHandle):
175  if columnIndex is None:
176  columnIndex = data.get(component="columns")
177  if columnIndex is not None:
178  return columnIndex.names
179  if isinstance(data, MultilevelParquetTable):
180  return data.columnLevels
181  else:
182  raise TypeError(f"Unknown type for data: {type(data)}!")
183 
184  def _get_data_columnLevelNames(self, data, columnIndex=None):
185  """Gets the content of each of the column levels for a multilevel table
186 
187  Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
188 
189  Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
190  """
191  if isinstance(data, DeferredDatasetHandle):
192  if columnIndex is None:
193  columnIndex = data.get(component="columns")
194  if columnIndex is not None:
195  columnLevels = columnIndex.names
196  columnLevelNames = {
197  level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
198  for i, level in enumerate(columnLevels)
199  }
200  return columnLevelNames
201  if isinstance(data, MultilevelParquetTable):
202  return data.columnLevelNames
203  else:
204  raise TypeError(f"Unknown type for data: {type(data)}!")
205 
206  def _colsFromDict(self, colDict, columnIndex=None):
207  """Converts dictionary column specficiation to a list of columns
208 
209  This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
210  """
211  new_colDict = {}
212  columnLevels = self._get_data_columnLevels_get_data_columnLevels(None, columnIndex=columnIndex)
213 
214  for i, lev in enumerate(columnLevels):
215  if lev in colDict:
216  if isinstance(colDict[lev], str):
217  new_colDict[lev] = [colDict[lev]]
218  else:
219  new_colDict[lev] = colDict[lev]
220  else:
221  new_colDict[lev] = columnIndex.levels[i]
222 
223  levelCols = [new_colDict[lev] for lev in columnLevels]
224  cols = product(*levelCols)
225  return list(cols)
226 
227  def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
228  """Returns columns needed by functor from multilevel dataset
229 
230  To access tables with multilevel column structure, the `MultilevelParquetTable`
231  or `DeferredDatasetHandle` need to be passed either a list of tuples or a
232  dictionary.
233 
234  Parameters
235  ----------
236  data : `MultilevelParquetTable` or `DeferredDatasetHandle`
237 
238  columnIndex (optional): pandas `Index` object
239  either passed or read in from `DeferredDatasetHandle`.
240 
241  `returnTuple` : bool
242  If true, then return a list of tuples rather than the column dictionary
243  specification. This is set to `True` by `CompositeFunctor` in order to be able to
244  combine columns from the various component functors.
245 
246  """
247  if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
248  columnIndex = data.get(component="columns")
249 
250  # Confirm that the dataset has the column levels the functor is expecting it to have.
251  columnLevels = self._get_data_columnLevels_get_data_columnLevels(data, columnIndex)
252 
253  if not set(columnLevels) == set(self._columnLevels_columnLevels):
254  raise ValueError(
255  "ParquetTable does not have the expected column levels. "
256  f"Got {columnLevels}; expected {self._columnLevels}."
257  )
258 
259  columnDict = {'column': self.columnscolumns,
260  'dataset': self.datasetdataset}
261  if self.filtfilt is None:
262  columnLevelNames = self._get_data_columnLevelNames_get_data_columnLevelNames(data, columnIndex)
263  if "band" in columnLevels:
264  if self.datasetdataset == "ref":
265  columnDict["band"] = columnLevelNames["band"][0]
266  else:
267  raise ValueError(f"'filt' not set for functor {self.name}"
268  f"(dataset {self.dataset}) "
269  "and ParquetTable "
270  "contains multiple filters in column index. "
271  "Set 'filt' or set 'dataset' to 'ref'.")
272  else:
273  columnDict['band'] = self.filtfilt
274 
275  if isinstance(data, MultilevelParquetTable):
276  return data._colsFromDict(columnDict)
277  elif isinstance(data, DeferredDatasetHandle):
278  if returnTuple:
279  return self._colsFromDict_colsFromDict(columnDict, columnIndex=columnIndex)
280  else:
281  return columnDict
282 
283  def _func(self, df, dropna=True):
284  raise NotImplementedError('Must define calculation on dataframe')
285 
286  def _get_columnIndex(self, data):
287  """Return columnIndex
288  """
289 
290  if isinstance(data, DeferredDatasetHandle):
291  return data.get(component="columns")
292  else:
293  return None
294 
295  def _get_data(self, data):
296  """Retrieve dataframe necessary for calculation.
297 
298  The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
299 
300  Returns dataframe upon which `self._func` can act.
301 
302  N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
303  """
304  if isinstance(data, pd.DataFrame):
305  return data
306 
307  # First thing to do: check to see if the data source has a multilevel column index or not.
308  columnIndex = self._get_columnIndex_get_columnIndex(data)
309  is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
310 
311  # Simple single-level parquet table, gen2
312  if isinstance(data, ParquetTable) and not is_multiLevel:
313  columns = self.columnscolumns
314  df = data.toDataFrame(columns=columns)
315  return df
316 
317  # Get proper columns specification for this functor
318  if is_multiLevel:
319  columns = self.multilevelColumnsmultilevelColumns(data, columnIndex=columnIndex)
320  else:
321  columns = self.columnscolumns
322 
323  if isinstance(data, MultilevelParquetTable):
324  # Load in-memory dataframe with appropriate columns the gen2 way
325  df = data.toDataFrame(columns=columns, droplevels=False)
326  elif isinstance(data, DeferredDatasetHandle):
327  # Load in-memory dataframe with appropriate columns the gen3 way
328  df = data.get(parameters={"columns": columns})
329 
330  # Drop unnecessary column levels
331  if is_multiLevel:
332  df = self._setLevels_setLevels(df)
333 
334  return df
335 
336  def _setLevels(self, df):
337  levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels_dfLevels]
338  df.columns = df.columns.droplevel(levelsToDrop)
339  return df
340 
341  def _dropna(self, vals):
342  return vals.dropna()
343 
344  def __call__(self, data, dropna=False):
345  try:
346  df = self._get_data_get_data(data)
347  vals = self._func_func(df)
348  except Exception:
349  vals = self.failfail(df)
350  if dropna:
351  vals = self._dropna_dropna(vals)
352 
353  return vals
354 
355  def difference(self, data1, data2, **kwargs):
356  """Computes difference between functor called on two different ParquetTable objects
357  """
358  return self(data1, **kwargs) - self(data2, **kwargs)
359 
360  def fail(self, df):
361  return pd.Series(np.full(len(df), np.nan), index=df.index)
362 
363  @property
364  def name(self):
365  """Full name of functor (suitable for figure labels)
366  """
367  return NotImplementedError
368 
369  @property
370  def shortname(self):
371  """Short name of functor (suitable for column name/dict key)
372  """
373  return self.namename
374 
375 
377  """Perform multiple calculations at once on a catalog
378 
379  The role of a `CompositeFunctor` is to group together computations from
380  multiple functors. Instead of returning `pandas.Series` a
381  `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
382  being the keys of `funcDict`.
383 
384  The `columns` attribute of a `CompositeFunctor` is the union of all columns
385  in all the component functors.
386 
387  A `CompositeFunctor` does not use a `_func` method itself; rather,
388  when a `CompositeFunctor` is called, all its columns are loaded
389  at once, and the resulting dataframe is passed to the `_func` method of each component
390  functor. This has the advantage of only doing I/O (reading from parquet file) once,
391  and works because each individual `_func` method of each component functor does not
392  care if there are *extra* columns in the dataframe being passed; only that it must contain
393  *at least* the `columns` it expects.
394 
395  An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
396  file specifying a collection of functors.
397 
398  Parameters
399  ----------
400  funcs : `dict` or `list`
401  Dictionary or list of functors. If a list, then it will be converted
402  into a dictonary according to the `.shortname` attribute of each functor.
403 
404  """
405  dataset = None
406 
407  def __init__(self, funcs, **kwargs):
408 
409  if type(funcs) == dict:
410  self.funcDictfuncDict = funcs
411  else:
412  self.funcDictfuncDict = {f.shortname: f for f in funcs}
413 
414  self._filt_filt = None
415 
416  super().__init__(**kwargs)
417 
418  @property
419  def filt(self):
420  return self._filt_filt
421 
422  @filt.setter
423  def filt(self, filt):
424  if filt is not None:
425  for _, f in self.funcDictfuncDict.items():
426  f.filt = filt
427  self._filt_filt = filt
428 
429  def update(self, new):
430  if isinstance(new, dict):
431  self.funcDictfuncDict.update(new)
432  elif isinstance(new, CompositeFunctor):
433  self.funcDictfuncDict.update(new.funcDict)
434  else:
435  raise TypeError('Can only update with dictionary or CompositeFunctor.')
436 
437  # Make sure new functors have the same 'filt' set
438  if self.filtfiltfiltfiltfilt is not None:
439  self.filtfiltfiltfiltfilt = self.filtfiltfiltfiltfilt
440 
441  @property
442  def columns(self):
443  return list(set([x for y in [f.columns for f in self.funcDictfuncDict.values()] for x in y]))
444 
445  def multilevelColumns(self, data, **kwargs):
446  # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
447  return list(
448  set(
449  [
450  x
451  for y in [
452  f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDictfuncDict.values()
453  ]
454  for x in y
455  ]
456  )
457  )
458 
459  def __call__(self, data, **kwargs):
460  """Apply the functor to the data table
461 
462  Parameters
463  ----------
464  data : `lsst.daf.butler.DeferredDatasetHandle`,
465  `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
466  `lsst.pipe.tasks.parquetTable.ParquetTable`,
467  or `pandas.DataFrame`.
468  The table or a pointer to a table on disk from which columns can
469  be accessed
470  """
471  columnIndex = self._get_columnIndex_get_columnIndex(data)
472 
473  # First, determine whether data has a multilevel index (either gen2 or gen3)
474  is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
475 
476  # Multilevel index, gen2 or gen3
477  if is_multiLevel:
478  columns = self.multilevelColumnsmultilevelColumnsmultilevelColumns(data, columnIndex=columnIndex)
479 
480  if isinstance(data, MultilevelParquetTable):
481  # Read data into memory the gen2 way
482  df = data.toDataFrame(columns=columns, droplevels=False)
483  elif isinstance(data, DeferredDatasetHandle):
484  # Read data into memory the gen3 way
485  df = data.get(parameters={"columns": columns})
486 
487  valDict = {}
488  for k, f in self.funcDictfuncDict.items():
489  try:
490  subdf = f._setLevels(
491  df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
492  )
493  valDict[k] = f._func(subdf)
494  except Exception:
495  valDict[k] = f.fail(subdf)
496 
497  else:
498  if isinstance(data, DeferredDatasetHandle):
499  # input if Gen3 deferLoad=True
500  df = data.get(parameters={"columns": self.columnscolumnscolumns})
501  elif isinstance(data, pd.DataFrame):
502  # input if Gen3 deferLoad=False
503  df = data
504  else:
505  # Original Gen2 input is type ParquetTable and the fallback
506  df = data.toDataFrame(columns=self.columnscolumnscolumns)
507 
508  valDict = {k: f._func(df) for k, f in self.funcDictfuncDict.items()}
509 
510  try:
511  valDf = pd.concat(valDict, axis=1)
512  except TypeError:
513  print([(k, type(v)) for k, v in valDict.items()])
514  raise
515 
516  if kwargs.get('dropna', False):
517  valDf = valDf.dropna(how='any')
518 
519  return valDf
520 
521  @classmethod
522  def renameCol(cls, col, renameRules):
523  if renameRules is None:
524  return col
525  for old, new in renameRules:
526  if col.startswith(old):
527  col = col.replace(old, new)
528  return col
529 
530  @classmethod
531  def from_file(cls, filename, **kwargs):
532  # Allow environment variables in the filename.
533  filename = os.path.expandvars(filename)
534  with open(filename) as f:
535  translationDefinition = yaml.safe_load(f)
536 
537  return cls.from_yamlfrom_yaml(translationDefinition, **kwargs)
538 
539  @classmethod
540  def from_yaml(cls, translationDefinition, **kwargs):
541  funcs = {}
542  for func, val in translationDefinition['funcs'].items():
543  funcs[func] = init_fromDict(val, name=func)
544 
545  if 'flag_rename_rules' in translationDefinition:
546  renameRules = translationDefinition['flag_rename_rules']
547  else:
548  renameRules = None
549 
550  if 'refFlags' in translationDefinition:
551  for flag in translationDefinition['refFlags']:
552  funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='ref')
553 
554  if 'flags' in translationDefinition:
555  for flag in translationDefinition['flags']:
556  funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='meas')
557 
558  return cls(funcs, **kwargs)
559 
560 
561 def mag_aware_eval(df, expr):
562  """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
563 
564  Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
565 
566  Parameters
567  ----------
568  df : pandas.DataFrame
569  Dataframe on which to evaluate expression.
570 
571  expr : str
572  Expression.
573  """
574  try:
575  expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>)/log(10)', expr)
576  val = df.eval(expr_new, truediv=True)
577  except Exception: # Should check what actually gets raised
578  expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
579  val = df.eval(expr_new, truediv=True)
580  return val
581 
582 
584  """Arbitrary computation on a catalog
585 
586  Column names (and thus the columns to be loaded from catalog) are found
587  by finding all words and trying to ignore all "math-y" words.
588 
589  Parameters
590  ----------
591  expr : str
592  Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
593  """
594  _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
595 
596  def __init__(self, expr, **kwargs):
597  self.exprexpr = expr
598  super().__init__(**kwargs)
599 
600  @property
601  def name(self):
602  return self.exprexpr
603 
604  @property
605  def columns(self):
606  flux_cols = re.findall(r'mag\‍(\s*(\w+)\s*\‍)', self.exprexpr)
607 
608  cols = [c for c in re.findall(r'[a-zA-Z_]+', self.exprexpr) if c not in self._ignore_words_ignore_words]
609  not_a_col = []
610  for c in flux_cols:
611  if not re.search('_instFlux$', c):
612  cols.append(f'{c}_instFlux')
613  not_a_col.append(c)
614  else:
615  cols.append(c)
616 
617  return list(set([c for c in cols if c not in not_a_col]))
618 
619  def _func(self, df):
620  return mag_aware_eval(df, self.exprexpr)
621 
622 
624  """Get column with specified name
625  """
626 
627  def __init__(self, col, **kwargs):
628  self.colcol = col
629  super().__init__(**kwargs)
630 
631  @property
632  def name(self):
633  return self.colcol
634 
635  @property
636  def columns(self):
637  return [self.colcol]
638 
639  def _func(self, df):
640  return df[self.colcol]
641 
642 
643 class Index(Functor):
644  """Return the value of the index for each object
645  """
646 
647  columns = ['coord_ra'] # just a dummy; something has to be here
648  _defaultDataset = 'ref'
649  _defaultNoDup = True
650 
651  def _func(self, df):
652  return pd.Series(df.index, index=df.index)
653 
654 
656  col = 'id'
657  _allow_difference = False
658  _defaultNoDup = True
659 
660  def _func(self, df):
661  return pd.Series(df.index, index=df.index)
662 
663 
665  col = 'base_Footprint_nPix'
666 
667 
669  """Base class for coordinate column, in degrees
670  """
671  _radians = True
672 
673  def __init__(self, col, **kwargs):
674  super().__init__(col, **kwargs)
675 
676  def _func(self, df):
677  # Must not modify original column in case that column is used by another functor
678  output = df[self.colcol] * 180 / np.pi if self._radians_radians else df[self.colcol]
679  return output
680 
681 
683  """Right Ascension, in degrees
684  """
685  name = 'RA'
686  _defaultNoDup = True
687 
688  def __init__(self, **kwargs):
689  super().__init__('coord_ra', **kwargs)
690 
691  def __call__(self, catalog, **kwargs):
692  return super().__call__(catalog, **kwargs)
693 
694 
696  """Declination, in degrees
697  """
698  name = 'Dec'
699  _defaultNoDup = True
700 
701  def __init__(self, **kwargs):
702  super().__init__('coord_dec', **kwargs)
703 
704  def __call__(self, catalog, **kwargs):
705  return super().__call__(catalog, **kwargs)
706 
707 
708 def fluxName(col):
709  if not col.endswith('_instFlux'):
710  col += '_instFlux'
711  return col
712 
713 
714 def fluxErrName(col):
715  if not col.endswith('_instFluxErr'):
716  col += '_instFluxErr'
717  return col
718 
719 
720 class Mag(Functor):
721  """Compute calibrated magnitude
722 
723  Takes a `calib` argument, which returns the flux at mag=0
724  as `calib.getFluxMag0()`. If not provided, then the default
725  `fluxMag0` is 63095734448.0194, which is default for HSC.
726  This default should be removed in DM-21955
727 
728  This calculation hides warnings about invalid values and dividing by zero.
729 
730  As for all functors, a `dataset` and `filt` kwarg should be provided upon
731  initialization. Unlike the default `Functor`, however, the default dataset
732  for a `Mag` is `'meas'`, rather than `'ref'`.
733 
734  Parameters
735  ----------
736  col : `str`
737  Name of flux column from which to compute magnitude. Can be parseable
738  by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
739  `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
740  understand.
741  calib : `lsst.afw.image.calib.Calib` (optional)
742  Object that knows zero point.
743  """
744  _defaultDataset = 'meas'
745 
746  def __init__(self, col, calib=None, **kwargs):
747  self.colcol = fluxName(col)
748  self.calibcalib = calib
749  if calib is not None:
750  self.fluxMag0fluxMag0 = calib.getFluxMag0()[0]
751  else:
752  # TO DO: DM-21955 Replace hard coded photometic calibration values
753  self.fluxMag0fluxMag0 = 63095734448.0194
754 
755  super().__init__(**kwargs)
756 
757  @property
758  def columns(self):
759  return [self.colcol]
760 
761  def _func(self, df):
762  with np.warnings.catch_warnings():
763  np.warnings.filterwarnings('ignore', r'invalid value encountered')
764  np.warnings.filterwarnings('ignore', r'divide by zero')
765  return -2.5*np.log10(df[self.colcol] / self.fluxMag0fluxMag0)
766 
767  @property
768  def name(self):
769  return f'mag_{self.col}'
770 
771 
772 class MagErr(Mag):
773  """Compute calibrated magnitude uncertainty
774 
775  Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
776 
777  Parameters
778  col : `str`
779  Name of flux column
780  calib : `lsst.afw.image.calib.Calib` (optional)
781  Object that knows zero point.
782  """
783 
784  def __init__(self, *args, **kwargs):
785  super().__init__(*args, **kwargs)
786  if self.calibcalib is not None:
787  self.fluxMag0ErrfluxMag0Err = self.calibcalib.getFluxMag0()[1]
788  else:
789  self.fluxMag0ErrfluxMag0Err = 0.
790 
791  @property
792  def columns(self):
793  return [self.colcol, self.colcol + 'Err']
794 
795  def _func(self, df):
796  with np.warnings.catch_warnings():
797  np.warnings.filterwarnings('ignore', r'invalid value encountered')
798  np.warnings.filterwarnings('ignore', r'divide by zero')
799  fluxCol, fluxErrCol = self.columnscolumnscolumnscolumns
800  x = df[fluxErrCol] / df[fluxCol]
801  y = self.fluxMag0ErrfluxMag0Err / self.fluxMag0fluxMag0
802  magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
803  return magErr
804 
805  @property
806  def name(self):
807  return super().name + '_err'
808 
809 
811  """
812  """
813 
814  def _func(self, df):
815  return (df[self.colcol] / self.fluxMag0fluxMag0) * 1e9
816 
817 
819  _defaultDataset = 'meas'
820 
821  """Functor to calculate magnitude difference"""
822 
823  def __init__(self, col1, col2, **kwargs):
824  self.col1col1 = fluxName(col1)
825  self.col2col2 = fluxName(col2)
826  super().__init__(**kwargs)
827 
828  @property
829  def columns(self):
830  return [self.col1col1, self.col2col2]
831 
832  def _func(self, df):
833  with np.warnings.catch_warnings():
834  np.warnings.filterwarnings('ignore', r'invalid value encountered')
835  np.warnings.filterwarnings('ignore', r'divide by zero')
836  return -2.5*np.log10(df[self.col1col1]/df[self.col2col2])
837 
838  @property
839  def name(self):
840  return f'(mag_{self.col1} - mag_{self.col2})'
841 
842  @property
843  def shortname(self):
844  return f'magDiff_{self.col1}_{self.col2}'
845 
846 
847 class Color(Functor):
848  """Compute the color between two filters
849 
850  Computes color by initializing two different `Mag`
851  functors based on the `col` and filters provided, and
852  then returning the difference.
853 
854  This is enabled by the `_func` expecting a dataframe with a
855  multilevel column index, with both `'band'` and `'column'`,
856  instead of just `'column'`, which is the `Functor` default.
857  This is controlled by the `_dfLevels` attribute.
858 
859  Also of note, the default dataset for `Color` is `forced_src'`,
860  whereas for `Mag` it is `'meas'`.
861 
862  Parameters
863  ----------
864  col : str
865  Name of flux column from which to compute; same as would be passed to
866  `lsst.pipe.tasks.functors.Mag`.
867 
868  filt2, filt1 : str
869  Filters from which to compute magnitude difference.
870  Color computed is `Mag(filt2) - Mag(filt1)`.
871  """
872  _defaultDataset = 'forced_src'
873  _dfLevels = ('band', 'column')
874  _defaultNoDup = True
875 
876  def __init__(self, col, filt2, filt1, **kwargs):
877  self.colcol = fluxName(col)
878  if filt2 == filt1:
879  raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
880  self.filt2filt2 = filt2
881  self.filt1filt1 = filt1
882 
883  self.mag2mag2 = Mag(col, filt=filt2, **kwargs)
884  self.mag1mag1 = Mag(col, filt=filt1, **kwargs)
885 
886  super().__init__(**kwargs)
887 
888  @property
889  def filt(self):
890  return None
891 
892  @filt.setter
893  def filt(self, filt):
894  pass
895 
896  def _func(self, df):
897  mag2 = self.mag2._func(df[self.filt2])
898  mag1 = self.mag1._func(df[self.filt1])
899  return mag2 - mag1
900 
901  @property
902  def columns(self):
903  return [self.mag1mag1.col, self.mag2mag2.col]
904 
905  def multilevelColumns(self, parq, **kwargs):
906  return [(self.datasetdataset, self.filt1filt1, self.colcol), (self.datasetdataset, self.filt2filt2, self.colcol)]
907 
908  @property
909  def name(self):
910  return f'{self.filt2} - {self.filt1} ({self.col})'
911 
912  @property
913  def shortname(self):
914  return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
915 
916 
918  """Main function of this subclass is to override the dropna=True
919  """
920  _null_label = 'null'
921  _allow_difference = False
922  name = 'label'
923  _force_str = False
924 
925  def __call__(self, parq, dropna=False, **kwargs):
926  return super().__call__(parq, dropna=False, **kwargs)
927 
928 
930  _columns = ["base_ClassificationExtendedness_value"]
931  _column = "base_ClassificationExtendedness_value"
932 
933  def _func(self, df):
934  x = df[self._columns_columns][self._column_column]
935  mask = x.isnull()
936  test = (x < 0.5).astype(int)
937  test = test.mask(mask, 2)
938 
939  # TODO: DM-21954 Look into veracity of inline comment below
940  # are these backwards?
941  categories = ['galaxy', 'star', self._null_label_null_label]
942  label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
943  index=x.index, name='label')
944  if self._force_str_force_str:
945  label = label.astype(str)
946  return label
947 
948 
950  _columns = ['numStarFlags']
951  labels = {"star": 0, "maybe": 1, "notStar": 2}
952 
953  def _func(self, df):
954  x = df[self._columns_columns][self._columns_columns[0]]
955 
956  # Number of filters
957  n = len(x.unique()) - 1
958 
959  labels = ['noStar', 'maybe', 'star']
960  label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
961  index=x.index, name='label')
962 
963  if self._force_str_force_str:
964  label = label.astype(str)
965 
966  return label
967 
968 
970  name = 'Deconvolved Moments'
971  shortname = 'deconvolvedMoments'
972  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
973  "ext_shapeHSM_HsmSourceMoments_yy",
974  "base_SdssShape_xx", "base_SdssShape_yy",
975  "ext_shapeHSM_HsmPsfMoments_xx",
976  "ext_shapeHSM_HsmPsfMoments_yy")
977 
978  def _func(self, df):
979  """Calculate deconvolved moments"""
980  if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
981  hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
982  else:
983  hsm = np.ones(len(df))*np.nan
984  sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
985  if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
986  psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
987  else:
988  # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
989  # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
990  # raise TaskError("No psf shape parameter found in catalog")
991  raise RuntimeError('No psf shape parameter found in catalog')
992 
993  return hsm.where(np.isfinite(hsm), sdss) - psf
994 
995 
997  """Functor to calculate SDSS trace radius size for sources"""
998  name = "SDSS Trace Size"
999  shortname = 'sdssTrace'
1000  _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1001 
1002  def _func(self, df):
1003  srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1004  return srcSize
1005 
1006 
1008  """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1009  name = "PSF - SDSS Trace Size"
1010  shortname = 'psf_sdssTrace'
1011  _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1012  "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1013 
1014  def _func(self, df):
1015  srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1016  psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1017  sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1018  return sizeDiff
1019 
1020 
1022  """Functor to calculate HSM trace radius size for sources"""
1023  name = 'HSM Trace Size'
1024  shortname = 'hsmTrace'
1025  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1026  "ext_shapeHSM_HsmSourceMoments_yy")
1027 
1028  def _func(self, df):
1029  srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1030  + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1031  return srcSize
1032 
1033 
1035  """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1036  name = 'PSF - HSM Trace Size'
1037  shortname = 'psf_HsmTrace'
1038  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1039  "ext_shapeHSM_HsmSourceMoments_yy",
1040  "ext_shapeHSM_HsmPsfMoments_xx",
1041  "ext_shapeHSM_HsmPsfMoments_yy")
1042 
1043  def _func(self, df):
1044  srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1045  + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1046  psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1047  + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1048  sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1049  return sizeDiff
1050 
1051 
1053  name = 'HSM Psf FWHM'
1054  _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1055  # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1056  pixelScale = 0.168
1057  SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1058 
1059  def _func(self, df):
1060  return self.pixelScalepixelScale*self.SIGMA2FWHMSIGMA2FWHM*np.sqrt(
1061  0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1062 
1063 
1064 class E1(Functor):
1065  name = "Distortion Ellipticity (e1)"
1066  shortname = "Distortion"
1067 
1068  def __init__(self, colXX, colXY, colYY, **kwargs):
1069  self.colXXcolXX = colXX
1070  self.colXYcolXY = colXY
1071  self.colYYcolYY = colYY
1072  self._columns_columns = [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1073  super().__init__(**kwargs)
1074 
1075  @property
1076  def columns(self):
1077  return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1078 
1079  def _func(self, df):
1080  return df[self.colXXcolXX] - df[self.colYYcolYY] / (df[self.colXXcolXX] + df[self.colYYcolYY])
1081 
1082 
1083 class E2(Functor):
1084  name = "Ellipticity e2"
1085 
1086  def __init__(self, colXX, colXY, colYY, **kwargs):
1087  self.colXXcolXX = colXX
1088  self.colXYcolXY = colXY
1089  self.colYYcolYY = colYY
1090  super().__init__(**kwargs)
1091 
1092  @property
1093  def columns(self):
1094  return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1095 
1096  def _func(self, df):
1097  return 2*df[self.colXYcolXY] / (df[self.colXXcolXX] + df[self.colYYcolYY])
1098 
1099 
1101 
1102  def __init__(self, colXX, colXY, colYY, **kwargs):
1103  self.colXXcolXX = colXX
1104  self.colXYcolXY = colXY
1105  self.colYYcolYY = colYY
1106  super().__init__(**kwargs)
1107 
1108  @property
1109  def columns(self):
1110  return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1111 
1112  def _func(self, df):
1113  return (df[self.colXXcolXX]*df[self.colYYcolYY] - df[self.colXYcolXY]**2)**0.25
1114 
1115 
1117  """Computations using the stored localWcs.
1118  """
1119  name = "LocalWcsOperations"
1120 
1121  def __init__(self,
1122  colCD_1_1,
1123  colCD_1_2,
1124  colCD_2_1,
1125  colCD_2_2,
1126  **kwargs):
1127  self.colCD_1_1colCD_1_1 = colCD_1_1
1128  self.colCD_1_2colCD_1_2 = colCD_1_2
1129  self.colCD_2_1colCD_2_1 = colCD_2_1
1130  self.colCD_2_2colCD_2_2 = colCD_2_2
1131  super().__init__(**kwargs)
1132 
1133  def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1134  """Compute the distance on the sphere from x2, y1 to x1, y1.
1135 
1136  Parameters
1137  ----------
1138  x : `pandas.Series`
1139  X pixel coordinate.
1140  y : `pandas.Series`
1141  Y pixel coordinate.
1142  cd11 : `pandas.Series`
1143  [1, 1] element of the local Wcs affine transform.
1144  cd11 : `pandas.Series`
1145  [1, 1] element of the local Wcs affine transform.
1146  cd12 : `pandas.Series`
1147  [1, 2] element of the local Wcs affine transform.
1148  cd21 : `pandas.Series`
1149  [2, 1] element of the local Wcs affine transform.
1150  cd22 : `pandas.Series`
1151  [2, 2] element of the local Wcs affine transform.
1152 
1153  Returns
1154  -------
1155  raDecTuple : tuple
1156  RA and dec conversion of x and y given the local Wcs. Returned
1157  units are in radians.
1158 
1159  """
1160  return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1161 
1162  def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1163  """Compute the local pixel scale conversion.
1164 
1165  Parameters
1166  ----------
1167  ra1 : `pandas.Series`
1168  Ra of the first coordinate in radians.
1169  dec1 : `pandas.Series`
1170  Dec of the first coordinate in radians.
1171  ra2 : `pandas.Series`
1172  Ra of the second coordinate in radians.
1173  dec2 : `pandas.Series`
1174  Dec of the second coordinate in radians.
1175 
1176  Returns
1177  -------
1178  dist : `pandas.Series`
1179  Distance on the sphere in radians.
1180  """
1181  deltaDec = dec2 - dec1
1182  deltaRa = ra2 - ra1
1183  return 2 * np.arcsin(
1184  np.sqrt(
1185  np.sin(deltaDec / 2) ** 2
1186  + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1187 
1188  def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1189  """Compute the distance on the sphere from x2, y1 to x1, y1.
1190 
1191  Parameters
1192  ----------
1193  x1 : `pandas.Series`
1194  X pixel coordinate.
1195  y1 : `pandas.Series`
1196  Y pixel coordinate.
1197  x2 : `pandas.Series`
1198  X pixel coordinate.
1199  y2 : `pandas.Series`
1200  Y pixel coordinate.
1201  cd11 : `pandas.Series`
1202  [1, 1] element of the local Wcs affine transform.
1203  cd11 : `pandas.Series`
1204  [1, 1] element of the local Wcs affine transform.
1205  cd12 : `pandas.Series`
1206  [1, 2] element of the local Wcs affine transform.
1207  cd21 : `pandas.Series`
1208  [2, 1] element of the local Wcs affine transform.
1209  cd22 : `pandas.Series`
1210  [2, 2] element of the local Wcs affine transform.
1211 
1212  Returns
1213  -------
1214  Distance : `pandas.Series`
1215  Arcseconds per pixel at the location of the local WC
1216  """
1217  ra1, dec1 = self.computeDeltaRaDeccomputeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1218  ra2, dec2 = self.computeDeltaRaDeccomputeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1219  # Great circle distance for small separations.
1220  return self.computeSkySeperationcomputeSkySeperation(ra1, dec1, ra2, dec2)
1221 
1222 
1224  """Compute the local pixel scale from the stored CDMatrix.
1225  """
1226  name = "PixelScale"
1227 
1228  @property
1229  def columns(self):
1230  return [self.colCD_1_1colCD_1_1,
1231  self.colCD_1_2colCD_1_2,
1232  self.colCD_2_1colCD_2_1,
1233  self.colCD_2_2colCD_2_2]
1234 
1235  def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1236  """Compute the local pixel to scale conversion in arcseconds.
1237 
1238  Parameters
1239  ----------
1240  cd11 : `pandas.Series`
1241  [1, 1] element of the local Wcs affine transform in radians.
1242  cd11 : `pandas.Series`
1243  [1, 1] element of the local Wcs affine transform in radians.
1244  cd12 : `pandas.Series`
1245  [1, 2] element of the local Wcs affine transform in radians.
1246  cd21 : `pandas.Series`
1247  [2, 1] element of the local Wcs affine transform in radians.
1248  cd22 : `pandas.Series`
1249  [2, 2] element of the local Wcs affine transform in radians.
1250 
1251  Returns
1252  -------
1253  pixScale : `pandas.Series`
1254  Arcseconds per pixel at the location of the local WC
1255  """
1256  return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1257 
1258  def _func(self, df):
1259  return self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1260  df[self.colCD_1_2colCD_1_2],
1261  df[self.colCD_2_1colCD_2_1],
1262  df[self.colCD_2_2colCD_2_2])
1263 
1264 
1266  """Convert a value in units pixels squared to units arcseconds squared.
1267  """
1268 
1269  def __init__(self,
1270  col,
1271  colCD_1_1,
1272  colCD_1_2,
1273  colCD_2_1,
1274  colCD_2_2,
1275  **kwargs):
1276  self.colcol = col
1277  super().__init__(colCD_1_1,
1278  colCD_1_2,
1279  colCD_2_1,
1280  colCD_2_2,
1281  **kwargs)
1282 
1283  @property
1284  def name(self):
1285  return f"{self.col}_asArcseconds"
1286 
1287  @property
1288  def columns(self):
1289  return [self.colcol,
1290  self.colCD_1_1colCD_1_1,
1291  self.colCD_1_2colCD_1_2,
1292  self.colCD_2_1colCD_2_1,
1293  self.colCD_2_2colCD_2_2]
1294 
1295  def _func(self, df):
1296  return df[self.colcol] * self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1297  df[self.colCD_1_2colCD_1_2],
1298  df[self.colCD_2_1colCD_2_1],
1299  df[self.colCD_2_2colCD_2_2])
1300 
1301 
1303  """Convert a value in units pixels to units arcseconds.
1304  """
1305 
1306  def __init__(self,
1307  col,
1308  colCD_1_1,
1309  colCD_1_2,
1310  colCD_2_1,
1311  colCD_2_2,
1312  **kwargs):
1313  self.colcol = col
1314  super().__init__(colCD_1_1,
1315  colCD_1_2,
1316  colCD_2_1,
1317  colCD_2_2,
1318  **kwargs)
1319 
1320  @property
1321  def name(self):
1322  return f"{self.col}_asArcsecondsSq"
1323 
1324  @property
1325  def columns(self):
1326  return [self.colcol,
1327  self.colCD_1_1colCD_1_1,
1328  self.colCD_1_2colCD_1_2,
1329  self.colCD_2_1colCD_2_1,
1330  self.colCD_2_2colCD_2_2]
1331 
1332  def _func(self, df):
1333  pixScale = self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1334  df[self.colCD_1_2colCD_1_2],
1335  df[self.colCD_2_1colCD_2_1],
1336  df[self.colCD_2_2colCD_2_2])
1337  return df[self.colcol] * pixScale * pixScale
1338 
1339 
1341  name = 'Reference Band'
1342  shortname = 'refBand'
1343 
1344  @property
1345  def columns(self):
1346  return ["merge_measurement_i",
1347  "merge_measurement_r",
1348  "merge_measurement_z",
1349  "merge_measurement_y",
1350  "merge_measurement_g"]
1351 
1352  def _func(self, df):
1353  def getFilterAliasName(row):
1354  # get column name with the max value (True > False)
1355  colName = row.idxmax()
1356  return colName.replace('merge_measurement_', '')
1357 
1358  return df[self.columnscolumnscolumns].apply(getFilterAliasName, axis=1)
1359 
1360 
1362  # AB to NanoJansky (3631 Jansky)
1363  AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1364  LOG_AB_FLUX_SCALE = 12.56
1365  FIVE_OVER_2LOG10 = 1.085736204758129569
1366  # TO DO: DM-21955 Replace hard coded photometic calibration values
1367  COADD_ZP = 27
1368 
1369  def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1370  self.vhypotvhypot = np.vectorize(self.hypothypot)
1371  self.colcol = colFlux
1372  self.colFluxErrcolFluxErr = colFluxErr
1373 
1374  self.calibcalib = calib
1375  if calib is not None:
1376  self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err = calib.getFluxMag0()
1377  else:
1378  self.fluxMag0fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZPCOADD_ZP)
1379  self.fluxMag0ErrfluxMag0Err = 0.
1380 
1381  super().__init__(**kwargs)
1382 
1383  @property
1384  def columns(self):
1385  return [self.colcol]
1386 
1387  @property
1388  def name(self):
1389  return f'mag_{self.col}'
1390 
1391  @classmethod
1392  def hypot(cls, a, b):
1393  if np.abs(a) < np.abs(b):
1394  a, b = b, a
1395  if a == 0.:
1396  return 0.
1397  q = b/a
1398  return np.abs(a) * np.sqrt(1. + q*q)
1399 
1400  def dn2flux(self, dn, fluxMag0):
1401  return self.AB_FLUX_SCALEAB_FLUX_SCALE * dn / fluxMag0
1402 
1403  def dn2mag(self, dn, fluxMag0):
1404  with np.warnings.catch_warnings():
1405  np.warnings.filterwarnings('ignore', r'invalid value encountered')
1406  np.warnings.filterwarnings('ignore', r'divide by zero')
1407  return -2.5 * np.log10(dn/fluxMag0)
1408 
1409  def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1410  retVal = self.vhypotvhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1411  retVal *= self.AB_FLUX_SCALEAB_FLUX_SCALE / fluxMag0 / fluxMag0
1412  return retVal
1413 
1414  def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1415  retVal = self.dn2fluxErrdn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2fluxdn2flux(dn, fluxMag0)
1416  return self.FIVE_OVER_2LOG10FIVE_OVER_2LOG10 * retVal
1417 
1418 
1420  def _func(self, df):
1421  return self.dn2fluxdn2flux(df[self.colcol], self.fluxMag0fluxMag0)
1422 
1423 
1425  @property
1426  def columns(self):
1427  return [self.colcol, self.colFluxErrcolFluxErr]
1428 
1429  def _func(self, df):
1430  retArr = self.dn2fluxErrdn2fluxErr(df[self.colcol], df[self.colFluxErrcolFluxErr], self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err)
1431  return pd.Series(retArr, index=df.index)
1432 
1433 
1435  def _func(self, df):
1436  return self.dn2magdn2mag(df[self.colcol], self.fluxMag0fluxMag0)
1437 
1438 
1440  @property
1441  def columns(self):
1442  return [self.colcol, self.colFluxErrcolFluxErr]
1443 
1444  def _func(self, df):
1445  retArr = self.dn2MagErrdn2MagErr(df[self.colcol], df[self.colFluxErrcolFluxErr], self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err)
1446  return pd.Series(retArr, index=df.index)
1447 
1448 
1450  """Base class for calibrating the specified instrument flux column using
1451  the local photometric calibration.
1452 
1453  Parameters
1454  ----------
1455  instFluxCol : `str`
1456  Name of the instrument flux column.
1457  instFluxErrCol : `str`
1458  Name of the assocated error columns for ``instFluxCol``.
1459  photoCalibCol : `str`
1460  Name of local calibration column.
1461  photoCalibErrCol : `str`
1462  Error associated with ``photoCalibCol``
1463 
1464  See also
1465  --------
1466  LocalPhotometry
1467  LocalNanojansky
1468  LocalNanojanskyErr
1469  LocalMagnitude
1470  LocalMagnitudeErr
1471  """
1472  logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1473 
1474  def __init__(self,
1475  instFluxCol,
1476  instFluxErrCol,
1477  photoCalibCol,
1478  photoCalibErrCol,
1479  **kwargs):
1480  self.instFluxColinstFluxCol = instFluxCol
1481  self.instFluxErrColinstFluxErrCol = instFluxErrCol
1482  self.photoCalibColphotoCalibCol = photoCalibCol
1483  self.photoCalibErrColphotoCalibErrCol = photoCalibErrCol
1484  super().__init__(**kwargs)
1485 
1486  def instFluxToNanojansky(self, instFlux, localCalib):
1487  """Convert instrument flux to nanojanskys.
1488 
1489  Parameters
1490  ----------
1491  instFlux : `numpy.ndarray` or `pandas.Series`
1492  Array of instrument flux measurements
1493  localCalib : `numpy.ndarray` or `pandas.Series`
1494  Array of local photometric calibration estimates.
1495 
1496  Returns
1497  -------
1498  calibFlux : `numpy.ndarray` or `pandas.Series`
1499  Array of calibrated flux measurements.
1500  """
1501  return instFlux * localCalib
1502 
1503  def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1504  """Convert instrument flux to nanojanskys.
1505 
1506  Parameters
1507  ----------
1508  instFlux : `numpy.ndarray` or `pandas.Series`
1509  Array of instrument flux measurements
1510  instFluxErr : `numpy.ndarray` or `pandas.Series`
1511  Errors on associated ``instFlux`` values
1512  localCalib : `numpy.ndarray` or `pandas.Series`
1513  Array of local photometric calibration estimates.
1514  localCalibErr : `numpy.ndarray` or `pandas.Series`
1515  Errors on associated ``localCalib`` values
1516 
1517  Returns
1518  -------
1519  calibFluxErr : `numpy.ndarray` or `pandas.Series`
1520  Errors on calibrated flux measurements.
1521  """
1522  return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1523 
1524  def instFluxToMagnitude(self, instFlux, localCalib):
1525  """Convert instrument flux to nanojanskys.
1526 
1527  Parameters
1528  ----------
1529  instFlux : `numpy.ndarray` or `pandas.Series`
1530  Array of instrument flux measurements
1531  localCalib : `numpy.ndarray` or `pandas.Series`
1532  Array of local photometric calibration estimates.
1533 
1534  Returns
1535  -------
1536  calibMag : `numpy.ndarray` or `pandas.Series`
1537  Array of calibrated AB magnitudes.
1538  """
1539  return -2.5 * np.log10(self.instFluxToNanojanskyinstFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToABlogNJanskyToAB
1540 
1541  def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1542  """Convert instrument flux err to nanojanskys.
1543 
1544  Parameters
1545  ----------
1546  instFlux : `numpy.ndarray` or `pandas.Series`
1547  Array of instrument flux measurements
1548  instFluxErr : `numpy.ndarray` or `pandas.Series`
1549  Errors on associated ``instFlux`` values
1550  localCalib : `numpy.ndarray` or `pandas.Series`
1551  Array of local photometric calibration estimates.
1552  localCalibErr : `numpy.ndarray` or `pandas.Series`
1553  Errors on associated ``localCalib`` values
1554 
1555  Returns
1556  -------
1557  calibMagErr: `numpy.ndarray` or `pandas.Series`
1558  Error on calibrated AB magnitudes.
1559  """
1560  err = self.instFluxErrToNanojanskyErrinstFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1561  return 2.5 / np.log(10) * err / self.instFluxToNanojanskyinstFluxToNanojansky(instFlux, instFluxErr)
1562 
1563 
1565  """Compute calibrated fluxes using the local calibration value.
1566 
1567  See also
1568  --------
1569  LocalNanojansky
1570  LocalNanojanskyErr
1571  LocalMagnitude
1572  LocalMagnitudeErr
1573  """
1574 
1575  @property
1576  def columns(self):
1577  return [self.instFluxColinstFluxCol, self.photoCalibColphotoCalibCol]
1578 
1579  @property
1580  def name(self):
1581  return f'flux_{self.instFluxCol}'
1582 
1583  def _func(self, df):
1584  return self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxColinstFluxCol], df[self.photoCalibColphotoCalibCol])
1585 
1586 
1588  """Compute calibrated flux errors using the local calibration value.
1589 
1590  See also
1591  --------
1592  LocalNanojansky
1593  LocalNanojanskyErr
1594  LocalMagnitude
1595  LocalMagnitudeErr
1596  """
1597 
1598  @property
1599  def columns(self):
1600  return [self.instFluxColinstFluxCol, self.instFluxErrColinstFluxErrCol,
1601  self.photoCalibColphotoCalibCol, self.photoCalibErrColphotoCalibErrCol]
1602 
1603  @property
1604  def name(self):
1605  return f'fluxErr_{self.instFluxCol}'
1606 
1607  def _func(self, df):
1608  return self.instFluxErrToNanojanskyErrinstFluxErrToNanojanskyErr(df[self.instFluxColinstFluxCol], df[self.instFluxErrColinstFluxErrCol],
1609  df[self.photoCalibColphotoCalibCol], df[self.photoCalibErrColphotoCalibErrCol])
1610 
1611 
1613  """Compute calibrated AB magnitudes using the local calibration value.
1614 
1615  See also
1616  --------
1617  LocalNanojansky
1618  LocalNanojanskyErr
1619  LocalMagnitude
1620  LocalMagnitudeErr
1621  """
1622 
1623  @property
1624  def columns(self):
1625  return [self.instFluxColinstFluxCol, self.photoCalibColphotoCalibCol]
1626 
1627  @property
1628  def name(self):
1629  return f'mag_{self.instFluxCol}'
1630 
1631  def _func(self, df):
1632  return self.instFluxToMagnitudeinstFluxToMagnitude(df[self.instFluxColinstFluxCol],
1633  df[self.photoCalibColphotoCalibCol])
1634 
1635 
1637  """Compute calibrated AB magnitude errors using the local calibration value.
1638 
1639  See also
1640  --------
1641  LocalNanojansky
1642  LocalNanojanskyErr
1643  LocalMagnitude
1644  LocalMagnitudeErr
1645  """
1646 
1647  @property
1648  def columns(self):
1649  return [self.instFluxColinstFluxCol, self.instFluxErrColinstFluxErrCol,
1650  self.photoCalibColphotoCalibCol, self.photoCalibErrColphotoCalibErrCol]
1651 
1652  @property
1653  def name(self):
1654  return f'magErr_{self.instFluxCol}'
1655 
1656  def _func(self, df):
1657  return self.instFluxErrToMagnitudeErrinstFluxErrToMagnitudeErr(df[self.instFluxColinstFluxCol],
1658  df[self.instFluxErrColinstFluxErrCol],
1659  df[self.photoCalibColphotoCalibCol],
1660  df[self.photoCalibErrColphotoCalibErrCol])
1661 
1662 
1664  """Compute absolute mean of dipole fluxes.
1665 
1666  See also
1667  --------
1668  LocalNanojansky
1669  LocalNanojanskyErr
1670  LocalMagnitude
1671  LocalMagnitudeErr
1672  LocalDipoleMeanFlux
1673  LocalDipoleMeanFluxErr
1674  LocalDipoleDiffFlux
1675  LocalDipoleDiffFluxErr
1676  """
1677  def __init__(self,
1678  instFluxPosCol,
1679  instFluxNegCol,
1680  instFluxPosErrCol,
1681  instFluxNegErrCol,
1682  photoCalibCol,
1683  photoCalibErrCol,
1684  **kwargs):
1685  self.instFluxNegColinstFluxNegCol = instFluxNegCol
1686  self.instFluxPosColinstFluxPosCol = instFluxPosCol
1687  self.instFluxNegErrColinstFluxNegErrCol = instFluxNegErrCol
1688  self.instFluxPosErrColinstFluxPosErrCol = instFluxPosErrCol
1689  self.photoCalibColphotoCalibColphotoCalibCol = photoCalibCol
1690  self.photoCalibErrColphotoCalibErrColphotoCalibErrCol = photoCalibErrCol
1691  super().__init__(instFluxNegCol,
1692  instFluxNegErrCol,
1693  photoCalibCol,
1694  photoCalibErrCol,
1695  **kwargs)
1696 
1697  @property
1698  def columns(self):
1699  return [self.instFluxPosColinstFluxPosCol,
1700  self.instFluxNegColinstFluxNegCol,
1701  self.photoCalibColphotoCalibColphotoCalibCol]
1702 
1703  @property
1704  def name(self):
1705  return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1706 
1707  def _func(self, df):
1708  return 0.5*(np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxNegColinstFluxNegCol], df[self.photoCalibColphotoCalibColphotoCalibCol]))
1709  + np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxPosColinstFluxPosCol], df[self.photoCalibColphotoCalibColphotoCalibCol])))
1710 
1711 
1713  """Compute the error on the absolute mean of dipole fluxes.
1714 
1715  See also
1716  --------
1717  LocalNanojansky
1718  LocalNanojanskyErr
1719  LocalMagnitude
1720  LocalMagnitudeErr
1721  LocalDipoleMeanFlux
1722  LocalDipoleMeanFluxErr
1723  LocalDipoleDiffFlux
1724  LocalDipoleDiffFluxErr
1725  """
1726 
1727  @property
1728  def columns(self):
1729  return [self.instFluxPosColinstFluxPosCol,
1730  self.instFluxNegColinstFluxNegCol,
1731  self.instFluxPosErrColinstFluxPosErrCol,
1732  self.instFluxNegErrColinstFluxNegErrCol,
1733  self.photoCalibColphotoCalibColphotoCalibCol,
1734  self.photoCalibErrColphotoCalibErrColphotoCalibErrCol]
1735 
1736  @property
1737  def name(self):
1738  return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1739 
1740  def _func(self, df):
1741  return 0.5*np.sqrt(
1742  (np.fabs(df[self.instFluxNegColinstFluxNegCol]) + np.fabs(df[self.instFluxPosColinstFluxPosCol])
1743  * df[self.photoCalibErrColphotoCalibErrColphotoCalibErrCol])**2
1744  + (df[self.instFluxNegErrColinstFluxNegErrCol]**2 + df[self.instFluxPosErrColinstFluxPosErrCol]**2)
1745  * df[self.photoCalibColphotoCalibColphotoCalibCol]**2)
1746 
1747 
1749  """Compute the absolute difference of dipole fluxes.
1750 
1751  Value is (abs(pos) - abs(neg))
1752 
1753  See also
1754  --------
1755  LocalNanojansky
1756  LocalNanojanskyErr
1757  LocalMagnitude
1758  LocalMagnitudeErr
1759  LocalDipoleMeanFlux
1760  LocalDipoleMeanFluxErr
1761  LocalDipoleDiffFlux
1762  LocalDipoleDiffFluxErr
1763  """
1764 
1765  @property
1766  def columns(self):
1767  return [self.instFluxPosColinstFluxPosCol,
1768  self.instFluxNegColinstFluxNegCol,
1769  self.photoCalibColphotoCalibColphotoCalibCol]
1770 
1771  @property
1772  def name(self):
1773  return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1774 
1775  def _func(self, df):
1776  return (np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxPosColinstFluxPosCol], df[self.photoCalibColphotoCalibColphotoCalibCol]))
1777  - np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxNegColinstFluxNegCol], df[self.photoCalibColphotoCalibColphotoCalibCol])))
1778 
1779 
1781  """Compute the error on the absolute difference of dipole fluxes.
1782 
1783  See also
1784  --------
1785  LocalNanojansky
1786  LocalNanojanskyErr
1787  LocalMagnitude
1788  LocalMagnitudeErr
1789  LocalDipoleMeanFlux
1790  LocalDipoleMeanFluxErr
1791  LocalDipoleDiffFlux
1792  LocalDipoleDiffFluxErr
1793  """
1794 
1795  @property
1796  def columns(self):
1797  return [self.instFluxPosColinstFluxPosCol,
1798  self.instFluxNegColinstFluxNegCol,
1799  self.instFluxPosErrColinstFluxPosErrCol,
1800  self.instFluxNegErrColinstFluxNegErrCol,
1801  self.photoCalibColphotoCalibColphotoCalibCol,
1802  self.photoCalibErrColphotoCalibErrColphotoCalibErrCol]
1803 
1804  @property
1805  def name(self):
1806  return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1807 
1808  def _func(self, df):
1809  return np.sqrt(
1810  ((np.fabs(df[self.instFluxPosColinstFluxPosCol]) - np.fabs(df[self.instFluxNegColinstFluxNegCol]))
1811  * df[self.photoCalibErrColphotoCalibErrColphotoCalibErrCol])**2
1812  + (df[self.instFluxPosErrColinstFluxPosErrCol]**2 + df[self.instFluxNegErrColinstFluxNegErrCol]**2)
1813  * df[self.photoCalibColphotoCalibColphotoCalibCol]**2)
1814 
1815 
1817  """Base class for returning the ratio of 2 columns.
1818 
1819  Can be used to compute a Signal to Noise ratio for any input flux.
1820 
1821  Parameters
1822  ----------
1823  numerator : `str`
1824  Name of the column to use at the numerator in the ratio
1825  denominator : `str`
1826  Name of the column to use as the denominator in the ratio.
1827  """
1828  def __init__(self,
1829  numerator,
1830  denominator,
1831  **kwargs):
1832  self.numeratornumerator = numerator
1833  self.denominatordenominator = denominator
1834  super().__init__(**kwargs)
1835 
1836  @property
1837  def columns(self):
1838  return [self.numeratornumerator, self.denominatordenominator]
1839 
1840  @property
1841  def name(self):
1842  return f'ratio_{self.numerator}_{self.denominator}'
1843 
1844  def _func(self, df):
1845  with np.warnings.catch_warnings():
1846  np.warnings.filterwarnings('ignore', r'invalid value encountered')
1847  np.warnings.filterwarnings('ignore', r'divide by zero')
1848  return df[self.numeratornumerator] / df[self.denominatordenominator]
def multilevelColumns(self, parq, **kwargs)
Definition: functors.py:905
def __init__(self, col, filt2, filt1, **kwargs)
Definition: functors.py:876
def __init__(self, col, **kwargs)
Definition: functors.py:627
def __init__(self, funcs, **kwargs)
Definition: functors.py:407
def __call__(self, data, **kwargs)
Definition: functors.py:459
def from_file(cls, filename, **kwargs)
Definition: functors.py:531
def from_yaml(cls, translationDefinition, **kwargs)
Definition: functors.py:540
def renameCol(cls, col, renameRules)
Definition: functors.py:522
def multilevelColumns(self, data, **kwargs)
Definition: functors.py:445
def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22)
Definition: functors.py:1235
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1312
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1275
def __init__(self, col, **kwargs)
Definition: functors.py:673
def __init__(self, expr, **kwargs)
Definition: functors.py:596
def __init__(self, **kwargs)
Definition: functors.py:701
def __call__(self, catalog, **kwargs)
Definition: functors.py:704
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1068
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1086
def __call__(self, data, dropna=False)
Definition: functors.py:344
def _func(self, df, dropna=True)
Definition: functors.py:283
def multilevelColumns(self, data, columnIndex=None, returnTuple=False)
Definition: functors.py:227
def _get_data_columnLevelNames(self, data, columnIndex=None)
Definition: functors.py:184
def difference(self, data1, data2, **kwargs)
Definition: functors.py:355
def __init__(self, filt=None, dataset=None, noDup=None)
Definition: functors.py:140
def _get_columnIndex(self, data)
Definition: functors.py:286
def _colsFromDict(self, colDict, columnIndex=None)
Definition: functors.py:206
def _get_data_columnLevels(self, data, columnIndex=None)
Definition: functors.py:160
def __call__(self, parq, dropna=False, **kwargs)
Definition: functors.py:925
def __init__(self, instFluxPosCol, instFluxNegCol, instFluxPosErrCol, instFluxNegErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1684
def instFluxToNanojansky(self, instFlux, localCalib)
Definition: functors.py:1486
def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1541
def __init__(self, instFluxCol, instFluxErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1479
def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1503
def instFluxToMagnitude(self, instFlux, localCalib)
Definition: functors.py:1524
def __init__(self, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1126
def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22)
Definition: functors.py:1133
def computeSkySeperation(self, ra1, dec1, ra2, dec2)
Definition: functors.py:1162
def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22)
Definition: functors.py:1188
def __init__(self, col1, col2, **kwargs)
Definition: functors.py:823
def __init__(self, *args, **kwargs)
Definition: functors.py:784
def __init__(self, col, calib=None, **kwargs)
Definition: functors.py:746
def dn2mag(self, dn, fluxMag0)
Definition: functors.py:1403
def dn2flux(self, dn, fluxMag0)
Definition: functors.py:1400
def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1409
def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1414
def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs)
Definition: functors.py:1369
def __call__(self, catalog, **kwargs)
Definition: functors.py:691
def __init__(self, **kwargs)
Definition: functors.py:688
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1102
def __init__(self, numerator, denominator, **kwargs)
Definition: functors.py:1831
def mag_aware_eval(df, expr)
Definition: functors.py:561
def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor', name=None)
Definition: functors.py:37