lsst.pipe.tasks  21.0.0-125-g25893231+3ac9af0c75
functors.py
Go to the documentation of this file.
1 # This file is part of pipe_tasks.
2 #
3 # LSST Data Management System
4 # This product includes software developed by the
5 # LSST Project (http://www.lsst.org/).
6 # See COPYRIGHT file at the top of the source tree.
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 import yaml
23 import re
24 from itertools import product
25 import os.path
26 
27 import pandas as pd
28 import numpy as np
29 import astropy.units as u
30 
31 from lsst.daf.persistence import doImport
32 from lsst.daf.butler import DeferredDatasetHandle
33 from .parquetTable import ParquetTable, MultilevelParquetTable
34 
35 
36 def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
37  typeKey='functor', name=None):
38  """Initialize an object defined in a dictionary
39 
40  The object needs to be importable as
41  f'{basePath}.{initDict[typeKey]}'
42  The positional and keyword arguments (if any) are contained in
43  "args" and "kwargs" entries in the dictionary, respectively.
44  This is used in `functors.CompositeFunctor.from_yaml` to initialize
45  a composite functor from a specification in a YAML file.
46 
47  Parameters
48  ----------
49  initDict : dictionary
50  Dictionary describing object's initialization. Must contain
51  an entry keyed by ``typeKey`` that is the name of the object,
52  relative to ``basePath``.
53  basePath : str
54  Path relative to module in which ``initDict[typeKey]`` is defined.
55  typeKey : str
56  Key of ``initDict`` that is the name of the object
57  (relative to `basePath`).
58  """
59  initDict = initDict.copy()
60  # TO DO: DM-21956 We should be able to define functors outside this module
61  pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
62  args = []
63  if 'args' in initDict:
64  args = initDict.pop('args')
65  if isinstance(args, str):
66  args = [args]
67  try:
68  element = pythonType(*args, **initDict)
69  except Exception as e:
70  message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
71  raise type(e)(message, e.args)
72  return element
73 
74 
75 class Functor(object):
76  """Define and execute a calculation on a ParquetTable
77 
78  The `__call__` method accepts either a `ParquetTable` object or a
79  `DeferredDatasetHandle`, and returns the
80  result of the calculation as a single column. Each functor defines what
81  columns are needed for the calculation, and only these columns are read
82  from the `ParquetTable`.
83 
84  The action of `__call__` consists of two steps: first, loading the
85  necessary columns from disk into memory as a `pandas.DataFrame` object;
86  and second, performing the computation on this dataframe and returning the
87  result.
88 
89 
90  To define a new `Functor`, a subclass must define a `_func` method,
91  that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
92  In addition, it must define the following attributes
93 
94  * `_columns`: The columns necessary to perform the calculation
95  * `name`: A name appropriate for a figure axis label
96  * `shortname`: A name appropriate for use as a dictionary key
97 
98  On initialization, a `Functor` should declare what band (`filt` kwarg)
99  and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
100  applied to. This enables the `_get_data` method to extract the proper
101  columns from the parquet file. If not specified, the dataset will fall back
102  on the `_defaultDataset`attribute. If band is not specified and `dataset`
103  is anything other than `'ref'`, then an error will be raised when trying to
104  perform the calculation.
105 
106  As currently implemented, `Functor` is only set up to expect a
107  dataset of the format of the `deepCoadd_obj` dataset; that is, a
108  dataframe with a multi-level column index,
109  with the levels of the column index being `band`,
110  `dataset`, and `column`. This is defined in the `_columnLevels` attribute,
111  as well as being implicit in the role of the `filt` and `dataset` attributes
112  defined at initialization. In addition, the `_get_data` method that reads
113  the dataframe from the `ParquetTable` will return a dataframe with column
114  index levels defined by the `_dfLevels` attribute; by default, this is
115  `column`.
116 
117  The `_columnLevels` and `_dfLevels` attributes should generally not need to
118  be changed, unless `_func` needs columns from multiple filters or datasets
119  to do the calculation.
120  An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
121  which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
122  it gets to have those levels in the column index.
123 
124  Parameters
125  ----------
126  filt : str
127  Filter upon which to do the calculation
128 
129  dataset : str
130  Dataset upon which to do the calculation
131  (e.g., 'ref', 'meas', 'forced_src').
132 
133  """
134 
135  _defaultDataset = 'ref'
136  _columnLevels = ('band', 'dataset', 'column')
137  _dfLevels = ('column',)
138  _defaultNoDup = False
139 
140  def __init__(self, filt=None, dataset=None, noDup=None):
141  self.filtfilt = filt
142  self.datasetdataset = dataset if dataset is not None else self._defaultDataset_defaultDataset
143  self._noDup_noDup = noDup
144 
145  @property
146  def noDup(self):
147  if self._noDup_noDup is not None:
148  return self._noDup_noDup
149  else:
150  return self._defaultNoDup_defaultNoDup
151 
152  @property
153  def columns(self):
154  """Columns required to perform calculation
155  """
156  if not hasattr(self, '_columns'):
157  raise NotImplementedError('Must define columns property or _columns attribute')
158  return self._columns
159 
160  def _get_data_columnLevels(self, data, columnIndex=None):
161  """Gets the names of the column index levels
162 
163  This should only be called in the context of a multilevel table.
164  The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
165  and with the gen3 `DeferredDatasetHandle`.
166 
167  Parameters
168  ----------
169  data : `MultilevelParquetTable` or `DeferredDatasetHandle`
170 
171  columnnIndex (optional): pandas `Index` object
172  if not passed, then it is read from the `DeferredDatasetHandle`
173  """
174  if isinstance(data, DeferredDatasetHandle):
175  if columnIndex is None:
176  columnIndex = data.get(component="columns")
177  if columnIndex is not None:
178  return columnIndex.names
179  if isinstance(data, MultilevelParquetTable):
180  return data.columnLevels
181  else:
182  raise TypeError(f"Unknown type for data: {type(data)}!")
183 
184  def _get_data_columnLevelNames(self, data, columnIndex=None):
185  """Gets the content of each of the column levels for a multilevel table
186 
187  Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
188 
189  Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
190  """
191  if isinstance(data, DeferredDatasetHandle):
192  if columnIndex is None:
193  columnIndex = data.get(component="columns")
194  if columnIndex is not None:
195  columnLevels = columnIndex.names
196  columnLevelNames = {
197  level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
198  for i, level in enumerate(columnLevels)
199  }
200  return columnLevelNames
201  if isinstance(data, MultilevelParquetTable):
202  return data.columnLevelNames
203  else:
204  raise TypeError(f"Unknown type for data: {type(data)}!")
205 
206  def _colsFromDict(self, colDict, columnIndex=None):
207  """Converts dictionary column specficiation to a list of columns
208 
209  This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
210  """
211  new_colDict = {}
212  columnLevels = self._get_data_columnLevels_get_data_columnLevels(None, columnIndex=columnIndex)
213 
214  for i, lev in enumerate(columnLevels):
215  if lev in colDict:
216  if isinstance(colDict[lev], str):
217  new_colDict[lev] = [colDict[lev]]
218  else:
219  new_colDict[lev] = colDict[lev]
220  else:
221  new_colDict[lev] = columnIndex.levels[i]
222 
223  levelCols = [new_colDict[lev] for lev in columnLevels]
224  cols = product(*levelCols)
225  return list(cols)
226 
227  def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
228  """Returns columns needed by functor from multilevel dataset
229 
230  To access tables with multilevel column structure, the `MultilevelParquetTable`
231  or `DeferredDatasetHandle` need to be passed either a list of tuples or a
232  dictionary.
233 
234  Parameters
235  ----------
236  data : `MultilevelParquetTable` or `DeferredDatasetHandle`
237 
238  columnIndex (optional): pandas `Index` object
239  either passed or read in from `DeferredDatasetHandle`.
240 
241  `returnTuple` : bool
242  If true, then return a list of tuples rather than the column dictionary
243  specification. This is set to `True` by `CompositeFunctor` in order to be able to
244  combine columns from the various component functors.
245 
246  """
247  if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
248  columnIndex = data.get(component="columns")
249 
250  # Confirm that the dataset has the column levels the functor is expecting it to have.
251  columnLevels = self._get_data_columnLevels_get_data_columnLevels(data, columnIndex)
252 
253  if not set(columnLevels) == set(self._columnLevels_columnLevels):
254  raise ValueError(
255  "ParquetTable does not have the expected column levels. "
256  f"Got {columnLevels}; expected {self._columnLevels}."
257  )
258 
259  columnDict = {'column': self.columnscolumns,
260  'dataset': self.datasetdataset}
261  if self.filtfilt is None:
262  columnLevelNames = self._get_data_columnLevelNames_get_data_columnLevelNames(data, columnIndex)
263  if "band" in columnLevels:
264  if self.datasetdataset == "ref":
265  columnDict["band"] = columnLevelNames["band"][0]
266  else:
267  raise ValueError(f"'filt' not set for functor {self.name}"
268  f"(dataset {self.dataset}) "
269  "and ParquetTable "
270  "contains multiple filters in column index. "
271  "Set 'filt' or set 'dataset' to 'ref'.")
272  else:
273  columnDict['band'] = self.filtfilt
274 
275  if isinstance(data, MultilevelParquetTable):
276  return data._colsFromDict(columnDict)
277  elif isinstance(data, DeferredDatasetHandle):
278  if returnTuple:
279  return self._colsFromDict_colsFromDict(columnDict, columnIndex=columnIndex)
280  else:
281  return columnDict
282 
283  def _func(self, df, dropna=True):
284  raise NotImplementedError('Must define calculation on dataframe')
285 
286  def _get_columnIndex(self, data):
287  """Return columnIndex
288  """
289 
290  if isinstance(data, DeferredDatasetHandle):
291  return data.get(component="columns")
292  else:
293  return None
294 
295  def _get_data(self, data):
296  """Retrieve dataframe necessary for calculation.
297 
298  The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
299 
300  Returns dataframe upon which `self._func` can act.
301 
302  N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
303  """
304  if isinstance(data, pd.DataFrame):
305  return data
306 
307  # First thing to do: check to see if the data source has a multilevel column index or not.
308  columnIndex = self._get_columnIndex_get_columnIndex(data)
309  is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
310 
311  # Simple single-level parquet table, gen2
312  if isinstance(data, ParquetTable) and not is_multiLevel:
313  columns = self.columnscolumns
314  df = data.toDataFrame(columns=columns)
315  return df
316 
317  # Get proper columns specification for this functor
318  if is_multiLevel:
319  columns = self.multilevelColumnsmultilevelColumns(data, columnIndex=columnIndex)
320  else:
321  columns = self.columnscolumns
322 
323  if isinstance(data, MultilevelParquetTable):
324  # Load in-memory dataframe with appropriate columns the gen2 way
325  df = data.toDataFrame(columns=columns, droplevels=False)
326  elif isinstance(data, DeferredDatasetHandle):
327  # Load in-memory dataframe with appropriate columns the gen3 way
328  df = data.get(parameters={"columns": columns})
329 
330  # Drop unnecessary column levels
331  if is_multiLevel:
332  df = self._setLevels_setLevels(df)
333 
334  return df
335 
336  def _setLevels(self, df):
337  levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels_dfLevels]
338  df.columns = df.columns.droplevel(levelsToDrop)
339  return df
340 
341  def _dropna(self, vals):
342  return vals.dropna()
343 
344  def __call__(self, data, dropna=False):
345  try:
346  df = self._get_data_get_data(data)
347  vals = self._func_func(df)
348  except Exception:
349  vals = self.failfail(df)
350  if dropna:
351  vals = self._dropna_dropna(vals)
352 
353  return vals
354 
355  def difference(self, data1, data2, **kwargs):
356  """Computes difference between functor called on two different ParquetTable objects
357  """
358  return self(data1, **kwargs) - self(data2, **kwargs)
359 
360  def fail(self, df):
361  return pd.Series(np.full(len(df), np.nan), index=df.index)
362 
363  @property
364  def name(self):
365  """Full name of functor (suitable for figure labels)
366  """
367  return NotImplementedError
368 
369  @property
370  def shortname(self):
371  """Short name of functor (suitable for column name/dict key)
372  """
373  return self.namename
374 
375 
377  """Perform multiple calculations at once on a catalog
378 
379  The role of a `CompositeFunctor` is to group together computations from
380  multiple functors. Instead of returning `pandas.Series` a
381  `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
382  being the keys of `funcDict`.
383 
384  The `columns` attribute of a `CompositeFunctor` is the union of all columns
385  in all the component functors.
386 
387  A `CompositeFunctor` does not use a `_func` method itself; rather,
388  when a `CompositeFunctor` is called, all its columns are loaded
389  at once, and the resulting dataframe is passed to the `_func` method of each component
390  functor. This has the advantage of only doing I/O (reading from parquet file) once,
391  and works because each individual `_func` method of each component functor does not
392  care if there are *extra* columns in the dataframe being passed; only that it must contain
393  *at least* the `columns` it expects.
394 
395  An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
396  file specifying a collection of functors.
397 
398  Parameters
399  ----------
400  funcs : `dict` or `list`
401  Dictionary or list of functors. If a list, then it will be converted
402  into a dictonary according to the `.shortname` attribute of each functor.
403 
404  """
405  dataset = None
406 
407  def __init__(self, funcs, **kwargs):
408 
409  if type(funcs) == dict:
410  self.funcDictfuncDict = funcs
411  else:
412  self.funcDictfuncDict = {f.shortname: f for f in funcs}
413 
414  self._filt_filt = None
415 
416  super().__init__(**kwargs)
417 
418  @property
419  def filt(self):
420  return self._filt_filt
421 
422  @filt.setter
423  def filt(self, filt):
424  if filt is not None:
425  for _, f in self.funcDictfuncDict.items():
426  f.filt = filt
427  self._filt_filt = filt
428 
429  def update(self, new):
430  if isinstance(new, dict):
431  self.funcDictfuncDict.update(new)
432  elif isinstance(new, CompositeFunctor):
433  self.funcDictfuncDict.update(new.funcDict)
434  else:
435  raise TypeError('Can only update with dictionary or CompositeFunctor.')
436 
437  # Make sure new functors have the same 'filt' set
438  if self.filtfiltfiltfiltfilt is not None:
439  self.filtfiltfiltfiltfilt = self.filtfiltfiltfiltfilt
440 
441  @property
442  def columns(self):
443  return list(set([x for y in [f.columns for f in self.funcDictfuncDict.values()] for x in y]))
444 
445  def multilevelColumns(self, data, **kwargs):
446  # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
447  return list(
448  set(
449  [
450  x
451  for y in [
452  f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDictfuncDict.values()
453  ]
454  for x in y
455  ]
456  )
457  )
458 
459  def __call__(self, data, **kwargs):
460  """Apply the functor to the data table
461 
462  Parameters
463  ----------
464  data : `lsst.daf.butler.DeferredDatasetHandle`,
465  `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
466  `lsst.pipe.tasks.parquetTable.ParquetTable`,
467  or `pandas.DataFrame`.
468  The table or a pointer to a table on disk from which columns can
469  be accessed
470  """
471  columnIndex = self._get_columnIndex_get_columnIndex(data)
472 
473  # First, determine whether data has a multilevel index (either gen2 or gen3)
474  is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
475 
476  # Multilevel index, gen2 or gen3
477  if is_multiLevel:
478  columns = self.multilevelColumnsmultilevelColumnsmultilevelColumns(data, columnIndex=columnIndex)
479 
480  if isinstance(data, MultilevelParquetTable):
481  # Read data into memory the gen2 way
482  df = data.toDataFrame(columns=columns, droplevels=False)
483  elif isinstance(data, DeferredDatasetHandle):
484  # Read data into memory the gen3 way
485  df = data.get(parameters={"columns": columns})
486 
487  valDict = {}
488  for k, f in self.funcDictfuncDict.items():
489  try:
490  subdf = f._setLevels(
491  df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
492  )
493  valDict[k] = f._func(subdf)
494  except Exception:
495  valDict[k] = f.fail(subdf)
496 
497  else:
498  if isinstance(data, DeferredDatasetHandle):
499  # input if Gen3 deferLoad=True
500  df = data.get(parameters={"columns": self.columnscolumnscolumns})
501  elif isinstance(data, pd.DataFrame):
502  # input if Gen3 deferLoad=False
503  df = data
504  else:
505  # Original Gen2 input is type ParquetTable and the fallback
506  df = data.toDataFrame(columns=self.columnscolumnscolumns)
507 
508  valDict = {k: f._func(df) for k, f in self.funcDictfuncDict.items()}
509 
510  try:
511  valDf = pd.concat(valDict, axis=1)
512  except TypeError:
513  print([(k, type(v)) for k, v in valDict.items()])
514  raise
515 
516  if kwargs.get('dropna', False):
517  valDf = valDf.dropna(how='any')
518 
519  return valDf
520 
521  @classmethod
522  def renameCol(cls, col, renameRules):
523  if renameRules is None:
524  return col
525  for old, new in renameRules:
526  if col.startswith(old):
527  col = col.replace(old, new)
528  return col
529 
530  @classmethod
531  def from_file(cls, filename, **kwargs):
532  # Allow environment variables in the filename.
533  filename = os.path.expandvars(filename)
534  with open(filename) as f:
535  translationDefinition = yaml.safe_load(f)
536 
537  return cls.from_yamlfrom_yaml(translationDefinition, **kwargs)
538 
539  @classmethod
540  def from_yaml(cls, translationDefinition, **kwargs):
541  funcs = {}
542  for func, val in translationDefinition['funcs'].items():
543  funcs[func] = init_fromDict(val, name=func)
544 
545  if 'flag_rename_rules' in translationDefinition:
546  renameRules = translationDefinition['flag_rename_rules']
547  else:
548  renameRules = None
549 
550  if 'refFlags' in translationDefinition:
551  for flag in translationDefinition['refFlags']:
552  funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='ref')
553 
554  if 'forcedFlags' in translationDefinition:
555  for flag in translationDefinition['forcedFlags']:
556  funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
557 
558  if 'flags' in translationDefinition:
559  for flag in translationDefinition['flags']:
560  funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='meas')
561 
562  return cls(funcs, **kwargs)
563 
564 
565 def mag_aware_eval(df, expr):
566  """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
567 
568  Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
569 
570  Parameters
571  ----------
572  df : pandas.DataFrame
573  Dataframe on which to evaluate expression.
574 
575  expr : str
576  Expression.
577  """
578  try:
579  expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>)/log(10)', expr)
580  val = df.eval(expr_new, truediv=True)
581  except Exception: # Should check what actually gets raised
582  expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
583  val = df.eval(expr_new, truediv=True)
584  return val
585 
586 
588  """Arbitrary computation on a catalog
589 
590  Column names (and thus the columns to be loaded from catalog) are found
591  by finding all words and trying to ignore all "math-y" words.
592 
593  Parameters
594  ----------
595  expr : str
596  Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
597  """
598  _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
599 
600  def __init__(self, expr, **kwargs):
601  self.exprexpr = expr
602  super().__init__(**kwargs)
603 
604  @property
605  def name(self):
606  return self.exprexpr
607 
608  @property
609  def columns(self):
610  flux_cols = re.findall(r'mag\‍(\s*(\w+)\s*\‍)', self.exprexpr)
611 
612  cols = [c for c in re.findall(r'[a-zA-Z_]+', self.exprexpr) if c not in self._ignore_words_ignore_words]
613  not_a_col = []
614  for c in flux_cols:
615  if not re.search('_instFlux$', c):
616  cols.append(f'{c}_instFlux')
617  not_a_col.append(c)
618  else:
619  cols.append(c)
620 
621  return list(set([c for c in cols if c not in not_a_col]))
622 
623  def _func(self, df):
624  return mag_aware_eval(df, self.exprexpr)
625 
626 
628  """Get column with specified name
629  """
630 
631  def __init__(self, col, **kwargs):
632  self.colcol = col
633  super().__init__(**kwargs)
634 
635  @property
636  def name(self):
637  return self.colcol
638 
639  @property
640  def columns(self):
641  return [self.colcol]
642 
643  def _func(self, df):
644  return df[self.colcol]
645 
646 
647 class Index(Functor):
648  """Return the value of the index for each object
649  """
650 
651  columns = ['coord_ra'] # just a dummy; something has to be here
652  _defaultDataset = 'ref'
653  _defaultNoDup = True
654 
655  def _func(self, df):
656  return pd.Series(df.index, index=df.index)
657 
658 
660  col = 'id'
661  _allow_difference = False
662  _defaultNoDup = True
663 
664  def _func(self, df):
665  return pd.Series(df.index, index=df.index)
666 
667 
669  col = 'base_Footprint_nPix'
670 
671 
673  """Base class for coordinate column, in degrees
674  """
675  _radians = True
676 
677  def __init__(self, col, **kwargs):
678  super().__init__(col, **kwargs)
679 
680  def _func(self, df):
681  # Must not modify original column in case that column is used by another functor
682  output = df[self.colcol] * 180 / np.pi if self._radians_radians else df[self.colcol]
683  return output
684 
685 
687  """Right Ascension, in degrees
688  """
689  name = 'RA'
690  _defaultNoDup = True
691 
692  def __init__(self, **kwargs):
693  super().__init__('coord_ra', **kwargs)
694 
695  def __call__(self, catalog, **kwargs):
696  return super().__call__(catalog, **kwargs)
697 
698 
700  """Declination, in degrees
701  """
702  name = 'Dec'
703  _defaultNoDup = True
704 
705  def __init__(self, **kwargs):
706  super().__init__('coord_dec', **kwargs)
707 
708  def __call__(self, catalog, **kwargs):
709  return super().__call__(catalog, **kwargs)
710 
711 
712 def fluxName(col):
713  if not col.endswith('_instFlux'):
714  col += '_instFlux'
715  return col
716 
717 
718 def fluxErrName(col):
719  if not col.endswith('_instFluxErr'):
720  col += '_instFluxErr'
721  return col
722 
723 
724 class Mag(Functor):
725  """Compute calibrated magnitude
726 
727  Takes a `calib` argument, which returns the flux at mag=0
728  as `calib.getFluxMag0()`. If not provided, then the default
729  `fluxMag0` is 63095734448.0194, which is default for HSC.
730  This default should be removed in DM-21955
731 
732  This calculation hides warnings about invalid values and dividing by zero.
733 
734  As for all functors, a `dataset` and `filt` kwarg should be provided upon
735  initialization. Unlike the default `Functor`, however, the default dataset
736  for a `Mag` is `'meas'`, rather than `'ref'`.
737 
738  Parameters
739  ----------
740  col : `str`
741  Name of flux column from which to compute magnitude. Can be parseable
742  by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
743  `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
744  understand.
745  calib : `lsst.afw.image.calib.Calib` (optional)
746  Object that knows zero point.
747  """
748  _defaultDataset = 'meas'
749 
750  def __init__(self, col, calib=None, **kwargs):
751  self.colcol = fluxName(col)
752  self.calibcalib = calib
753  if calib is not None:
754  self.fluxMag0fluxMag0 = calib.getFluxMag0()[0]
755  else:
756  # TO DO: DM-21955 Replace hard coded photometic calibration values
757  self.fluxMag0fluxMag0 = 63095734448.0194
758 
759  super().__init__(**kwargs)
760 
761  @property
762  def columns(self):
763  return [self.colcol]
764 
765  def _func(self, df):
766  with np.warnings.catch_warnings():
767  np.warnings.filterwarnings('ignore', r'invalid value encountered')
768  np.warnings.filterwarnings('ignore', r'divide by zero')
769  return -2.5*np.log10(df[self.colcol] / self.fluxMag0fluxMag0)
770 
771  @property
772  def name(self):
773  return f'mag_{self.col}'
774 
775 
776 class MagErr(Mag):
777  """Compute calibrated magnitude uncertainty
778 
779  Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
780 
781  Parameters
782  col : `str`
783  Name of flux column
784  calib : `lsst.afw.image.calib.Calib` (optional)
785  Object that knows zero point.
786  """
787 
788  def __init__(self, *args, **kwargs):
789  super().__init__(*args, **kwargs)
790  if self.calibcalib is not None:
791  self.fluxMag0ErrfluxMag0Err = self.calibcalib.getFluxMag0()[1]
792  else:
793  self.fluxMag0ErrfluxMag0Err = 0.
794 
795  @property
796  def columns(self):
797  return [self.colcol, self.colcol + 'Err']
798 
799  def _func(self, df):
800  with np.warnings.catch_warnings():
801  np.warnings.filterwarnings('ignore', r'invalid value encountered')
802  np.warnings.filterwarnings('ignore', r'divide by zero')
803  fluxCol, fluxErrCol = self.columnscolumnscolumnscolumns
804  x = df[fluxErrCol] / df[fluxCol]
805  y = self.fluxMag0ErrfluxMag0Err / self.fluxMag0fluxMag0
806  magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
807  return magErr
808 
809  @property
810  def name(self):
811  return super().name + '_err'
812 
813 
815  """
816  """
817 
818  def _func(self, df):
819  return (df[self.colcol] / self.fluxMag0fluxMag0) * 1e9
820 
821 
823  _defaultDataset = 'meas'
824 
825  """Functor to calculate magnitude difference"""
826 
827  def __init__(self, col1, col2, **kwargs):
828  self.col1col1 = fluxName(col1)
829  self.col2col2 = fluxName(col2)
830  super().__init__(**kwargs)
831 
832  @property
833  def columns(self):
834  return [self.col1col1, self.col2col2]
835 
836  def _func(self, df):
837  with np.warnings.catch_warnings():
838  np.warnings.filterwarnings('ignore', r'invalid value encountered')
839  np.warnings.filterwarnings('ignore', r'divide by zero')
840  return -2.5*np.log10(df[self.col1col1]/df[self.col2col2])
841 
842  @property
843  def name(self):
844  return f'(mag_{self.col1} - mag_{self.col2})'
845 
846  @property
847  def shortname(self):
848  return f'magDiff_{self.col1}_{self.col2}'
849 
850 
851 class Color(Functor):
852  """Compute the color between two filters
853 
854  Computes color by initializing two different `Mag`
855  functors based on the `col` and filters provided, and
856  then returning the difference.
857 
858  This is enabled by the `_func` expecting a dataframe with a
859  multilevel column index, with both `'band'` and `'column'`,
860  instead of just `'column'`, which is the `Functor` default.
861  This is controlled by the `_dfLevels` attribute.
862 
863  Also of note, the default dataset for `Color` is `forced_src'`,
864  whereas for `Mag` it is `'meas'`.
865 
866  Parameters
867  ----------
868  col : str
869  Name of flux column from which to compute; same as would be passed to
870  `lsst.pipe.tasks.functors.Mag`.
871 
872  filt2, filt1 : str
873  Filters from which to compute magnitude difference.
874  Color computed is `Mag(filt2) - Mag(filt1)`.
875  """
876  _defaultDataset = 'forced_src'
877  _dfLevels = ('band', 'column')
878  _defaultNoDup = True
879 
880  def __init__(self, col, filt2, filt1, **kwargs):
881  self.colcol = fluxName(col)
882  if filt2 == filt1:
883  raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
884  self.filt2filt2 = filt2
885  self.filt1filt1 = filt1
886 
887  self.mag2mag2 = Mag(col, filt=filt2, **kwargs)
888  self.mag1mag1 = Mag(col, filt=filt1, **kwargs)
889 
890  super().__init__(**kwargs)
891 
892  @property
893  def filt(self):
894  return None
895 
896  @filt.setter
897  def filt(self, filt):
898  pass
899 
900  def _func(self, df):
901  mag2 = self.mag2._func(df[self.filt2])
902  mag1 = self.mag1._func(df[self.filt1])
903  return mag2 - mag1
904 
905  @property
906  def columns(self):
907  return [self.mag1mag1.col, self.mag2mag2.col]
908 
909  def multilevelColumns(self, parq, **kwargs):
910  return [(self.datasetdataset, self.filt1filt1, self.colcol), (self.datasetdataset, self.filt2filt2, self.colcol)]
911 
912  @property
913  def name(self):
914  return f'{self.filt2} - {self.filt1} ({self.col})'
915 
916  @property
917  def shortname(self):
918  return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
919 
920 
922  """Main function of this subclass is to override the dropna=True
923  """
924  _null_label = 'null'
925  _allow_difference = False
926  name = 'label'
927  _force_str = False
928 
929  def __call__(self, parq, dropna=False, **kwargs):
930  return super().__call__(parq, dropna=False, **kwargs)
931 
932 
934  _columns = ["base_ClassificationExtendedness_value"]
935  _column = "base_ClassificationExtendedness_value"
936 
937  def _func(self, df):
938  x = df[self._columns_columns][self._column_column]
939  mask = x.isnull()
940  test = (x < 0.5).astype(int)
941  test = test.mask(mask, 2)
942 
943  # TODO: DM-21954 Look into veracity of inline comment below
944  # are these backwards?
945  categories = ['galaxy', 'star', self._null_label_null_label]
946  label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
947  index=x.index, name='label')
948  if self._force_str_force_str:
949  label = label.astype(str)
950  return label
951 
952 
954  _columns = ['numStarFlags']
955  labels = {"star": 0, "maybe": 1, "notStar": 2}
956 
957  def _func(self, df):
958  x = df[self._columns_columns][self._columns_columns[0]]
959 
960  # Number of filters
961  n = len(x.unique()) - 1
962 
963  labels = ['noStar', 'maybe', 'star']
964  label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
965  index=x.index, name='label')
966 
967  if self._force_str_force_str:
968  label = label.astype(str)
969 
970  return label
971 
972 
974  name = 'Deconvolved Moments'
975  shortname = 'deconvolvedMoments'
976  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
977  "ext_shapeHSM_HsmSourceMoments_yy",
978  "base_SdssShape_xx", "base_SdssShape_yy",
979  "ext_shapeHSM_HsmPsfMoments_xx",
980  "ext_shapeHSM_HsmPsfMoments_yy")
981 
982  def _func(self, df):
983  """Calculate deconvolved moments"""
984  if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
985  hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
986  else:
987  hsm = np.ones(len(df))*np.nan
988  sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
989  if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
990  psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
991  else:
992  # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
993  # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
994  # raise TaskError("No psf shape parameter found in catalog")
995  raise RuntimeError('No psf shape parameter found in catalog')
996 
997  return hsm.where(np.isfinite(hsm), sdss) - psf
998 
999 
1001  """Functor to calculate SDSS trace radius size for sources"""
1002  name = "SDSS Trace Size"
1003  shortname = 'sdssTrace'
1004  _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1005 
1006  def _func(self, df):
1007  srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1008  return srcSize
1009 
1010 
1012  """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1013  name = "PSF - SDSS Trace Size"
1014  shortname = 'psf_sdssTrace'
1015  _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1016  "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1017 
1018  def _func(self, df):
1019  srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1020  psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1021  sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1022  return sizeDiff
1023 
1024 
1026  """Functor to calculate HSM trace radius size for sources"""
1027  name = 'HSM Trace Size'
1028  shortname = 'hsmTrace'
1029  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1030  "ext_shapeHSM_HsmSourceMoments_yy")
1031 
1032  def _func(self, df):
1033  srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1034  + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1035  return srcSize
1036 
1037 
1039  """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1040  name = 'PSF - HSM Trace Size'
1041  shortname = 'psf_HsmTrace'
1042  _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1043  "ext_shapeHSM_HsmSourceMoments_yy",
1044  "ext_shapeHSM_HsmPsfMoments_xx",
1045  "ext_shapeHSM_HsmPsfMoments_yy")
1046 
1047  def _func(self, df):
1048  srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1049  + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1050  psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1051  + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1052  sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1053  return sizeDiff
1054 
1055 
1057  name = 'HSM Psf FWHM'
1058  _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1059  # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1060  pixelScale = 0.168
1061  SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1062 
1063  def _func(self, df):
1064  return self.pixelScalepixelScale*self.SIGMA2FWHMSIGMA2FWHM*np.sqrt(
1065  0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1066 
1067 
1068 class E1(Functor):
1069  name = "Distortion Ellipticity (e1)"
1070  shortname = "Distortion"
1071 
1072  def __init__(self, colXX, colXY, colYY, **kwargs):
1073  self.colXXcolXX = colXX
1074  self.colXYcolXY = colXY
1075  self.colYYcolYY = colYY
1076  self._columns_columns = [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1077  super().__init__(**kwargs)
1078 
1079  @property
1080  def columns(self):
1081  return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1082 
1083  def _func(self, df):
1084  return df[self.colXXcolXX] - df[self.colYYcolYY] / (df[self.colXXcolXX] + df[self.colYYcolYY])
1085 
1086 
1087 class E2(Functor):
1088  name = "Ellipticity e2"
1089 
1090  def __init__(self, colXX, colXY, colYY, **kwargs):
1091  self.colXXcolXX = colXX
1092  self.colXYcolXY = colXY
1093  self.colYYcolYY = colYY
1094  super().__init__(**kwargs)
1095 
1096  @property
1097  def columns(self):
1098  return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1099 
1100  def _func(self, df):
1101  return 2*df[self.colXYcolXY] / (df[self.colXXcolXX] + df[self.colYYcolYY])
1102 
1103 
1105 
1106  def __init__(self, colXX, colXY, colYY, **kwargs):
1107  self.colXXcolXX = colXX
1108  self.colXYcolXY = colXY
1109  self.colYYcolYY = colYY
1110  super().__init__(**kwargs)
1111 
1112  @property
1113  def columns(self):
1114  return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1115 
1116  def _func(self, df):
1117  return (df[self.colXXcolXX]*df[self.colYYcolYY] - df[self.colXYcolXY]**2)**0.25
1118 
1119 
1121  """Computations using the stored localWcs.
1122  """
1123  name = "LocalWcsOperations"
1124 
1125  def __init__(self,
1126  colCD_1_1,
1127  colCD_1_2,
1128  colCD_2_1,
1129  colCD_2_2,
1130  **kwargs):
1131  self.colCD_1_1colCD_1_1 = colCD_1_1
1132  self.colCD_1_2colCD_1_2 = colCD_1_2
1133  self.colCD_2_1colCD_2_1 = colCD_2_1
1134  self.colCD_2_2colCD_2_2 = colCD_2_2
1135  super().__init__(**kwargs)
1136 
1137  def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1138  """Compute the distance on the sphere from x2, y1 to x1, y1.
1139 
1140  Parameters
1141  ----------
1142  x : `pandas.Series`
1143  X pixel coordinate.
1144  y : `pandas.Series`
1145  Y pixel coordinate.
1146  cd11 : `pandas.Series`
1147  [1, 1] element of the local Wcs affine transform.
1148  cd11 : `pandas.Series`
1149  [1, 1] element of the local Wcs affine transform.
1150  cd12 : `pandas.Series`
1151  [1, 2] element of the local Wcs affine transform.
1152  cd21 : `pandas.Series`
1153  [2, 1] element of the local Wcs affine transform.
1154  cd22 : `pandas.Series`
1155  [2, 2] element of the local Wcs affine transform.
1156 
1157  Returns
1158  -------
1159  raDecTuple : tuple
1160  RA and dec conversion of x and y given the local Wcs. Returned
1161  units are in radians.
1162 
1163  """
1164  return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1165 
1166  def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1167  """Compute the local pixel scale conversion.
1168 
1169  Parameters
1170  ----------
1171  ra1 : `pandas.Series`
1172  Ra of the first coordinate in radians.
1173  dec1 : `pandas.Series`
1174  Dec of the first coordinate in radians.
1175  ra2 : `pandas.Series`
1176  Ra of the second coordinate in radians.
1177  dec2 : `pandas.Series`
1178  Dec of the second coordinate in radians.
1179 
1180  Returns
1181  -------
1182  dist : `pandas.Series`
1183  Distance on the sphere in radians.
1184  """
1185  deltaDec = dec2 - dec1
1186  deltaRa = ra2 - ra1
1187  return 2 * np.arcsin(
1188  np.sqrt(
1189  np.sin(deltaDec / 2) ** 2
1190  + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1191 
1192  def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1193  """Compute the distance on the sphere from x2, y1 to x1, y1.
1194 
1195  Parameters
1196  ----------
1197  x1 : `pandas.Series`
1198  X pixel coordinate.
1199  y1 : `pandas.Series`
1200  Y pixel coordinate.
1201  x2 : `pandas.Series`
1202  X pixel coordinate.
1203  y2 : `pandas.Series`
1204  Y pixel coordinate.
1205  cd11 : `pandas.Series`
1206  [1, 1] element of the local Wcs affine transform.
1207  cd11 : `pandas.Series`
1208  [1, 1] element of the local Wcs affine transform.
1209  cd12 : `pandas.Series`
1210  [1, 2] element of the local Wcs affine transform.
1211  cd21 : `pandas.Series`
1212  [2, 1] element of the local Wcs affine transform.
1213  cd22 : `pandas.Series`
1214  [2, 2] element of the local Wcs affine transform.
1215 
1216  Returns
1217  -------
1218  Distance : `pandas.Series`
1219  Arcseconds per pixel at the location of the local WC
1220  """
1221  ra1, dec1 = self.computeDeltaRaDeccomputeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1222  ra2, dec2 = self.computeDeltaRaDeccomputeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1223  # Great circle distance for small separations.
1224  return self.computeSkySeperationcomputeSkySeperation(ra1, dec1, ra2, dec2)
1225 
1226 
1228  """Compute the local pixel scale from the stored CDMatrix.
1229  """
1230  name = "PixelScale"
1231 
1232  @property
1233  def columns(self):
1234  return [self.colCD_1_1colCD_1_1,
1235  self.colCD_1_2colCD_1_2,
1236  self.colCD_2_1colCD_2_1,
1237  self.colCD_2_2colCD_2_2]
1238 
1239  def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1240  """Compute the local pixel to scale conversion in arcseconds.
1241 
1242  Parameters
1243  ----------
1244  cd11 : `pandas.Series`
1245  [1, 1] element of the local Wcs affine transform in radians.
1246  cd11 : `pandas.Series`
1247  [1, 1] element of the local Wcs affine transform in radians.
1248  cd12 : `pandas.Series`
1249  [1, 2] element of the local Wcs affine transform in radians.
1250  cd21 : `pandas.Series`
1251  [2, 1] element of the local Wcs affine transform in radians.
1252  cd22 : `pandas.Series`
1253  [2, 2] element of the local Wcs affine transform in radians.
1254 
1255  Returns
1256  -------
1257  pixScale : `pandas.Series`
1258  Arcseconds per pixel at the location of the local WC
1259  """
1260  return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1261 
1262  def _func(self, df):
1263  return self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1264  df[self.colCD_1_2colCD_1_2],
1265  df[self.colCD_2_1colCD_2_1],
1266  df[self.colCD_2_2colCD_2_2])
1267 
1268 
1270  """Convert a value in units pixels squared to units arcseconds squared.
1271  """
1272 
1273  def __init__(self,
1274  col,
1275  colCD_1_1,
1276  colCD_1_2,
1277  colCD_2_1,
1278  colCD_2_2,
1279  **kwargs):
1280  self.colcol = col
1281  super().__init__(colCD_1_1,
1282  colCD_1_2,
1283  colCD_2_1,
1284  colCD_2_2,
1285  **kwargs)
1286 
1287  @property
1288  def name(self):
1289  return f"{self.col}_asArcseconds"
1290 
1291  @property
1292  def columns(self):
1293  return [self.colcol,
1294  self.colCD_1_1colCD_1_1,
1295  self.colCD_1_2colCD_1_2,
1296  self.colCD_2_1colCD_2_1,
1297  self.colCD_2_2colCD_2_2]
1298 
1299  def _func(self, df):
1300  return df[self.colcol] * self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1301  df[self.colCD_1_2colCD_1_2],
1302  df[self.colCD_2_1colCD_2_1],
1303  df[self.colCD_2_2colCD_2_2])
1304 
1305 
1307  """Convert a value in units pixels to units arcseconds.
1308  """
1309 
1310  def __init__(self,
1311  col,
1312  colCD_1_1,
1313  colCD_1_2,
1314  colCD_2_1,
1315  colCD_2_2,
1316  **kwargs):
1317  self.colcol = col
1318  super().__init__(colCD_1_1,
1319  colCD_1_2,
1320  colCD_2_1,
1321  colCD_2_2,
1322  **kwargs)
1323 
1324  @property
1325  def name(self):
1326  return f"{self.col}_asArcsecondsSq"
1327 
1328  @property
1329  def columns(self):
1330  return [self.colcol,
1331  self.colCD_1_1colCD_1_1,
1332  self.colCD_1_2colCD_1_2,
1333  self.colCD_2_1colCD_2_1,
1334  self.colCD_2_2colCD_2_2]
1335 
1336  def _func(self, df):
1337  pixScale = self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1338  df[self.colCD_1_2colCD_1_2],
1339  df[self.colCD_2_1colCD_2_1],
1340  df[self.colCD_2_2colCD_2_2])
1341  return df[self.colcol] * pixScale * pixScale
1342 
1343 
1345  name = 'Reference Band'
1346  shortname = 'refBand'
1347 
1348  @property
1349  def columns(self):
1350  return ["merge_measurement_i",
1351  "merge_measurement_r",
1352  "merge_measurement_z",
1353  "merge_measurement_y",
1354  "merge_measurement_g"]
1355 
1356  def _func(self, df):
1357  def getFilterAliasName(row):
1358  # get column name with the max value (True > False)
1359  colName = row.idxmax()
1360  return colName.replace('merge_measurement_', '')
1361 
1362  return df[self.columnscolumnscolumns].apply(getFilterAliasName, axis=1)
1363 
1364 
1366  # AB to NanoJansky (3631 Jansky)
1367  AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1368  LOG_AB_FLUX_SCALE = 12.56
1369  FIVE_OVER_2LOG10 = 1.085736204758129569
1370  # TO DO: DM-21955 Replace hard coded photometic calibration values
1371  COADD_ZP = 27
1372 
1373  def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1374  self.vhypotvhypot = np.vectorize(self.hypothypot)
1375  self.colcol = colFlux
1376  self.colFluxErrcolFluxErr = colFluxErr
1377 
1378  self.calibcalib = calib
1379  if calib is not None:
1380  self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err = calib.getFluxMag0()
1381  else:
1382  self.fluxMag0fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZPCOADD_ZP)
1383  self.fluxMag0ErrfluxMag0Err = 0.
1384 
1385  super().__init__(**kwargs)
1386 
1387  @property
1388  def columns(self):
1389  return [self.colcol]
1390 
1391  @property
1392  def name(self):
1393  return f'mag_{self.col}'
1394 
1395  @classmethod
1396  def hypot(cls, a, b):
1397  if np.abs(a) < np.abs(b):
1398  a, b = b, a
1399  if a == 0.:
1400  return 0.
1401  q = b/a
1402  return np.abs(a) * np.sqrt(1. + q*q)
1403 
1404  def dn2flux(self, dn, fluxMag0):
1405  return self.AB_FLUX_SCALEAB_FLUX_SCALE * dn / fluxMag0
1406 
1407  def dn2mag(self, dn, fluxMag0):
1408  with np.warnings.catch_warnings():
1409  np.warnings.filterwarnings('ignore', r'invalid value encountered')
1410  np.warnings.filterwarnings('ignore', r'divide by zero')
1411  return -2.5 * np.log10(dn/fluxMag0)
1412 
1413  def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1414  retVal = self.vhypotvhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1415  retVal *= self.AB_FLUX_SCALEAB_FLUX_SCALE / fluxMag0 / fluxMag0
1416  return retVal
1417 
1418  def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1419  retVal = self.dn2fluxErrdn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2fluxdn2flux(dn, fluxMag0)
1420  return self.FIVE_OVER_2LOG10FIVE_OVER_2LOG10 * retVal
1421 
1422 
1424  def _func(self, df):
1425  return self.dn2fluxdn2flux(df[self.colcol], self.fluxMag0fluxMag0)
1426 
1427 
1429  @property
1430  def columns(self):
1431  return [self.colcol, self.colFluxErrcolFluxErr]
1432 
1433  def _func(self, df):
1434  retArr = self.dn2fluxErrdn2fluxErr(df[self.colcol], df[self.colFluxErrcolFluxErr], self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err)
1435  return pd.Series(retArr, index=df.index)
1436 
1437 
1439  def _func(self, df):
1440  return self.dn2magdn2mag(df[self.colcol], self.fluxMag0fluxMag0)
1441 
1442 
1444  @property
1445  def columns(self):
1446  return [self.colcol, self.colFluxErrcolFluxErr]
1447 
1448  def _func(self, df):
1449  retArr = self.dn2MagErrdn2MagErr(df[self.colcol], df[self.colFluxErrcolFluxErr], self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err)
1450  return pd.Series(retArr, index=df.index)
1451 
1452 
1454  """Base class for calibrating the specified instrument flux column using
1455  the local photometric calibration.
1456 
1457  Parameters
1458  ----------
1459  instFluxCol : `str`
1460  Name of the instrument flux column.
1461  instFluxErrCol : `str`
1462  Name of the assocated error columns for ``instFluxCol``.
1463  photoCalibCol : `str`
1464  Name of local calibration column.
1465  photoCalibErrCol : `str`
1466  Error associated with ``photoCalibCol``
1467 
1468  See also
1469  --------
1470  LocalPhotometry
1471  LocalNanojansky
1472  LocalNanojanskyErr
1473  LocalMagnitude
1474  LocalMagnitudeErr
1475  """
1476  logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1477 
1478  def __init__(self,
1479  instFluxCol,
1480  instFluxErrCol,
1481  photoCalibCol,
1482  photoCalibErrCol,
1483  **kwargs):
1484  self.instFluxColinstFluxCol = instFluxCol
1485  self.instFluxErrColinstFluxErrCol = instFluxErrCol
1486  self.photoCalibColphotoCalibCol = photoCalibCol
1487  self.photoCalibErrColphotoCalibErrCol = photoCalibErrCol
1488  super().__init__(**kwargs)
1489 
1490  def instFluxToNanojansky(self, instFlux, localCalib):
1491  """Convert instrument flux to nanojanskys.
1492 
1493  Parameters
1494  ----------
1495  instFlux : `numpy.ndarray` or `pandas.Series`
1496  Array of instrument flux measurements
1497  localCalib : `numpy.ndarray` or `pandas.Series`
1498  Array of local photometric calibration estimates.
1499 
1500  Returns
1501  -------
1502  calibFlux : `numpy.ndarray` or `pandas.Series`
1503  Array of calibrated flux measurements.
1504  """
1505  return instFlux * localCalib
1506 
1507  def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1508  """Convert instrument flux to nanojanskys.
1509 
1510  Parameters
1511  ----------
1512  instFlux : `numpy.ndarray` or `pandas.Series`
1513  Array of instrument flux measurements
1514  instFluxErr : `numpy.ndarray` or `pandas.Series`
1515  Errors on associated ``instFlux`` values
1516  localCalib : `numpy.ndarray` or `pandas.Series`
1517  Array of local photometric calibration estimates.
1518  localCalibErr : `numpy.ndarray` or `pandas.Series`
1519  Errors on associated ``localCalib`` values
1520 
1521  Returns
1522  -------
1523  calibFluxErr : `numpy.ndarray` or `pandas.Series`
1524  Errors on calibrated flux measurements.
1525  """
1526  return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1527 
1528  def instFluxToMagnitude(self, instFlux, localCalib):
1529  """Convert instrument flux to nanojanskys.
1530 
1531  Parameters
1532  ----------
1533  instFlux : `numpy.ndarray` or `pandas.Series`
1534  Array of instrument flux measurements
1535  localCalib : `numpy.ndarray` or `pandas.Series`
1536  Array of local photometric calibration estimates.
1537 
1538  Returns
1539  -------
1540  calibMag : `numpy.ndarray` or `pandas.Series`
1541  Array of calibrated AB magnitudes.
1542  """
1543  return -2.5 * np.log10(self.instFluxToNanojanskyinstFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToABlogNJanskyToAB
1544 
1545  def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1546  """Convert instrument flux err to nanojanskys.
1547 
1548  Parameters
1549  ----------
1550  instFlux : `numpy.ndarray` or `pandas.Series`
1551  Array of instrument flux measurements
1552  instFluxErr : `numpy.ndarray` or `pandas.Series`
1553  Errors on associated ``instFlux`` values
1554  localCalib : `numpy.ndarray` or `pandas.Series`
1555  Array of local photometric calibration estimates.
1556  localCalibErr : `numpy.ndarray` or `pandas.Series`
1557  Errors on associated ``localCalib`` values
1558 
1559  Returns
1560  -------
1561  calibMagErr: `numpy.ndarray` or `pandas.Series`
1562  Error on calibrated AB magnitudes.
1563  """
1564  err = self.instFluxErrToNanojanskyErrinstFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1565  return 2.5 / np.log(10) * err / self.instFluxToNanojanskyinstFluxToNanojansky(instFlux, instFluxErr)
1566 
1567 
1569  """Compute calibrated fluxes using the local calibration value.
1570 
1571  See also
1572  --------
1573  LocalNanojansky
1574  LocalNanojanskyErr
1575  LocalMagnitude
1576  LocalMagnitudeErr
1577  """
1578 
1579  @property
1580  def columns(self):
1581  return [self.instFluxColinstFluxCol, self.photoCalibColphotoCalibCol]
1582 
1583  @property
1584  def name(self):
1585  return f'flux_{self.instFluxCol}'
1586 
1587  def _func(self, df):
1588  return self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxColinstFluxCol], df[self.photoCalibColphotoCalibCol])
1589 
1590 
1592  """Compute calibrated flux errors using the local calibration value.
1593 
1594  See also
1595  --------
1596  LocalNanojansky
1597  LocalNanojanskyErr
1598  LocalMagnitude
1599  LocalMagnitudeErr
1600  """
1601 
1602  @property
1603  def columns(self):
1604  return [self.instFluxColinstFluxCol, self.instFluxErrColinstFluxErrCol,
1605  self.photoCalibColphotoCalibCol, self.photoCalibErrColphotoCalibErrCol]
1606 
1607  @property
1608  def name(self):
1609  return f'fluxErr_{self.instFluxCol}'
1610 
1611  def _func(self, df):
1612  return self.instFluxErrToNanojanskyErrinstFluxErrToNanojanskyErr(df[self.instFluxColinstFluxCol], df[self.instFluxErrColinstFluxErrCol],
1613  df[self.photoCalibColphotoCalibCol], df[self.photoCalibErrColphotoCalibErrCol])
1614 
1615 
1617  """Compute calibrated AB magnitudes using the local calibration value.
1618 
1619  See also
1620  --------
1621  LocalNanojansky
1622  LocalNanojanskyErr
1623  LocalMagnitude
1624  LocalMagnitudeErr
1625  """
1626 
1627  @property
1628  def columns(self):
1629  return [self.instFluxColinstFluxCol, self.photoCalibColphotoCalibCol]
1630 
1631  @property
1632  def name(self):
1633  return f'mag_{self.instFluxCol}'
1634 
1635  def _func(self, df):
1636  return self.instFluxToMagnitudeinstFluxToMagnitude(df[self.instFluxColinstFluxCol],
1637  df[self.photoCalibColphotoCalibCol])
1638 
1639 
1641  """Compute calibrated AB magnitude errors using the local calibration value.
1642 
1643  See also
1644  --------
1645  LocalNanojansky
1646  LocalNanojanskyErr
1647  LocalMagnitude
1648  LocalMagnitudeErr
1649  """
1650 
1651  @property
1652  def columns(self):
1653  return [self.instFluxColinstFluxCol, self.instFluxErrColinstFluxErrCol,
1654  self.photoCalibColphotoCalibCol, self.photoCalibErrColphotoCalibErrCol]
1655 
1656  @property
1657  def name(self):
1658  return f'magErr_{self.instFluxCol}'
1659 
1660  def _func(self, df):
1661  return self.instFluxErrToMagnitudeErrinstFluxErrToMagnitudeErr(df[self.instFluxColinstFluxCol],
1662  df[self.instFluxErrColinstFluxErrCol],
1663  df[self.photoCalibColphotoCalibCol],
1664  df[self.photoCalibErrColphotoCalibErrCol])
1665 
1666 
1668  """Compute absolute mean of dipole fluxes.
1669 
1670  See also
1671  --------
1672  LocalNanojansky
1673  LocalNanojanskyErr
1674  LocalMagnitude
1675  LocalMagnitudeErr
1676  LocalDipoleMeanFlux
1677  LocalDipoleMeanFluxErr
1678  LocalDipoleDiffFlux
1679  LocalDipoleDiffFluxErr
1680  """
1681  def __init__(self,
1682  instFluxPosCol,
1683  instFluxNegCol,
1684  instFluxPosErrCol,
1685  instFluxNegErrCol,
1686  photoCalibCol,
1687  photoCalibErrCol,
1688  **kwargs):
1689  self.instFluxNegColinstFluxNegCol = instFluxNegCol
1690  self.instFluxPosColinstFluxPosCol = instFluxPosCol
1691  self.instFluxNegErrColinstFluxNegErrCol = instFluxNegErrCol
1692  self.instFluxPosErrColinstFluxPosErrCol = instFluxPosErrCol
1693  self.photoCalibColphotoCalibColphotoCalibCol = photoCalibCol
1694  self.photoCalibErrColphotoCalibErrColphotoCalibErrCol = photoCalibErrCol
1695  super().__init__(instFluxNegCol,
1696  instFluxNegErrCol,
1697  photoCalibCol,
1698  photoCalibErrCol,
1699  **kwargs)
1700 
1701  @property
1702  def columns(self):
1703  return [self.instFluxPosColinstFluxPosCol,
1704  self.instFluxNegColinstFluxNegCol,
1705  self.photoCalibColphotoCalibColphotoCalibCol]
1706 
1707  @property
1708  def name(self):
1709  return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1710 
1711  def _func(self, df):
1712  return 0.5*(np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxNegColinstFluxNegCol], df[self.photoCalibColphotoCalibColphotoCalibCol]))
1713  + np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxPosColinstFluxPosCol], df[self.photoCalibColphotoCalibColphotoCalibCol])))
1714 
1715 
1717  """Compute the error on the absolute mean of dipole fluxes.
1718 
1719  See also
1720  --------
1721  LocalNanojansky
1722  LocalNanojanskyErr
1723  LocalMagnitude
1724  LocalMagnitudeErr
1725  LocalDipoleMeanFlux
1726  LocalDipoleMeanFluxErr
1727  LocalDipoleDiffFlux
1728  LocalDipoleDiffFluxErr
1729  """
1730 
1731  @property
1732  def columns(self):
1733  return [self.instFluxPosColinstFluxPosCol,
1734  self.instFluxNegColinstFluxNegCol,
1735  self.instFluxPosErrColinstFluxPosErrCol,
1736  self.instFluxNegErrColinstFluxNegErrCol,
1737  self.photoCalibColphotoCalibColphotoCalibCol,
1738  self.photoCalibErrColphotoCalibErrColphotoCalibErrCol]
1739 
1740  @property
1741  def name(self):
1742  return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1743 
1744  def _func(self, df):
1745  return 0.5*np.sqrt(
1746  (np.fabs(df[self.instFluxNegColinstFluxNegCol]) + np.fabs(df[self.instFluxPosColinstFluxPosCol])
1747  * df[self.photoCalibErrColphotoCalibErrColphotoCalibErrCol])**2
1748  + (df[self.instFluxNegErrColinstFluxNegErrCol]**2 + df[self.instFluxPosErrColinstFluxPosErrCol]**2)
1749  * df[self.photoCalibColphotoCalibColphotoCalibCol]**2)
1750 
1751 
1753  """Compute the absolute difference of dipole fluxes.
1754 
1755  Value is (abs(pos) - abs(neg))
1756 
1757  See also
1758  --------
1759  LocalNanojansky
1760  LocalNanojanskyErr
1761  LocalMagnitude
1762  LocalMagnitudeErr
1763  LocalDipoleMeanFlux
1764  LocalDipoleMeanFluxErr
1765  LocalDipoleDiffFlux
1766  LocalDipoleDiffFluxErr
1767  """
1768 
1769  @property
1770  def columns(self):
1771  return [self.instFluxPosColinstFluxPosCol,
1772  self.instFluxNegColinstFluxNegCol,
1773  self.photoCalibColphotoCalibColphotoCalibCol]
1774 
1775  @property
1776  def name(self):
1777  return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1778 
1779  def _func(self, df):
1780  return (np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxPosColinstFluxPosCol], df[self.photoCalibColphotoCalibColphotoCalibCol]))
1781  - np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxNegColinstFluxNegCol], df[self.photoCalibColphotoCalibColphotoCalibCol])))
1782 
1783 
1785  """Compute the error on the absolute difference of dipole fluxes.
1786 
1787  See also
1788  --------
1789  LocalNanojansky
1790  LocalNanojanskyErr
1791  LocalMagnitude
1792  LocalMagnitudeErr
1793  LocalDipoleMeanFlux
1794  LocalDipoleMeanFluxErr
1795  LocalDipoleDiffFlux
1796  LocalDipoleDiffFluxErr
1797  """
1798 
1799  @property
1800  def columns(self):
1801  return [self.instFluxPosColinstFluxPosCol,
1802  self.instFluxNegColinstFluxNegCol,
1803  self.instFluxPosErrColinstFluxPosErrCol,
1804  self.instFluxNegErrColinstFluxNegErrCol,
1805  self.photoCalibColphotoCalibColphotoCalibCol,
1806  self.photoCalibErrColphotoCalibErrColphotoCalibErrCol]
1807 
1808  @property
1809  def name(self):
1810  return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1811 
1812  def _func(self, df):
1813  return np.sqrt(
1814  ((np.fabs(df[self.instFluxPosColinstFluxPosCol]) - np.fabs(df[self.instFluxNegColinstFluxNegCol]))
1815  * df[self.photoCalibErrColphotoCalibErrColphotoCalibErrCol])**2
1816  + (df[self.instFluxPosErrColinstFluxPosErrCol]**2 + df[self.instFluxNegErrColinstFluxNegErrCol]**2)
1817  * df[self.photoCalibColphotoCalibColphotoCalibCol]**2)
1818 
1819 
1821  """Base class for returning the ratio of 2 columns.
1822 
1823  Can be used to compute a Signal to Noise ratio for any input flux.
1824 
1825  Parameters
1826  ----------
1827  numerator : `str`
1828  Name of the column to use at the numerator in the ratio
1829  denominator : `str`
1830  Name of the column to use as the denominator in the ratio.
1831  """
1832  def __init__(self,
1833  numerator,
1834  denominator,
1835  **kwargs):
1836  self.numeratornumerator = numerator
1837  self.denominatordenominator = denominator
1838  super().__init__(**kwargs)
1839 
1840  @property
1841  def columns(self):
1842  return [self.numeratornumerator, self.denominatordenominator]
1843 
1844  @property
1845  def name(self):
1846  return f'ratio_{self.numerator}_{self.denominator}'
1847 
1848  def _func(self, df):
1849  with np.warnings.catch_warnings():
1850  np.warnings.filterwarnings('ignore', r'invalid value encountered')
1851  np.warnings.filterwarnings('ignore', r'divide by zero')
1852  return df[self.numeratornumerator] / df[self.denominatordenominator]
def multilevelColumns(self, parq, **kwargs)
Definition: functors.py:909
def __init__(self, col, filt2, filt1, **kwargs)
Definition: functors.py:880
def __init__(self, col, **kwargs)
Definition: functors.py:631
def __init__(self, funcs, **kwargs)
Definition: functors.py:407
def __call__(self, data, **kwargs)
Definition: functors.py:459
def from_file(cls, filename, **kwargs)
Definition: functors.py:531
def from_yaml(cls, translationDefinition, **kwargs)
Definition: functors.py:540
def renameCol(cls, col, renameRules)
Definition: functors.py:522
def multilevelColumns(self, data, **kwargs)
Definition: functors.py:445
def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22)
Definition: functors.py:1239
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1316
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1279
def __init__(self, col, **kwargs)
Definition: functors.py:677
def __init__(self, expr, **kwargs)
Definition: functors.py:600
def __init__(self, **kwargs)
Definition: functors.py:705
def __call__(self, catalog, **kwargs)
Definition: functors.py:708
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1072
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1090
def __call__(self, data, dropna=False)
Definition: functors.py:344
def _func(self, df, dropna=True)
Definition: functors.py:283
def multilevelColumns(self, data, columnIndex=None, returnTuple=False)
Definition: functors.py:227
def _get_data_columnLevelNames(self, data, columnIndex=None)
Definition: functors.py:184
def difference(self, data1, data2, **kwargs)
Definition: functors.py:355
def __init__(self, filt=None, dataset=None, noDup=None)
Definition: functors.py:140
def _get_columnIndex(self, data)
Definition: functors.py:286
def _colsFromDict(self, colDict, columnIndex=None)
Definition: functors.py:206
def _get_data_columnLevels(self, data, columnIndex=None)
Definition: functors.py:160
def __call__(self, parq, dropna=False, **kwargs)
Definition: functors.py:929
def __init__(self, instFluxPosCol, instFluxNegCol, instFluxPosErrCol, instFluxNegErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1688
def instFluxToNanojansky(self, instFlux, localCalib)
Definition: functors.py:1490
def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1545
def __init__(self, instFluxCol, instFluxErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1483
def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1507
def instFluxToMagnitude(self, instFlux, localCalib)
Definition: functors.py:1528
def __init__(self, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1130
def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22)
Definition: functors.py:1137
def computeSkySeperation(self, ra1, dec1, ra2, dec2)
Definition: functors.py:1166
def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22)
Definition: functors.py:1192
def __init__(self, col1, col2, **kwargs)
Definition: functors.py:827
def __init__(self, *args, **kwargs)
Definition: functors.py:788
def __init__(self, col, calib=None, **kwargs)
Definition: functors.py:750
def dn2mag(self, dn, fluxMag0)
Definition: functors.py:1407
def dn2flux(self, dn, fluxMag0)
Definition: functors.py:1404
def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1413
def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1418
def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs)
Definition: functors.py:1373
def __call__(self, catalog, **kwargs)
Definition: functors.py:695
def __init__(self, **kwargs)
Definition: functors.py:692
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1106
def __init__(self, numerator, denominator, **kwargs)
Definition: functors.py:1835
def mag_aware_eval(df, expr)
Definition: functors.py:565
def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor', name=None)
Definition: functors.py:37