Coverage for python/lsst/pipe/tasks/functors.py: 40%
825 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-13 02:59 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-13 02:59 -0700
1# This file is part of pipe_tasks.
2#
3# LSST Data Management System
4# This product includes software developed by the
5# LSST Project (http://www.lsst.org/).
6# See COPYRIGHT file at the top of the source tree.
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22import yaml
23import re
24from itertools import product
25import logging
26import os.path
28import pandas as pd
29import numpy as np
30import astropy.units as u
31from dustmaps.sfd import SFDQuery
32from astropy.coordinates import SkyCoord
34from lsst.utils import doImport
35from lsst.daf.butler import DeferredDatasetHandle
36import lsst.geom as geom
37import lsst.sphgeom as sphgeom
39from .parquetTable import ParquetTable, MultilevelParquetTable
42def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
43 typeKey='functor', name=None):
44 """Initialize an object defined in a dictionary
46 The object needs to be importable as
47 f'{basePath}.{initDict[typeKey]}'
48 The positional and keyword arguments (if any) are contained in
49 "args" and "kwargs" entries in the dictionary, respectively.
50 This is used in `functors.CompositeFunctor.from_yaml` to initialize
51 a composite functor from a specification in a YAML file.
53 Parameters
54 ----------
55 initDict : dictionary
56 Dictionary describing object's initialization. Must contain
57 an entry keyed by ``typeKey`` that is the name of the object,
58 relative to ``basePath``.
59 basePath : str
60 Path relative to module in which ``initDict[typeKey]`` is defined.
61 typeKey : str
62 Key of ``initDict`` that is the name of the object
63 (relative to `basePath`).
64 """
65 initDict = initDict.copy()
66 # TO DO: DM-21956 We should be able to define functors outside this module
67 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
68 args = []
69 if 'args' in initDict:
70 args = initDict.pop('args')
71 if isinstance(args, str):
72 args = [args]
73 try:
74 element = pythonType(*args, **initDict)
75 except Exception as e:
76 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
77 raise type(e)(message, e.args)
78 return element
81class Functor(object):
82 """Define and execute a calculation on a ParquetTable
84 The `__call__` method accepts either a `ParquetTable` object or a
85 `DeferredDatasetHandle`, and returns the
86 result of the calculation as a single column. Each functor defines what
87 columns are needed for the calculation, and only these columns are read
88 from the `ParquetTable`.
90 The action of `__call__` consists of two steps: first, loading the
91 necessary columns from disk into memory as a `pandas.DataFrame` object;
92 and second, performing the computation on this dataframe and returning the
93 result.
96 To define a new `Functor`, a subclass must define a `_func` method,
97 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
98 In addition, it must define the following attributes
100 * `_columns`: The columns necessary to perform the calculation
101 * `name`: A name appropriate for a figure axis label
102 * `shortname`: A name appropriate for use as a dictionary key
104 On initialization, a `Functor` should declare what band (`filt` kwarg)
105 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
106 applied to. This enables the `_get_data` method to extract the proper
107 columns from the parquet file. If not specified, the dataset will fall back
108 on the `_defaultDataset`attribute. If band is not specified and `dataset`
109 is anything other than `'ref'`, then an error will be raised when trying to
110 perform the calculation.
112 Originally, `Functor` was set up to expect
113 datasets formatted like the `deepCoadd_obj` dataset; that is, a
114 dataframe with a multi-level column index, with the levels of the
115 column index being `band`, `dataset`, and `column`.
116 It has since been generalized to apply to dataframes without mutli-level
117 indices and multi-level indices with just `dataset` and `column` levels.
118 In addition, the `_get_data` method that reads
119 the dataframe from the `ParquetTable` will return a dataframe with column
120 index levels defined by the `_dfLevels` attribute; by default, this is
121 `column`.
123 The `_dfLevels` attributes should generally not need to
124 be changed, unless `_func` needs columns from multiple filters or datasets
125 to do the calculation.
126 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
127 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
128 it gets to have those levels in the column index.
130 Parameters
131 ----------
132 filt : str
133 Filter upon which to do the calculation
135 dataset : str
136 Dataset upon which to do the calculation
137 (e.g., 'ref', 'meas', 'forced_src').
139 """
141 _defaultDataset = 'ref'
142 _dfLevels = ('column',)
143 _defaultNoDup = False
145 def __init__(self, filt=None, dataset=None, noDup=None):
146 self.filt = filt
147 self.dataset = dataset if dataset is not None else self._defaultDataset
148 self._noDup = noDup
149 self.log = logging.getLogger(type(self).__name__)
151 @property
152 def noDup(self):
153 if self._noDup is not None:
154 return self._noDup
155 else:
156 return self._defaultNoDup
158 @property
159 def columns(self):
160 """Columns required to perform calculation
161 """
162 if not hasattr(self, '_columns'):
163 raise NotImplementedError('Must define columns property or _columns attribute')
164 return self._columns
166 def _get_data_columnLevels(self, data, columnIndex=None):
167 """Gets the names of the column index levels
169 This should only be called in the context of a multilevel table.
170 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
171 and with the gen3 `DeferredDatasetHandle`.
173 Parameters
174 ----------
175 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
177 columnnIndex (optional): pandas `Index` object
178 if not passed, then it is read from the `DeferredDatasetHandle`
179 """
180 if isinstance(data, DeferredDatasetHandle):
181 if columnIndex is None:
182 columnIndex = data.get(component="columns")
183 if columnIndex is not None:
184 return columnIndex.names
185 if isinstance(data, MultilevelParquetTable):
186 return data.columnLevels
187 else:
188 raise TypeError(f"Unknown type for data: {type(data)}!")
190 def _get_data_columnLevelNames(self, data, columnIndex=None):
191 """Gets the content of each of the column levels for a multilevel table
193 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
195 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
196 """
197 if isinstance(data, DeferredDatasetHandle):
198 if columnIndex is None:
199 columnIndex = data.get(component="columns")
200 if columnIndex is not None:
201 columnLevels = columnIndex.names
202 columnLevelNames = {
203 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
204 for i, level in enumerate(columnLevels)
205 }
206 return columnLevelNames
207 if isinstance(data, MultilevelParquetTable):
208 return data.columnLevelNames
209 else:
210 raise TypeError(f"Unknown type for data: {type(data)}!")
212 def _colsFromDict(self, colDict, columnIndex=None):
213 """Converts dictionary column specficiation to a list of columns
215 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
216 """
217 new_colDict = {}
218 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
220 for i, lev in enumerate(columnLevels):
221 if lev in colDict:
222 if isinstance(colDict[lev], str):
223 new_colDict[lev] = [colDict[lev]]
224 else:
225 new_colDict[lev] = colDict[lev]
226 else:
227 new_colDict[lev] = columnIndex.levels[i]
229 levelCols = [new_colDict[lev] for lev in columnLevels]
230 cols = list(product(*levelCols))
231 colsAvailable = [col for col in cols if col in columnIndex]
232 return colsAvailable
234 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
235 """Returns columns needed by functor from multilevel dataset
237 To access tables with multilevel column structure, the `MultilevelParquetTable`
238 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
239 dictionary.
241 Parameters
242 ----------
243 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
245 columnIndex (optional): pandas `Index` object
246 either passed or read in from `DeferredDatasetHandle`.
248 `returnTuple` : bool
249 If true, then return a list of tuples rather than the column dictionary
250 specification. This is set to `True` by `CompositeFunctor` in order to be able to
251 combine columns from the various component functors.
253 """
254 if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
255 columnIndex = data.get(component="columns")
257 # Confirm that the dataset has the column levels the functor is expecting it to have.
258 columnLevels = self._get_data_columnLevels(data, columnIndex)
260 columnDict = {'column': self.columns,
261 'dataset': self.dataset}
262 if self.filt is None:
263 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
264 if "band" in columnLevels:
265 if self.dataset == "ref":
266 columnDict["band"] = columnLevelNames["band"][0]
267 else:
268 raise ValueError(f"'filt' not set for functor {self.name}"
269 f"(dataset {self.dataset}) "
270 "and ParquetTable "
271 "contains multiple filters in column index. "
272 "Set 'filt' or set 'dataset' to 'ref'.")
273 else:
274 columnDict['band'] = self.filt
276 if isinstance(data, MultilevelParquetTable):
277 return data._colsFromDict(columnDict)
278 elif isinstance(data, DeferredDatasetHandle):
279 if returnTuple:
280 return self._colsFromDict(columnDict, columnIndex=columnIndex)
281 else:
282 return columnDict
284 def _func(self, df, dropna=True):
285 raise NotImplementedError('Must define calculation on dataframe')
287 def _get_columnIndex(self, data):
288 """Return columnIndex
289 """
291 if isinstance(data, DeferredDatasetHandle):
292 return data.get(component="columns")
293 else:
294 return None
296 def _get_data(self, data):
297 """Retrieve dataframe necessary for calculation.
299 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
301 Returns dataframe upon which `self._func` can act.
303 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
304 """
305 if isinstance(data, pd.DataFrame):
306 return data
308 # First thing to do: check to see if the data source has a multilevel column index or not.
309 columnIndex = self._get_columnIndex(data)
310 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
312 # Simple single-level parquet table, gen2
313 if isinstance(data, ParquetTable) and not is_multiLevel:
314 columns = self.columns
315 df = data.toDataFrame(columns=columns)
316 return df
318 # Get proper columns specification for this functor
319 if is_multiLevel:
320 columns = self.multilevelColumns(data, columnIndex=columnIndex)
321 else:
322 columns = self.columns
324 if isinstance(data, MultilevelParquetTable):
325 # Load in-memory dataframe with appropriate columns the gen2 way
326 df = data.toDataFrame(columns=columns, droplevels=False)
327 elif isinstance(data, DeferredDatasetHandle):
328 # Load in-memory dataframe with appropriate columns the gen3 way
329 df = data.get(parameters={"columns": columns})
331 # Drop unnecessary column levels
332 if is_multiLevel:
333 df = self._setLevels(df)
335 return df
337 def _setLevels(self, df):
338 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
339 df.columns = df.columns.droplevel(levelsToDrop)
340 return df
342 def _dropna(self, vals):
343 return vals.dropna()
345 def __call__(self, data, dropna=False):
346 try:
347 df = self._get_data(data)
348 vals = self._func(df)
349 except Exception as e:
350 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
351 vals = self.fail(df)
352 if dropna:
353 vals = self._dropna(vals)
355 return vals
357 def difference(self, data1, data2, **kwargs):
358 """Computes difference between functor called on two different ParquetTable objects
359 """
360 return self(data1, **kwargs) - self(data2, **kwargs)
362 def fail(self, df):
363 return pd.Series(np.full(len(df), np.nan), index=df.index)
365 @property
366 def name(self):
367 """Full name of functor (suitable for figure labels)
368 """
369 return NotImplementedError
371 @property
372 def shortname(self):
373 """Short name of functor (suitable for column name/dict key)
374 """
375 return self.name
378class CompositeFunctor(Functor):
379 """Perform multiple calculations at once on a catalog
381 The role of a `CompositeFunctor` is to group together computations from
382 multiple functors. Instead of returning `pandas.Series` a
383 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
384 being the keys of `funcDict`.
386 The `columns` attribute of a `CompositeFunctor` is the union of all columns
387 in all the component functors.
389 A `CompositeFunctor` does not use a `_func` method itself; rather,
390 when a `CompositeFunctor` is called, all its columns are loaded
391 at once, and the resulting dataframe is passed to the `_func` method of each component
392 functor. This has the advantage of only doing I/O (reading from parquet file) once,
393 and works because each individual `_func` method of each component functor does not
394 care if there are *extra* columns in the dataframe being passed; only that it must contain
395 *at least* the `columns` it expects.
397 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
398 file specifying a collection of functors.
400 Parameters
401 ----------
402 funcs : `dict` or `list`
403 Dictionary or list of functors. If a list, then it will be converted
404 into a dictonary according to the `.shortname` attribute of each functor.
406 """
407 dataset = None
409 def __init__(self, funcs, **kwargs):
411 if type(funcs) == dict:
412 self.funcDict = funcs
413 else:
414 self.funcDict = {f.shortname: f for f in funcs}
416 self._filt = None
418 super().__init__(**kwargs)
420 @property
421 def filt(self):
422 return self._filt
424 @filt.setter
425 def filt(self, filt):
426 if filt is not None:
427 for _, f in self.funcDict.items():
428 f.filt = filt
429 self._filt = filt
431 def update(self, new):
432 if isinstance(new, dict):
433 self.funcDict.update(new)
434 elif isinstance(new, CompositeFunctor):
435 self.funcDict.update(new.funcDict)
436 else:
437 raise TypeError('Can only update with dictionary or CompositeFunctor.')
439 # Make sure new functors have the same 'filt' set
440 if self.filt is not None:
441 self.filt = self.filt
443 @property
444 def columns(self):
445 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
447 def multilevelColumns(self, data, **kwargs):
448 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
449 return list(
450 set(
451 [
452 x
453 for y in [
454 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
455 ]
456 for x in y
457 ]
458 )
459 )
461 def __call__(self, data, **kwargs):
462 """Apply the functor to the data table
464 Parameters
465 ----------
466 data : `lsst.daf.butler.DeferredDatasetHandle`,
467 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
468 `lsst.pipe.tasks.parquetTable.ParquetTable`,
469 or `pandas.DataFrame`.
470 The table or a pointer to a table on disk from which columns can
471 be accessed
472 """
473 columnIndex = self._get_columnIndex(data)
475 # First, determine whether data has a multilevel index (either gen2 or gen3)
476 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
478 # Multilevel index, gen2 or gen3
479 if is_multiLevel:
480 columns = self.multilevelColumns(data, columnIndex=columnIndex)
482 if isinstance(data, MultilevelParquetTable):
483 # Read data into memory the gen2 way
484 df = data.toDataFrame(columns=columns, droplevels=False)
485 elif isinstance(data, DeferredDatasetHandle):
486 # Read data into memory the gen3 way
487 df = data.get(parameters={"columns": columns})
489 valDict = {}
490 for k, f in self.funcDict.items():
491 try:
492 subdf = f._setLevels(
493 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
494 )
495 valDict[k] = f._func(subdf)
496 except Exception as e:
497 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
498 try:
499 valDict[k] = f.fail(subdf)
500 except NameError:
501 raise e
503 else:
504 if isinstance(data, DeferredDatasetHandle):
505 # input if Gen3 deferLoad=True
506 df = data.get(parameters={"columns": self.columns})
507 elif isinstance(data, pd.DataFrame):
508 # input if Gen3 deferLoad=False
509 df = data
510 else:
511 # Original Gen2 input is type ParquetTable and the fallback
512 df = data.toDataFrame(columns=self.columns)
514 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
516 # Check that output columns are actually columns
517 for name, colVal in valDict.items():
518 if len(colVal.shape) != 1:
519 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
520 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
522 try:
523 valDf = pd.concat(valDict, axis=1)
524 except TypeError:
525 print([(k, type(v)) for k, v in valDict.items()])
526 raise
528 if kwargs.get('dropna', False):
529 valDf = valDf.dropna(how='any')
531 return valDf
533 @classmethod
534 def renameCol(cls, col, renameRules):
535 if renameRules is None:
536 return col
537 for old, new in renameRules:
538 if col.startswith(old):
539 col = col.replace(old, new)
540 return col
542 @classmethod
543 def from_file(cls, filename, **kwargs):
544 # Allow environment variables in the filename.
545 filename = os.path.expandvars(filename)
546 with open(filename) as f:
547 translationDefinition = yaml.safe_load(f)
549 return cls.from_yaml(translationDefinition, **kwargs)
551 @classmethod
552 def from_yaml(cls, translationDefinition, **kwargs):
553 funcs = {}
554 for func, val in translationDefinition['funcs'].items():
555 funcs[func] = init_fromDict(val, name=func)
557 if 'flag_rename_rules' in translationDefinition:
558 renameRules = translationDefinition['flag_rename_rules']
559 else:
560 renameRules = None
562 if 'calexpFlags' in translationDefinition:
563 for flag in translationDefinition['calexpFlags']:
564 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
566 if 'refFlags' in translationDefinition:
567 for flag in translationDefinition['refFlags']:
568 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
570 if 'forcedFlags' in translationDefinition:
571 for flag in translationDefinition['forcedFlags']:
572 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
574 if 'flags' in translationDefinition:
575 for flag in translationDefinition['flags']:
576 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
578 return cls(funcs, **kwargs)
581def mag_aware_eval(df, expr, log):
582 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
584 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
586 Parameters
587 ----------
588 df : pandas.DataFrame
589 Dataframe on which to evaluate expression.
591 expr : str
592 Expression.
593 """
594 try:
595 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
596 val = df.eval(expr_new)
597 except Exception as e: # Should check what actually gets raised
598 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e)
599 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
600 val = df.eval(expr_new)
601 return val
604class CustomFunctor(Functor):
605 """Arbitrary computation on a catalog
607 Column names (and thus the columns to be loaded from catalog) are found
608 by finding all words and trying to ignore all "math-y" words.
610 Parameters
611 ----------
612 expr : str
613 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
614 """
615 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
617 def __init__(self, expr, **kwargs):
618 self.expr = expr
619 super().__init__(**kwargs)
621 @property
622 def name(self):
623 return self.expr
625 @property
626 def columns(self):
627 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
629 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
630 not_a_col = []
631 for c in flux_cols:
632 if not re.search('_instFlux$', c):
633 cols.append(f'{c}_instFlux')
634 not_a_col.append(c)
635 else:
636 cols.append(c)
638 return list(set([c for c in cols if c not in not_a_col]))
640 def _func(self, df):
641 return mag_aware_eval(df, self.expr, self.log)
644class Column(Functor):
645 """Get column with specified name
646 """
648 def __init__(self, col, **kwargs):
649 self.col = col
650 super().__init__(**kwargs)
652 @property
653 def name(self):
654 return self.col
656 @property
657 def columns(self):
658 return [self.col]
660 def _func(self, df):
661 return df[self.col]
664class Index(Functor):
665 """Return the value of the index for each object
666 """
668 columns = ['coord_ra'] # just a dummy; something has to be here
669 _defaultDataset = 'ref'
670 _defaultNoDup = True
672 def _func(self, df):
673 return pd.Series(df.index, index=df.index)
676class IDColumn(Column):
677 col = 'id'
678 _allow_difference = False
679 _defaultNoDup = True
681 def _func(self, df):
682 return pd.Series(df.index, index=df.index)
685class FootprintNPix(Column):
686 col = 'base_Footprint_nPix'
689class CoordColumn(Column):
690 """Base class for coordinate column, in degrees
691 """
692 _radians = True
694 def __init__(self, col, **kwargs):
695 super().__init__(col, **kwargs)
697 def _func(self, df):
698 # Must not modify original column in case that column is used by another functor
699 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
700 return output
703class RAColumn(CoordColumn):
704 """Right Ascension, in degrees
705 """
706 name = 'RA'
707 _defaultNoDup = True
709 def __init__(self, **kwargs):
710 super().__init__('coord_ra', **kwargs)
712 def __call__(self, catalog, **kwargs):
713 return super().__call__(catalog, **kwargs)
716class DecColumn(CoordColumn):
717 """Declination, in degrees
718 """
719 name = 'Dec'
720 _defaultNoDup = True
722 def __init__(self, **kwargs):
723 super().__init__('coord_dec', **kwargs)
725 def __call__(self, catalog, **kwargs):
726 return super().__call__(catalog, **kwargs)
729class HtmIndex20(Functor):
730 """Compute the level 20 HtmIndex for the catalog.
732 Notes
733 -----
734 This functor was implemented to satisfy requirements of old APDB interface
735 which required ``pixelId`` column in DiaObject with HTM20 index. APDB
736 interface had migrated to not need that information, but we keep this
737 class in case it may be useful for something else.
738 """
739 name = "Htm20"
740 htmLevel = 20
741 _radians = True
743 def __init__(self, ra, decl, **kwargs):
744 self.pixelator = sphgeom.HtmPixelization(self.htmLevel)
745 self.ra = ra
746 self.decl = decl
747 self._columns = [self.ra, self.decl]
748 super().__init__(**kwargs)
750 def _func(self, df):
752 def computePixel(row):
753 if self._radians:
754 sphPoint = geom.SpherePoint(row[self.ra],
755 row[self.decl],
756 geom.radians)
757 else:
758 sphPoint = geom.SpherePoint(row[self.ra],
759 row[self.decl],
760 geom.degrees)
761 return self.pixelator.index(sphPoint.getVector())
763 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
766def fluxName(col):
767 if not col.endswith('_instFlux'):
768 col += '_instFlux'
769 return col
772def fluxErrName(col):
773 if not col.endswith('_instFluxErr'):
774 col += '_instFluxErr'
775 return col
778class Mag(Functor):
779 """Compute calibrated magnitude
781 Takes a `calib` argument, which returns the flux at mag=0
782 as `calib.getFluxMag0()`. If not provided, then the default
783 `fluxMag0` is 63095734448.0194, which is default for HSC.
784 This default should be removed in DM-21955
786 This calculation hides warnings about invalid values and dividing by zero.
788 As for all functors, a `dataset` and `filt` kwarg should be provided upon
789 initialization. Unlike the default `Functor`, however, the default dataset
790 for a `Mag` is `'meas'`, rather than `'ref'`.
792 Parameters
793 ----------
794 col : `str`
795 Name of flux column from which to compute magnitude. Can be parseable
796 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
797 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
798 understand.
799 calib : `lsst.afw.image.calib.Calib` (optional)
800 Object that knows zero point.
801 """
802 _defaultDataset = 'meas'
804 def __init__(self, col, calib=None, **kwargs):
805 self.col = fluxName(col)
806 self.calib = calib
807 if calib is not None:
808 self.fluxMag0 = calib.getFluxMag0()[0]
809 else:
810 # TO DO: DM-21955 Replace hard coded photometic calibration values
811 self.fluxMag0 = 63095734448.0194
813 super().__init__(**kwargs)
815 @property
816 def columns(self):
817 return [self.col]
819 def _func(self, df):
820 with np.warnings.catch_warnings():
821 np.warnings.filterwarnings('ignore', r'invalid value encountered')
822 np.warnings.filterwarnings('ignore', r'divide by zero')
823 return -2.5*np.log10(df[self.col] / self.fluxMag0)
825 @property
826 def name(self):
827 return f'mag_{self.col}'
830class MagErr(Mag):
831 """Compute calibrated magnitude uncertainty
833 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
835 Parameters
836 col : `str`
837 Name of flux column
838 calib : `lsst.afw.image.calib.Calib` (optional)
839 Object that knows zero point.
840 """
842 def __init__(self, *args, **kwargs):
843 super().__init__(*args, **kwargs)
844 if self.calib is not None:
845 self.fluxMag0Err = self.calib.getFluxMag0()[1]
846 else:
847 self.fluxMag0Err = 0.
849 @property
850 def columns(self):
851 return [self.col, self.col + 'Err']
853 def _func(self, df):
854 with np.warnings.catch_warnings():
855 np.warnings.filterwarnings('ignore', r'invalid value encountered')
856 np.warnings.filterwarnings('ignore', r'divide by zero')
857 fluxCol, fluxErrCol = self.columns
858 x = df[fluxErrCol] / df[fluxCol]
859 y = self.fluxMag0Err / self.fluxMag0
860 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
861 return magErr
863 @property
864 def name(self):
865 return super().name + '_err'
868class NanoMaggie(Mag):
869 """
870 """
872 def _func(self, df):
873 return (df[self.col] / self.fluxMag0) * 1e9
876class MagDiff(Functor):
877 _defaultDataset = 'meas'
879 """Functor to calculate magnitude difference"""
881 def __init__(self, col1, col2, **kwargs):
882 self.col1 = fluxName(col1)
883 self.col2 = fluxName(col2)
884 super().__init__(**kwargs)
886 @property
887 def columns(self):
888 return [self.col1, self.col2]
890 def _func(self, df):
891 with np.warnings.catch_warnings():
892 np.warnings.filterwarnings('ignore', r'invalid value encountered')
893 np.warnings.filterwarnings('ignore', r'divide by zero')
894 return -2.5*np.log10(df[self.col1]/df[self.col2])
896 @property
897 def name(self):
898 return f'(mag_{self.col1} - mag_{self.col2})'
900 @property
901 def shortname(self):
902 return f'magDiff_{self.col1}_{self.col2}'
905class Color(Functor):
906 """Compute the color between two filters
908 Computes color by initializing two different `Mag`
909 functors based on the `col` and filters provided, and
910 then returning the difference.
912 This is enabled by the `_func` expecting a dataframe with a
913 multilevel column index, with both `'band'` and `'column'`,
914 instead of just `'column'`, which is the `Functor` default.
915 This is controlled by the `_dfLevels` attribute.
917 Also of note, the default dataset for `Color` is `forced_src'`,
918 whereas for `Mag` it is `'meas'`.
920 Parameters
921 ----------
922 col : str
923 Name of flux column from which to compute; same as would be passed to
924 `lsst.pipe.tasks.functors.Mag`.
926 filt2, filt1 : str
927 Filters from which to compute magnitude difference.
928 Color computed is `Mag(filt2) - Mag(filt1)`.
929 """
930 _defaultDataset = 'forced_src'
931 _dfLevels = ('band', 'column')
932 _defaultNoDup = True
934 def __init__(self, col, filt2, filt1, **kwargs):
935 self.col = fluxName(col)
936 if filt2 == filt1:
937 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
938 self.filt2 = filt2
939 self.filt1 = filt1
941 self.mag2 = Mag(col, filt=filt2, **kwargs)
942 self.mag1 = Mag(col, filt=filt1, **kwargs)
944 super().__init__(**kwargs)
946 @property
947 def filt(self):
948 return None
950 @filt.setter
951 def filt(self, filt):
952 pass
954 def _func(self, df):
955 mag2 = self.mag2._func(df[self.filt2])
956 mag1 = self.mag1._func(df[self.filt1])
957 return mag2 - mag1
959 @property
960 def columns(self):
961 return [self.mag1.col, self.mag2.col]
963 def multilevelColumns(self, parq, **kwargs):
964 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
966 @property
967 def name(self):
968 return f'{self.filt2} - {self.filt1} ({self.col})'
970 @property
971 def shortname(self):
972 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
975class Labeller(Functor):
976 """Main function of this subclass is to override the dropna=True
977 """
978 _null_label = 'null'
979 _allow_difference = False
980 name = 'label'
981 _force_str = False
983 def __call__(self, parq, dropna=False, **kwargs):
984 return super().__call__(parq, dropna=False, **kwargs)
987class StarGalaxyLabeller(Labeller):
988 _columns = ["base_ClassificationExtendedness_value"]
989 _column = "base_ClassificationExtendedness_value"
991 def _func(self, df):
992 x = df[self._columns][self._column]
993 mask = x.isnull()
994 test = (x < 0.5).astype(int)
995 test = test.mask(mask, 2)
997 # TODO: DM-21954 Look into veracity of inline comment below
998 # are these backwards?
999 categories = ['galaxy', 'star', self._null_label]
1000 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
1001 index=x.index, name='label')
1002 if self._force_str:
1003 label = label.astype(str)
1004 return label
1007class NumStarLabeller(Labeller):
1008 _columns = ['numStarFlags']
1009 labels = {"star": 0, "maybe": 1, "notStar": 2}
1011 def _func(self, df):
1012 x = df[self._columns][self._columns[0]]
1014 # Number of filters
1015 n = len(x.unique()) - 1
1017 labels = ['noStar', 'maybe', 'star']
1018 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
1019 index=x.index, name='label')
1021 if self._force_str:
1022 label = label.astype(str)
1024 return label
1027class DeconvolvedMoments(Functor):
1028 name = 'Deconvolved Moments'
1029 shortname = 'deconvolvedMoments'
1030 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1031 "ext_shapeHSM_HsmSourceMoments_yy",
1032 "base_SdssShape_xx", "base_SdssShape_yy",
1033 "ext_shapeHSM_HsmPsfMoments_xx",
1034 "ext_shapeHSM_HsmPsfMoments_yy")
1036 def _func(self, df):
1037 """Calculate deconvolved moments"""
1038 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1039 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1040 else:
1041 hsm = np.ones(len(df))*np.nan
1042 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1043 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1044 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1045 else:
1046 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
1047 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
1048 # raise TaskError("No psf shape parameter found in catalog")
1049 raise RuntimeError('No psf shape parameter found in catalog')
1051 return hsm.where(np.isfinite(hsm), sdss) - psf
1054class SdssTraceSize(Functor):
1055 """Functor to calculate SDSS trace radius size for sources"""
1056 name = "SDSS Trace Size"
1057 shortname = 'sdssTrace'
1058 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1060 def _func(self, df):
1061 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1062 return srcSize
1065class PsfSdssTraceSizeDiff(Functor):
1066 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1067 name = "PSF - SDSS Trace Size"
1068 shortname = 'psf_sdssTrace'
1069 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1070 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1072 def _func(self, df):
1073 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1074 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1075 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1076 return sizeDiff
1079class HsmTraceSize(Functor):
1080 """Functor to calculate HSM trace radius size for sources"""
1081 name = 'HSM Trace Size'
1082 shortname = 'hsmTrace'
1083 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1084 "ext_shapeHSM_HsmSourceMoments_yy")
1086 def _func(self, df):
1087 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1088 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1089 return srcSize
1092class PsfHsmTraceSizeDiff(Functor):
1093 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1094 name = 'PSF - HSM Trace Size'
1095 shortname = 'psf_HsmTrace'
1096 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1097 "ext_shapeHSM_HsmSourceMoments_yy",
1098 "ext_shapeHSM_HsmPsfMoments_xx",
1099 "ext_shapeHSM_HsmPsfMoments_yy")
1101 def _func(self, df):
1102 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1103 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1104 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1105 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1106 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1107 return sizeDiff
1110class HsmFwhm(Functor):
1111 name = 'HSM Psf FWHM'
1112 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1113 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1114 pixelScale = 0.168
1115 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1117 def _func(self, df):
1118 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1119 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1122class E1(Functor):
1123 name = "Distortion Ellipticity (e1)"
1124 shortname = "Distortion"
1126 def __init__(self, colXX, colXY, colYY, **kwargs):
1127 self.colXX = colXX
1128 self.colXY = colXY
1129 self.colYY = colYY
1130 self._columns = [self.colXX, self.colXY, self.colYY]
1131 super().__init__(**kwargs)
1133 @property
1134 def columns(self):
1135 return [self.colXX, self.colXY, self.colYY]
1137 def _func(self, df):
1138 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1141class E2(Functor):
1142 name = "Ellipticity e2"
1144 def __init__(self, colXX, colXY, colYY, **kwargs):
1145 self.colXX = colXX
1146 self.colXY = colXY
1147 self.colYY = colYY
1148 super().__init__(**kwargs)
1150 @property
1151 def columns(self):
1152 return [self.colXX, self.colXY, self.colYY]
1154 def _func(self, df):
1155 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1158class RadiusFromQuadrupole(Functor):
1160 def __init__(self, colXX, colXY, colYY, **kwargs):
1161 self.colXX = colXX
1162 self.colXY = colXY
1163 self.colYY = colYY
1164 super().__init__(**kwargs)
1166 @property
1167 def columns(self):
1168 return [self.colXX, self.colXY, self.colYY]
1170 def _func(self, df):
1171 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1174class LocalWcs(Functor):
1175 """Computations using the stored localWcs.
1176 """
1177 name = "LocalWcsOperations"
1179 def __init__(self,
1180 colCD_1_1,
1181 colCD_1_2,
1182 colCD_2_1,
1183 colCD_2_2,
1184 **kwargs):
1185 self.colCD_1_1 = colCD_1_1
1186 self.colCD_1_2 = colCD_1_2
1187 self.colCD_2_1 = colCD_2_1
1188 self.colCD_2_2 = colCD_2_2
1189 super().__init__(**kwargs)
1191 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1192 """Compute the distance on the sphere from x2, y1 to x1, y1.
1194 Parameters
1195 ----------
1196 x : `pandas.Series`
1197 X pixel coordinate.
1198 y : `pandas.Series`
1199 Y pixel coordinate.
1200 cd11 : `pandas.Series`
1201 [1, 1] element of the local Wcs affine transform.
1202 cd11 : `pandas.Series`
1203 [1, 1] element of the local Wcs affine transform.
1204 cd12 : `pandas.Series`
1205 [1, 2] element of the local Wcs affine transform.
1206 cd21 : `pandas.Series`
1207 [2, 1] element of the local Wcs affine transform.
1208 cd22 : `pandas.Series`
1209 [2, 2] element of the local Wcs affine transform.
1211 Returns
1212 -------
1213 raDecTuple : tuple
1214 RA and dec conversion of x and y given the local Wcs. Returned
1215 units are in radians.
1217 """
1218 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1220 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1221 """Compute the local pixel scale conversion.
1223 Parameters
1224 ----------
1225 ra1 : `pandas.Series`
1226 Ra of the first coordinate in radians.
1227 dec1 : `pandas.Series`
1228 Dec of the first coordinate in radians.
1229 ra2 : `pandas.Series`
1230 Ra of the second coordinate in radians.
1231 dec2 : `pandas.Series`
1232 Dec of the second coordinate in radians.
1234 Returns
1235 -------
1236 dist : `pandas.Series`
1237 Distance on the sphere in radians.
1238 """
1239 deltaDec = dec2 - dec1
1240 deltaRa = ra2 - ra1
1241 return 2 * np.arcsin(
1242 np.sqrt(
1243 np.sin(deltaDec / 2) ** 2
1244 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1246 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1247 """Compute the distance on the sphere from x2, y1 to x1, y1.
1249 Parameters
1250 ----------
1251 x1 : `pandas.Series`
1252 X pixel coordinate.
1253 y1 : `pandas.Series`
1254 Y pixel coordinate.
1255 x2 : `pandas.Series`
1256 X pixel coordinate.
1257 y2 : `pandas.Series`
1258 Y pixel coordinate.
1259 cd11 : `pandas.Series`
1260 [1, 1] element of the local Wcs affine transform.
1261 cd11 : `pandas.Series`
1262 [1, 1] element of the local Wcs affine transform.
1263 cd12 : `pandas.Series`
1264 [1, 2] element of the local Wcs affine transform.
1265 cd21 : `pandas.Series`
1266 [2, 1] element of the local Wcs affine transform.
1267 cd22 : `pandas.Series`
1268 [2, 2] element of the local Wcs affine transform.
1270 Returns
1271 -------
1272 Distance : `pandas.Series`
1273 Arcseconds per pixel at the location of the local WC
1274 """
1275 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1276 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1277 # Great circle distance for small separations.
1278 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1281class ComputePixelScale(LocalWcs):
1282 """Compute the local pixel scale from the stored CDMatrix.
1283 """
1284 name = "PixelScale"
1286 @property
1287 def columns(self):
1288 return [self.colCD_1_1,
1289 self.colCD_1_2,
1290 self.colCD_2_1,
1291 self.colCD_2_2]
1293 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1294 """Compute the local pixel to scale conversion in arcseconds.
1296 Parameters
1297 ----------
1298 cd11 : `pandas.Series`
1299 [1, 1] element of the local Wcs affine transform in radians.
1300 cd11 : `pandas.Series`
1301 [1, 1] element of the local Wcs affine transform in radians.
1302 cd12 : `pandas.Series`
1303 [1, 2] element of the local Wcs affine transform in radians.
1304 cd21 : `pandas.Series`
1305 [2, 1] element of the local Wcs affine transform in radians.
1306 cd22 : `pandas.Series`
1307 [2, 2] element of the local Wcs affine transform in radians.
1309 Returns
1310 -------
1311 pixScale : `pandas.Series`
1312 Arcseconds per pixel at the location of the local WC
1313 """
1314 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1316 def _func(self, df):
1317 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1318 df[self.colCD_1_2],
1319 df[self.colCD_2_1],
1320 df[self.colCD_2_2])
1323class ConvertPixelToArcseconds(ComputePixelScale):
1324 """Convert a value in units pixels squared to units arcseconds squared.
1325 """
1327 def __init__(self,
1328 col,
1329 colCD_1_1,
1330 colCD_1_2,
1331 colCD_2_1,
1332 colCD_2_2,
1333 **kwargs):
1334 self.col = col
1335 super().__init__(colCD_1_1,
1336 colCD_1_2,
1337 colCD_2_1,
1338 colCD_2_2,
1339 **kwargs)
1341 @property
1342 def name(self):
1343 return f"{self.col}_asArcseconds"
1345 @property
1346 def columns(self):
1347 return [self.col,
1348 self.colCD_1_1,
1349 self.colCD_1_2,
1350 self.colCD_2_1,
1351 self.colCD_2_2]
1353 def _func(self, df):
1354 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1355 df[self.colCD_1_2],
1356 df[self.colCD_2_1],
1357 df[self.colCD_2_2])
1360class ConvertPixelSqToArcsecondsSq(ComputePixelScale):
1361 """Convert a value in units pixels to units arcseconds.
1362 """
1364 def __init__(self,
1365 col,
1366 colCD_1_1,
1367 colCD_1_2,
1368 colCD_2_1,
1369 colCD_2_2,
1370 **kwargs):
1371 self.col = col
1372 super().__init__(colCD_1_1,
1373 colCD_1_2,
1374 colCD_2_1,
1375 colCD_2_2,
1376 **kwargs)
1378 @property
1379 def name(self):
1380 return f"{self.col}_asArcsecondsSq"
1382 @property
1383 def columns(self):
1384 return [self.col,
1385 self.colCD_1_1,
1386 self.colCD_1_2,
1387 self.colCD_2_1,
1388 self.colCD_2_2]
1390 def _func(self, df):
1391 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1392 df[self.colCD_1_2],
1393 df[self.colCD_2_1],
1394 df[self.colCD_2_2])
1395 return df[self.col] * pixScale * pixScale
1398class ReferenceBand(Functor):
1399 name = 'Reference Band'
1400 shortname = 'refBand'
1402 @property
1403 def columns(self):
1404 return ["merge_measurement_i",
1405 "merge_measurement_r",
1406 "merge_measurement_z",
1407 "merge_measurement_y",
1408 "merge_measurement_g",
1409 "merge_measurement_u"]
1411 def _func(self, df: pd.DataFrame) -> pd.Series:
1412 def getFilterAliasName(row):
1413 # get column name with the max value (True > False)
1414 colName = row.idxmax()
1415 return colName.replace('merge_measurement_', '')
1417 # Skip columns that are unavailable, because this functor requests the
1418 # superset of bands that could be included in the object table
1419 columns = [col for col in self.columns if col in df.columns]
1420 # Makes a Series of dtype object if df is empty
1421 return df[columns].apply(getFilterAliasName, axis=1,
1422 result_type='reduce').astype('object')
1425class Photometry(Functor):
1426 # AB to NanoJansky (3631 Jansky)
1427 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1428 LOG_AB_FLUX_SCALE = 12.56
1429 FIVE_OVER_2LOG10 = 1.085736204758129569
1430 # TO DO: DM-21955 Replace hard coded photometic calibration values
1431 COADD_ZP = 27
1433 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1434 self.vhypot = np.vectorize(self.hypot)
1435 self.col = colFlux
1436 self.colFluxErr = colFluxErr
1438 self.calib = calib
1439 if calib is not None:
1440 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1441 else:
1442 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1443 self.fluxMag0Err = 0.
1445 super().__init__(**kwargs)
1447 @property
1448 def columns(self):
1449 return [self.col]
1451 @property
1452 def name(self):
1453 return f'mag_{self.col}'
1455 @classmethod
1456 def hypot(cls, a, b):
1457 if np.abs(a) < np.abs(b):
1458 a, b = b, a
1459 if a == 0.:
1460 return 0.
1461 q = b/a
1462 return np.abs(a) * np.sqrt(1. + q*q)
1464 def dn2flux(self, dn, fluxMag0):
1465 return self.AB_FLUX_SCALE * dn / fluxMag0
1467 def dn2mag(self, dn, fluxMag0):
1468 with np.warnings.catch_warnings():
1469 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1470 np.warnings.filterwarnings('ignore', r'divide by zero')
1471 return -2.5 * np.log10(dn/fluxMag0)
1473 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1474 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1475 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1476 return retVal
1478 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1479 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1480 return self.FIVE_OVER_2LOG10 * retVal
1483class NanoJansky(Photometry):
1484 def _func(self, df):
1485 return self.dn2flux(df[self.col], self.fluxMag0)
1488class NanoJanskyErr(Photometry):
1489 @property
1490 def columns(self):
1491 return [self.col, self.colFluxErr]
1493 def _func(self, df):
1494 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1495 return pd.Series(retArr, index=df.index)
1498class Magnitude(Photometry):
1499 def _func(self, df):
1500 return self.dn2mag(df[self.col], self.fluxMag0)
1503class MagnitudeErr(Photometry):
1504 @property
1505 def columns(self):
1506 return [self.col, self.colFluxErr]
1508 def _func(self, df):
1509 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1510 return pd.Series(retArr, index=df.index)
1513class LocalPhotometry(Functor):
1514 """Base class for calibrating the specified instrument flux column using
1515 the local photometric calibration.
1517 Parameters
1518 ----------
1519 instFluxCol : `str`
1520 Name of the instrument flux column.
1521 instFluxErrCol : `str`
1522 Name of the assocated error columns for ``instFluxCol``.
1523 photoCalibCol : `str`
1524 Name of local calibration column.
1525 photoCalibErrCol : `str`
1526 Error associated with ``photoCalibCol``
1528 See also
1529 --------
1530 LocalPhotometry
1531 LocalNanojansky
1532 LocalNanojanskyErr
1533 LocalMagnitude
1534 LocalMagnitudeErr
1535 """
1536 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1538 def __init__(self,
1539 instFluxCol,
1540 instFluxErrCol,
1541 photoCalibCol,
1542 photoCalibErrCol,
1543 **kwargs):
1544 self.instFluxCol = instFluxCol
1545 self.instFluxErrCol = instFluxErrCol
1546 self.photoCalibCol = photoCalibCol
1547 self.photoCalibErrCol = photoCalibErrCol
1548 super().__init__(**kwargs)
1550 def instFluxToNanojansky(self, instFlux, localCalib):
1551 """Convert instrument flux to nanojanskys.
1553 Parameters
1554 ----------
1555 instFlux : `numpy.ndarray` or `pandas.Series`
1556 Array of instrument flux measurements
1557 localCalib : `numpy.ndarray` or `pandas.Series`
1558 Array of local photometric calibration estimates.
1560 Returns
1561 -------
1562 calibFlux : `numpy.ndarray` or `pandas.Series`
1563 Array of calibrated flux measurements.
1564 """
1565 return instFlux * localCalib
1567 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1568 """Convert instrument flux to nanojanskys.
1570 Parameters
1571 ----------
1572 instFlux : `numpy.ndarray` or `pandas.Series`
1573 Array of instrument flux measurements
1574 instFluxErr : `numpy.ndarray` or `pandas.Series`
1575 Errors on associated ``instFlux`` values
1576 localCalib : `numpy.ndarray` or `pandas.Series`
1577 Array of local photometric calibration estimates.
1578 localCalibErr : `numpy.ndarray` or `pandas.Series`
1579 Errors on associated ``localCalib`` values
1581 Returns
1582 -------
1583 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1584 Errors on calibrated flux measurements.
1585 """
1586 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1588 def instFluxToMagnitude(self, instFlux, localCalib):
1589 """Convert instrument flux to nanojanskys.
1591 Parameters
1592 ----------
1593 instFlux : `numpy.ndarray` or `pandas.Series`
1594 Array of instrument flux measurements
1595 localCalib : `numpy.ndarray` or `pandas.Series`
1596 Array of local photometric calibration estimates.
1598 Returns
1599 -------
1600 calibMag : `numpy.ndarray` or `pandas.Series`
1601 Array of calibrated AB magnitudes.
1602 """
1603 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1605 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1606 """Convert instrument flux err to nanojanskys.
1608 Parameters
1609 ----------
1610 instFlux : `numpy.ndarray` or `pandas.Series`
1611 Array of instrument flux measurements
1612 instFluxErr : `numpy.ndarray` or `pandas.Series`
1613 Errors on associated ``instFlux`` values
1614 localCalib : `numpy.ndarray` or `pandas.Series`
1615 Array of local photometric calibration estimates.
1616 localCalibErr : `numpy.ndarray` or `pandas.Series`
1617 Errors on associated ``localCalib`` values
1619 Returns
1620 -------
1621 calibMagErr: `numpy.ndarray` or `pandas.Series`
1622 Error on calibrated AB magnitudes.
1623 """
1624 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1625 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1628class LocalNanojansky(LocalPhotometry):
1629 """Compute calibrated fluxes using the local calibration value.
1631 See also
1632 --------
1633 LocalNanojansky
1634 LocalNanojanskyErr
1635 LocalMagnitude
1636 LocalMagnitudeErr
1637 """
1639 @property
1640 def columns(self):
1641 return [self.instFluxCol, self.photoCalibCol]
1643 @property
1644 def name(self):
1645 return f'flux_{self.instFluxCol}'
1647 def _func(self, df):
1648 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1651class LocalNanojanskyErr(LocalPhotometry):
1652 """Compute calibrated flux errors using the local calibration value.
1654 See also
1655 --------
1656 LocalNanojansky
1657 LocalNanojanskyErr
1658 LocalMagnitude
1659 LocalMagnitudeErr
1660 """
1662 @property
1663 def columns(self):
1664 return [self.instFluxCol, self.instFluxErrCol,
1665 self.photoCalibCol, self.photoCalibErrCol]
1667 @property
1668 def name(self):
1669 return f'fluxErr_{self.instFluxCol}'
1671 def _func(self, df):
1672 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1673 df[self.photoCalibCol], df[self.photoCalibErrCol])
1676class LocalMagnitude(LocalPhotometry):
1677 """Compute calibrated AB magnitudes using the local calibration value.
1679 See also
1680 --------
1681 LocalNanojansky
1682 LocalNanojanskyErr
1683 LocalMagnitude
1684 LocalMagnitudeErr
1685 """
1687 @property
1688 def columns(self):
1689 return [self.instFluxCol, self.photoCalibCol]
1691 @property
1692 def name(self):
1693 return f'mag_{self.instFluxCol}'
1695 def _func(self, df):
1696 return self.instFluxToMagnitude(df[self.instFluxCol],
1697 df[self.photoCalibCol])
1700class LocalMagnitudeErr(LocalPhotometry):
1701 """Compute calibrated AB magnitude errors using the local calibration value.
1703 See also
1704 --------
1705 LocalNanojansky
1706 LocalNanojanskyErr
1707 LocalMagnitude
1708 LocalMagnitudeErr
1709 """
1711 @property
1712 def columns(self):
1713 return [self.instFluxCol, self.instFluxErrCol,
1714 self.photoCalibCol, self.photoCalibErrCol]
1716 @property
1717 def name(self):
1718 return f'magErr_{self.instFluxCol}'
1720 def _func(self, df):
1721 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1722 df[self.instFluxErrCol],
1723 df[self.photoCalibCol],
1724 df[self.photoCalibErrCol])
1727class LocalDipoleMeanFlux(LocalPhotometry):
1728 """Compute absolute mean of dipole fluxes.
1730 See also
1731 --------
1732 LocalNanojansky
1733 LocalNanojanskyErr
1734 LocalMagnitude
1735 LocalMagnitudeErr
1736 LocalDipoleMeanFlux
1737 LocalDipoleMeanFluxErr
1738 LocalDipoleDiffFlux
1739 LocalDipoleDiffFluxErr
1740 """
1741 def __init__(self,
1742 instFluxPosCol,
1743 instFluxNegCol,
1744 instFluxPosErrCol,
1745 instFluxNegErrCol,
1746 photoCalibCol,
1747 photoCalibErrCol,
1748 **kwargs):
1749 self.instFluxNegCol = instFluxNegCol
1750 self.instFluxPosCol = instFluxPosCol
1751 self.instFluxNegErrCol = instFluxNegErrCol
1752 self.instFluxPosErrCol = instFluxPosErrCol
1753 self.photoCalibCol = photoCalibCol
1754 self.photoCalibErrCol = photoCalibErrCol
1755 super().__init__(instFluxNegCol,
1756 instFluxNegErrCol,
1757 photoCalibCol,
1758 photoCalibErrCol,
1759 **kwargs)
1761 @property
1762 def columns(self):
1763 return [self.instFluxPosCol,
1764 self.instFluxNegCol,
1765 self.photoCalibCol]
1767 @property
1768 def name(self):
1769 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1771 def _func(self, df):
1772 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))
1773 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])))
1776class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux):
1777 """Compute the error on the absolute mean of dipole fluxes.
1779 See also
1780 --------
1781 LocalNanojansky
1782 LocalNanojanskyErr
1783 LocalMagnitude
1784 LocalMagnitudeErr
1785 LocalDipoleMeanFlux
1786 LocalDipoleMeanFluxErr
1787 LocalDipoleDiffFlux
1788 LocalDipoleDiffFluxErr
1789 """
1791 @property
1792 def columns(self):
1793 return [self.instFluxPosCol,
1794 self.instFluxNegCol,
1795 self.instFluxPosErrCol,
1796 self.instFluxNegErrCol,
1797 self.photoCalibCol,
1798 self.photoCalibErrCol]
1800 @property
1801 def name(self):
1802 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1804 def _func(self, df):
1805 return 0.5*np.sqrt(
1806 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1807 * df[self.photoCalibErrCol])**2
1808 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1809 * df[self.photoCalibCol]**2)
1812class LocalDipoleDiffFlux(LocalDipoleMeanFlux):
1813 """Compute the absolute difference of dipole fluxes.
1815 Value is (abs(pos) - abs(neg))
1817 See also
1818 --------
1819 LocalNanojansky
1820 LocalNanojanskyErr
1821 LocalMagnitude
1822 LocalMagnitudeErr
1823 LocalDipoleMeanFlux
1824 LocalDipoleMeanFluxErr
1825 LocalDipoleDiffFlux
1826 LocalDipoleDiffFluxErr
1827 """
1829 @property
1830 def columns(self):
1831 return [self.instFluxPosCol,
1832 self.instFluxNegCol,
1833 self.photoCalibCol]
1835 @property
1836 def name(self):
1837 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1839 def _func(self, df):
1840 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))
1841 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])))
1844class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux):
1845 """Compute the error on the absolute difference of dipole fluxes.
1847 See also
1848 --------
1849 LocalNanojansky
1850 LocalNanojanskyErr
1851 LocalMagnitude
1852 LocalMagnitudeErr
1853 LocalDipoleMeanFlux
1854 LocalDipoleMeanFluxErr
1855 LocalDipoleDiffFlux
1856 LocalDipoleDiffFluxErr
1857 """
1859 @property
1860 def columns(self):
1861 return [self.instFluxPosCol,
1862 self.instFluxNegCol,
1863 self.instFluxPosErrCol,
1864 self.instFluxNegErrCol,
1865 self.photoCalibCol,
1866 self.photoCalibErrCol]
1868 @property
1869 def name(self):
1870 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1872 def _func(self, df):
1873 return np.sqrt(
1874 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1875 * df[self.photoCalibErrCol])**2
1876 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1877 * df[self.photoCalibCol]**2)
1880class Ratio(Functor):
1881 """Base class for returning the ratio of 2 columns.
1883 Can be used to compute a Signal to Noise ratio for any input flux.
1885 Parameters
1886 ----------
1887 numerator : `str`
1888 Name of the column to use at the numerator in the ratio
1889 denominator : `str`
1890 Name of the column to use as the denominator in the ratio.
1891 """
1892 def __init__(self,
1893 numerator,
1894 denominator,
1895 **kwargs):
1896 self.numerator = numerator
1897 self.denominator = denominator
1898 super().__init__(**kwargs)
1900 @property
1901 def columns(self):
1902 return [self.numerator, self.denominator]
1904 @property
1905 def name(self):
1906 return f'ratio_{self.numerator}_{self.denominator}'
1908 def _func(self, df):
1909 with np.warnings.catch_warnings():
1910 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1911 np.warnings.filterwarnings('ignore', r'divide by zero')
1912 return df[self.numerator] / df[self.denominator]
1915class Ebv(Functor):
1916 """Compute E(B-V) from dustmaps.sfd
1917 """
1918 _defaultDataset = 'ref'
1919 name = "E(B-V)"
1920 shortname = "ebv"
1922 def __init__(self, **kwargs):
1923 self._columns = ['coord_ra', 'coord_dec']
1924 self.sfd = SFDQuery()
1925 super().__init__(**kwargs)
1927 def _func(self, df):
1928 coords = SkyCoord(df['coord_ra']*u.rad, df['coord_dec']*u.rad)
1929 ebv = self.sfd(coords)
1930 # Double precision unnecessary scientifically
1931 # but currently needed for ingest to qserv
1932 return pd.Series(ebv, index=df.index).astype('float64')