Coverage for python/lsst/pipe/tasks/functors.py: 34%
826 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-25 03:45 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-25 03:45 -0800
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["Functor", "CompositeFunctor", "CustomFunctor", "Column", "Index",
23 "IDColumn", "FootprintNPix", "CoordColumn", "RAColumn", "DecColumn",
24 "HtmIndex20", "Mag", "MagErr", "NanoMaggie", "MagDiff", "Color",
25 "Labeller", "StarGalaxyLabeller", "NumStarLabeller", "DeconvolvedMoments",
26 "SdssTraceSize", "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff",
27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs", "ComputePixelScale",
28 "ConvertPixelToArcseconds", "ConvertPixelSqToArcsecondsSq", "ReferenceBand",
29 "Photometry", "NanoJansky", "NanoJanskyErr", "Magnitude", "MagnitudeErr",
30 "LocalPhotometry", "LocalNanojansky", "LocalNanojanskyErr",
31 "LocalMagnitude", "LocalMagnitudeErr", "LocalDipoleMeanFlux",
32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux", "LocalDipoleDiffFluxErr",
33 "Ratio", "Ebv"]
35import yaml
36import re
37from itertools import product
38import logging
39import os.path
41import pandas as pd
42import numpy as np
43import astropy.units as u
44from astropy.coordinates import SkyCoord
46from lsst.utils import doImport
47from lsst.daf.butler import DeferredDatasetHandle
48import lsst.geom as geom
49import lsst.sphgeom as sphgeom
51from .parquetTable import ParquetTable, MultilevelParquetTable
54def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
55 typeKey='functor', name=None):
56 """Initialize an object defined in a dictionary
58 The object needs to be importable as
59 f'{basePath}.{initDict[typeKey]}'
60 The positional and keyword arguments (if any) are contained in
61 "args" and "kwargs" entries in the dictionary, respectively.
62 This is used in `functors.CompositeFunctor.from_yaml` to initialize
63 a composite functor from a specification in a YAML file.
65 Parameters
66 ----------
67 initDict : dictionary
68 Dictionary describing object's initialization. Must contain
69 an entry keyed by ``typeKey`` that is the name of the object,
70 relative to ``basePath``.
71 basePath : str
72 Path relative to module in which ``initDict[typeKey]`` is defined.
73 typeKey : str
74 Key of ``initDict`` that is the name of the object
75 (relative to `basePath`).
76 """
77 initDict = initDict.copy()
78 # TO DO: DM-21956 We should be able to define functors outside this module
79 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
80 args = []
81 if 'args' in initDict:
82 args = initDict.pop('args')
83 if isinstance(args, str):
84 args = [args]
85 try:
86 element = pythonType(*args, **initDict)
87 except Exception as e:
88 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
89 raise type(e)(message, e.args)
90 return element
93class Functor(object):
94 """Define and execute a calculation on a ParquetTable
96 The `__call__` method accepts either a `ParquetTable` object or a
97 `DeferredDatasetHandle`, and returns the
98 result of the calculation as a single column. Each functor defines what
99 columns are needed for the calculation, and only these columns are read
100 from the `ParquetTable`.
102 The action of `__call__` consists of two steps: first, loading the
103 necessary columns from disk into memory as a `pandas.DataFrame` object;
104 and second, performing the computation on this dataframe and returning the
105 result.
108 To define a new `Functor`, a subclass must define a `_func` method,
109 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
110 In addition, it must define the following attributes
112 * `_columns`: The columns necessary to perform the calculation
113 * `name`: A name appropriate for a figure axis label
114 * `shortname`: A name appropriate for use as a dictionary key
116 On initialization, a `Functor` should declare what band (`filt` kwarg)
117 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
118 applied to. This enables the `_get_data` method to extract the proper
119 columns from the parquet file. If not specified, the dataset will fall back
120 on the `_defaultDataset`attribute. If band is not specified and `dataset`
121 is anything other than `'ref'`, then an error will be raised when trying to
122 perform the calculation.
124 Originally, `Functor` was set up to expect
125 datasets formatted like the `deepCoadd_obj` dataset; that is, a
126 dataframe with a multi-level column index, with the levels of the
127 column index being `band`, `dataset`, and `column`.
128 It has since been generalized to apply to dataframes without mutli-level
129 indices and multi-level indices with just `dataset` and `column` levels.
130 In addition, the `_get_data` method that reads
131 the dataframe from the `ParquetTable` will return a dataframe with column
132 index levels defined by the `_dfLevels` attribute; by default, this is
133 `column`.
135 The `_dfLevels` attributes should generally not need to
136 be changed, unless `_func` needs columns from multiple filters or datasets
137 to do the calculation.
138 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
139 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
140 it gets to have those levels in the column index.
142 Parameters
143 ----------
144 filt : str
145 Filter upon which to do the calculation
147 dataset : str
148 Dataset upon which to do the calculation
149 (e.g., 'ref', 'meas', 'forced_src').
151 """
153 _defaultDataset = 'ref'
154 _dfLevels = ('column',)
155 _defaultNoDup = False
157 def __init__(self, filt=None, dataset=None, noDup=None):
158 self.filt = filt
159 self.dataset = dataset if dataset is not None else self._defaultDataset
160 self._noDup = noDup
161 self.log = logging.getLogger(type(self).__name__)
163 @property
164 def noDup(self):
165 if self._noDup is not None:
166 return self._noDup
167 else:
168 return self._defaultNoDup
170 @property
171 def columns(self):
172 """Columns required to perform calculation
173 """
174 if not hasattr(self, '_columns'):
175 raise NotImplementedError('Must define columns property or _columns attribute')
176 return self._columns
178 def _get_data_columnLevels(self, data, columnIndex=None):
179 """Gets the names of the column index levels
181 This should only be called in the context of a multilevel table.
182 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
183 and with the gen3 `DeferredDatasetHandle`.
185 Parameters
186 ----------
187 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
189 columnnIndex (optional): pandas `Index` object
190 if not passed, then it is read from the `DeferredDatasetHandle`
191 """
192 if isinstance(data, DeferredDatasetHandle):
193 if columnIndex is None:
194 columnIndex = data.get(component="columns")
195 if columnIndex is not None:
196 return columnIndex.names
197 if isinstance(data, MultilevelParquetTable):
198 return data.columnLevels
199 else:
200 raise TypeError(f"Unknown type for data: {type(data)}!")
202 def _get_data_columnLevelNames(self, data, columnIndex=None):
203 """Gets the content of each of the column levels for a multilevel table
205 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
207 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
208 """
209 if isinstance(data, DeferredDatasetHandle):
210 if columnIndex is None:
211 columnIndex = data.get(component="columns")
212 if columnIndex is not None:
213 columnLevels = columnIndex.names
214 columnLevelNames = {
215 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
216 for i, level in enumerate(columnLevels)
217 }
218 return columnLevelNames
219 if isinstance(data, MultilevelParquetTable):
220 return data.columnLevelNames
221 else:
222 raise TypeError(f"Unknown type for data: {type(data)}!")
224 def _colsFromDict(self, colDict, columnIndex=None):
225 """Converts dictionary column specficiation to a list of columns
227 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
228 """
229 new_colDict = {}
230 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
232 for i, lev in enumerate(columnLevels):
233 if lev in colDict:
234 if isinstance(colDict[lev], str):
235 new_colDict[lev] = [colDict[lev]]
236 else:
237 new_colDict[lev] = colDict[lev]
238 else:
239 new_colDict[lev] = columnIndex.levels[i]
241 levelCols = [new_colDict[lev] for lev in columnLevels]
242 cols = list(product(*levelCols))
243 colsAvailable = [col for col in cols if col in columnIndex]
244 return colsAvailable
246 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
247 """Returns columns needed by functor from multilevel dataset
249 To access tables with multilevel column structure, the `MultilevelParquetTable`
250 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
251 dictionary.
253 Parameters
254 ----------
255 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
257 columnIndex (optional): pandas `Index` object
258 either passed or read in from `DeferredDatasetHandle`.
260 `returnTuple` : bool
261 If true, then return a list of tuples rather than the column dictionary
262 specification. This is set to `True` by `CompositeFunctor` in order to be able to
263 combine columns from the various component functors.
265 """
266 if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
267 columnIndex = data.get(component="columns")
269 # Confirm that the dataset has the column levels the functor is expecting it to have.
270 columnLevels = self._get_data_columnLevels(data, columnIndex)
272 columnDict = {'column': self.columns,
273 'dataset': self.dataset}
274 if self.filt is None:
275 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
276 if "band" in columnLevels:
277 if self.dataset == "ref":
278 columnDict["band"] = columnLevelNames["band"][0]
279 else:
280 raise ValueError(f"'filt' not set for functor {self.name}"
281 f"(dataset {self.dataset}) "
282 "and ParquetTable "
283 "contains multiple filters in column index. "
284 "Set 'filt' or set 'dataset' to 'ref'.")
285 else:
286 columnDict['band'] = self.filt
288 if isinstance(data, MultilevelParquetTable):
289 return data._colsFromDict(columnDict)
290 elif isinstance(data, DeferredDatasetHandle):
291 if returnTuple:
292 return self._colsFromDict(columnDict, columnIndex=columnIndex)
293 else:
294 return columnDict
296 def _func(self, df, dropna=True):
297 raise NotImplementedError('Must define calculation on dataframe')
299 def _get_columnIndex(self, data):
300 """Return columnIndex
301 """
303 if isinstance(data, DeferredDatasetHandle):
304 return data.get(component="columns")
305 else:
306 return None
308 def _get_data(self, data):
309 """Retrieve dataframe necessary for calculation.
311 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
313 Returns dataframe upon which `self._func` can act.
315 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
316 """
317 if isinstance(data, pd.DataFrame):
318 return data
320 # First thing to do: check to see if the data source has a multilevel column index or not.
321 columnIndex = self._get_columnIndex(data)
322 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
324 # Simple single-level parquet table, gen2
325 if isinstance(data, ParquetTable) and not is_multiLevel:
326 columns = self.columns
327 df = data.toDataFrame(columns=columns)
328 return df
330 # Get proper columns specification for this functor
331 if is_multiLevel:
332 columns = self.multilevelColumns(data, columnIndex=columnIndex)
333 else:
334 columns = self.columns
336 if isinstance(data, MultilevelParquetTable):
337 # Load in-memory dataframe with appropriate columns the gen2 way
338 df = data.toDataFrame(columns=columns, droplevels=False)
339 elif isinstance(data, DeferredDatasetHandle):
340 # Load in-memory dataframe with appropriate columns the gen3 way
341 df = data.get(parameters={"columns": columns})
343 # Drop unnecessary column levels
344 if is_multiLevel:
345 df = self._setLevels(df)
347 return df
349 def _setLevels(self, df):
350 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
351 df.columns = df.columns.droplevel(levelsToDrop)
352 return df
354 def _dropna(self, vals):
355 return vals.dropna()
357 def __call__(self, data, dropna=False):
358 try:
359 df = self._get_data(data)
360 vals = self._func(df)
361 except Exception as e:
362 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
363 vals = self.fail(df)
364 if dropna:
365 vals = self._dropna(vals)
367 return vals
369 def difference(self, data1, data2, **kwargs):
370 """Computes difference between functor called on two different ParquetTable objects
371 """
372 return self(data1, **kwargs) - self(data2, **kwargs)
374 def fail(self, df):
375 return pd.Series(np.full(len(df), np.nan), index=df.index)
377 @property
378 def name(self):
379 """Full name of functor (suitable for figure labels)
380 """
381 return NotImplementedError
383 @property
384 def shortname(self):
385 """Short name of functor (suitable for column name/dict key)
386 """
387 return self.name
390class CompositeFunctor(Functor):
391 """Perform multiple calculations at once on a catalog
393 The role of a `CompositeFunctor` is to group together computations from
394 multiple functors. Instead of returning `pandas.Series` a
395 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
396 being the keys of `funcDict`.
398 The `columns` attribute of a `CompositeFunctor` is the union of all columns
399 in all the component functors.
401 A `CompositeFunctor` does not use a `_func` method itself; rather,
402 when a `CompositeFunctor` is called, all its columns are loaded
403 at once, and the resulting dataframe is passed to the `_func` method of each component
404 functor. This has the advantage of only doing I/O (reading from parquet file) once,
405 and works because each individual `_func` method of each component functor does not
406 care if there are *extra* columns in the dataframe being passed; only that it must contain
407 *at least* the `columns` it expects.
409 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
410 file specifying a collection of functors.
412 Parameters
413 ----------
414 funcs : `dict` or `list`
415 Dictionary or list of functors. If a list, then it will be converted
416 into a dictonary according to the `.shortname` attribute of each functor.
418 """
419 dataset = None
421 def __init__(self, funcs, **kwargs):
423 if type(funcs) == dict:
424 self.funcDict = funcs
425 else:
426 self.funcDict = {f.shortname: f for f in funcs}
428 self._filt = None
430 super().__init__(**kwargs)
432 @property
433 def filt(self):
434 return self._filt
436 @filt.setter
437 def filt(self, filt):
438 if filt is not None:
439 for _, f in self.funcDict.items():
440 f.filt = filt
441 self._filt = filt
443 def update(self, new):
444 if isinstance(new, dict):
445 self.funcDict.update(new)
446 elif isinstance(new, CompositeFunctor):
447 self.funcDict.update(new.funcDict)
448 else:
449 raise TypeError('Can only update with dictionary or CompositeFunctor.')
451 # Make sure new functors have the same 'filt' set
452 if self.filt is not None:
453 self.filt = self.filt
455 @property
456 def columns(self):
457 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
459 def multilevelColumns(self, data, **kwargs):
460 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
461 return list(
462 set(
463 [
464 x
465 for y in [
466 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
467 ]
468 for x in y
469 ]
470 )
471 )
473 def __call__(self, data, **kwargs):
474 """Apply the functor to the data table
476 Parameters
477 ----------
478 data : `lsst.daf.butler.DeferredDatasetHandle`,
479 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
480 `lsst.pipe.tasks.parquetTable.ParquetTable`,
481 or `pandas.DataFrame`.
482 The table or a pointer to a table on disk from which columns can
483 be accessed
484 """
485 columnIndex = self._get_columnIndex(data)
487 # First, determine whether data has a multilevel index (either gen2 or gen3)
488 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
490 # Multilevel index, gen2 or gen3
491 if is_multiLevel:
492 columns = self.multilevelColumns(data, columnIndex=columnIndex)
494 if isinstance(data, MultilevelParquetTable):
495 # Read data into memory the gen2 way
496 df = data.toDataFrame(columns=columns, droplevels=False)
497 elif isinstance(data, DeferredDatasetHandle):
498 # Read data into memory the gen3 way
499 df = data.get(parameters={"columns": columns})
501 valDict = {}
502 for k, f in self.funcDict.items():
503 try:
504 subdf = f._setLevels(
505 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
506 )
507 valDict[k] = f._func(subdf)
508 except Exception as e:
509 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
510 try:
511 valDict[k] = f.fail(subdf)
512 except NameError:
513 raise e
515 else:
516 if isinstance(data, DeferredDatasetHandle):
517 # input if Gen3 deferLoad=True
518 df = data.get(parameters={"columns": self.columns})
519 elif isinstance(data, pd.DataFrame):
520 # input if Gen3 deferLoad=False
521 df = data
522 else:
523 # Original Gen2 input is type ParquetTable and the fallback
524 df = data.toDataFrame(columns=self.columns)
526 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
528 # Check that output columns are actually columns
529 for name, colVal in valDict.items():
530 if len(colVal.shape) != 1:
531 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
532 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
534 try:
535 valDf = pd.concat(valDict, axis=1)
536 except TypeError:
537 print([(k, type(v)) for k, v in valDict.items()])
538 raise
540 if kwargs.get('dropna', False):
541 valDf = valDf.dropna(how='any')
543 return valDf
545 @classmethod
546 def renameCol(cls, col, renameRules):
547 if renameRules is None:
548 return col
549 for old, new in renameRules:
550 if col.startswith(old):
551 col = col.replace(old, new)
552 return col
554 @classmethod
555 def from_file(cls, filename, **kwargs):
556 # Allow environment variables in the filename.
557 filename = os.path.expandvars(filename)
558 with open(filename) as f:
559 translationDefinition = yaml.safe_load(f)
561 return cls.from_yaml(translationDefinition, **kwargs)
563 @classmethod
564 def from_yaml(cls, translationDefinition, **kwargs):
565 funcs = {}
566 for func, val in translationDefinition['funcs'].items():
567 funcs[func] = init_fromDict(val, name=func)
569 if 'flag_rename_rules' in translationDefinition:
570 renameRules = translationDefinition['flag_rename_rules']
571 else:
572 renameRules = None
574 if 'calexpFlags' in translationDefinition:
575 for flag in translationDefinition['calexpFlags']:
576 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
578 if 'refFlags' in translationDefinition:
579 for flag in translationDefinition['refFlags']:
580 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
582 if 'forcedFlags' in translationDefinition:
583 for flag in translationDefinition['forcedFlags']:
584 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
586 if 'flags' in translationDefinition:
587 for flag in translationDefinition['flags']:
588 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
590 return cls(funcs, **kwargs)
593def mag_aware_eval(df, expr, log):
594 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
596 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
598 Parameters
599 ----------
600 df : pandas.DataFrame
601 Dataframe on which to evaluate expression.
603 expr : str
604 Expression.
605 """
606 try:
607 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
608 val = df.eval(expr_new)
609 except Exception as e: # Should check what actually gets raised
610 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e)
611 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
612 val = df.eval(expr_new)
613 return val
616class CustomFunctor(Functor):
617 """Arbitrary computation on a catalog
619 Column names (and thus the columns to be loaded from catalog) are found
620 by finding all words and trying to ignore all "math-y" words.
622 Parameters
623 ----------
624 expr : str
625 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
626 """
627 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
629 def __init__(self, expr, **kwargs):
630 self.expr = expr
631 super().__init__(**kwargs)
633 @property
634 def name(self):
635 return self.expr
637 @property
638 def columns(self):
639 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
641 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
642 not_a_col = []
643 for c in flux_cols:
644 if not re.search('_instFlux$', c):
645 cols.append(f'{c}_instFlux')
646 not_a_col.append(c)
647 else:
648 cols.append(c)
650 return list(set([c for c in cols if c not in not_a_col]))
652 def _func(self, df):
653 return mag_aware_eval(df, self.expr, self.log)
656class Column(Functor):
657 """Get column with specified name
658 """
660 def __init__(self, col, **kwargs):
661 self.col = col
662 super().__init__(**kwargs)
664 @property
665 def name(self):
666 return self.col
668 @property
669 def columns(self):
670 return [self.col]
672 def _func(self, df):
673 return df[self.col]
676class Index(Functor):
677 """Return the value of the index for each object
678 """
680 columns = ['coord_ra'] # just a dummy; something has to be here
681 _defaultDataset = 'ref'
682 _defaultNoDup = True
684 def _func(self, df):
685 return pd.Series(df.index, index=df.index)
688class IDColumn(Column):
689 col = 'id'
690 _allow_difference = False
691 _defaultNoDup = True
693 def _func(self, df):
694 return pd.Series(df.index, index=df.index)
697class FootprintNPix(Column):
698 col = 'base_Footprint_nPix'
701class CoordColumn(Column):
702 """Base class for coordinate column, in degrees
703 """
704 _radians = True
706 def __init__(self, col, **kwargs):
707 super().__init__(col, **kwargs)
709 def _func(self, df):
710 # Must not modify original column in case that column is used by another functor
711 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
712 return output
715class RAColumn(CoordColumn):
716 """Right Ascension, in degrees
717 """
718 name = 'RA'
719 _defaultNoDup = True
721 def __init__(self, **kwargs):
722 super().__init__('coord_ra', **kwargs)
724 def __call__(self, catalog, **kwargs):
725 return super().__call__(catalog, **kwargs)
728class DecColumn(CoordColumn):
729 """Declination, in degrees
730 """
731 name = 'Dec'
732 _defaultNoDup = True
734 def __init__(self, **kwargs):
735 super().__init__('coord_dec', **kwargs)
737 def __call__(self, catalog, **kwargs):
738 return super().__call__(catalog, **kwargs)
741class HtmIndex20(Functor):
742 """Compute the level 20 HtmIndex for the catalog.
744 Notes
745 -----
746 This functor was implemented to satisfy requirements of old APDB interface
747 which required ``pixelId`` column in DiaObject with HTM20 index. APDB
748 interface had migrated to not need that information, but we keep this
749 class in case it may be useful for something else.
750 """
751 name = "Htm20"
752 htmLevel = 20
753 _radians = True
755 def __init__(self, ra, decl, **kwargs):
756 self.pixelator = sphgeom.HtmPixelization(self.htmLevel)
757 self.ra = ra
758 self.decl = decl
759 self._columns = [self.ra, self.decl]
760 super().__init__(**kwargs)
762 def _func(self, df):
764 def computePixel(row):
765 if self._radians:
766 sphPoint = geom.SpherePoint(row[self.ra],
767 row[self.decl],
768 geom.radians)
769 else:
770 sphPoint = geom.SpherePoint(row[self.ra],
771 row[self.decl],
772 geom.degrees)
773 return self.pixelator.index(sphPoint.getVector())
775 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
778def fluxName(col):
779 if not col.endswith('_instFlux'):
780 col += '_instFlux'
781 return col
784def fluxErrName(col):
785 if not col.endswith('_instFluxErr'):
786 col += '_instFluxErr'
787 return col
790class Mag(Functor):
791 """Compute calibrated magnitude
793 Takes a `calib` argument, which returns the flux at mag=0
794 as `calib.getFluxMag0()`. If not provided, then the default
795 `fluxMag0` is 63095734448.0194, which is default for HSC.
796 This default should be removed in DM-21955
798 This calculation hides warnings about invalid values and dividing by zero.
800 As for all functors, a `dataset` and `filt` kwarg should be provided upon
801 initialization. Unlike the default `Functor`, however, the default dataset
802 for a `Mag` is `'meas'`, rather than `'ref'`.
804 Parameters
805 ----------
806 col : `str`
807 Name of flux column from which to compute magnitude. Can be parseable
808 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
809 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
810 understand.
811 calib : `lsst.afw.image.calib.Calib` (optional)
812 Object that knows zero point.
813 """
814 _defaultDataset = 'meas'
816 def __init__(self, col, calib=None, **kwargs):
817 self.col = fluxName(col)
818 self.calib = calib
819 if calib is not None:
820 self.fluxMag0 = calib.getFluxMag0()[0]
821 else:
822 # TO DO: DM-21955 Replace hard coded photometic calibration values
823 self.fluxMag0 = 63095734448.0194
825 super().__init__(**kwargs)
827 @property
828 def columns(self):
829 return [self.col]
831 def _func(self, df):
832 with np.warnings.catch_warnings():
833 np.warnings.filterwarnings('ignore', r'invalid value encountered')
834 np.warnings.filterwarnings('ignore', r'divide by zero')
835 return -2.5*np.log10(df[self.col] / self.fluxMag0)
837 @property
838 def name(self):
839 return f'mag_{self.col}'
842class MagErr(Mag):
843 """Compute calibrated magnitude uncertainty
845 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
847 Parameters
848 col : `str`
849 Name of flux column
850 calib : `lsst.afw.image.calib.Calib` (optional)
851 Object that knows zero point.
852 """
854 def __init__(self, *args, **kwargs):
855 super().__init__(*args, **kwargs)
856 if self.calib is not None:
857 self.fluxMag0Err = self.calib.getFluxMag0()[1]
858 else:
859 self.fluxMag0Err = 0.
861 @property
862 def columns(self):
863 return [self.col, self.col + 'Err']
865 def _func(self, df):
866 with np.warnings.catch_warnings():
867 np.warnings.filterwarnings('ignore', r'invalid value encountered')
868 np.warnings.filterwarnings('ignore', r'divide by zero')
869 fluxCol, fluxErrCol = self.columns
870 x = df[fluxErrCol] / df[fluxCol]
871 y = self.fluxMag0Err / self.fluxMag0
872 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
873 return magErr
875 @property
876 def name(self):
877 return super().name + '_err'
880class NanoMaggie(Mag):
881 """
882 """
884 def _func(self, df):
885 return (df[self.col] / self.fluxMag0) * 1e9
888class MagDiff(Functor):
889 _defaultDataset = 'meas'
891 """Functor to calculate magnitude difference"""
893 def __init__(self, col1, col2, **kwargs):
894 self.col1 = fluxName(col1)
895 self.col2 = fluxName(col2)
896 super().__init__(**kwargs)
898 @property
899 def columns(self):
900 return [self.col1, self.col2]
902 def _func(self, df):
903 with np.warnings.catch_warnings():
904 np.warnings.filterwarnings('ignore', r'invalid value encountered')
905 np.warnings.filterwarnings('ignore', r'divide by zero')
906 return -2.5*np.log10(df[self.col1]/df[self.col2])
908 @property
909 def name(self):
910 return f'(mag_{self.col1} - mag_{self.col2})'
912 @property
913 def shortname(self):
914 return f'magDiff_{self.col1}_{self.col2}'
917class Color(Functor):
918 """Compute the color between two filters
920 Computes color by initializing two different `Mag`
921 functors based on the `col` and filters provided, and
922 then returning the difference.
924 This is enabled by the `_func` expecting a dataframe with a
925 multilevel column index, with both `'band'` and `'column'`,
926 instead of just `'column'`, which is the `Functor` default.
927 This is controlled by the `_dfLevels` attribute.
929 Also of note, the default dataset for `Color` is `forced_src'`,
930 whereas for `Mag` it is `'meas'`.
932 Parameters
933 ----------
934 col : str
935 Name of flux column from which to compute; same as would be passed to
936 `lsst.pipe.tasks.functors.Mag`.
938 filt2, filt1 : str
939 Filters from which to compute magnitude difference.
940 Color computed is `Mag(filt2) - Mag(filt1)`.
941 """
942 _defaultDataset = 'forced_src'
943 _dfLevels = ('band', 'column')
944 _defaultNoDup = True
946 def __init__(self, col, filt2, filt1, **kwargs):
947 self.col = fluxName(col)
948 if filt2 == filt1:
949 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
950 self.filt2 = filt2
951 self.filt1 = filt1
953 self.mag2 = Mag(col, filt=filt2, **kwargs)
954 self.mag1 = Mag(col, filt=filt1, **kwargs)
956 super().__init__(**kwargs)
958 @property
959 def filt(self):
960 return None
962 @filt.setter
963 def filt(self, filt):
964 pass
966 def _func(self, df):
967 mag2 = self.mag2._func(df[self.filt2])
968 mag1 = self.mag1._func(df[self.filt1])
969 return mag2 - mag1
971 @property
972 def columns(self):
973 return [self.mag1.col, self.mag2.col]
975 def multilevelColumns(self, parq, **kwargs):
976 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
978 @property
979 def name(self):
980 return f'{self.filt2} - {self.filt1} ({self.col})'
982 @property
983 def shortname(self):
984 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
987class Labeller(Functor):
988 """Main function of this subclass is to override the dropna=True
989 """
990 _null_label = 'null'
991 _allow_difference = False
992 name = 'label'
993 _force_str = False
995 def __call__(self, parq, dropna=False, **kwargs):
996 return super().__call__(parq, dropna=False, **kwargs)
999class StarGalaxyLabeller(Labeller):
1000 _columns = ["base_ClassificationExtendedness_value"]
1001 _column = "base_ClassificationExtendedness_value"
1003 def _func(self, df):
1004 x = df[self._columns][self._column]
1005 mask = x.isnull()
1006 test = (x < 0.5).astype(int)
1007 test = test.mask(mask, 2)
1009 # TODO: DM-21954 Look into veracity of inline comment below
1010 # are these backwards?
1011 categories = ['galaxy', 'star', self._null_label]
1012 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
1013 index=x.index, name='label')
1014 if self._force_str:
1015 label = label.astype(str)
1016 return label
1019class NumStarLabeller(Labeller):
1020 _columns = ['numStarFlags']
1021 labels = {"star": 0, "maybe": 1, "notStar": 2}
1023 def _func(self, df):
1024 x = df[self._columns][self._columns[0]]
1026 # Number of filters
1027 n = len(x.unique()) - 1
1029 labels = ['noStar', 'maybe', 'star']
1030 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
1031 index=x.index, name='label')
1033 if self._force_str:
1034 label = label.astype(str)
1036 return label
1039class DeconvolvedMoments(Functor):
1040 name = 'Deconvolved Moments'
1041 shortname = 'deconvolvedMoments'
1042 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1043 "ext_shapeHSM_HsmSourceMoments_yy",
1044 "base_SdssShape_xx", "base_SdssShape_yy",
1045 "ext_shapeHSM_HsmPsfMoments_xx",
1046 "ext_shapeHSM_HsmPsfMoments_yy")
1048 def _func(self, df):
1049 """Calculate deconvolved moments"""
1050 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1051 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1052 else:
1053 hsm = np.ones(len(df))*np.nan
1054 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1055 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1056 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1057 else:
1058 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
1059 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
1060 # raise TaskError("No psf shape parameter found in catalog")
1061 raise RuntimeError('No psf shape parameter found in catalog')
1063 return hsm.where(np.isfinite(hsm), sdss) - psf
1066class SdssTraceSize(Functor):
1067 """Functor to calculate SDSS trace radius size for sources"""
1068 name = "SDSS Trace Size"
1069 shortname = 'sdssTrace'
1070 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1072 def _func(self, df):
1073 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1074 return srcSize
1077class PsfSdssTraceSizeDiff(Functor):
1078 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1079 name = "PSF - SDSS Trace Size"
1080 shortname = 'psf_sdssTrace'
1081 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1082 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1084 def _func(self, df):
1085 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1086 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1087 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1088 return sizeDiff
1091class HsmTraceSize(Functor):
1092 """Functor to calculate HSM trace radius size for sources"""
1093 name = 'HSM Trace Size'
1094 shortname = 'hsmTrace'
1095 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1096 "ext_shapeHSM_HsmSourceMoments_yy")
1098 def _func(self, df):
1099 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1100 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1101 return srcSize
1104class PsfHsmTraceSizeDiff(Functor):
1105 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1106 name = 'PSF - HSM Trace Size'
1107 shortname = 'psf_HsmTrace'
1108 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1109 "ext_shapeHSM_HsmSourceMoments_yy",
1110 "ext_shapeHSM_HsmPsfMoments_xx",
1111 "ext_shapeHSM_HsmPsfMoments_yy")
1113 def _func(self, df):
1114 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1115 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1116 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1117 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1118 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1119 return sizeDiff
1122class HsmFwhm(Functor):
1123 name = 'HSM Psf FWHM'
1124 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1125 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1126 pixelScale = 0.168
1127 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1129 def _func(self, df):
1130 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1131 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1134class E1(Functor):
1135 name = "Distortion Ellipticity (e1)"
1136 shortname = "Distortion"
1138 def __init__(self, colXX, colXY, colYY, **kwargs):
1139 self.colXX = colXX
1140 self.colXY = colXY
1141 self.colYY = colYY
1142 self._columns = [self.colXX, self.colXY, self.colYY]
1143 super().__init__(**kwargs)
1145 @property
1146 def columns(self):
1147 return [self.colXX, self.colXY, self.colYY]
1149 def _func(self, df):
1150 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1153class E2(Functor):
1154 name = "Ellipticity e2"
1156 def __init__(self, colXX, colXY, colYY, **kwargs):
1157 self.colXX = colXX
1158 self.colXY = colXY
1159 self.colYY = colYY
1160 super().__init__(**kwargs)
1162 @property
1163 def columns(self):
1164 return [self.colXX, self.colXY, self.colYY]
1166 def _func(self, df):
1167 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1170class RadiusFromQuadrupole(Functor):
1172 def __init__(self, colXX, colXY, colYY, **kwargs):
1173 self.colXX = colXX
1174 self.colXY = colXY
1175 self.colYY = colYY
1176 super().__init__(**kwargs)
1178 @property
1179 def columns(self):
1180 return [self.colXX, self.colXY, self.colYY]
1182 def _func(self, df):
1183 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1186class LocalWcs(Functor):
1187 """Computations using the stored localWcs.
1188 """
1189 name = "LocalWcsOperations"
1191 def __init__(self,
1192 colCD_1_1,
1193 colCD_1_2,
1194 colCD_2_1,
1195 colCD_2_2,
1196 **kwargs):
1197 self.colCD_1_1 = colCD_1_1
1198 self.colCD_1_2 = colCD_1_2
1199 self.colCD_2_1 = colCD_2_1
1200 self.colCD_2_2 = colCD_2_2
1201 super().__init__(**kwargs)
1203 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1204 """Compute the distance on the sphere from x2, y1 to x1, y1.
1206 Parameters
1207 ----------
1208 x : `pandas.Series`
1209 X pixel coordinate.
1210 y : `pandas.Series`
1211 Y pixel coordinate.
1212 cd11 : `pandas.Series`
1213 [1, 1] element of the local Wcs affine transform.
1214 cd11 : `pandas.Series`
1215 [1, 1] element of the local Wcs affine transform.
1216 cd12 : `pandas.Series`
1217 [1, 2] element of the local Wcs affine transform.
1218 cd21 : `pandas.Series`
1219 [2, 1] element of the local Wcs affine transform.
1220 cd22 : `pandas.Series`
1221 [2, 2] element of the local Wcs affine transform.
1223 Returns
1224 -------
1225 raDecTuple : tuple
1226 RA and dec conversion of x and y given the local Wcs. Returned
1227 units are in radians.
1229 """
1230 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1232 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1233 """Compute the local pixel scale conversion.
1235 Parameters
1236 ----------
1237 ra1 : `pandas.Series`
1238 Ra of the first coordinate in radians.
1239 dec1 : `pandas.Series`
1240 Dec of the first coordinate in radians.
1241 ra2 : `pandas.Series`
1242 Ra of the second coordinate in radians.
1243 dec2 : `pandas.Series`
1244 Dec of the second coordinate in radians.
1246 Returns
1247 -------
1248 dist : `pandas.Series`
1249 Distance on the sphere in radians.
1250 """
1251 deltaDec = dec2 - dec1
1252 deltaRa = ra2 - ra1
1253 return 2 * np.arcsin(
1254 np.sqrt(
1255 np.sin(deltaDec / 2) ** 2
1256 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1258 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1259 """Compute the distance on the sphere from x2, y1 to x1, y1.
1261 Parameters
1262 ----------
1263 x1 : `pandas.Series`
1264 X pixel coordinate.
1265 y1 : `pandas.Series`
1266 Y pixel coordinate.
1267 x2 : `pandas.Series`
1268 X pixel coordinate.
1269 y2 : `pandas.Series`
1270 Y pixel coordinate.
1271 cd11 : `pandas.Series`
1272 [1, 1] element of the local Wcs affine transform.
1273 cd11 : `pandas.Series`
1274 [1, 1] element of the local Wcs affine transform.
1275 cd12 : `pandas.Series`
1276 [1, 2] element of the local Wcs affine transform.
1277 cd21 : `pandas.Series`
1278 [2, 1] element of the local Wcs affine transform.
1279 cd22 : `pandas.Series`
1280 [2, 2] element of the local Wcs affine transform.
1282 Returns
1283 -------
1284 Distance : `pandas.Series`
1285 Arcseconds per pixel at the location of the local WC
1286 """
1287 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1288 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1289 # Great circle distance for small separations.
1290 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1293class ComputePixelScale(LocalWcs):
1294 """Compute the local pixel scale from the stored CDMatrix.
1295 """
1296 name = "PixelScale"
1298 @property
1299 def columns(self):
1300 return [self.colCD_1_1,
1301 self.colCD_1_2,
1302 self.colCD_2_1,
1303 self.colCD_2_2]
1305 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1306 """Compute the local pixel to scale conversion in arcseconds.
1308 Parameters
1309 ----------
1310 cd11 : `pandas.Series`
1311 [1, 1] element of the local Wcs affine transform in radians.
1312 cd11 : `pandas.Series`
1313 [1, 1] element of the local Wcs affine transform in radians.
1314 cd12 : `pandas.Series`
1315 [1, 2] element of the local Wcs affine transform in radians.
1316 cd21 : `pandas.Series`
1317 [2, 1] element of the local Wcs affine transform in radians.
1318 cd22 : `pandas.Series`
1319 [2, 2] element of the local Wcs affine transform in radians.
1321 Returns
1322 -------
1323 pixScale : `pandas.Series`
1324 Arcseconds per pixel at the location of the local WC
1325 """
1326 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1328 def _func(self, df):
1329 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1330 df[self.colCD_1_2],
1331 df[self.colCD_2_1],
1332 df[self.colCD_2_2])
1335class ConvertPixelToArcseconds(ComputePixelScale):
1336 """Convert a value in units pixels squared to units arcseconds squared.
1337 """
1339 def __init__(self,
1340 col,
1341 colCD_1_1,
1342 colCD_1_2,
1343 colCD_2_1,
1344 colCD_2_2,
1345 **kwargs):
1346 self.col = col
1347 super().__init__(colCD_1_1,
1348 colCD_1_2,
1349 colCD_2_1,
1350 colCD_2_2,
1351 **kwargs)
1353 @property
1354 def name(self):
1355 return f"{self.col}_asArcseconds"
1357 @property
1358 def columns(self):
1359 return [self.col,
1360 self.colCD_1_1,
1361 self.colCD_1_2,
1362 self.colCD_2_1,
1363 self.colCD_2_2]
1365 def _func(self, df):
1366 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1367 df[self.colCD_1_2],
1368 df[self.colCD_2_1],
1369 df[self.colCD_2_2])
1372class ConvertPixelSqToArcsecondsSq(ComputePixelScale):
1373 """Convert a value in units pixels to units arcseconds.
1374 """
1376 def __init__(self,
1377 col,
1378 colCD_1_1,
1379 colCD_1_2,
1380 colCD_2_1,
1381 colCD_2_2,
1382 **kwargs):
1383 self.col = col
1384 super().__init__(colCD_1_1,
1385 colCD_1_2,
1386 colCD_2_1,
1387 colCD_2_2,
1388 **kwargs)
1390 @property
1391 def name(self):
1392 return f"{self.col}_asArcsecondsSq"
1394 @property
1395 def columns(self):
1396 return [self.col,
1397 self.colCD_1_1,
1398 self.colCD_1_2,
1399 self.colCD_2_1,
1400 self.colCD_2_2]
1402 def _func(self, df):
1403 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1404 df[self.colCD_1_2],
1405 df[self.colCD_2_1],
1406 df[self.colCD_2_2])
1407 return df[self.col] * pixScale * pixScale
1410class ReferenceBand(Functor):
1411 name = 'Reference Band'
1412 shortname = 'refBand'
1414 @property
1415 def columns(self):
1416 return ["merge_measurement_i",
1417 "merge_measurement_r",
1418 "merge_measurement_z",
1419 "merge_measurement_y",
1420 "merge_measurement_g",
1421 "merge_measurement_u"]
1423 def _func(self, df: pd.DataFrame) -> pd.Series:
1424 def getFilterAliasName(row):
1425 # get column name with the max value (True > False)
1426 colName = row.idxmax()
1427 return colName.replace('merge_measurement_', '')
1429 # Skip columns that are unavailable, because this functor requests the
1430 # superset of bands that could be included in the object table
1431 columns = [col for col in self.columns if col in df.columns]
1432 # Makes a Series of dtype object if df is empty
1433 return df[columns].apply(getFilterAliasName, axis=1,
1434 result_type='reduce').astype('object')
1437class Photometry(Functor):
1438 # AB to NanoJansky (3631 Jansky)
1439 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1440 LOG_AB_FLUX_SCALE = 12.56
1441 FIVE_OVER_2LOG10 = 1.085736204758129569
1442 # TO DO: DM-21955 Replace hard coded photometic calibration values
1443 COADD_ZP = 27
1445 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1446 self.vhypot = np.vectorize(self.hypot)
1447 self.col = colFlux
1448 self.colFluxErr = colFluxErr
1450 self.calib = calib
1451 if calib is not None:
1452 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1453 else:
1454 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1455 self.fluxMag0Err = 0.
1457 super().__init__(**kwargs)
1459 @property
1460 def columns(self):
1461 return [self.col]
1463 @property
1464 def name(self):
1465 return f'mag_{self.col}'
1467 @classmethod
1468 def hypot(cls, a, b):
1469 if np.abs(a) < np.abs(b):
1470 a, b = b, a
1471 if a == 0.:
1472 return 0.
1473 q = b/a
1474 return np.abs(a) * np.sqrt(1. + q*q)
1476 def dn2flux(self, dn, fluxMag0):
1477 return self.AB_FLUX_SCALE * dn / fluxMag0
1479 def dn2mag(self, dn, fluxMag0):
1480 with np.warnings.catch_warnings():
1481 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1482 np.warnings.filterwarnings('ignore', r'divide by zero')
1483 return -2.5 * np.log10(dn/fluxMag0)
1485 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1486 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1487 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1488 return retVal
1490 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1491 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1492 return self.FIVE_OVER_2LOG10 * retVal
1495class NanoJansky(Photometry):
1496 def _func(self, df):
1497 return self.dn2flux(df[self.col], self.fluxMag0)
1500class NanoJanskyErr(Photometry):
1501 @property
1502 def columns(self):
1503 return [self.col, self.colFluxErr]
1505 def _func(self, df):
1506 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1507 return pd.Series(retArr, index=df.index)
1510class Magnitude(Photometry):
1511 def _func(self, df):
1512 return self.dn2mag(df[self.col], self.fluxMag0)
1515class MagnitudeErr(Photometry):
1516 @property
1517 def columns(self):
1518 return [self.col, self.colFluxErr]
1520 def _func(self, df):
1521 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1522 return pd.Series(retArr, index=df.index)
1525class LocalPhotometry(Functor):
1526 """Base class for calibrating the specified instrument flux column using
1527 the local photometric calibration.
1529 Parameters
1530 ----------
1531 instFluxCol : `str`
1532 Name of the instrument flux column.
1533 instFluxErrCol : `str`
1534 Name of the assocated error columns for ``instFluxCol``.
1535 photoCalibCol : `str`
1536 Name of local calibration column.
1537 photoCalibErrCol : `str`
1538 Error associated with ``photoCalibCol``
1540 See also
1541 --------
1542 LocalPhotometry
1543 LocalNanojansky
1544 LocalNanojanskyErr
1545 LocalMagnitude
1546 LocalMagnitudeErr
1547 """
1548 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1550 def __init__(self,
1551 instFluxCol,
1552 instFluxErrCol,
1553 photoCalibCol,
1554 photoCalibErrCol,
1555 **kwargs):
1556 self.instFluxCol = instFluxCol
1557 self.instFluxErrCol = instFluxErrCol
1558 self.photoCalibCol = photoCalibCol
1559 self.photoCalibErrCol = photoCalibErrCol
1560 super().__init__(**kwargs)
1562 def instFluxToNanojansky(self, instFlux, localCalib):
1563 """Convert instrument flux to nanojanskys.
1565 Parameters
1566 ----------
1567 instFlux : `numpy.ndarray` or `pandas.Series`
1568 Array of instrument flux measurements
1569 localCalib : `numpy.ndarray` or `pandas.Series`
1570 Array of local photometric calibration estimates.
1572 Returns
1573 -------
1574 calibFlux : `numpy.ndarray` or `pandas.Series`
1575 Array of calibrated flux measurements.
1576 """
1577 return instFlux * localCalib
1579 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1580 """Convert instrument flux to nanojanskys.
1582 Parameters
1583 ----------
1584 instFlux : `numpy.ndarray` or `pandas.Series`
1585 Array of instrument flux measurements
1586 instFluxErr : `numpy.ndarray` or `pandas.Series`
1587 Errors on associated ``instFlux`` values
1588 localCalib : `numpy.ndarray` or `pandas.Series`
1589 Array of local photometric calibration estimates.
1590 localCalibErr : `numpy.ndarray` or `pandas.Series`
1591 Errors on associated ``localCalib`` values
1593 Returns
1594 -------
1595 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1596 Errors on calibrated flux measurements.
1597 """
1598 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1600 def instFluxToMagnitude(self, instFlux, localCalib):
1601 """Convert instrument flux to nanojanskys.
1603 Parameters
1604 ----------
1605 instFlux : `numpy.ndarray` or `pandas.Series`
1606 Array of instrument flux measurements
1607 localCalib : `numpy.ndarray` or `pandas.Series`
1608 Array of local photometric calibration estimates.
1610 Returns
1611 -------
1612 calibMag : `numpy.ndarray` or `pandas.Series`
1613 Array of calibrated AB magnitudes.
1614 """
1615 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1617 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1618 """Convert instrument flux err to nanojanskys.
1620 Parameters
1621 ----------
1622 instFlux : `numpy.ndarray` or `pandas.Series`
1623 Array of instrument flux measurements
1624 instFluxErr : `numpy.ndarray` or `pandas.Series`
1625 Errors on associated ``instFlux`` values
1626 localCalib : `numpy.ndarray` or `pandas.Series`
1627 Array of local photometric calibration estimates.
1628 localCalibErr : `numpy.ndarray` or `pandas.Series`
1629 Errors on associated ``localCalib`` values
1631 Returns
1632 -------
1633 calibMagErr: `numpy.ndarray` or `pandas.Series`
1634 Error on calibrated AB magnitudes.
1635 """
1636 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1637 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1640class LocalNanojansky(LocalPhotometry):
1641 """Compute calibrated fluxes using the local calibration value.
1643 See also
1644 --------
1645 LocalNanojansky
1646 LocalNanojanskyErr
1647 LocalMagnitude
1648 LocalMagnitudeErr
1649 """
1651 @property
1652 def columns(self):
1653 return [self.instFluxCol, self.photoCalibCol]
1655 @property
1656 def name(self):
1657 return f'flux_{self.instFluxCol}'
1659 def _func(self, df):
1660 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1663class LocalNanojanskyErr(LocalPhotometry):
1664 """Compute calibrated flux errors using the local calibration value.
1666 See also
1667 --------
1668 LocalNanojansky
1669 LocalNanojanskyErr
1670 LocalMagnitude
1671 LocalMagnitudeErr
1672 """
1674 @property
1675 def columns(self):
1676 return [self.instFluxCol, self.instFluxErrCol,
1677 self.photoCalibCol, self.photoCalibErrCol]
1679 @property
1680 def name(self):
1681 return f'fluxErr_{self.instFluxCol}'
1683 def _func(self, df):
1684 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1685 df[self.photoCalibCol], df[self.photoCalibErrCol])
1688class LocalMagnitude(LocalPhotometry):
1689 """Compute calibrated AB magnitudes using the local calibration value.
1691 See also
1692 --------
1693 LocalNanojansky
1694 LocalNanojanskyErr
1695 LocalMagnitude
1696 LocalMagnitudeErr
1697 """
1699 @property
1700 def columns(self):
1701 return [self.instFluxCol, self.photoCalibCol]
1703 @property
1704 def name(self):
1705 return f'mag_{self.instFluxCol}'
1707 def _func(self, df):
1708 return self.instFluxToMagnitude(df[self.instFluxCol],
1709 df[self.photoCalibCol])
1712class LocalMagnitudeErr(LocalPhotometry):
1713 """Compute calibrated AB magnitude errors using the local calibration value.
1715 See also
1716 --------
1717 LocalNanojansky
1718 LocalNanojanskyErr
1719 LocalMagnitude
1720 LocalMagnitudeErr
1721 """
1723 @property
1724 def columns(self):
1725 return [self.instFluxCol, self.instFluxErrCol,
1726 self.photoCalibCol, self.photoCalibErrCol]
1728 @property
1729 def name(self):
1730 return f'magErr_{self.instFluxCol}'
1732 def _func(self, df):
1733 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1734 df[self.instFluxErrCol],
1735 df[self.photoCalibCol],
1736 df[self.photoCalibErrCol])
1739class LocalDipoleMeanFlux(LocalPhotometry):
1740 """Compute absolute mean of dipole fluxes.
1742 See also
1743 --------
1744 LocalNanojansky
1745 LocalNanojanskyErr
1746 LocalMagnitude
1747 LocalMagnitudeErr
1748 LocalDipoleMeanFlux
1749 LocalDipoleMeanFluxErr
1750 LocalDipoleDiffFlux
1751 LocalDipoleDiffFluxErr
1752 """
1753 def __init__(self,
1754 instFluxPosCol,
1755 instFluxNegCol,
1756 instFluxPosErrCol,
1757 instFluxNegErrCol,
1758 photoCalibCol,
1759 photoCalibErrCol,
1760 **kwargs):
1761 self.instFluxNegCol = instFluxNegCol
1762 self.instFluxPosCol = instFluxPosCol
1763 self.instFluxNegErrCol = instFluxNegErrCol
1764 self.instFluxPosErrCol = instFluxPosErrCol
1765 self.photoCalibCol = photoCalibCol
1766 self.photoCalibErrCol = photoCalibErrCol
1767 super().__init__(instFluxNegCol,
1768 instFluxNegErrCol,
1769 photoCalibCol,
1770 photoCalibErrCol,
1771 **kwargs)
1773 @property
1774 def columns(self):
1775 return [self.instFluxPosCol,
1776 self.instFluxNegCol,
1777 self.photoCalibCol]
1779 @property
1780 def name(self):
1781 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1783 def _func(self, df):
1784 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))
1785 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])))
1788class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux):
1789 """Compute the error on the absolute mean of dipole fluxes.
1791 See also
1792 --------
1793 LocalNanojansky
1794 LocalNanojanskyErr
1795 LocalMagnitude
1796 LocalMagnitudeErr
1797 LocalDipoleMeanFlux
1798 LocalDipoleMeanFluxErr
1799 LocalDipoleDiffFlux
1800 LocalDipoleDiffFluxErr
1801 """
1803 @property
1804 def columns(self):
1805 return [self.instFluxPosCol,
1806 self.instFluxNegCol,
1807 self.instFluxPosErrCol,
1808 self.instFluxNegErrCol,
1809 self.photoCalibCol,
1810 self.photoCalibErrCol]
1812 @property
1813 def name(self):
1814 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1816 def _func(self, df):
1817 return 0.5*np.sqrt(
1818 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1819 * df[self.photoCalibErrCol])**2
1820 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1821 * df[self.photoCalibCol]**2)
1824class LocalDipoleDiffFlux(LocalDipoleMeanFlux):
1825 """Compute the absolute difference of dipole fluxes.
1827 Value is (abs(pos) - abs(neg))
1829 See also
1830 --------
1831 LocalNanojansky
1832 LocalNanojanskyErr
1833 LocalMagnitude
1834 LocalMagnitudeErr
1835 LocalDipoleMeanFlux
1836 LocalDipoleMeanFluxErr
1837 LocalDipoleDiffFlux
1838 LocalDipoleDiffFluxErr
1839 """
1841 @property
1842 def columns(self):
1843 return [self.instFluxPosCol,
1844 self.instFluxNegCol,
1845 self.photoCalibCol]
1847 @property
1848 def name(self):
1849 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1851 def _func(self, df):
1852 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))
1853 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])))
1856class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux):
1857 """Compute the error on the absolute difference of dipole fluxes.
1859 See also
1860 --------
1861 LocalNanojansky
1862 LocalNanojanskyErr
1863 LocalMagnitude
1864 LocalMagnitudeErr
1865 LocalDipoleMeanFlux
1866 LocalDipoleMeanFluxErr
1867 LocalDipoleDiffFlux
1868 LocalDipoleDiffFluxErr
1869 """
1871 @property
1872 def columns(self):
1873 return [self.instFluxPosCol,
1874 self.instFluxNegCol,
1875 self.instFluxPosErrCol,
1876 self.instFluxNegErrCol,
1877 self.photoCalibCol,
1878 self.photoCalibErrCol]
1880 @property
1881 def name(self):
1882 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1884 def _func(self, df):
1885 return np.sqrt(
1886 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1887 * df[self.photoCalibErrCol])**2
1888 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1889 * df[self.photoCalibCol]**2)
1892class Ratio(Functor):
1893 """Base class for returning the ratio of 2 columns.
1895 Can be used to compute a Signal to Noise ratio for any input flux.
1897 Parameters
1898 ----------
1899 numerator : `str`
1900 Name of the column to use at the numerator in the ratio
1901 denominator : `str`
1902 Name of the column to use as the denominator in the ratio.
1903 """
1904 def __init__(self,
1905 numerator,
1906 denominator,
1907 **kwargs):
1908 self.numerator = numerator
1909 self.denominator = denominator
1910 super().__init__(**kwargs)
1912 @property
1913 def columns(self):
1914 return [self.numerator, self.denominator]
1916 @property
1917 def name(self):
1918 return f'ratio_{self.numerator}_{self.denominator}'
1920 def _func(self, df):
1921 with np.warnings.catch_warnings():
1922 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1923 np.warnings.filterwarnings('ignore', r'divide by zero')
1924 return df[self.numerator] / df[self.denominator]
1927class Ebv(Functor):
1928 """Compute E(B-V) from dustmaps.sfd
1929 """
1930 _defaultDataset = 'ref'
1931 name = "E(B-V)"
1932 shortname = "ebv"
1934 def __init__(self, **kwargs):
1935 # import is only needed for Ebv
1936 from dustmaps.sfd import SFDQuery
1937 self._columns = ['coord_ra', 'coord_dec']
1938 self.sfd = SFDQuery()
1939 super().__init__(**kwargs)
1941 def _func(self, df):
1942 coords = SkyCoord(df['coord_ra']*u.rad, df['coord_dec']*u.rad)
1943 ebv = self.sfd(coords)
1944 # Double precision unnecessary scientifically
1945 # but currently needed for ingest to qserv
1946 return pd.Series(ebv, index=df.index).astype('float64')