Coverage for python/lsst/pipe/tasks/functors.py: 36%
809 statements
« prev ^ index » next coverage.py v7.2.3, created at 2023-04-27 03:36 -0700
« prev ^ index » next coverage.py v7.2.3, created at 2023-04-27 03:36 -0700
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["Functor", "CompositeFunctor", "CustomFunctor", "Column", "Index",
23 "IDColumn", "FootprintNPix", "CoordColumn", "RAColumn", "DecColumn",
24 "HtmIndex20", "Mag", "MagErr", "NanoMaggie", "MagDiff", "Color",
25 "Labeller", "StarGalaxyLabeller", "NumStarLabeller", "DeconvolvedMoments",
26 "SdssTraceSize", "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff",
27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs", "ComputePixelScale",
28 "ConvertPixelToArcseconds", "ConvertPixelSqToArcsecondsSq", "ReferenceBand",
29 "Photometry", "NanoJansky", "NanoJanskyErr", "Magnitude", "MagnitudeErr",
30 "LocalPhotometry", "LocalNanojansky", "LocalNanojanskyErr",
31 "LocalMagnitude", "LocalMagnitudeErr", "LocalDipoleMeanFlux",
32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux", "LocalDipoleDiffFluxErr",
33 "Ratio", "Ebv"]
35import yaml
36import re
37from itertools import product
38import logging
39import os.path
41import pandas as pd
42import numpy as np
43import astropy.units as u
44from astropy.coordinates import SkyCoord
46from lsst.utils import doImport
47from lsst.utils.introspection import get_full_type_name
48from lsst.daf.butler import DeferredDatasetHandle
49from lsst.pipe.base import InMemoryDatasetHandle
50import lsst.geom as geom
51import lsst.sphgeom as sphgeom
54def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
55 typeKey='functor', name=None):
56 """Initialize an object defined in a dictionary
58 The object needs to be importable as
59 f'{basePath}.{initDict[typeKey]}'
60 The positional and keyword arguments (if any) are contained in
61 "args" and "kwargs" entries in the dictionary, respectively.
62 This is used in `functors.CompositeFunctor.from_yaml` to initialize
63 a composite functor from a specification in a YAML file.
65 Parameters
66 ----------
67 initDict : dictionary
68 Dictionary describing object's initialization. Must contain
69 an entry keyed by ``typeKey`` that is the name of the object,
70 relative to ``basePath``.
71 basePath : str
72 Path relative to module in which ``initDict[typeKey]`` is defined.
73 typeKey : str
74 Key of ``initDict`` that is the name of the object
75 (relative to `basePath`).
76 """
77 initDict = initDict.copy()
78 # TO DO: DM-21956 We should be able to define functors outside this module
79 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
80 args = []
81 if 'args' in initDict:
82 args = initDict.pop('args')
83 if isinstance(args, str):
84 args = [args]
85 try:
86 element = pythonType(*args, **initDict)
87 except Exception as e:
88 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
89 raise type(e)(message, e.args)
90 return element
93class Functor(object):
94 """Define and execute a calculation on a DataFrame or Handle holding a DataFrame.
96 The `__call__` method accepts either a `DataFrame` object or a
97 `DeferredDatasetHandle` or `InMemoryDatasetHandle`, and returns the
98 result of the calculation as a single column. Each functor defines what
99 columns are needed for the calculation, and only these columns are read
100 from the dataset handle.
102 The action of `__call__` consists of two steps: first, loading the
103 necessary columns from disk into memory as a `pandas.DataFrame` object;
104 and second, performing the computation on this dataframe and returning the
105 result.
108 To define a new `Functor`, a subclass must define a `_func` method,
109 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
110 In addition, it must define the following attributes
112 * `_columns`: The columns necessary to perform the calculation
113 * `name`: A name appropriate for a figure axis label
114 * `shortname`: A name appropriate for use as a dictionary key
116 On initialization, a `Functor` should declare what band (`filt` kwarg)
117 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
118 applied to. This enables the `_get_data` method to extract the proper
119 columns from the underlying data. If not specified, the dataset will fall back
120 on the `_defaultDataset`attribute. If band is not specified and `dataset`
121 is anything other than `'ref'`, then an error will be raised when trying to
122 perform the calculation.
124 Originally, `Functor` was set up to expect
125 datasets formatted like the `deepCoadd_obj` dataset; that is, a
126 dataframe with a multi-level column index, with the levels of the
127 column index being `band`, `dataset`, and `column`.
128 It has since been generalized to apply to dataframes without mutli-level
129 indices and multi-level indices with just `dataset` and `column` levels.
130 In addition, the `_get_data` method that reads
131 the columns from the underlying data will return a dataframe with column
132 index levels defined by the `_dfLevels` attribute; by default, this is
133 `column`.
135 The `_dfLevels` attributes should generally not need to
136 be changed, unless `_func` needs columns from multiple filters or datasets
137 to do the calculation.
138 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
139 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
140 it gets to have those levels in the column index.
142 Parameters
143 ----------
144 filt : str
145 Filter upon which to do the calculation
147 dataset : str
148 Dataset upon which to do the calculation
149 (e.g., 'ref', 'meas', 'forced_src').
150 """
152 _defaultDataset = 'ref'
153 _dfLevels = ('column',)
154 _defaultNoDup = False
156 def __init__(self, filt=None, dataset=None, noDup=None):
157 self.filt = filt
158 self.dataset = dataset if dataset is not None else self._defaultDataset
159 self._noDup = noDup
160 self.log = logging.getLogger(type(self).__name__)
162 @property
163 def noDup(self):
164 if self._noDup is not None:
165 return self._noDup
166 else:
167 return self._defaultNoDup
169 @property
170 def columns(self):
171 """Columns required to perform calculation
172 """
173 if not hasattr(self, '_columns'):
174 raise NotImplementedError('Must define columns property or _columns attribute')
175 return self._columns
177 def _get_data_columnLevels(self, data, columnIndex=None):
178 """Gets the names of the column index levels
180 This should only be called in the context of a multilevel table.
182 Parameters
183 ----------
184 data : various
185 The data to be read, can be a `DeferredDatasetHandle` or
186 `InMemoryDatasetHandle`.
187 columnnIndex (optional): pandas `Index` object
188 If not passed, then it is read from the `DeferredDatasetHandle`
189 for `InMemoryDatasetHandle`.
190 """
191 if columnIndex is None:
192 columnIndex = data.get(component="columns")
193 return columnIndex.names
195 def _get_data_columnLevelNames(self, data, columnIndex=None):
196 """Gets the content of each of the column levels for a multilevel table.
197 """
198 if columnIndex is None:
199 columnIndex = data.get(component="columns")
201 columnLevels = columnIndex.names
202 columnLevelNames = {
203 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
204 for i, level in enumerate(columnLevels)
205 }
206 return columnLevelNames
208 def _colsFromDict(self, colDict, columnIndex=None):
209 """Converts dictionary column specficiation to a list of columns
210 """
211 new_colDict = {}
212 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
214 for i, lev in enumerate(columnLevels):
215 if lev in colDict:
216 if isinstance(colDict[lev], str):
217 new_colDict[lev] = [colDict[lev]]
218 else:
219 new_colDict[lev] = colDict[lev]
220 else:
221 new_colDict[lev] = columnIndex.levels[i]
223 levelCols = [new_colDict[lev] for lev in columnLevels]
224 cols = list(product(*levelCols))
225 colsAvailable = [col for col in cols if col in columnIndex]
226 return colsAvailable
228 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
229 """Returns columns needed by functor from multilevel dataset
231 To access tables with multilevel column structure, the `DeferredDatasetHandle`
232 or `InMemoryDatasetHandle` need to be passed either a list of tuples or a
233 dictionary.
235 Parameters
236 ----------
237 data : various
238 The data as either `DeferredDatasetHandle`, or `InMemoryDatasetHandle`.
239 columnIndex (optional): pandas `Index` object
240 either passed or read in from `DeferredDatasetHandle`.
241 `returnTuple` : `bool`
242 If true, then return a list of tuples rather than the column dictionary
243 specification. This is set to `True` by `CompositeFunctor` in order to be able to
244 combine columns from the various component functors.
246 """
247 if not isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
248 raise RuntimeError(f"Unexpected data type. Got {get_full_type_name(data)}.")
250 if columnIndex is None:
251 columnIndex = data.get(component="columns")
253 # Confirm that the dataset has the column levels the functor is expecting it to have.
254 columnLevels = self._get_data_columnLevels(data, columnIndex)
256 columnDict = {'column': self.columns,
257 'dataset': self.dataset}
258 if self.filt is None:
259 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
260 if "band" in columnLevels:
261 if self.dataset == "ref":
262 columnDict["band"] = columnLevelNames["band"][0]
263 else:
264 raise ValueError(f"'filt' not set for functor {self.name}"
265 f"(dataset {self.dataset}) "
266 "and DataFrame "
267 "contains multiple filters in column index. "
268 "Set 'filt' or set 'dataset' to 'ref'.")
269 else:
270 columnDict['band'] = self.filt
272 if returnTuple:
273 return self._colsFromDict(columnDict, columnIndex=columnIndex)
274 else:
275 return columnDict
277 def _func(self, df, dropna=True):
278 raise NotImplementedError('Must define calculation on dataframe')
280 def _get_columnIndex(self, data):
281 """Return columnIndex
282 """
284 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
285 return data.get(component="columns")
286 else:
287 return None
289 def _get_data(self, data):
290 """Retrieve dataframe necessary for calculation.
292 The data argument can be a `DataFrame`, a `DeferredDatasetHandle`, or an
293 `InMemoryDatasetHandle`.
295 Returns dataframe upon which `self._func` can act.
296 """
297 # We wrap a dataframe in a handle here to take advantage of the dataframe
298 # delegate dataframe column wrangling abilities.
299 if isinstance(data, pd.DataFrame):
300 _data = InMemoryDatasetHandle(data, storageClass="DataFrame")
301 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
302 _data = data
303 else:
304 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.")
306 # First thing to do: check to see if the data source has a multilevel column index or not.
307 columnIndex = self._get_columnIndex(_data)
308 is_multiLevel = isinstance(columnIndex, pd.MultiIndex)
310 # Get proper columns specification for this functor
311 if is_multiLevel:
312 columns = self.multilevelColumns(_data, columnIndex=columnIndex)
313 else:
314 columns = self.columns
316 # Load in-memory dataframe with appropriate columns the gen3 way
317 df = _data.get(parameters={"columns": columns})
319 # Drop unnecessary column levels
320 if is_multiLevel:
321 df = self._setLevels(df)
323 return df
325 def _setLevels(self, df):
326 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
327 df.columns = df.columns.droplevel(levelsToDrop)
328 return df
330 def _dropna(self, vals):
331 return vals.dropna()
333 def __call__(self, data, dropna=False):
334 df = self._get_data(data)
335 try:
336 vals = self._func(df)
337 except Exception as e:
338 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
339 vals = self.fail(df)
340 if dropna:
341 vals = self._dropna(vals)
343 return vals
345 def difference(self, data1, data2, **kwargs):
346 """Computes difference between functor called on two different DataFrame/Handle objects
347 """
348 return self(data1, **kwargs) - self(data2, **kwargs)
350 def fail(self, df):
351 return pd.Series(np.full(len(df), np.nan), index=df.index)
353 @property
354 def name(self):
355 """Full name of functor (suitable for figure labels)
356 """
357 return NotImplementedError
359 @property
360 def shortname(self):
361 """Short name of functor (suitable for column name/dict key)
362 """
363 return self.name
366class CompositeFunctor(Functor):
367 """Perform multiple calculations at once on a catalog.
369 The role of a `CompositeFunctor` is to group together computations from
370 multiple functors. Instead of returning `pandas.Series` a
371 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
372 being the keys of `funcDict`.
374 The `columns` attribute of a `CompositeFunctor` is the union of all columns
375 in all the component functors.
377 A `CompositeFunctor` does not use a `_func` method itself; rather,
378 when a `CompositeFunctor` is called, all its columns are loaded
379 at once, and the resulting dataframe is passed to the `_func` method of each component
380 functor. This has the advantage of only doing I/O (reading from parquet file) once,
381 and works because each individual `_func` method of each component functor does not
382 care if there are *extra* columns in the dataframe being passed; only that it must contain
383 *at least* the `columns` it expects.
385 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
386 file specifying a collection of functors.
388 Parameters
389 ----------
390 funcs : `dict` or `list`
391 Dictionary or list of functors. If a list, then it will be converted
392 into a dictonary according to the `.shortname` attribute of each functor.
394 """
395 dataset = None
397 def __init__(self, funcs, **kwargs):
399 if type(funcs) == dict:
400 self.funcDict = funcs
401 else:
402 self.funcDict = {f.shortname: f for f in funcs}
404 self._filt = None
406 super().__init__(**kwargs)
408 @property
409 def filt(self):
410 return self._filt
412 @filt.setter
413 def filt(self, filt):
414 if filt is not None:
415 for _, f in self.funcDict.items():
416 f.filt = filt
417 self._filt = filt
419 def update(self, new):
420 if isinstance(new, dict):
421 self.funcDict.update(new)
422 elif isinstance(new, CompositeFunctor):
423 self.funcDict.update(new.funcDict)
424 else:
425 raise TypeError('Can only update with dictionary or CompositeFunctor.')
427 # Make sure new functors have the same 'filt' set
428 if self.filt is not None:
429 self.filt = self.filt
431 @property
432 def columns(self):
433 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
435 def multilevelColumns(self, data, **kwargs):
436 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
437 return list(
438 set(
439 [
440 x
441 for y in [
442 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
443 ]
444 for x in y
445 ]
446 )
447 )
449 def __call__(self, data, **kwargs):
450 """Apply the functor to the data table
452 Parameters
453 ----------
454 data : various
455 The data represented as `lsst.daf.butler.DeferredDatasetHandle`,
456 `lsst.pipe.base.InMemoryDatasetHandle`,
457 or `pandas.DataFrame`.
458 The table or a pointer to a table on disk from which columns can
459 be accessed
460 """
461 if isinstance(data, pd.DataFrame):
462 _data = InMemoryDatasetHandle(data, storageClass="DataFrame")
463 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
464 _data = data
465 else:
466 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.")
468 columnIndex = self._get_columnIndex(_data)
470 if isinstance(columnIndex, pd.MultiIndex):
471 columns = self.multilevelColumns(_data, columnIndex=columnIndex)
472 df = _data.get(parameters={"columns": columns})
474 valDict = {}
475 for k, f in self.funcDict.items():
476 try:
477 subdf = f._setLevels(
478 df[f.multilevelColumns(_data, returnTuple=True, columnIndex=columnIndex)]
479 )
480 valDict[k] = f._func(subdf)
481 except Exception as e:
482 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
483 try:
484 valDict[k] = f.fail(subdf)
485 except NameError:
486 raise e
488 else:
489 df = _data.get(parameters={"columns": self.columns})
491 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
493 # Check that output columns are actually columns
494 for name, colVal in valDict.items():
495 if len(colVal.shape) != 1:
496 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
497 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
499 try:
500 valDf = pd.concat(valDict, axis=1)
501 except TypeError:
502 print([(k, type(v)) for k, v in valDict.items()])
503 raise
505 if kwargs.get('dropna', False):
506 valDf = valDf.dropna(how='any')
508 return valDf
510 @classmethod
511 def renameCol(cls, col, renameRules):
512 if renameRules is None:
513 return col
514 for old, new in renameRules:
515 if col.startswith(old):
516 col = col.replace(old, new)
517 return col
519 @classmethod
520 def from_file(cls, filename, **kwargs):
521 # Allow environment variables in the filename.
522 filename = os.path.expandvars(filename)
523 with open(filename) as f:
524 translationDefinition = yaml.safe_load(f)
526 return cls.from_yaml(translationDefinition, **kwargs)
528 @classmethod
529 def from_yaml(cls, translationDefinition, **kwargs):
530 funcs = {}
531 for func, val in translationDefinition['funcs'].items():
532 funcs[func] = init_fromDict(val, name=func)
534 if 'flag_rename_rules' in translationDefinition:
535 renameRules = translationDefinition['flag_rename_rules']
536 else:
537 renameRules = None
539 if 'calexpFlags' in translationDefinition:
540 for flag in translationDefinition['calexpFlags']:
541 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
543 if 'refFlags' in translationDefinition:
544 for flag in translationDefinition['refFlags']:
545 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
547 if 'forcedFlags' in translationDefinition:
548 for flag in translationDefinition['forcedFlags']:
549 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
551 if 'flags' in translationDefinition:
552 for flag in translationDefinition['flags']:
553 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
555 return cls(funcs, **kwargs)
558def mag_aware_eval(df, expr, log):
559 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
561 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
563 Parameters
564 ----------
565 df : pandas.DataFrame
566 Dataframe on which to evaluate expression.
568 expr : str
569 Expression.
570 """
571 try:
572 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
573 val = df.eval(expr_new)
574 except Exception as e: # Should check what actually gets raised
575 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e)
576 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
577 val = df.eval(expr_new)
578 return val
581class CustomFunctor(Functor):
582 """Arbitrary computation on a catalog
584 Column names (and thus the columns to be loaded from catalog) are found
585 by finding all words and trying to ignore all "math-y" words.
587 Parameters
588 ----------
589 expr : str
590 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
591 """
592 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
594 def __init__(self, expr, **kwargs):
595 self.expr = expr
596 super().__init__(**kwargs)
598 @property
599 def name(self):
600 return self.expr
602 @property
603 def columns(self):
604 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
606 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
607 not_a_col = []
608 for c in flux_cols:
609 if not re.search('_instFlux$', c):
610 cols.append(f'{c}_instFlux')
611 not_a_col.append(c)
612 else:
613 cols.append(c)
615 return list(set([c for c in cols if c not in not_a_col]))
617 def _func(self, df):
618 return mag_aware_eval(df, self.expr, self.log)
621class Column(Functor):
622 """Get column with specified name
623 """
625 def __init__(self, col, **kwargs):
626 self.col = col
627 super().__init__(**kwargs)
629 @property
630 def name(self):
631 return self.col
633 @property
634 def columns(self):
635 return [self.col]
637 def _func(self, df):
638 return df[self.col]
641class Index(Functor):
642 """Return the value of the index for each object
643 """
645 columns = ['coord_ra'] # just a dummy; something has to be here
646 _defaultDataset = 'ref'
647 _defaultNoDup = True
649 def _func(self, df):
650 return pd.Series(df.index, index=df.index)
653class IDColumn(Column):
654 col = 'id'
655 _allow_difference = False
656 _defaultNoDup = True
658 def _func(self, df):
659 return pd.Series(df.index, index=df.index)
662class FootprintNPix(Column):
663 col = 'base_Footprint_nPix'
666class CoordColumn(Column):
667 """Base class for coordinate column, in degrees
668 """
669 _radians = True
671 def __init__(self, col, **kwargs):
672 super().__init__(col, **kwargs)
674 def _func(self, df):
675 # Must not modify original column in case that column is used by another functor
676 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
677 return output
680class RAColumn(CoordColumn):
681 """Right Ascension, in degrees
682 """
683 name = 'RA'
684 _defaultNoDup = True
686 def __init__(self, **kwargs):
687 super().__init__('coord_ra', **kwargs)
689 def __call__(self, catalog, **kwargs):
690 return super().__call__(catalog, **kwargs)
693class DecColumn(CoordColumn):
694 """Declination, in degrees
695 """
696 name = 'Dec'
697 _defaultNoDup = True
699 def __init__(self, **kwargs):
700 super().__init__('coord_dec', **kwargs)
702 def __call__(self, catalog, **kwargs):
703 return super().__call__(catalog, **kwargs)
706class HtmIndex20(Functor):
707 """Compute the level 20 HtmIndex for the catalog.
709 Notes
710 -----
711 This functor was implemented to satisfy requirements of old APDB interface
712 which required ``pixelId`` column in DiaObject with HTM20 index. APDB
713 interface had migrated to not need that information, but we keep this
714 class in case it may be useful for something else.
715 """
716 name = "Htm20"
717 htmLevel = 20
718 _radians = True
720 def __init__(self, ra, decl, **kwargs):
721 self.pixelator = sphgeom.HtmPixelization(self.htmLevel)
722 self.ra = ra
723 self.decl = decl
724 self._columns = [self.ra, self.decl]
725 super().__init__(**kwargs)
727 def _func(self, df):
729 def computePixel(row):
730 if self._radians:
731 sphPoint = geom.SpherePoint(row[self.ra],
732 row[self.decl],
733 geom.radians)
734 else:
735 sphPoint = geom.SpherePoint(row[self.ra],
736 row[self.decl],
737 geom.degrees)
738 return self.pixelator.index(sphPoint.getVector())
740 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
743def fluxName(col):
744 if not col.endswith('_instFlux'):
745 col += '_instFlux'
746 return col
749def fluxErrName(col):
750 if not col.endswith('_instFluxErr'):
751 col += '_instFluxErr'
752 return col
755class Mag(Functor):
756 """Compute calibrated magnitude
758 Takes a `calib` argument, which returns the flux at mag=0
759 as `calib.getFluxMag0()`. If not provided, then the default
760 `fluxMag0` is 63095734448.0194, which is default for HSC.
761 This default should be removed in DM-21955
763 This calculation hides warnings about invalid values and dividing by zero.
765 As for all functors, a `dataset` and `filt` kwarg should be provided upon
766 initialization. Unlike the default `Functor`, however, the default dataset
767 for a `Mag` is `'meas'`, rather than `'ref'`.
769 Parameters
770 ----------
771 col : `str`
772 Name of flux column from which to compute magnitude. Can be parseable
773 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
774 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
775 understand.
776 calib : `lsst.afw.image.calib.Calib` (optional)
777 Object that knows zero point.
778 """
779 _defaultDataset = 'meas'
781 def __init__(self, col, calib=None, **kwargs):
782 self.col = fluxName(col)
783 self.calib = calib
784 if calib is not None:
785 self.fluxMag0 = calib.getFluxMag0()[0]
786 else:
787 # TO DO: DM-21955 Replace hard coded photometic calibration values
788 self.fluxMag0 = 63095734448.0194
790 super().__init__(**kwargs)
792 @property
793 def columns(self):
794 return [self.col]
796 def _func(self, df):
797 with np.warnings.catch_warnings():
798 np.warnings.filterwarnings('ignore', r'invalid value encountered')
799 np.warnings.filterwarnings('ignore', r'divide by zero')
800 return -2.5*np.log10(df[self.col] / self.fluxMag0)
802 @property
803 def name(self):
804 return f'mag_{self.col}'
807class MagErr(Mag):
808 """Compute calibrated magnitude uncertainty
810 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
812 Parameters
813 col : `str`
814 Name of flux column
815 calib : `lsst.afw.image.calib.Calib` (optional)
816 Object that knows zero point.
817 """
819 def __init__(self, *args, **kwargs):
820 super().__init__(*args, **kwargs)
821 if self.calib is not None:
822 self.fluxMag0Err = self.calib.getFluxMag0()[1]
823 else:
824 self.fluxMag0Err = 0.
826 @property
827 def columns(self):
828 return [self.col, self.col + 'Err']
830 def _func(self, df):
831 with np.warnings.catch_warnings():
832 np.warnings.filterwarnings('ignore', r'invalid value encountered')
833 np.warnings.filterwarnings('ignore', r'divide by zero')
834 fluxCol, fluxErrCol = self.columns
835 x = df[fluxErrCol] / df[fluxCol]
836 y = self.fluxMag0Err / self.fluxMag0
837 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
838 return magErr
840 @property
841 def name(self):
842 return super().name + '_err'
845class NanoMaggie(Mag):
846 """
847 """
849 def _func(self, df):
850 return (df[self.col] / self.fluxMag0) * 1e9
853class MagDiff(Functor):
854 _defaultDataset = 'meas'
856 """Functor to calculate magnitude difference"""
858 def __init__(self, col1, col2, **kwargs):
859 self.col1 = fluxName(col1)
860 self.col2 = fluxName(col2)
861 super().__init__(**kwargs)
863 @property
864 def columns(self):
865 return [self.col1, self.col2]
867 def _func(self, df):
868 with np.warnings.catch_warnings():
869 np.warnings.filterwarnings('ignore', r'invalid value encountered')
870 np.warnings.filterwarnings('ignore', r'divide by zero')
871 return -2.5*np.log10(df[self.col1]/df[self.col2])
873 @property
874 def name(self):
875 return f'(mag_{self.col1} - mag_{self.col2})'
877 @property
878 def shortname(self):
879 return f'magDiff_{self.col1}_{self.col2}'
882class Color(Functor):
883 """Compute the color between two filters
885 Computes color by initializing two different `Mag`
886 functors based on the `col` and filters provided, and
887 then returning the difference.
889 This is enabled by the `_func` expecting a dataframe with a
890 multilevel column index, with both `'band'` and `'column'`,
891 instead of just `'column'`, which is the `Functor` default.
892 This is controlled by the `_dfLevels` attribute.
894 Also of note, the default dataset for `Color` is `forced_src'`,
895 whereas for `Mag` it is `'meas'`.
897 Parameters
898 ----------
899 col : str
900 Name of flux column from which to compute; same as would be passed to
901 `lsst.pipe.tasks.functors.Mag`.
903 filt2, filt1 : str
904 Filters from which to compute magnitude difference.
905 Color computed is `Mag(filt2) - Mag(filt1)`.
906 """
907 _defaultDataset = 'forced_src'
908 _dfLevels = ('band', 'column')
909 _defaultNoDup = True
911 def __init__(self, col, filt2, filt1, **kwargs):
912 self.col = fluxName(col)
913 if filt2 == filt1:
914 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
915 self.filt2 = filt2
916 self.filt1 = filt1
918 self.mag2 = Mag(col, filt=filt2, **kwargs)
919 self.mag1 = Mag(col, filt=filt1, **kwargs)
921 super().__init__(**kwargs)
923 @property
924 def filt(self):
925 return None
927 @filt.setter
928 def filt(self, filt):
929 pass
931 def _func(self, df):
932 mag2 = self.mag2._func(df[self.filt2])
933 mag1 = self.mag1._func(df[self.filt1])
934 return mag2 - mag1
936 @property
937 def columns(self):
938 return [self.mag1.col, self.mag2.col]
940 def multilevelColumns(self, parq, **kwargs):
941 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
943 @property
944 def name(self):
945 return f'{self.filt2} - {self.filt1} ({self.col})'
947 @property
948 def shortname(self):
949 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
952class Labeller(Functor):
953 """Main function of this subclass is to override the dropna=True
954 """
955 _null_label = 'null'
956 _allow_difference = False
957 name = 'label'
958 _force_str = False
960 def __call__(self, parq, dropna=False, **kwargs):
961 return super().__call__(parq, dropna=False, **kwargs)
964class StarGalaxyLabeller(Labeller):
965 _columns = ["base_ClassificationExtendedness_value"]
966 _column = "base_ClassificationExtendedness_value"
968 def _func(self, df):
969 x = df[self._columns][self._column]
970 mask = x.isnull()
971 test = (x < 0.5).astype(int)
972 test = test.mask(mask, 2)
974 # TODO: DM-21954 Look into veracity of inline comment below
975 # are these backwards?
976 categories = ['galaxy', 'star', self._null_label]
977 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
978 index=x.index, name='label')
979 if self._force_str:
980 label = label.astype(str)
981 return label
984class NumStarLabeller(Labeller):
985 _columns = ['numStarFlags']
986 labels = {"star": 0, "maybe": 1, "notStar": 2}
988 def _func(self, df):
989 x = df[self._columns][self._columns[0]]
991 # Number of filters
992 n = len(x.unique()) - 1
994 labels = ['noStar', 'maybe', 'star']
995 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
996 index=x.index, name='label')
998 if self._force_str:
999 label = label.astype(str)
1001 return label
1004class DeconvolvedMoments(Functor):
1005 name = 'Deconvolved Moments'
1006 shortname = 'deconvolvedMoments'
1007 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1008 "ext_shapeHSM_HsmSourceMoments_yy",
1009 "base_SdssShape_xx", "base_SdssShape_yy",
1010 "ext_shapeHSM_HsmPsfMoments_xx",
1011 "ext_shapeHSM_HsmPsfMoments_yy")
1013 def _func(self, df):
1014 """Calculate deconvolved moments"""
1015 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1016 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1017 else:
1018 hsm = np.ones(len(df))*np.nan
1019 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1020 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1021 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1022 else:
1023 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
1024 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
1025 # raise TaskError("No psf shape parameter found in catalog")
1026 raise RuntimeError('No psf shape parameter found in catalog')
1028 return hsm.where(np.isfinite(hsm), sdss) - psf
1031class SdssTraceSize(Functor):
1032 """Functor to calculate SDSS trace radius size for sources"""
1033 name = "SDSS Trace Size"
1034 shortname = 'sdssTrace'
1035 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1037 def _func(self, df):
1038 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1039 return srcSize
1042class PsfSdssTraceSizeDiff(Functor):
1043 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1044 name = "PSF - SDSS Trace Size"
1045 shortname = 'psf_sdssTrace'
1046 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1047 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1049 def _func(self, df):
1050 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1051 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1052 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1053 return sizeDiff
1056class HsmTraceSize(Functor):
1057 """Functor to calculate HSM trace radius size for sources"""
1058 name = 'HSM Trace Size'
1059 shortname = 'hsmTrace'
1060 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1061 "ext_shapeHSM_HsmSourceMoments_yy")
1063 def _func(self, df):
1064 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1065 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1066 return srcSize
1069class PsfHsmTraceSizeDiff(Functor):
1070 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1071 name = 'PSF - HSM Trace Size'
1072 shortname = 'psf_HsmTrace'
1073 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1074 "ext_shapeHSM_HsmSourceMoments_yy",
1075 "ext_shapeHSM_HsmPsfMoments_xx",
1076 "ext_shapeHSM_HsmPsfMoments_yy")
1078 def _func(self, df):
1079 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1080 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1081 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1082 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1083 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1084 return sizeDiff
1087class HsmFwhm(Functor):
1088 name = 'HSM Psf FWHM'
1089 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1090 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1091 pixelScale = 0.168
1092 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1094 def _func(self, df):
1095 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1096 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1099class E1(Functor):
1100 name = "Distortion Ellipticity (e1)"
1101 shortname = "Distortion"
1103 def __init__(self, colXX, colXY, colYY, **kwargs):
1104 self.colXX = colXX
1105 self.colXY = colXY
1106 self.colYY = colYY
1107 self._columns = [self.colXX, self.colXY, self.colYY]
1108 super().__init__(**kwargs)
1110 @property
1111 def columns(self):
1112 return [self.colXX, self.colXY, self.colYY]
1114 def _func(self, df):
1115 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1118class E2(Functor):
1119 name = "Ellipticity e2"
1121 def __init__(self, colXX, colXY, colYY, **kwargs):
1122 self.colXX = colXX
1123 self.colXY = colXY
1124 self.colYY = colYY
1125 super().__init__(**kwargs)
1127 @property
1128 def columns(self):
1129 return [self.colXX, self.colXY, self.colYY]
1131 def _func(self, df):
1132 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1135class RadiusFromQuadrupole(Functor):
1137 def __init__(self, colXX, colXY, colYY, **kwargs):
1138 self.colXX = colXX
1139 self.colXY = colXY
1140 self.colYY = colYY
1141 super().__init__(**kwargs)
1143 @property
1144 def columns(self):
1145 return [self.colXX, self.colXY, self.colYY]
1147 def _func(self, df):
1148 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1151class LocalWcs(Functor):
1152 """Computations using the stored localWcs.
1153 """
1154 name = "LocalWcsOperations"
1156 def __init__(self,
1157 colCD_1_1,
1158 colCD_1_2,
1159 colCD_2_1,
1160 colCD_2_2,
1161 **kwargs):
1162 self.colCD_1_1 = colCD_1_1
1163 self.colCD_1_2 = colCD_1_2
1164 self.colCD_2_1 = colCD_2_1
1165 self.colCD_2_2 = colCD_2_2
1166 super().__init__(**kwargs)
1168 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1169 """Compute the distance on the sphere from x2, y1 to x1, y1.
1171 Parameters
1172 ----------
1173 x : `pandas.Series`
1174 X pixel coordinate.
1175 y : `pandas.Series`
1176 Y pixel coordinate.
1177 cd11 : `pandas.Series`
1178 [1, 1] element of the local Wcs affine transform.
1179 cd11 : `pandas.Series`
1180 [1, 1] element of the local Wcs affine transform.
1181 cd12 : `pandas.Series`
1182 [1, 2] element of the local Wcs affine transform.
1183 cd21 : `pandas.Series`
1184 [2, 1] element of the local Wcs affine transform.
1185 cd22 : `pandas.Series`
1186 [2, 2] element of the local Wcs affine transform.
1188 Returns
1189 -------
1190 raDecTuple : tuple
1191 RA and dec conversion of x and y given the local Wcs. Returned
1192 units are in radians.
1194 """
1195 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1197 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1198 """Compute the local pixel scale conversion.
1200 Parameters
1201 ----------
1202 ra1 : `pandas.Series`
1203 Ra of the first coordinate in radians.
1204 dec1 : `pandas.Series`
1205 Dec of the first coordinate in radians.
1206 ra2 : `pandas.Series`
1207 Ra of the second coordinate in radians.
1208 dec2 : `pandas.Series`
1209 Dec of the second coordinate in radians.
1211 Returns
1212 -------
1213 dist : `pandas.Series`
1214 Distance on the sphere in radians.
1215 """
1216 deltaDec = dec2 - dec1
1217 deltaRa = ra2 - ra1
1218 return 2 * np.arcsin(
1219 np.sqrt(
1220 np.sin(deltaDec / 2) ** 2
1221 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1223 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1224 """Compute the distance on the sphere from x2, y1 to x1, y1.
1226 Parameters
1227 ----------
1228 x1 : `pandas.Series`
1229 X pixel coordinate.
1230 y1 : `pandas.Series`
1231 Y pixel coordinate.
1232 x2 : `pandas.Series`
1233 X pixel coordinate.
1234 y2 : `pandas.Series`
1235 Y pixel coordinate.
1236 cd11 : `pandas.Series`
1237 [1, 1] element of the local Wcs affine transform.
1238 cd11 : `pandas.Series`
1239 [1, 1] element of the local Wcs affine transform.
1240 cd12 : `pandas.Series`
1241 [1, 2] element of the local Wcs affine transform.
1242 cd21 : `pandas.Series`
1243 [2, 1] element of the local Wcs affine transform.
1244 cd22 : `pandas.Series`
1245 [2, 2] element of the local Wcs affine transform.
1247 Returns
1248 -------
1249 Distance : `pandas.Series`
1250 Arcseconds per pixel at the location of the local WC
1251 """
1252 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1253 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1254 # Great circle distance for small separations.
1255 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1258class ComputePixelScale(LocalWcs):
1259 """Compute the local pixel scale from the stored CDMatrix.
1260 """
1261 name = "PixelScale"
1263 @property
1264 def columns(self):
1265 return [self.colCD_1_1,
1266 self.colCD_1_2,
1267 self.colCD_2_1,
1268 self.colCD_2_2]
1270 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1271 """Compute the local pixel to scale conversion in arcseconds.
1273 Parameters
1274 ----------
1275 cd11 : `pandas.Series`
1276 [1, 1] element of the local Wcs affine transform in radians.
1277 cd11 : `pandas.Series`
1278 [1, 1] element of the local Wcs affine transform in radians.
1279 cd12 : `pandas.Series`
1280 [1, 2] element of the local Wcs affine transform in radians.
1281 cd21 : `pandas.Series`
1282 [2, 1] element of the local Wcs affine transform in radians.
1283 cd22 : `pandas.Series`
1284 [2, 2] element of the local Wcs affine transform in radians.
1286 Returns
1287 -------
1288 pixScale : `pandas.Series`
1289 Arcseconds per pixel at the location of the local WC
1290 """
1291 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1293 def _func(self, df):
1294 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1295 df[self.colCD_1_2],
1296 df[self.colCD_2_1],
1297 df[self.colCD_2_2])
1300class ConvertPixelToArcseconds(ComputePixelScale):
1301 """Convert a value in units pixels squared to units arcseconds squared.
1302 """
1304 def __init__(self,
1305 col,
1306 colCD_1_1,
1307 colCD_1_2,
1308 colCD_2_1,
1309 colCD_2_2,
1310 **kwargs):
1311 self.col = col
1312 super().__init__(colCD_1_1,
1313 colCD_1_2,
1314 colCD_2_1,
1315 colCD_2_2,
1316 **kwargs)
1318 @property
1319 def name(self):
1320 return f"{self.col}_asArcseconds"
1322 @property
1323 def columns(self):
1324 return [self.col,
1325 self.colCD_1_1,
1326 self.colCD_1_2,
1327 self.colCD_2_1,
1328 self.colCD_2_2]
1330 def _func(self, df):
1331 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1332 df[self.colCD_1_2],
1333 df[self.colCD_2_1],
1334 df[self.colCD_2_2])
1337class ConvertPixelSqToArcsecondsSq(ComputePixelScale):
1338 """Convert a value in units pixels to units arcseconds.
1339 """
1341 def __init__(self,
1342 col,
1343 colCD_1_1,
1344 colCD_1_2,
1345 colCD_2_1,
1346 colCD_2_2,
1347 **kwargs):
1348 self.col = col
1349 super().__init__(colCD_1_1,
1350 colCD_1_2,
1351 colCD_2_1,
1352 colCD_2_2,
1353 **kwargs)
1355 @property
1356 def name(self):
1357 return f"{self.col}_asArcsecondsSq"
1359 @property
1360 def columns(self):
1361 return [self.col,
1362 self.colCD_1_1,
1363 self.colCD_1_2,
1364 self.colCD_2_1,
1365 self.colCD_2_2]
1367 def _func(self, df):
1368 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1369 df[self.colCD_1_2],
1370 df[self.colCD_2_1],
1371 df[self.colCD_2_2])
1372 return df[self.col] * pixScale * pixScale
1375class ReferenceBand(Functor):
1376 name = 'Reference Band'
1377 shortname = 'refBand'
1379 @property
1380 def columns(self):
1381 return ["merge_measurement_i",
1382 "merge_measurement_r",
1383 "merge_measurement_z",
1384 "merge_measurement_y",
1385 "merge_measurement_g",
1386 "merge_measurement_u"]
1388 def _func(self, df: pd.DataFrame) -> pd.Series:
1389 def getFilterAliasName(row):
1390 # get column name with the max value (True > False)
1391 colName = row.idxmax()
1392 return colName.replace('merge_measurement_', '')
1394 # Skip columns that are unavailable, because this functor requests the
1395 # superset of bands that could be included in the object table
1396 columns = [col for col in self.columns if col in df.columns]
1397 # Makes a Series of dtype object if df is empty
1398 return df[columns].apply(getFilterAliasName, axis=1,
1399 result_type='reduce').astype('object')
1402class Photometry(Functor):
1403 # AB to NanoJansky (3631 Jansky)
1404 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1405 LOG_AB_FLUX_SCALE = 12.56
1406 FIVE_OVER_2LOG10 = 1.085736204758129569
1407 # TO DO: DM-21955 Replace hard coded photometic calibration values
1408 COADD_ZP = 27
1410 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1411 self.vhypot = np.vectorize(self.hypot)
1412 self.col = colFlux
1413 self.colFluxErr = colFluxErr
1415 self.calib = calib
1416 if calib is not None:
1417 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1418 else:
1419 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1420 self.fluxMag0Err = 0.
1422 super().__init__(**kwargs)
1424 @property
1425 def columns(self):
1426 return [self.col]
1428 @property
1429 def name(self):
1430 return f'mag_{self.col}'
1432 @classmethod
1433 def hypot(cls, a, b):
1434 if np.abs(a) < np.abs(b):
1435 a, b = b, a
1436 if a == 0.:
1437 return 0.
1438 q = b/a
1439 return np.abs(a) * np.sqrt(1. + q*q)
1441 def dn2flux(self, dn, fluxMag0):
1442 return self.AB_FLUX_SCALE * dn / fluxMag0
1444 def dn2mag(self, dn, fluxMag0):
1445 with np.warnings.catch_warnings():
1446 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1447 np.warnings.filterwarnings('ignore', r'divide by zero')
1448 return -2.5 * np.log10(dn/fluxMag0)
1450 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1451 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1452 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1453 return retVal
1455 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1456 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1457 return self.FIVE_OVER_2LOG10 * retVal
1460class NanoJansky(Photometry):
1461 def _func(self, df):
1462 return self.dn2flux(df[self.col], self.fluxMag0)
1465class NanoJanskyErr(Photometry):
1466 @property
1467 def columns(self):
1468 return [self.col, self.colFluxErr]
1470 def _func(self, df):
1471 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1472 return pd.Series(retArr, index=df.index)
1475class Magnitude(Photometry):
1476 def _func(self, df):
1477 return self.dn2mag(df[self.col], self.fluxMag0)
1480class MagnitudeErr(Photometry):
1481 @property
1482 def columns(self):
1483 return [self.col, self.colFluxErr]
1485 def _func(self, df):
1486 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1487 return pd.Series(retArr, index=df.index)
1490class LocalPhotometry(Functor):
1491 """Base class for calibrating the specified instrument flux column using
1492 the local photometric calibration.
1494 Parameters
1495 ----------
1496 instFluxCol : `str`
1497 Name of the instrument flux column.
1498 instFluxErrCol : `str`
1499 Name of the assocated error columns for ``instFluxCol``.
1500 photoCalibCol : `str`
1501 Name of local calibration column.
1502 photoCalibErrCol : `str`
1503 Error associated with ``photoCalibCol``
1505 See also
1506 --------
1507 LocalPhotometry
1508 LocalNanojansky
1509 LocalNanojanskyErr
1510 LocalMagnitude
1511 LocalMagnitudeErr
1512 """
1513 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1515 def __init__(self,
1516 instFluxCol,
1517 instFluxErrCol,
1518 photoCalibCol,
1519 photoCalibErrCol,
1520 **kwargs):
1521 self.instFluxCol = instFluxCol
1522 self.instFluxErrCol = instFluxErrCol
1523 self.photoCalibCol = photoCalibCol
1524 self.photoCalibErrCol = photoCalibErrCol
1525 super().__init__(**kwargs)
1527 def instFluxToNanojansky(self, instFlux, localCalib):
1528 """Convert instrument flux to nanojanskys.
1530 Parameters
1531 ----------
1532 instFlux : `numpy.ndarray` or `pandas.Series`
1533 Array of instrument flux measurements
1534 localCalib : `numpy.ndarray` or `pandas.Series`
1535 Array of local photometric calibration estimates.
1537 Returns
1538 -------
1539 calibFlux : `numpy.ndarray` or `pandas.Series`
1540 Array of calibrated flux measurements.
1541 """
1542 return instFlux * localCalib
1544 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1545 """Convert instrument flux to nanojanskys.
1547 Parameters
1548 ----------
1549 instFlux : `numpy.ndarray` or `pandas.Series`
1550 Array of instrument flux measurements
1551 instFluxErr : `numpy.ndarray` or `pandas.Series`
1552 Errors on associated ``instFlux`` values
1553 localCalib : `numpy.ndarray` or `pandas.Series`
1554 Array of local photometric calibration estimates.
1555 localCalibErr : `numpy.ndarray` or `pandas.Series`
1556 Errors on associated ``localCalib`` values
1558 Returns
1559 -------
1560 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1561 Errors on calibrated flux measurements.
1562 """
1563 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1565 def instFluxToMagnitude(self, instFlux, localCalib):
1566 """Convert instrument flux to nanojanskys.
1568 Parameters
1569 ----------
1570 instFlux : `numpy.ndarray` or `pandas.Series`
1571 Array of instrument flux measurements
1572 localCalib : `numpy.ndarray` or `pandas.Series`
1573 Array of local photometric calibration estimates.
1575 Returns
1576 -------
1577 calibMag : `numpy.ndarray` or `pandas.Series`
1578 Array of calibrated AB magnitudes.
1579 """
1580 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1582 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1583 """Convert instrument flux err to nanojanskys.
1585 Parameters
1586 ----------
1587 instFlux : `numpy.ndarray` or `pandas.Series`
1588 Array of instrument flux measurements
1589 instFluxErr : `numpy.ndarray` or `pandas.Series`
1590 Errors on associated ``instFlux`` values
1591 localCalib : `numpy.ndarray` or `pandas.Series`
1592 Array of local photometric calibration estimates.
1593 localCalibErr : `numpy.ndarray` or `pandas.Series`
1594 Errors on associated ``localCalib`` values
1596 Returns
1597 -------
1598 calibMagErr: `numpy.ndarray` or `pandas.Series`
1599 Error on calibrated AB magnitudes.
1600 """
1601 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1602 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1605class LocalNanojansky(LocalPhotometry):
1606 """Compute calibrated fluxes using the local calibration value.
1608 See also
1609 --------
1610 LocalNanojansky
1611 LocalNanojanskyErr
1612 LocalMagnitude
1613 LocalMagnitudeErr
1614 """
1616 @property
1617 def columns(self):
1618 return [self.instFluxCol, self.photoCalibCol]
1620 @property
1621 def name(self):
1622 return f'flux_{self.instFluxCol}'
1624 def _func(self, df):
1625 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1628class LocalNanojanskyErr(LocalPhotometry):
1629 """Compute calibrated flux errors using the local calibration value.
1631 See also
1632 --------
1633 LocalNanojansky
1634 LocalNanojanskyErr
1635 LocalMagnitude
1636 LocalMagnitudeErr
1637 """
1639 @property
1640 def columns(self):
1641 return [self.instFluxCol, self.instFluxErrCol,
1642 self.photoCalibCol, self.photoCalibErrCol]
1644 @property
1645 def name(self):
1646 return f'fluxErr_{self.instFluxCol}'
1648 def _func(self, df):
1649 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1650 df[self.photoCalibCol], df[self.photoCalibErrCol])
1653class LocalMagnitude(LocalPhotometry):
1654 """Compute calibrated AB magnitudes using the local calibration value.
1656 See also
1657 --------
1658 LocalNanojansky
1659 LocalNanojanskyErr
1660 LocalMagnitude
1661 LocalMagnitudeErr
1662 """
1664 @property
1665 def columns(self):
1666 return [self.instFluxCol, self.photoCalibCol]
1668 @property
1669 def name(self):
1670 return f'mag_{self.instFluxCol}'
1672 def _func(self, df):
1673 return self.instFluxToMagnitude(df[self.instFluxCol],
1674 df[self.photoCalibCol])
1677class LocalMagnitudeErr(LocalPhotometry):
1678 """Compute calibrated AB magnitude errors using the local calibration value.
1680 See also
1681 --------
1682 LocalNanojansky
1683 LocalNanojanskyErr
1684 LocalMagnitude
1685 LocalMagnitudeErr
1686 """
1688 @property
1689 def columns(self):
1690 return [self.instFluxCol, self.instFluxErrCol,
1691 self.photoCalibCol, self.photoCalibErrCol]
1693 @property
1694 def name(self):
1695 return f'magErr_{self.instFluxCol}'
1697 def _func(self, df):
1698 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1699 df[self.instFluxErrCol],
1700 df[self.photoCalibCol],
1701 df[self.photoCalibErrCol])
1704class LocalDipoleMeanFlux(LocalPhotometry):
1705 """Compute absolute mean of dipole fluxes.
1707 See also
1708 --------
1709 LocalNanojansky
1710 LocalNanojanskyErr
1711 LocalMagnitude
1712 LocalMagnitudeErr
1713 LocalDipoleMeanFlux
1714 LocalDipoleMeanFluxErr
1715 LocalDipoleDiffFlux
1716 LocalDipoleDiffFluxErr
1717 """
1718 def __init__(self,
1719 instFluxPosCol,
1720 instFluxNegCol,
1721 instFluxPosErrCol,
1722 instFluxNegErrCol,
1723 photoCalibCol,
1724 photoCalibErrCol,
1725 **kwargs):
1726 self.instFluxNegCol = instFluxNegCol
1727 self.instFluxPosCol = instFluxPosCol
1728 self.instFluxNegErrCol = instFluxNegErrCol
1729 self.instFluxPosErrCol = instFluxPosErrCol
1730 self.photoCalibCol = photoCalibCol
1731 self.photoCalibErrCol = photoCalibErrCol
1732 super().__init__(instFluxNegCol,
1733 instFluxNegErrCol,
1734 photoCalibCol,
1735 photoCalibErrCol,
1736 **kwargs)
1738 @property
1739 def columns(self):
1740 return [self.instFluxPosCol,
1741 self.instFluxNegCol,
1742 self.photoCalibCol]
1744 @property
1745 def name(self):
1746 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1748 def _func(self, df):
1749 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))
1750 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])))
1753class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux):
1754 """Compute the error on the absolute mean of dipole fluxes.
1756 See also
1757 --------
1758 LocalNanojansky
1759 LocalNanojanskyErr
1760 LocalMagnitude
1761 LocalMagnitudeErr
1762 LocalDipoleMeanFlux
1763 LocalDipoleMeanFluxErr
1764 LocalDipoleDiffFlux
1765 LocalDipoleDiffFluxErr
1766 """
1768 @property
1769 def columns(self):
1770 return [self.instFluxPosCol,
1771 self.instFluxNegCol,
1772 self.instFluxPosErrCol,
1773 self.instFluxNegErrCol,
1774 self.photoCalibCol,
1775 self.photoCalibErrCol]
1777 @property
1778 def name(self):
1779 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1781 def _func(self, df):
1782 return 0.5*np.sqrt(
1783 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1784 * df[self.photoCalibErrCol])**2
1785 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1786 * df[self.photoCalibCol]**2)
1789class LocalDipoleDiffFlux(LocalDipoleMeanFlux):
1790 """Compute the absolute difference of dipole fluxes.
1792 Value is (abs(pos) - abs(neg))
1794 See also
1795 --------
1796 LocalNanojansky
1797 LocalNanojanskyErr
1798 LocalMagnitude
1799 LocalMagnitudeErr
1800 LocalDipoleMeanFlux
1801 LocalDipoleMeanFluxErr
1802 LocalDipoleDiffFlux
1803 LocalDipoleDiffFluxErr
1804 """
1806 @property
1807 def columns(self):
1808 return [self.instFluxPosCol,
1809 self.instFluxNegCol,
1810 self.photoCalibCol]
1812 @property
1813 def name(self):
1814 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1816 def _func(self, df):
1817 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))
1818 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])))
1821class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux):
1822 """Compute the error on the absolute difference of dipole fluxes.
1824 See also
1825 --------
1826 LocalNanojansky
1827 LocalNanojanskyErr
1828 LocalMagnitude
1829 LocalMagnitudeErr
1830 LocalDipoleMeanFlux
1831 LocalDipoleMeanFluxErr
1832 LocalDipoleDiffFlux
1833 LocalDipoleDiffFluxErr
1834 """
1836 @property
1837 def columns(self):
1838 return [self.instFluxPosCol,
1839 self.instFluxNegCol,
1840 self.instFluxPosErrCol,
1841 self.instFluxNegErrCol,
1842 self.photoCalibCol,
1843 self.photoCalibErrCol]
1845 @property
1846 def name(self):
1847 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1849 def _func(self, df):
1850 return np.sqrt(
1851 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1852 * df[self.photoCalibErrCol])**2
1853 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1854 * df[self.photoCalibCol]**2)
1857class Ratio(Functor):
1858 """Base class for returning the ratio of 2 columns.
1860 Can be used to compute a Signal to Noise ratio for any input flux.
1862 Parameters
1863 ----------
1864 numerator : `str`
1865 Name of the column to use at the numerator in the ratio
1866 denominator : `str`
1867 Name of the column to use as the denominator in the ratio.
1868 """
1869 def __init__(self,
1870 numerator,
1871 denominator,
1872 **kwargs):
1873 self.numerator = numerator
1874 self.denominator = denominator
1875 super().__init__(**kwargs)
1877 @property
1878 def columns(self):
1879 return [self.numerator, self.denominator]
1881 @property
1882 def name(self):
1883 return f'ratio_{self.numerator}_{self.denominator}'
1885 def _func(self, df):
1886 with np.warnings.catch_warnings():
1887 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1888 np.warnings.filterwarnings('ignore', r'divide by zero')
1889 return df[self.numerator] / df[self.denominator]
1892class Ebv(Functor):
1893 """Compute E(B-V) from dustmaps.sfd
1894 """
1895 _defaultDataset = 'ref'
1896 name = "E(B-V)"
1897 shortname = "ebv"
1899 def __init__(self, **kwargs):
1900 # import is only needed for Ebv
1901 from dustmaps.sfd import SFDQuery
1902 self._columns = ['coord_ra', 'coord_dec']
1903 self.sfd = SFDQuery()
1904 super().__init__(**kwargs)
1906 def _func(self, df):
1907 coords = SkyCoord(df['coord_ra']*u.rad, df['coord_dec']*u.rad)
1908 ebv = self.sfd(coords)
1909 # Double precision unnecessary scientifically
1910 # but currently needed for ingest to qserv
1911 return pd.Series(ebv, index=df.index).astype('float64')