Coverage for python/lsst/pipe/tasks/functors.py: 42%
735 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 12:19 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 12:19 +0000
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["init_fromDict", "Functor", "CompositeFunctor", "mag_aware_eval",
23 "CustomFunctor", "Column", "Index", "CoordColumn", "RAColumn",
24 "DecColumn", "HtmIndex20", "fluxName", "fluxErrName", "Mag",
25 "MagErr", "MagDiff", "Color", "DeconvolvedMoments", "SdssTraceSize",
26 "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff",
27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs",
28 "ComputePixelScale", "ConvertPixelToArcseconds",
29 "ConvertPixelSqToArcsecondsSq", "ReferenceBand", "Photometry",
30 "NanoJansky", "NanoJanskyErr", "LocalPhotometry", "LocalNanojansky",
31 "LocalNanojanskyErr", "LocalDipoleMeanFlux",
32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux",
33 "LocalDipoleDiffFluxErr", "Ebv",
34 ]
36import yaml
37import re
38from itertools import product
39import logging
40import os.path
41import warnings
43import pandas as pd
44import numpy as np
45import astropy.units as u
46from astropy.coordinates import SkyCoord
48from lsst.utils import doImport
49from lsst.utils.introspection import get_full_type_name
50from lsst.daf.butler import DeferredDatasetHandle
51from lsst.pipe.base import InMemoryDatasetHandle
52import lsst.geom as geom
53import lsst.sphgeom as sphgeom
56def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
57 typeKey='functor', name=None):
58 """Initialize an object defined in a dictionary.
60 The object needs to be importable as f'{basePath}.{initDict[typeKey]}'.
61 The positional and keyword arguments (if any) are contained in "args" and
62 "kwargs" entries in the dictionary, respectively.
63 This is used in `~lsst.pipe.tasks.functors.CompositeFunctor.from_yaml` to
64 initialize a composite functor from a specification in a YAML file.
66 Parameters
67 ----------
68 initDict : dictionary
69 Dictionary describing object's initialization.
70 Must contain an entry keyed by ``typeKey`` that is the name of the
71 object, relative to ``basePath``.
72 basePath : str
73 Path relative to module in which ``initDict[typeKey]`` is defined.
74 typeKey : str
75 Key of ``initDict`` that is the name of the object (relative to
76 ``basePath``).
77 """
78 initDict = initDict.copy()
79 # TO DO: DM-21956 We should be able to define functors outside this module
80 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
81 args = []
82 if 'args' in initDict:
83 args = initDict.pop('args')
84 if isinstance(args, str):
85 args = [args]
86 try:
87 element = pythonType(*args, **initDict)
88 except Exception as e:
89 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
90 raise type(e)(message, e.args)
91 return element
94class Functor(object):
95 """Define and execute a calculation on a DataFrame or Handle holding a
96 DataFrame.
98 The `__call__` method accepts either a `~pandas.DataFrame` object or a
99 `~lsst.daf.butler.DeferredDatasetHandle` or
100 `~lsst.pipe.base.InMemoryDatasetHandle`, and returns the
101 result of the calculation as a single column.
102 Each functor defines what columns are needed for the calculation, and only
103 these columns are read from the dataset handle.
105 The action of `__call__` consists of two steps: first, loading the
106 necessary columns from disk into memory as a `~pandas.DataFrame` object;
107 and second, performing the computation on this DataFrame and returning the
108 result.
110 To define a new `Functor`, a subclass must define a `_func` method,
111 that takes a `~pandas.DataFrame` and returns result in a `~pandas.Series`.
112 In addition, it must define the following attributes:
114 * `_columns`: The columns necessary to perform the calculation
115 * `name`: A name appropriate for a figure axis label
116 * `shortname`: A name appropriate for use as a dictionary key
118 On initialization, a `Functor` should declare what band (``filt`` kwarg)
119 and dataset (e.g. ``'ref'``, ``'meas'``, ``'forced_src'``) it is intended
120 to be applied to.
121 This enables the `_get_data` method to extract the proper columns from the
122 underlying data.
123 If not specified, the dataset will fall back on the `_defaultDataset`
124 attribute.
125 If band is not specified and ``dataset`` is anything other than ``'ref'``,
126 then an error will be raised when trying to perform the calculation.
128 Originally, `Functor` was set up to expect datasets formatted like the
129 ``deepCoadd_obj`` dataset; that is, a DataFrame with a multi-level column
130 index, with the levels of the column index being ``band``, ``dataset``, and
131 ``column``.
132 It has since been generalized to apply to DataFrames without multi-level
133 indices and multi-level indices with just ``dataset`` and ``column``
134 levels.
135 In addition, the `_get_data` method that reads the columns from the
136 underlying data will return a DataFrame with column index levels defined by
137 the `_dfLevels` attribute; by default, this is ``column``.
139 The `_dfLevels` attributes should generally not need to be changed, unless
140 `_func` needs columns from multiple filters or datasets to do the
141 calculation.
142 An example of this is the `~lsst.pipe.tasks.functors.Color` functor, for
143 which `_dfLevels = ('band', 'column')`, and `_func` expects the DataFrame
144 it gets to have those levels in the column index.
146 Parameters
147 ----------
148 filt : str
149 Band upon which to do the calculation.
151 dataset : str
152 Dataset upon which to do the calculation (e.g., 'ref', 'meas',
153 'forced_src').
154 """
156 _defaultDataset = 'ref'
157 _dfLevels = ('column',)
158 _defaultNoDup = False
160 def __init__(self, filt=None, dataset=None, noDup=None):
161 self.filt = filt
162 self.dataset = dataset if dataset is not None else self._defaultDataset
163 self._noDup = noDup
164 self.log = logging.getLogger(type(self).__name__)
166 @property
167 def noDup(self):
168 """Do not explode by band if used on object table."""
169 if self._noDup is not None:
170 return self._noDup
171 else:
172 return self._defaultNoDup
174 @property
175 def columns(self):
176 """Columns required to perform calculation."""
177 if not hasattr(self, '_columns'):
178 raise NotImplementedError('Must define columns property or _columns attribute')
179 return self._columns
181 def _get_data_columnLevels(self, data, columnIndex=None):
182 """Gets the names of the column index levels.
184 This should only be called in the context of a multilevel table.
186 Parameters
187 ----------
188 data : various
189 The data to be read, can be a
190 `~lsst.daf.butler.DeferredDatasetHandle` or
191 `~lsst.pipe.base.InMemoryDatasetHandle`.
192 columnIndex (optional): pandas `~pandas.Index` object
193 If not passed, then it is read from the
194 `~lsst.daf.butler.DeferredDatasetHandle`
195 for `~lsst.pipe.base.InMemoryDatasetHandle`.
196 """
197 if columnIndex is None:
198 columnIndex = data.get(component="columns")
199 return columnIndex.names
201 def _get_data_columnLevelNames(self, data, columnIndex=None):
202 """Gets the content of each of the column levels for a multilevel
203 table.
204 """
205 if columnIndex is None:
206 columnIndex = data.get(component="columns")
208 columnLevels = columnIndex.names
209 columnLevelNames = {
210 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
211 for i, level in enumerate(columnLevels)
212 }
213 return columnLevelNames
215 def _colsFromDict(self, colDict, columnIndex=None):
216 """Converts dictionary column specficiation to a list of columns."""
217 new_colDict = {}
218 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
220 for i, lev in enumerate(columnLevels):
221 if lev in colDict:
222 if isinstance(colDict[lev], str):
223 new_colDict[lev] = [colDict[lev]]
224 else:
225 new_colDict[lev] = colDict[lev]
226 else:
227 new_colDict[lev] = columnIndex.levels[i]
229 levelCols = [new_colDict[lev] for lev in columnLevels]
230 cols = list(product(*levelCols))
231 colsAvailable = [col for col in cols if col in columnIndex]
232 return colsAvailable
234 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
235 """Returns columns needed by functor from multilevel dataset.
237 To access tables with multilevel column structure, the
238 `~lsst.daf.butler.DeferredDatasetHandle` or
239 `~lsst.pipe.base.InMemoryDatasetHandle` needs to be passed
240 either a list of tuples or a dictionary.
242 Parameters
243 ----------
244 data : various
245 The data as either `~lsst.daf.butler.DeferredDatasetHandle`, or
246 `~lsst.pipe.base.InMemoryDatasetHandle`.
247 columnIndex (optional): pandas `~pandas.Index` object
248 Either passed or read in from
249 `~lsst.daf.butler.DeferredDatasetHandle`.
250 `returnTuple` : `bool`
251 If true, then return a list of tuples rather than the column
252 dictionary specification.
253 This is set to `True` by `CompositeFunctor` in order to be able to
254 combine columns from the various component functors.
256 """
257 if not isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
258 raise RuntimeError(f"Unexpected data type. Got {get_full_type_name(data)}.")
260 if columnIndex is None:
261 columnIndex = data.get(component="columns")
263 # Confirm that the dataset has the column levels the functor is
264 # expecting it to have.
265 columnLevels = self._get_data_columnLevels(data, columnIndex)
267 columnDict = {'column': self.columns,
268 'dataset': self.dataset}
269 if self.filt is None:
270 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
271 if "band" in columnLevels:
272 if self.dataset == "ref":
273 columnDict["band"] = columnLevelNames["band"][0]
274 else:
275 raise ValueError(f"'filt' not set for functor {self.name}"
276 f"(dataset {self.dataset}) "
277 "and DataFrame "
278 "contains multiple filters in column index. "
279 "Set 'filt' or set 'dataset' to 'ref'.")
280 else:
281 columnDict['band'] = self.filt
283 if returnTuple:
284 return self._colsFromDict(columnDict, columnIndex=columnIndex)
285 else:
286 return columnDict
288 def _func(self, df, dropna=True):
289 raise NotImplementedError('Must define calculation on DataFrame')
291 def _get_columnIndex(self, data):
292 """Return columnIndex."""
294 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
295 return data.get(component="columns")
296 else:
297 return None
299 def _get_data(self, data):
300 """Retrieve DataFrame necessary for calculation.
302 The data argument can be a `~pandas.DataFrame`, a
303 `~lsst.daf.butler.DeferredDatasetHandle`, or
304 an `~lsst.pipe.base.InMemoryDatasetHandle`.
306 Returns a DataFrame upon which `self._func` can act.
307 """
308 # We wrap a DataFrame in a handle here to take advantage of the
309 # DataFrame delegate DataFrame column wrangling abilities.
310 if isinstance(data, pd.DataFrame):
311 _data = InMemoryDatasetHandle(data, storageClass="DataFrame")
312 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
313 _data = data
314 else:
315 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.")
317 # First thing to do: check to see if the data source has a multilevel
318 # column index or not.
319 columnIndex = self._get_columnIndex(_data)
320 is_multiLevel = isinstance(columnIndex, pd.MultiIndex)
322 # Get proper columns specification for this functor.
323 if is_multiLevel:
324 columns = self.multilevelColumns(_data, columnIndex=columnIndex)
325 else:
326 columns = self.columns
328 # Load in-memory DataFrame with appropriate columns the gen3 way.
329 df = _data.get(parameters={"columns": columns})
331 # Drop unnecessary column levels.
332 if is_multiLevel:
333 df = self._setLevels(df)
335 return df
337 def _setLevels(self, df):
338 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
339 df.columns = df.columns.droplevel(levelsToDrop)
340 return df
342 def _dropna(self, vals):
343 return vals.dropna()
345 def __call__(self, data, dropna=False):
346 df = self._get_data(data)
347 try:
348 vals = self._func(df)
349 except Exception as e:
350 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
351 vals = self.fail(df)
352 if dropna:
353 vals = self._dropna(vals)
355 return vals
357 def difference(self, data1, data2, **kwargs):
358 """Computes difference between functor called on two different
359 DataFrame/Handle objects.
360 """
361 return self(data1, **kwargs) - self(data2, **kwargs)
363 def fail(self, df):
364 return pd.Series(np.full(len(df), np.nan), index=df.index)
366 @property
367 def name(self):
368 """Full name of functor (suitable for figure labels)."""
369 return NotImplementedError
371 @property
372 def shortname(self):
373 """Short name of functor (suitable for column name/dict key)."""
374 return self.name
377class CompositeFunctor(Functor):
378 """Perform multiple calculations at once on a catalog.
380 The role of a `CompositeFunctor` is to group together computations from
381 multiple functors.
382 Instead of returning `~pandas.Series` a `CompositeFunctor` returns a
383 `~pandas.DataFrame`, with the column names being the keys of ``funcDict``.
385 The `columns` attribute of a `CompositeFunctor` is the union of all columns
386 in all the component functors.
388 A `CompositeFunctor` does not use a `_func` method itself; rather, when a
389 `CompositeFunctor` is called, all its columns are loaded at once, and the
390 resulting DataFrame is passed to the `_func` method of each component
391 functor.
392 This has the advantage of only doing I/O (reading from parquet file) once,
393 and works because each individual `_func` method of each component functor
394 does not care if there are *extra* columns in the DataFrame being passed;
395 only that it must contain *at least* the `columns` it expects.
397 An important and useful class method is `from_yaml`, which takes as an
398 argument the path to a YAML file specifying a collection of functors.
400 Parameters
401 ----------
402 funcs : `dict` or `list`
403 Dictionary or list of functors.
404 If a list, then it will be converted into a dictonary according to the
405 `.shortname` attribute of each functor.
406 """
407 dataset = None
408 name = "CompositeFunctor"
410 def __init__(self, funcs, **kwargs):
412 if type(funcs) == dict:
413 self.funcDict = funcs
414 else:
415 self.funcDict = {f.shortname: f for f in funcs}
417 self._filt = None
419 super().__init__(**kwargs)
421 @property
422 def filt(self):
423 return self._filt
425 @filt.setter
426 def filt(self, filt):
427 if filt is not None:
428 for _, f in self.funcDict.items():
429 f.filt = filt
430 self._filt = filt
432 def update(self, new):
433 """Update the functor with new functors."""
434 if isinstance(new, dict):
435 self.funcDict.update(new)
436 elif isinstance(new, CompositeFunctor):
437 self.funcDict.update(new.funcDict)
438 else:
439 raise TypeError('Can only update with dictionary or CompositeFunctor.')
441 # Make sure new functors have the same 'filt' set.
442 if self.filt is not None:
443 self.filt = self.filt
445 @property
446 def columns(self):
447 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
449 def multilevelColumns(self, data, **kwargs):
450 # Get the union of columns for all component functors.
451 # Note the need to have `returnTuple=True` here.
452 return list(
453 set(
454 [
455 x
456 for y in [
457 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
458 ]
459 for x in y
460 ]
461 )
462 )
464 def __call__(self, data, **kwargs):
465 """Apply the functor to the data table.
467 Parameters
468 ----------
469 data : various
470 The data represented as `~lsst.daf.butler.DeferredDatasetHandle`,
471 `~lsst.pipe.base.InMemoryDatasetHandle`, or `~pandas.DataFrame`.
472 The table or a pointer to a table on disk from which columns can
473 be accessed.
474 """
475 if isinstance(data, pd.DataFrame):
476 _data = InMemoryDatasetHandle(data, storageClass="DataFrame")
477 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
478 _data = data
479 else:
480 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.")
482 columnIndex = self._get_columnIndex(_data)
484 if isinstance(columnIndex, pd.MultiIndex):
485 columns = self.multilevelColumns(_data, columnIndex=columnIndex)
486 df = _data.get(parameters={"columns": columns})
488 valDict = {}
489 for k, f in self.funcDict.items():
490 try:
491 subdf = f._setLevels(
492 df[f.multilevelColumns(_data, returnTuple=True, columnIndex=columnIndex)]
493 )
494 valDict[k] = f._func(subdf)
495 except Exception as e:
496 self.log.exception(
497 "Exception in %s (funcs: %s) call: %s",
498 self.name,
499 str(list(self.funcDict.keys())),
500 type(e).__name__,
501 )
502 try:
503 valDict[k] = f.fail(subdf)
504 except NameError:
505 raise e
507 else:
508 df = _data.get(parameters={"columns": self.columns})
510 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
512 # Check that output columns are actually columns.
513 for name, colVal in valDict.items():
514 if len(colVal.shape) != 1:
515 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
516 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
518 try:
519 valDf = pd.concat(valDict, axis=1)
520 except TypeError:
521 print([(k, type(v)) for k, v in valDict.items()])
522 raise
524 if kwargs.get('dropna', False):
525 valDf = valDf.dropna(how='any')
527 return valDf
529 @classmethod
530 def renameCol(cls, col, renameRules):
531 if renameRules is None:
532 return col
533 for old, new in renameRules:
534 if col.startswith(old):
535 col = col.replace(old, new)
536 return col
538 @classmethod
539 def from_file(cls, filename, **kwargs):
540 # Allow environment variables in the filename.
541 filename = os.path.expandvars(filename)
542 with open(filename) as f:
543 translationDefinition = yaml.safe_load(f)
545 return cls.from_yaml(translationDefinition, **kwargs)
547 @classmethod
548 def from_yaml(cls, translationDefinition, **kwargs):
549 funcs = {}
550 for func, val in translationDefinition['funcs'].items():
551 funcs[func] = init_fromDict(val, name=func)
553 if 'flag_rename_rules' in translationDefinition:
554 renameRules = translationDefinition['flag_rename_rules']
555 else:
556 renameRules = None
558 if 'calexpFlags' in translationDefinition:
559 for flag in translationDefinition['calexpFlags']:
560 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
562 if 'refFlags' in translationDefinition:
563 for flag in translationDefinition['refFlags']:
564 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
566 if 'forcedFlags' in translationDefinition:
567 for flag in translationDefinition['forcedFlags']:
568 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
570 if 'flags' in translationDefinition:
571 for flag in translationDefinition['flags']:
572 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
574 return cls(funcs, **kwargs)
577def mag_aware_eval(df, expr, log):
578 """Evaluate an expression on a DataFrame, knowing what the 'mag' function
579 means.
581 Builds on `pandas.DataFrame.eval`, which parses and executes math on
582 DataFrames.
584 Parameters
585 ----------
586 df : ~pandas.DataFrame
587 DataFrame on which to evaluate expression.
589 expr : str
590 Expression.
591 """
592 try:
593 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
594 val = df.eval(expr_new)
595 except Exception as e: # Should check what actually gets raised
596 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e)
597 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
598 val = df.eval(expr_new)
599 return val
602class CustomFunctor(Functor):
603 """Arbitrary computation on a catalog.
605 Column names (and thus the columns to be loaded from catalog) are found by
606 finding all words and trying to ignore all "math-y" words.
608 Parameters
609 ----------
610 expr : str
611 Expression to evaluate, to be parsed and executed by
612 `~lsst.pipe.tasks.functors.mag_aware_eval`.
613 """
614 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
616 def __init__(self, expr, **kwargs):
617 self.expr = expr
618 super().__init__(**kwargs)
620 @property
621 def name(self):
622 return self.expr
624 @property
625 def columns(self):
626 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
628 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
629 not_a_col = []
630 for c in flux_cols:
631 if not re.search('_instFlux$', c):
632 cols.append(f'{c}_instFlux')
633 not_a_col.append(c)
634 else:
635 cols.append(c)
637 return list(set([c for c in cols if c not in not_a_col]))
639 def _func(self, df):
640 return mag_aware_eval(df, self.expr, self.log)
643class Column(Functor):
644 """Get column with a specified name."""
646 def __init__(self, col, **kwargs):
647 self.col = col
648 super().__init__(**kwargs)
650 @property
651 def name(self):
652 return self.col
654 @property
655 def columns(self):
656 return [self.col]
658 def _func(self, df):
659 return df[self.col]
662class Index(Functor):
663 """Return the value of the index for each object."""
665 columns = ['coord_ra'] # Just a dummy; something has to be here.
666 _defaultDataset = 'ref'
667 _defaultNoDup = True
669 def _func(self, df):
670 return pd.Series(df.index, index=df.index)
673class CoordColumn(Column):
674 """Base class for coordinate column, in degrees."""
675 _radians = True
677 def __init__(self, col, **kwargs):
678 super().__init__(col, **kwargs)
680 def _func(self, df):
681 # Must not modify original column in case that column is used by
682 # another functor.
683 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
684 return output
687class RAColumn(CoordColumn):
688 """Right Ascension, in degrees."""
689 name = 'RA'
690 _defaultNoDup = True
692 def __init__(self, **kwargs):
693 super().__init__('coord_ra', **kwargs)
695 def __call__(self, catalog, **kwargs):
696 return super().__call__(catalog, **kwargs)
699class DecColumn(CoordColumn):
700 """Declination, in degrees."""
701 name = 'Dec'
702 _defaultNoDup = True
704 def __init__(self, **kwargs):
705 super().__init__('coord_dec', **kwargs)
707 def __call__(self, catalog, **kwargs):
708 return super().__call__(catalog, **kwargs)
711class RAErrColumn(CoordColumn):
712 """Uncertainty in Right Ascension, in degrees."""
713 name = 'RAErr'
714 _defaultNoDup = True
716 def __init__(self, **kwargs):
717 super().__init__('coord_raErr', **kwargs)
720class DecErrColumn(CoordColumn):
721 """Uncertainty in declination, in degrees."""
722 name = 'DecErr'
723 _defaultNoDup = True
725 def __init__(self, **kwargs):
726 super().__init__('coord_decErr', **kwargs)
729class RADecCovColumn(Column):
730 """Coordinate covariance column, in degrees."""
731 _radians = True
732 name = 'RADecCov'
733 _defaultNoDup = True
735 def __init__(self, **kwargs):
736 super().__init__('coord_ra_dec_Cov', **kwargs)
738 def _func(self, df):
739 # Must not modify original column in case that column is used by
740 # another functor.
741 output = df[self.col]*(180/np.pi)**2 if self._radians else df[self.col]
742 return output
745class HtmIndex20(Functor):
746 """Compute the level 20 HtmIndex for the catalog.
748 Notes
749 -----
750 This functor was implemented to satisfy requirements of old APDB interface
751 which required the ``pixelId`` column in DiaObject with HTM20 index.
752 The APDB interface had migrated to not need that information, but we keep
753 this class in case it may be useful for something else.
754 """
755 name = "Htm20"
756 htmLevel = 20
757 _radians = True
759 def __init__(self, ra, dec, **kwargs):
760 self.pixelator = sphgeom.HtmPixelization(self.htmLevel)
761 self.ra = ra
762 self.dec = dec
763 self._columns = [self.ra, self.dec]
764 super().__init__(**kwargs)
766 def _func(self, df):
768 def computePixel(row):
769 if self._radians:
770 sphPoint = geom.SpherePoint(row[self.ra],
771 row[self.dec],
772 geom.radians)
773 else:
774 sphPoint = geom.SpherePoint(row[self.ra],
775 row[self.dec],
776 geom.degrees)
777 return self.pixelator.index(sphPoint.getVector())
779 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
782def fluxName(col):
783 """Append _instFlux to the column name if it doesn't have it already."""
784 if not col.endswith('_instFlux'):
785 col += '_instFlux'
786 return col
789def fluxErrName(col):
790 """Append _instFluxErr to the column name if it doesn't have it already."""
791 if not col.endswith('_instFluxErr'):
792 col += '_instFluxErr'
793 return col
796class Mag(Functor):
797 """Compute calibrated magnitude.
799 Returns the flux at mag=0.
800 The default ``fluxMag0`` is 63095734448.0194, which is default for HSC.
801 TO DO: This default should be made configurable in DM-21955.
803 This calculation hides warnings about invalid values and dividing by zero.
805 As with all functors, a ``dataset`` and ``filt`` kwarg should be provided
806 upon initialization.
807 Unlike the default `Functor`, however, the default dataset for a `Mag` is
808 ``'meas'``, rather than ``'ref'``.
810 Parameters
811 ----------
812 col : `str`
813 Name of flux column from which to compute magnitude.
814 Can be parseable by the `~lsst.pipe.tasks.functors.fluxName` function;
815 that is, you can pass ``'modelfit_CModel'`` instead of
816 ``'modelfit_CModel_instFlux'``, and it will understand.
817 """
818 _defaultDataset = 'meas'
820 def __init__(self, col, **kwargs):
821 self.col = fluxName(col)
822 # TO DO: DM-21955 Replace hard coded photometic calibration values.
823 self.fluxMag0 = 63095734448.0194
825 super().__init__(**kwargs)
827 @property
828 def columns(self):
829 return [self.col]
831 def _func(self, df):
832 with warnings.catch_warnings():
833 warnings.filterwarnings('ignore', r'invalid value encountered')
834 warnings.filterwarnings('ignore', r'divide by zero')
835 return -2.5*np.log10(df[self.col] / self.fluxMag0)
837 @property
838 def name(self):
839 return f'mag_{self.col}'
842class MagErr(Mag):
843 """Compute calibrated magnitude uncertainty.
845 Parameters
846 ----------
847 col : `str`
848 Name of the flux column.
849 """
851 def __init__(self, *args, **kwargs):
852 super().__init__(*args, **kwargs)
853 # TO DO: DM-21955 Replace hard coded photometic calibration values.
854 self.fluxMag0Err = 0.
856 @property
857 def columns(self):
858 return [self.col, self.col + 'Err']
860 def _func(self, df):
861 with warnings.catch_warnings():
862 warnings.filterwarnings('ignore', r'invalid value encountered')
863 warnings.filterwarnings('ignore', r'divide by zero')
864 fluxCol, fluxErrCol = self.columns
865 x = df[fluxErrCol] / df[fluxCol]
866 y = self.fluxMag0Err / self.fluxMag0
867 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
868 return magErr
870 @property
871 def name(self):
872 return super().name + '_err'
875class MagDiff(Functor):
876 """Functor to calculate magnitude difference."""
877 _defaultDataset = 'meas'
879 def __init__(self, col1, col2, **kwargs):
880 self.col1 = fluxName(col1)
881 self.col2 = fluxName(col2)
882 super().__init__(**kwargs)
884 @property
885 def columns(self):
886 return [self.col1, self.col2]
888 def _func(self, df):
889 with warnings.catch_warnings():
890 warnings.filterwarnings('ignore', r'invalid value encountered')
891 warnings.filterwarnings('ignore', r'divide by zero')
892 return -2.5*np.log10(df[self.col1]/df[self.col2])
894 @property
895 def name(self):
896 return f'(mag_{self.col1} - mag_{self.col2})'
898 @property
899 def shortname(self):
900 return f'magDiff_{self.col1}_{self.col2}'
903class Color(Functor):
904 """Compute the color between two filters.
906 Computes color by initializing two different `Mag` functors based on the
907 ``col`` and filters provided, and then returning the difference.
909 This is enabled by the `_func` method expecting a DataFrame with a
910 multilevel column index, with both ``'band'`` and ``'column'``, instead of
911 just ``'column'``, which is the `Functor` default.
912 This is controlled by the `_dfLevels` attribute.
914 Also of note, the default dataset for `Color` is ``forced_src'``, whereas
915 for `Mag` it is ``'meas'``.
917 Parameters
918 ----------
919 col : str
920 Name of the flux column from which to compute; same as would be passed
921 to `~lsst.pipe.tasks.functors.Mag`.
923 filt2, filt1 : str
924 Filters from which to compute magnitude difference.
925 Color computed is ``Mag(filt2) - Mag(filt1)``.
926 """
927 _defaultDataset = 'forced_src'
928 _dfLevels = ('band', 'column')
929 _defaultNoDup = True
931 def __init__(self, col, filt2, filt1, **kwargs):
932 self.col = fluxName(col)
933 if filt2 == filt1:
934 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
935 self.filt2 = filt2
936 self.filt1 = filt1
938 self.mag2 = Mag(col, filt=filt2, **kwargs)
939 self.mag1 = Mag(col, filt=filt1, **kwargs)
941 super().__init__(**kwargs)
943 @property
944 def filt(self):
945 return None
947 @filt.setter
948 def filt(self, filt):
949 pass
951 def _func(self, df):
952 mag2 = self.mag2._func(df[self.filt2])
953 mag1 = self.mag1._func(df[self.filt1])
954 return mag2 - mag1
956 @property
957 def columns(self):
958 return [self.mag1.col, self.mag2.col]
960 def multilevelColumns(self, parq, **kwargs):
961 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
963 @property
964 def name(self):
965 return f'{self.filt2} - {self.filt1} ({self.col})'
967 @property
968 def shortname(self):
969 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
972class DeconvolvedMoments(Functor):
973 """This functor subtracts the trace of the PSF second moments from the
974 trace of the second moments of the source.
976 If the HsmShapeAlgorithm measurement is valid, then these will be used for
977 the sources.
978 Otherwise, the SdssShapeAlgorithm measurements will be used.
979 """
980 name = 'Deconvolved Moments'
981 shortname = 'deconvolvedMoments'
982 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
983 "ext_shapeHSM_HsmSourceMoments_yy",
984 "base_SdssShape_xx", "base_SdssShape_yy",
985 "ext_shapeHSM_HsmPsfMoments_xx",
986 "ext_shapeHSM_HsmPsfMoments_yy")
988 def _func(self, df):
989 """Calculate deconvolved moments."""
990 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
991 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
992 else:
993 hsm = np.ones(len(df))*np.nan
994 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
995 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
996 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
997 else:
998 # LSST does not have shape.sdss.psf.
999 # We could instead add base_PsfShape to the catalog using
1000 # exposure.getPsf().computeShape(s.getCentroid()).getIxx().
1001 raise RuntimeError('No psf shape parameter found in catalog')
1003 return hsm.where(np.isfinite(hsm), sdss) - psf
1006class SdssTraceSize(Functor):
1007 """Functor to calculate the SDSS trace radius size for sources.
1009 The SDSS trace radius size is a measure of size equal to the square root of
1010 half of the trace of the second moments tensor measured with the
1011 SdssShapeAlgorithm plugin.
1012 This has units of pixels.
1013 """
1014 name = "SDSS Trace Size"
1015 shortname = 'sdssTrace'
1016 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1018 def _func(self, df):
1019 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1020 return srcSize
1023class PsfSdssTraceSizeDiff(Functor):
1024 """Functor to calculate the SDSS trace radius size difference (%) between
1025 the object and the PSF model.
1027 See Also
1028 --------
1029 SdssTraceSize
1030 """
1031 name = "PSF - SDSS Trace Size"
1032 shortname = 'psf_sdssTrace'
1033 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1034 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1036 def _func(self, df):
1037 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1038 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1039 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1040 return sizeDiff
1043class HsmTraceSize(Functor):
1044 """Functor to calculate the HSM trace radius size for sources.
1046 The HSM trace radius size is a measure of size equal to the square root of
1047 half of the trace of the second moments tensor measured with the
1048 HsmShapeAlgorithm plugin.
1049 This has units of pixels.
1050 """
1051 name = 'HSM Trace Size'
1052 shortname = 'hsmTrace'
1053 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1054 "ext_shapeHSM_HsmSourceMoments_yy")
1056 def _func(self, df):
1057 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1058 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1059 return srcSize
1062class PsfHsmTraceSizeDiff(Functor):
1063 """Functor to calculate the HSM trace radius size difference (%) between
1064 the object and the PSF model.
1066 See Also
1067 --------
1068 HsmTraceSize
1069 """
1070 name = 'PSF - HSM Trace Size'
1071 shortname = 'psf_HsmTrace'
1072 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1073 "ext_shapeHSM_HsmSourceMoments_yy",
1074 "ext_shapeHSM_HsmPsfMoments_xx",
1075 "ext_shapeHSM_HsmPsfMoments_yy")
1077 def _func(self, df):
1078 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1079 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1080 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1081 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1082 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1083 return sizeDiff
1086class HsmFwhm(Functor):
1087 """Functor to calculate the PSF FWHM with second moments measured from the
1088 HsmShapeAlgorithm plugin.
1090 This is in units of arcseconds, and assumes the hsc_rings_v1 skymap pixel
1091 scale of 0.168 arcseconds/pixel.
1093 Notes
1094 -----
1095 This conversion assumes the PSF is Gaussian, which is not always the case.
1096 """
1097 name = 'HSM Psf FWHM'
1098 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1099 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1100 pixelScale = 0.168
1101 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1103 def _func(self, df):
1104 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1105 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1108class E1(Functor):
1109 r"""Calculate :math:`e_1` ellipticity component for sources, defined as:
1111 .. math::
1112 e_1 &= (I_{xx}-I_{yy})/(I_{xx}+I_{yy})
1114 See Also
1115 --------
1116 E2
1117 """
1118 name = "Distortion Ellipticity (e1)"
1119 shortname = "Distortion"
1121 def __init__(self, colXX, colXY, colYY, **kwargs):
1122 self.colXX = colXX
1123 self.colXY = colXY
1124 self.colYY = colYY
1125 self._columns = [self.colXX, self.colXY, self.colYY]
1126 super().__init__(**kwargs)
1128 @property
1129 def columns(self):
1130 return [self.colXX, self.colXY, self.colYY]
1132 def _func(self, df):
1133 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1136class E2(Functor):
1137 r"""Calculate :math:`e_2` ellipticity component for sources, defined as:
1139 .. math::
1140 e_2 &= 2I_{xy}/(I_{xx}+I_{yy})
1142 See Also
1143 --------
1144 E1
1145 """
1146 name = "Ellipticity e2"
1148 def __init__(self, colXX, colXY, colYY, **kwargs):
1149 self.colXX = colXX
1150 self.colXY = colXY
1151 self.colYY = colYY
1152 super().__init__(**kwargs)
1154 @property
1155 def columns(self):
1156 return [self.colXX, self.colXY, self.colYY]
1158 def _func(self, df):
1159 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1162class RadiusFromQuadrupole(Functor):
1163 """Calculate the radius from the quadrupole moments.
1165 This returns the fourth root of the determinant of the second moments
1166 tensor, which has units of pixels.
1168 See Also
1169 --------
1170 SdssTraceSize
1171 HsmTraceSize
1172 """
1174 def __init__(self, colXX, colXY, colYY, **kwargs):
1175 self.colXX = colXX
1176 self.colXY = colXY
1177 self.colYY = colYY
1178 super().__init__(**kwargs)
1180 @property
1181 def columns(self):
1182 return [self.colXX, self.colXY, self.colYY]
1184 def _func(self, df):
1185 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1188class LocalWcs(Functor):
1189 """Computations using the stored localWcs."""
1190 name = "LocalWcsOperations"
1192 def __init__(self,
1193 colCD_1_1,
1194 colCD_1_2,
1195 colCD_2_1,
1196 colCD_2_2,
1197 **kwargs):
1198 self.colCD_1_1 = colCD_1_1
1199 self.colCD_1_2 = colCD_1_2
1200 self.colCD_2_1 = colCD_2_1
1201 self.colCD_2_2 = colCD_2_2
1202 super().__init__(**kwargs)
1204 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1205 """Compute the distance on the sphere from x2, y1 to x1, y1.
1207 Parameters
1208 ----------
1209 x : `~pandas.Series`
1210 X pixel coordinate.
1211 y : `~pandas.Series`
1212 Y pixel coordinate.
1213 cd11 : `~pandas.Series`
1214 [1, 1] element of the local Wcs affine transform.
1215 cd11 : `~pandas.Series`
1216 [1, 1] element of the local Wcs affine transform.
1217 cd12 : `~pandas.Series`
1218 [1, 2] element of the local Wcs affine transform.
1219 cd21 : `~pandas.Series`
1220 [2, 1] element of the local Wcs affine transform.
1221 cd22 : `~pandas.Series`
1222 [2, 2] element of the local Wcs affine transform.
1224 Returns
1225 -------
1226 raDecTuple : tuple
1227 RA and dec conversion of x and y given the local Wcs.
1228 Returned units are in radians.
1230 """
1231 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1233 def computeSkySeparation(self, ra1, dec1, ra2, dec2):
1234 """Compute the local pixel scale conversion.
1236 Parameters
1237 ----------
1238 ra1 : `~pandas.Series`
1239 Ra of the first coordinate in radians.
1240 dec1 : `~pandas.Series`
1241 Dec of the first coordinate in radians.
1242 ra2 : `~pandas.Series`
1243 Ra of the second coordinate in radians.
1244 dec2 : `~pandas.Series`
1245 Dec of the second coordinate in radians.
1247 Returns
1248 -------
1249 dist : `~pandas.Series`
1250 Distance on the sphere in radians.
1251 """
1252 deltaDec = dec2 - dec1
1253 deltaRa = ra2 - ra1
1254 return 2 * np.arcsin(
1255 np.sqrt(
1256 np.sin(deltaDec / 2) ** 2
1257 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1259 def getSkySeparationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1260 """Compute the distance on the sphere from x2, y1 to x1, y1.
1262 Parameters
1263 ----------
1264 x1 : `~pandas.Series`
1265 X pixel coordinate.
1266 y1 : `~pandas.Series`
1267 Y pixel coordinate.
1268 x2 : `~pandas.Series`
1269 X pixel coordinate.
1270 y2 : `~pandas.Series`
1271 Y pixel coordinate.
1272 cd11 : `~pandas.Series`
1273 [1, 1] element of the local Wcs affine transform.
1274 cd11 : `~pandas.Series`
1275 [1, 1] element of the local Wcs affine transform.
1276 cd12 : `~pandas.Series`
1277 [1, 2] element of the local Wcs affine transform.
1278 cd21 : `~pandas.Series`
1279 [2, 1] element of the local Wcs affine transform.
1280 cd22 : `~pandas.Series`
1281 [2, 2] element of the local Wcs affine transform.
1283 Returns
1284 -------
1285 Distance : `~pandas.Series`
1286 Arcseconds per pixel at the location of the local WC.
1287 """
1288 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1289 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1290 # Great circle distance for small separations.
1291 return self.computeSkySeparation(ra1, dec1, ra2, dec2)
1294class ComputePixelScale(LocalWcs):
1295 """Compute the local pixel scale from the stored CDMatrix.
1296 """
1297 name = "PixelScale"
1299 @property
1300 def columns(self):
1301 return [self.colCD_1_1,
1302 self.colCD_1_2,
1303 self.colCD_2_1,
1304 self.colCD_2_2]
1306 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1307 """Compute the local pixel to scale conversion in arcseconds.
1309 Parameters
1310 ----------
1311 cd11 : `~pandas.Series`
1312 [1, 1] element of the local Wcs affine transform in radians.
1313 cd11 : `~pandas.Series`
1314 [1, 1] element of the local Wcs affine transform in radians.
1315 cd12 : `~pandas.Series`
1316 [1, 2] element of the local Wcs affine transform in radians.
1317 cd21 : `~pandas.Series`
1318 [2, 1] element of the local Wcs affine transform in radians.
1319 cd22 : `~pandas.Series`
1320 [2, 2] element of the local Wcs affine transform in radians.
1322 Returns
1323 -------
1324 pixScale : `~pandas.Series`
1325 Arcseconds per pixel at the location of the local WC.
1326 """
1327 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1329 def _func(self, df):
1330 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1331 df[self.colCD_1_2],
1332 df[self.colCD_2_1],
1333 df[self.colCD_2_2])
1336class ConvertPixelToArcseconds(ComputePixelScale):
1337 """Convert a value in units of pixels to units of arcseconds."""
1339 def __init__(self,
1340 col,
1341 colCD_1_1,
1342 colCD_1_2,
1343 colCD_2_1,
1344 colCD_2_2,
1345 **kwargs):
1346 self.col = col
1347 super().__init__(colCD_1_1,
1348 colCD_1_2,
1349 colCD_2_1,
1350 colCD_2_2,
1351 **kwargs)
1353 @property
1354 def name(self):
1355 return f"{self.col}_asArcseconds"
1357 @property
1358 def columns(self):
1359 return [self.col,
1360 self.colCD_1_1,
1361 self.colCD_1_2,
1362 self.colCD_2_1,
1363 self.colCD_2_2]
1365 def _func(self, df):
1366 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1367 df[self.colCD_1_2],
1368 df[self.colCD_2_1],
1369 df[self.colCD_2_2])
1372class ConvertPixelSqToArcsecondsSq(ComputePixelScale):
1373 """Convert a value in units of pixels squared to units of arcseconds
1374 squared.
1375 """
1377 def __init__(self,
1378 col,
1379 colCD_1_1,
1380 colCD_1_2,
1381 colCD_2_1,
1382 colCD_2_2,
1383 **kwargs):
1384 self.col = col
1385 super().__init__(colCD_1_1,
1386 colCD_1_2,
1387 colCD_2_1,
1388 colCD_2_2,
1389 **kwargs)
1391 @property
1392 def name(self):
1393 return f"{self.col}_asArcsecondsSq"
1395 @property
1396 def columns(self):
1397 return [self.col,
1398 self.colCD_1_1,
1399 self.colCD_1_2,
1400 self.colCD_2_1,
1401 self.colCD_2_2]
1403 def _func(self, df):
1404 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1405 df[self.colCD_1_2],
1406 df[self.colCD_2_1],
1407 df[self.colCD_2_2])
1408 return df[self.col] * pixScale * pixScale
1411class ReferenceBand(Functor):
1412 """Return the band used to seed multiband forced photometry.
1414 This functor is to be used on Object tables.
1415 It converts the boolean merge_measurements_{band} columns into a single
1416 string representing the first band for which merge_measurements_{band}
1417 is True.
1419 Assumes the default priority order of i, r, z, y, g, u.
1420 """
1421 name = 'Reference Band'
1422 shortname = 'refBand'
1424 @property
1425 def columns(self):
1426 return ["merge_measurement_i",
1427 "merge_measurement_r",
1428 "merge_measurement_z",
1429 "merge_measurement_y",
1430 "merge_measurement_g",
1431 "merge_measurement_u"]
1433 def _func(self, df: pd.DataFrame) -> pd.Series:
1434 def getFilterAliasName(row):
1435 # Get column name with the max value (True > False).
1436 colName = row.idxmax()
1437 return colName.replace('merge_measurement_', '')
1439 # Skip columns that are unavailable, because this functor requests the
1440 # superset of bands that could be included in the object table.
1441 columns = [col for col in self.columns if col in df.columns]
1442 # Makes a Series of dtype object if df is empty.
1443 return df[columns].apply(getFilterAliasName, axis=1,
1444 result_type='reduce').astype('object')
1447class Photometry(Functor):
1448 """Base class for Object table calibrated fluxes and magnitudes."""
1449 # AB to NanoJansky (3631 Jansky).
1450 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1451 LOG_AB_FLUX_SCALE = 12.56
1452 FIVE_OVER_2LOG10 = 1.085736204758129569
1453 # TO DO: DM-21955 Replace hard coded photometic calibration values.
1454 COADD_ZP = 27
1456 def __init__(self, colFlux, colFluxErr=None, **kwargs):
1457 self.vhypot = np.vectorize(self.hypot)
1458 self.col = colFlux
1459 self.colFluxErr = colFluxErr
1461 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1462 self.fluxMag0Err = 0.
1464 super().__init__(**kwargs)
1466 @property
1467 def columns(self):
1468 return [self.col]
1470 @property
1471 def name(self):
1472 return f'mag_{self.col}'
1474 @classmethod
1475 def hypot(cls, a, b):
1476 """Compute sqrt(a^2 + b^2) without under/overflow."""
1477 if np.abs(a) < np.abs(b):
1478 a, b = b, a
1479 if a == 0.:
1480 return 0.
1481 q = b/a
1482 return np.abs(a) * np.sqrt(1. + q*q)
1484 def dn2flux(self, dn, fluxMag0):
1485 """Convert instrumental flux to nanojanskys."""
1486 return self.AB_FLUX_SCALE * dn / fluxMag0
1488 def dn2mag(self, dn, fluxMag0):
1489 """Convert instrumental flux to AB magnitude."""
1490 with warnings.catch_warnings():
1491 warnings.filterwarnings('ignore', r'invalid value encountered')
1492 warnings.filterwarnings('ignore', r'divide by zero')
1493 return -2.5 * np.log10(dn/fluxMag0)
1495 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1496 """Convert instrumental flux error to nanojanskys."""
1497 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1498 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1499 return retVal
1501 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1502 """Convert instrumental flux error to AB magnitude error."""
1503 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1504 return self.FIVE_OVER_2LOG10 * retVal
1507class NanoJansky(Photometry):
1508 """Convert instrumental flux to nanojanskys."""
1509 def _func(self, df):
1510 return self.dn2flux(df[self.col], self.fluxMag0)
1513class NanoJanskyErr(Photometry):
1514 """Convert instrumental flux error to nanojanskys."""
1515 @property
1516 def columns(self):
1517 return [self.col, self.colFluxErr]
1519 def _func(self, df):
1520 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1521 return pd.Series(retArr, index=df.index)
1524class LocalPhotometry(Functor):
1525 """Base class for calibrating the specified instrument flux column using
1526 the local photometric calibration.
1528 Parameters
1529 ----------
1530 instFluxCol : `str`
1531 Name of the instrument flux column.
1532 instFluxErrCol : `str`
1533 Name of the assocated error columns for ``instFluxCol``.
1534 photoCalibCol : `str`
1535 Name of local calibration column.
1536 photoCalibErrCol : `str`
1537 Error associated with ``photoCalibCol``
1539 See Also
1540 --------
1541 LocalNanojansky
1542 LocalNanojanskyErr
1543 """
1544 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1546 def __init__(self,
1547 instFluxCol,
1548 instFluxErrCol,
1549 photoCalibCol,
1550 photoCalibErrCol,
1551 **kwargs):
1552 self.instFluxCol = instFluxCol
1553 self.instFluxErrCol = instFluxErrCol
1554 self.photoCalibCol = photoCalibCol
1555 self.photoCalibErrCol = photoCalibErrCol
1556 super().__init__(**kwargs)
1558 def instFluxToNanojansky(self, instFlux, localCalib):
1559 """Convert instrument flux to nanojanskys.
1561 Parameters
1562 ----------
1563 instFlux : `~numpy.ndarray` or `~pandas.Series`
1564 Array of instrument flux measurements.
1565 localCalib : `~numpy.ndarray` or `~pandas.Series`
1566 Array of local photometric calibration estimates.
1568 Returns
1569 -------
1570 calibFlux : `~numpy.ndarray` or `~pandas.Series`
1571 Array of calibrated flux measurements.
1572 """
1573 return instFlux * localCalib
1575 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1576 """Convert instrument flux to nanojanskys.
1578 Parameters
1579 ----------
1580 instFlux : `~numpy.ndarray` or `~pandas.Series`
1581 Array of instrument flux measurements.
1582 instFluxErr : `~numpy.ndarray` or `~pandas.Series`
1583 Errors on associated ``instFlux`` values.
1584 localCalib : `~numpy.ndarray` or `~pandas.Series`
1585 Array of local photometric calibration estimates.
1586 localCalibErr : `~numpy.ndarray` or `~pandas.Series`
1587 Errors on associated ``localCalib`` values.
1589 Returns
1590 -------
1591 calibFluxErr : `~numpy.ndarray` or `~pandas.Series`
1592 Errors on calibrated flux measurements.
1593 """
1594 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1596 def instFluxToMagnitude(self, instFlux, localCalib):
1597 """Convert instrument flux to nanojanskys.
1599 Parameters
1600 ----------
1601 instFlux : `~numpy.ndarray` or `~pandas.Series`
1602 Array of instrument flux measurements.
1603 localCalib : `~numpy.ndarray` or `~pandas.Series`
1604 Array of local photometric calibration estimates.
1606 Returns
1607 -------
1608 calibMag : `~numpy.ndarray` or `~pandas.Series`
1609 Array of calibrated AB magnitudes.
1610 """
1611 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1613 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1614 """Convert instrument flux err to nanojanskys.
1616 Parameters
1617 ----------
1618 instFlux : `~numpy.ndarray` or `~pandas.Series`
1619 Array of instrument flux measurements.
1620 instFluxErr : `~numpy.ndarray` or `~pandas.Series`
1621 Errors on associated ``instFlux`` values.
1622 localCalib : `~numpy.ndarray` or `~pandas.Series`
1623 Array of local photometric calibration estimates.
1624 localCalibErr : `~numpy.ndarray` or `~pandas.Series`
1625 Errors on associated ``localCalib`` values.
1627 Returns
1628 -------
1629 calibMagErr: `~numpy.ndarray` or `~pandas.Series`
1630 Error on calibrated AB magnitudes.
1631 """
1632 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1633 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1636class LocalNanojansky(LocalPhotometry):
1637 """Compute calibrated fluxes using the local calibration value.
1639 This returns units of nanojanskys.
1640 """
1642 @property
1643 def columns(self):
1644 return [self.instFluxCol, self.photoCalibCol]
1646 @property
1647 def name(self):
1648 return f'flux_{self.instFluxCol}'
1650 def _func(self, df):
1651 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1654class LocalNanojanskyErr(LocalPhotometry):
1655 """Compute calibrated flux errors using the local calibration value.
1657 This returns units of nanojanskys.
1658 """
1660 @property
1661 def columns(self):
1662 return [self.instFluxCol, self.instFluxErrCol,
1663 self.photoCalibCol, self.photoCalibErrCol]
1665 @property
1666 def name(self):
1667 return f'fluxErr_{self.instFluxCol}'
1669 def _func(self, df):
1670 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1671 df[self.photoCalibCol], df[self.photoCalibErrCol])
1674class LocalDipoleMeanFlux(LocalPhotometry):
1675 """Compute absolute mean of dipole fluxes.
1677 See Also
1678 --------
1679 LocalNanojansky
1680 LocalNanojanskyErr
1681 LocalDipoleMeanFluxErr
1682 LocalDipoleDiffFlux
1683 LocalDipoleDiffFluxErr
1684 """
1685 def __init__(self,
1686 instFluxPosCol,
1687 instFluxNegCol,
1688 instFluxPosErrCol,
1689 instFluxNegErrCol,
1690 photoCalibCol,
1691 photoCalibErrCol,
1692 **kwargs):
1693 self.instFluxNegCol = instFluxNegCol
1694 self.instFluxPosCol = instFluxPosCol
1695 self.instFluxNegErrCol = instFluxNegErrCol
1696 self.instFluxPosErrCol = instFluxPosErrCol
1697 self.photoCalibCol = photoCalibCol
1698 self.photoCalibErrCol = photoCalibErrCol
1699 super().__init__(instFluxNegCol,
1700 instFluxNegErrCol,
1701 photoCalibCol,
1702 photoCalibErrCol,
1703 **kwargs)
1705 @property
1706 def columns(self):
1707 return [self.instFluxPosCol,
1708 self.instFluxNegCol,
1709 self.photoCalibCol]
1711 @property
1712 def name(self):
1713 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1715 def _func(self, df):
1716 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))
1717 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])))
1720class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux):
1721 """Compute the error on the absolute mean of dipole fluxes.
1723 See Also
1724 --------
1725 LocalNanojansky
1726 LocalNanojanskyErr
1727 LocalDipoleMeanFlux
1728 LocalDipoleDiffFlux
1729 LocalDipoleDiffFluxErr
1730 """
1732 @property
1733 def columns(self):
1734 return [self.instFluxPosCol,
1735 self.instFluxNegCol,
1736 self.instFluxPosErrCol,
1737 self.instFluxNegErrCol,
1738 self.photoCalibCol,
1739 self.photoCalibErrCol]
1741 @property
1742 def name(self):
1743 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1745 def _func(self, df):
1746 return 0.5*np.sqrt(
1747 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1748 * df[self.photoCalibErrCol])**2
1749 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1750 * df[self.photoCalibCol]**2)
1753class LocalDipoleDiffFlux(LocalDipoleMeanFlux):
1754 """Compute the absolute difference of dipole fluxes.
1756 Calculated value is (abs(pos) - abs(neg)).
1758 See Also
1759 --------
1760 LocalNanojansky
1761 LocalNanojanskyErr
1762 LocalDipoleMeanFlux
1763 LocalDipoleMeanFluxErr
1764 LocalDipoleDiffFluxErr
1765 """
1767 @property
1768 def columns(self):
1769 return [self.instFluxPosCol,
1770 self.instFluxNegCol,
1771 self.photoCalibCol]
1773 @property
1774 def name(self):
1775 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1777 def _func(self, df):
1778 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))
1779 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])))
1782class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux):
1783 """Compute the error on the absolute difference of dipole fluxes.
1785 See Also
1786 --------
1787 LocalNanojansky
1788 LocalNanojanskyErr
1789 LocalDipoleMeanFlux
1790 LocalDipoleMeanFluxErr
1791 LocalDipoleDiffFlux
1792 """
1794 @property
1795 def columns(self):
1796 return [self.instFluxPosCol,
1797 self.instFluxNegCol,
1798 self.instFluxPosErrCol,
1799 self.instFluxNegErrCol,
1800 self.photoCalibCol,
1801 self.photoCalibErrCol]
1803 @property
1804 def name(self):
1805 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1807 def _func(self, df):
1808 return np.sqrt(
1809 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1810 * df[self.photoCalibErrCol])**2
1811 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1812 * df[self.photoCalibCol]**2)
1815class Ebv(Functor):
1816 """Compute E(B-V) from dustmaps.sfd."""
1817 _defaultDataset = 'ref'
1818 name = "E(B-V)"
1819 shortname = "ebv"
1821 def __init__(self, **kwargs):
1822 # Import is only needed for Ebv.
1823 from dustmaps.sfd import SFDQuery
1824 self._columns = ['coord_ra', 'coord_dec']
1825 self.sfd = SFDQuery()
1826 super().__init__(**kwargs)
1828 def _func(self, df):
1829 coords = SkyCoord(df['coord_ra'].values * u.rad, df['coord_dec'].values * u.rad)
1830 ebv = self.sfd(coords)
1831 # Double precision unnecessary scientifically but currently needed for
1832 # ingest to qserv.
1833 return pd.Series(ebv, index=df.index).astype('float64')