Coverage for python/lsst/pipe/tasks/functors.py: 42%
739 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-15 02:18 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-15 02:18 -0700
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["init_fromDict", "Functor", "CompositeFunctor", "mag_aware_eval",
23 "CustomFunctor", "Column", "Index", "CoordColumn", "RAColumn",
24 "DecColumn", "HtmIndex20", "fluxName", "fluxErrName", "Mag",
25 "MagErr", "MagDiff", "Color", "DeconvolvedMoments", "SdssTraceSize",
26 "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff",
27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs",
28 "ComputePixelScale", "ConvertPixelToArcseconds",
29 "ConvertPixelSqToArcsecondsSq", "ReferenceBand", "Photometry",
30 "NanoJansky", "NanoJanskyErr", "LocalPhotometry", "LocalNanojansky",
31 "LocalNanojanskyErr", "LocalDipoleMeanFlux",
32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux",
33 "LocalDipoleDiffFluxErr", "Ebv",
34 ]
36import logging
37import os
38import os.path
39import re
40import warnings
41from contextlib import redirect_stdout
42from itertools import product
44import astropy.units as u
45import lsst.geom as geom
46import lsst.sphgeom as sphgeom
47import numpy as np
48import pandas as pd
49import yaml
50from astropy.coordinates import SkyCoord
51from lsst.daf.butler import DeferredDatasetHandle
52from lsst.pipe.base import InMemoryDatasetHandle
53from lsst.utils import doImport
54from lsst.utils.introspection import get_full_type_name
57def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
58 typeKey='functor', name=None):
59 """Initialize an object defined in a dictionary.
61 The object needs to be importable as f'{basePath}.{initDict[typeKey]}'.
62 The positional and keyword arguments (if any) are contained in "args" and
63 "kwargs" entries in the dictionary, respectively.
64 This is used in `~lsst.pipe.tasks.functors.CompositeFunctor.from_yaml` to
65 initialize a composite functor from a specification in a YAML file.
67 Parameters
68 ----------
69 initDict : dictionary
70 Dictionary describing object's initialization.
71 Must contain an entry keyed by ``typeKey`` that is the name of the
72 object, relative to ``basePath``.
73 basePath : str
74 Path relative to module in which ``initDict[typeKey]`` is defined.
75 typeKey : str
76 Key of ``initDict`` that is the name of the object (relative to
77 ``basePath``).
78 """
79 initDict = initDict.copy()
80 # TO DO: DM-21956 We should be able to define functors outside this module
81 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
82 args = []
83 if 'args' in initDict:
84 args = initDict.pop('args')
85 if isinstance(args, str):
86 args = [args]
87 try:
88 element = pythonType(*args, **initDict)
89 except Exception as e:
90 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
91 raise type(e)(message, e.args)
92 return element
95class Functor(object):
96 """Define and execute a calculation on a DataFrame or Handle holding a
97 DataFrame.
99 The `__call__` method accepts either a `~pandas.DataFrame` object or a
100 `~lsst.daf.butler.DeferredDatasetHandle` or
101 `~lsst.pipe.base.InMemoryDatasetHandle`, and returns the
102 result of the calculation as a single column.
103 Each functor defines what columns are needed for the calculation, and only
104 these columns are read from the dataset handle.
106 The action of `__call__` consists of two steps: first, loading the
107 necessary columns from disk into memory as a `~pandas.DataFrame` object;
108 and second, performing the computation on this DataFrame and returning the
109 result.
111 To define a new `Functor`, a subclass must define a `_func` method,
112 that takes a `~pandas.DataFrame` and returns result in a `~pandas.Series`.
113 In addition, it must define the following attributes:
115 * `_columns`: The columns necessary to perform the calculation
116 * `name`: A name appropriate for a figure axis label
117 * `shortname`: A name appropriate for use as a dictionary key
119 On initialization, a `Functor` should declare what band (``filt`` kwarg)
120 and dataset (e.g. ``'ref'``, ``'meas'``, ``'forced_src'``) it is intended
121 to be applied to.
122 This enables the `_get_data` method to extract the proper columns from the
123 underlying data.
124 If not specified, the dataset will fall back on the `_defaultDataset`
125 attribute.
126 If band is not specified and ``dataset`` is anything other than ``'ref'``,
127 then an error will be raised when trying to perform the calculation.
129 Originally, `Functor` was set up to expect datasets formatted like the
130 ``deepCoadd_obj`` dataset; that is, a DataFrame with a multi-level column
131 index, with the levels of the column index being ``band``, ``dataset``, and
132 ``column``.
133 It has since been generalized to apply to DataFrames without multi-level
134 indices and multi-level indices with just ``dataset`` and ``column``
135 levels.
136 In addition, the `_get_data` method that reads the columns from the
137 underlying data will return a DataFrame with column index levels defined by
138 the `_dfLevels` attribute; by default, this is ``column``.
140 The `_dfLevels` attributes should generally not need to be changed, unless
141 `_func` needs columns from multiple filters or datasets to do the
142 calculation.
143 An example of this is the `~lsst.pipe.tasks.functors.Color` functor, for
144 which `_dfLevels = ('band', 'column')`, and `_func` expects the DataFrame
145 it gets to have those levels in the column index.
147 Parameters
148 ----------
149 filt : str
150 Band upon which to do the calculation.
152 dataset : str
153 Dataset upon which to do the calculation (e.g., 'ref', 'meas',
154 'forced_src').
155 """
157 _defaultDataset = 'ref'
158 _dfLevels = ('column',)
159 _defaultNoDup = False
161 def __init__(self, filt=None, dataset=None, noDup=None):
162 self.filt = filt
163 self.dataset = dataset if dataset is not None else self._defaultDataset
164 self._noDup = noDup
165 self.log = logging.getLogger(type(self).__name__)
167 @property
168 def noDup(self):
169 """Do not explode by band if used on object table."""
170 if self._noDup is not None:
171 return self._noDup
172 else:
173 return self._defaultNoDup
175 @property
176 def columns(self):
177 """Columns required to perform calculation."""
178 if not hasattr(self, '_columns'):
179 raise NotImplementedError('Must define columns property or _columns attribute')
180 return self._columns
182 def _get_data_columnLevels(self, data, columnIndex=None):
183 """Gets the names of the column index levels.
185 This should only be called in the context of a multilevel table.
187 Parameters
188 ----------
189 data : various
190 The data to be read, can be a
191 `~lsst.daf.butler.DeferredDatasetHandle` or
192 `~lsst.pipe.base.InMemoryDatasetHandle`.
193 columnIndex (optional): pandas `~pandas.Index` object
194 If not passed, then it is read from the
195 `~lsst.daf.butler.DeferredDatasetHandle`
196 for `~lsst.pipe.base.InMemoryDatasetHandle`.
197 """
198 if columnIndex is None:
199 columnIndex = data.get(component="columns")
200 return columnIndex.names
202 def _get_data_columnLevelNames(self, data, columnIndex=None):
203 """Gets the content of each of the column levels for a multilevel
204 table.
205 """
206 if columnIndex is None:
207 columnIndex = data.get(component="columns")
209 columnLevels = columnIndex.names
210 columnLevelNames = {
211 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
212 for i, level in enumerate(columnLevels)
213 }
214 return columnLevelNames
216 def _colsFromDict(self, colDict, columnIndex=None):
217 """Converts dictionary column specficiation to a list of columns."""
218 new_colDict = {}
219 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
221 for i, lev in enumerate(columnLevels):
222 if lev in colDict:
223 if isinstance(colDict[lev], str):
224 new_colDict[lev] = [colDict[lev]]
225 else:
226 new_colDict[lev] = colDict[lev]
227 else:
228 new_colDict[lev] = columnIndex.levels[i]
230 levelCols = [new_colDict[lev] for lev in columnLevels]
231 cols = list(product(*levelCols))
232 colsAvailable = [col for col in cols if col in columnIndex]
233 return colsAvailable
235 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
236 """Returns columns needed by functor from multilevel dataset.
238 To access tables with multilevel column structure, the
239 `~lsst.daf.butler.DeferredDatasetHandle` or
240 `~lsst.pipe.base.InMemoryDatasetHandle` needs to be passed
241 either a list of tuples or a dictionary.
243 Parameters
244 ----------
245 data : various
246 The data as either `~lsst.daf.butler.DeferredDatasetHandle`, or
247 `~lsst.pipe.base.InMemoryDatasetHandle`.
248 columnIndex (optional): pandas `~pandas.Index` object
249 Either passed or read in from
250 `~lsst.daf.butler.DeferredDatasetHandle`.
251 `returnTuple` : `bool`
252 If true, then return a list of tuples rather than the column
253 dictionary specification.
254 This is set to `True` by `CompositeFunctor` in order to be able to
255 combine columns from the various component functors.
257 """
258 if not isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
259 raise RuntimeError(f"Unexpected data type. Got {get_full_type_name(data)}.")
261 if columnIndex is None:
262 columnIndex = data.get(component="columns")
264 # Confirm that the dataset has the column levels the functor is
265 # expecting it to have.
266 columnLevels = self._get_data_columnLevels(data, columnIndex)
268 columnDict = {'column': self.columns,
269 'dataset': self.dataset}
270 if self.filt is None:
271 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
272 if "band" in columnLevels:
273 if self.dataset == "ref":
274 columnDict["band"] = columnLevelNames["band"][0]
275 else:
276 raise ValueError(f"'filt' not set for functor {self.name}"
277 f"(dataset {self.dataset}) "
278 "and DataFrame "
279 "contains multiple filters in column index. "
280 "Set 'filt' or set 'dataset' to 'ref'.")
281 else:
282 columnDict['band'] = self.filt
284 if returnTuple:
285 return self._colsFromDict(columnDict, columnIndex=columnIndex)
286 else:
287 return columnDict
289 def _func(self, df, dropna=True):
290 raise NotImplementedError('Must define calculation on DataFrame')
292 def _get_columnIndex(self, data):
293 """Return columnIndex."""
295 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
296 return data.get(component="columns")
297 else:
298 return None
300 def _get_data(self, data):
301 """Retrieve DataFrame necessary for calculation.
303 The data argument can be a `~pandas.DataFrame`, a
304 `~lsst.daf.butler.DeferredDatasetHandle`, or
305 an `~lsst.pipe.base.InMemoryDatasetHandle`.
307 Returns a DataFrame upon which `self._func` can act.
308 """
309 # We wrap a DataFrame in a handle here to take advantage of the
310 # DataFrame delegate DataFrame column wrangling abilities.
311 if isinstance(data, pd.DataFrame):
312 _data = InMemoryDatasetHandle(data, storageClass="DataFrame")
313 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
314 _data = data
315 else:
316 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.")
318 # First thing to do: check to see if the data source has a multilevel
319 # column index or not.
320 columnIndex = self._get_columnIndex(_data)
321 is_multiLevel = isinstance(columnIndex, pd.MultiIndex)
323 # Get proper columns specification for this functor.
324 if is_multiLevel:
325 columns = self.multilevelColumns(_data, columnIndex=columnIndex)
326 else:
327 columns = self.columns
329 # Load in-memory DataFrame with appropriate columns the gen3 way.
330 df = _data.get(parameters={"columns": columns})
332 # Drop unnecessary column levels.
333 if is_multiLevel:
334 df = self._setLevels(df)
336 return df
338 def _setLevels(self, df):
339 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
340 df.columns = df.columns.droplevel(levelsToDrop)
341 return df
343 def _dropna(self, vals):
344 return vals.dropna()
346 def __call__(self, data, dropna=False):
347 df = self._get_data(data)
348 try:
349 vals = self._func(df)
350 except Exception as e:
351 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
352 vals = self.fail(df)
353 if dropna:
354 vals = self._dropna(vals)
356 return vals
358 def difference(self, data1, data2, **kwargs):
359 """Computes difference between functor called on two different
360 DataFrame/Handle objects.
361 """
362 return self(data1, **kwargs) - self(data2, **kwargs)
364 def fail(self, df):
365 return pd.Series(np.full(len(df), np.nan), index=df.index)
367 @property
368 def name(self):
369 """Full name of functor (suitable for figure labels)."""
370 return NotImplementedError
372 @property
373 def shortname(self):
374 """Short name of functor (suitable for column name/dict key)."""
375 return self.name
378class CompositeFunctor(Functor):
379 """Perform multiple calculations at once on a catalog.
381 The role of a `CompositeFunctor` is to group together computations from
382 multiple functors.
383 Instead of returning `~pandas.Series` a `CompositeFunctor` returns a
384 `~pandas.DataFrame`, with the column names being the keys of ``funcDict``.
386 The `columns` attribute of a `CompositeFunctor` is the union of all columns
387 in all the component functors.
389 A `CompositeFunctor` does not use a `_func` method itself; rather, when a
390 `CompositeFunctor` is called, all its columns are loaded at once, and the
391 resulting DataFrame is passed to the `_func` method of each component
392 functor.
393 This has the advantage of only doing I/O (reading from parquet file) once,
394 and works because each individual `_func` method of each component functor
395 does not care if there are *extra* columns in the DataFrame being passed;
396 only that it must contain *at least* the `columns` it expects.
398 An important and useful class method is `from_yaml`, which takes as an
399 argument the path to a YAML file specifying a collection of functors.
401 Parameters
402 ----------
403 funcs : `dict` or `list`
404 Dictionary or list of functors.
405 If a list, then it will be converted into a dictonary according to the
406 `.shortname` attribute of each functor.
407 """
408 dataset = None
409 name = "CompositeFunctor"
411 def __init__(self, funcs, **kwargs):
413 if type(funcs) == dict:
414 self.funcDict = funcs
415 else:
416 self.funcDict = {f.shortname: f for f in funcs}
418 self._filt = None
420 super().__init__(**kwargs)
422 @property
423 def filt(self):
424 return self._filt
426 @filt.setter
427 def filt(self, filt):
428 if filt is not None:
429 for _, f in self.funcDict.items():
430 f.filt = filt
431 self._filt = filt
433 def update(self, new):
434 """Update the functor with new functors."""
435 if isinstance(new, dict):
436 self.funcDict.update(new)
437 elif isinstance(new, CompositeFunctor):
438 self.funcDict.update(new.funcDict)
439 else:
440 raise TypeError('Can only update with dictionary or CompositeFunctor.')
442 # Make sure new functors have the same 'filt' set.
443 if self.filt is not None:
444 self.filt = self.filt
446 @property
447 def columns(self):
448 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
450 def multilevelColumns(self, data, **kwargs):
451 # Get the union of columns for all component functors.
452 # Note the need to have `returnTuple=True` here.
453 return list(
454 set(
455 [
456 x
457 for y in [
458 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
459 ]
460 for x in y
461 ]
462 )
463 )
465 def __call__(self, data, **kwargs):
466 """Apply the functor to the data table.
468 Parameters
469 ----------
470 data : various
471 The data represented as `~lsst.daf.butler.DeferredDatasetHandle`,
472 `~lsst.pipe.base.InMemoryDatasetHandle`, or `~pandas.DataFrame`.
473 The table or a pointer to a table on disk from which columns can
474 be accessed.
475 """
476 if isinstance(data, pd.DataFrame):
477 _data = InMemoryDatasetHandle(data, storageClass="DataFrame")
478 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
479 _data = data
480 else:
481 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.")
483 columnIndex = self._get_columnIndex(_data)
485 if isinstance(columnIndex, pd.MultiIndex):
486 columns = self.multilevelColumns(_data, columnIndex=columnIndex)
487 df = _data.get(parameters={"columns": columns})
489 valDict = {}
490 for k, f in self.funcDict.items():
491 try:
492 subdf = f._setLevels(
493 df[f.multilevelColumns(_data, returnTuple=True, columnIndex=columnIndex)]
494 )
495 valDict[k] = f._func(subdf)
496 except Exception as e:
497 self.log.exception(
498 "Exception in %s (funcs: %s) call: %s",
499 self.name,
500 str(list(self.funcDict.keys())),
501 type(e).__name__,
502 )
503 try:
504 valDict[k] = f.fail(subdf)
505 except NameError:
506 raise e
508 else:
509 df = _data.get(parameters={"columns": self.columns})
511 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
513 # Check that output columns are actually columns.
514 for name, colVal in valDict.items():
515 if len(colVal.shape) != 1:
516 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
517 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
519 try:
520 valDf = pd.concat(valDict, axis=1)
521 except TypeError:
522 print([(k, type(v)) for k, v in valDict.items()])
523 raise
525 if kwargs.get('dropna', False):
526 valDf = valDf.dropna(how='any')
528 return valDf
530 @classmethod
531 def renameCol(cls, col, renameRules):
532 if renameRules is None:
533 return col
534 for old, new in renameRules:
535 if col.startswith(old):
536 col = col.replace(old, new)
537 return col
539 @classmethod
540 def from_file(cls, filename, **kwargs):
541 # Allow environment variables in the filename.
542 filename = os.path.expandvars(filename)
543 with open(filename) as f:
544 translationDefinition = yaml.safe_load(f)
546 return cls.from_yaml(translationDefinition, **kwargs)
548 @classmethod
549 def from_yaml(cls, translationDefinition, **kwargs):
550 funcs = {}
551 for func, val in translationDefinition['funcs'].items():
552 funcs[func] = init_fromDict(val, name=func)
554 if 'flag_rename_rules' in translationDefinition:
555 renameRules = translationDefinition['flag_rename_rules']
556 else:
557 renameRules = None
559 if 'calexpFlags' in translationDefinition:
560 for flag in translationDefinition['calexpFlags']:
561 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
563 if 'refFlags' in translationDefinition:
564 for flag in translationDefinition['refFlags']:
565 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
567 if 'forcedFlags' in translationDefinition:
568 for flag in translationDefinition['forcedFlags']:
569 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
571 if 'flags' in translationDefinition:
572 for flag in translationDefinition['flags']:
573 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
575 return cls(funcs, **kwargs)
578def mag_aware_eval(df, expr, log):
579 """Evaluate an expression on a DataFrame, knowing what the 'mag' function
580 means.
582 Builds on `pandas.DataFrame.eval`, which parses and executes math on
583 DataFrames.
585 Parameters
586 ----------
587 df : ~pandas.DataFrame
588 DataFrame on which to evaluate expression.
590 expr : str
591 Expression.
592 """
593 try:
594 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
595 val = df.eval(expr_new)
596 except Exception as e: # Should check what actually gets raised
597 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e)
598 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
599 val = df.eval(expr_new)
600 return val
603class CustomFunctor(Functor):
604 """Arbitrary computation on a catalog.
606 Column names (and thus the columns to be loaded from catalog) are found by
607 finding all words and trying to ignore all "math-y" words.
609 Parameters
610 ----------
611 expr : str
612 Expression to evaluate, to be parsed and executed by
613 `~lsst.pipe.tasks.functors.mag_aware_eval`.
614 """
615 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
617 def __init__(self, expr, **kwargs):
618 self.expr = expr
619 super().__init__(**kwargs)
621 @property
622 def name(self):
623 return self.expr
625 @property
626 def columns(self):
627 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
629 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
630 not_a_col = []
631 for c in flux_cols:
632 if not re.search('_instFlux$', c):
633 cols.append(f'{c}_instFlux')
634 not_a_col.append(c)
635 else:
636 cols.append(c)
638 return list(set([c for c in cols if c not in not_a_col]))
640 def _func(self, df):
641 return mag_aware_eval(df, self.expr, self.log)
644class Column(Functor):
645 """Get column with a specified name."""
647 def __init__(self, col, **kwargs):
648 self.col = col
649 super().__init__(**kwargs)
651 @property
652 def name(self):
653 return self.col
655 @property
656 def columns(self):
657 return [self.col]
659 def _func(self, df):
660 return df[self.col]
663class Index(Functor):
664 """Return the value of the index for each object."""
666 columns = ['coord_ra'] # Just a dummy; something has to be here.
667 _defaultDataset = 'ref'
668 _defaultNoDup = True
670 def _func(self, df):
671 return pd.Series(df.index, index=df.index)
674class CoordColumn(Column):
675 """Base class for coordinate column, in degrees."""
676 _radians = True
678 def __init__(self, col, **kwargs):
679 super().__init__(col, **kwargs)
681 def _func(self, df):
682 # Must not modify original column in case that column is used by
683 # another functor.
684 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
685 return output
688class RAColumn(CoordColumn):
689 """Right Ascension, in degrees."""
690 name = 'RA'
691 _defaultNoDup = True
693 def __init__(self, **kwargs):
694 super().__init__('coord_ra', **kwargs)
696 def __call__(self, catalog, **kwargs):
697 return super().__call__(catalog, **kwargs)
700class DecColumn(CoordColumn):
701 """Declination, in degrees."""
702 name = 'Dec'
703 _defaultNoDup = True
705 def __init__(self, **kwargs):
706 super().__init__('coord_dec', **kwargs)
708 def __call__(self, catalog, **kwargs):
709 return super().__call__(catalog, **kwargs)
712class RAErrColumn(CoordColumn):
713 """Uncertainty in Right Ascension, in degrees."""
714 name = 'RAErr'
715 _defaultNoDup = True
717 def __init__(self, **kwargs):
718 super().__init__('coord_raErr', **kwargs)
721class DecErrColumn(CoordColumn):
722 """Uncertainty in declination, in degrees."""
723 name = 'DecErr'
724 _defaultNoDup = True
726 def __init__(self, **kwargs):
727 super().__init__('coord_decErr', **kwargs)
730class RADecCovColumn(Column):
731 """Coordinate covariance column, in degrees."""
732 _radians = True
733 name = 'RADecCov'
734 _defaultNoDup = True
736 def __init__(self, **kwargs):
737 super().__init__('coord_ra_dec_Cov', **kwargs)
739 def _func(self, df):
740 # Must not modify original column in case that column is used by
741 # another functor.
742 output = df[self.col]*(180/np.pi)**2 if self._radians else df[self.col]
743 return output
746class HtmIndex20(Functor):
747 """Compute the level 20 HtmIndex for the catalog.
749 Notes
750 -----
751 This functor was implemented to satisfy requirements of old APDB interface
752 which required the ``pixelId`` column in DiaObject with HTM20 index.
753 The APDB interface had migrated to not need that information, but we keep
754 this class in case it may be useful for something else.
755 """
756 name = "Htm20"
757 htmLevel = 20
758 _radians = True
760 def __init__(self, ra, dec, **kwargs):
761 self.pixelator = sphgeom.HtmPixelization(self.htmLevel)
762 self.ra = ra
763 self.dec = dec
764 self._columns = [self.ra, self.dec]
765 super().__init__(**kwargs)
767 def _func(self, df):
769 def computePixel(row):
770 if self._radians:
771 sphPoint = geom.SpherePoint(row[self.ra],
772 row[self.dec],
773 geom.radians)
774 else:
775 sphPoint = geom.SpherePoint(row[self.ra],
776 row[self.dec],
777 geom.degrees)
778 return self.pixelator.index(sphPoint.getVector())
780 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
783def fluxName(col):
784 """Append _instFlux to the column name if it doesn't have it already."""
785 if not col.endswith('_instFlux'):
786 col += '_instFlux'
787 return col
790def fluxErrName(col):
791 """Append _instFluxErr to the column name if it doesn't have it already."""
792 if not col.endswith('_instFluxErr'):
793 col += '_instFluxErr'
794 return col
797class Mag(Functor):
798 """Compute calibrated magnitude.
800 Returns the flux at mag=0.
801 The default ``fluxMag0`` is 63095734448.0194, which is default for HSC.
802 TO DO: This default should be made configurable in DM-21955.
804 This calculation hides warnings about invalid values and dividing by zero.
806 As with all functors, a ``dataset`` and ``filt`` kwarg should be provided
807 upon initialization.
808 Unlike the default `Functor`, however, the default dataset for a `Mag` is
809 ``'meas'``, rather than ``'ref'``.
811 Parameters
812 ----------
813 col : `str`
814 Name of flux column from which to compute magnitude.
815 Can be parseable by the `~lsst.pipe.tasks.functors.fluxName` function;
816 that is, you can pass ``'modelfit_CModel'`` instead of
817 ``'modelfit_CModel_instFlux'``, and it will understand.
818 """
819 _defaultDataset = 'meas'
821 def __init__(self, col, **kwargs):
822 self.col = fluxName(col)
823 # TO DO: DM-21955 Replace hard coded photometic calibration values.
824 self.fluxMag0 = 63095734448.0194
826 super().__init__(**kwargs)
828 @property
829 def columns(self):
830 return [self.col]
832 def _func(self, df):
833 with warnings.catch_warnings():
834 warnings.filterwarnings('ignore', r'invalid value encountered')
835 warnings.filterwarnings('ignore', r'divide by zero')
836 return -2.5*np.log10(df[self.col] / self.fluxMag0)
838 @property
839 def name(self):
840 return f'mag_{self.col}'
843class MagErr(Mag):
844 """Compute calibrated magnitude uncertainty.
846 Parameters
847 ----------
848 col : `str`
849 Name of the flux column.
850 """
852 def __init__(self, *args, **kwargs):
853 super().__init__(*args, **kwargs)
854 # TO DO: DM-21955 Replace hard coded photometic calibration values.
855 self.fluxMag0Err = 0.
857 @property
858 def columns(self):
859 return [self.col, self.col + 'Err']
861 def _func(self, df):
862 with warnings.catch_warnings():
863 warnings.filterwarnings('ignore', r'invalid value encountered')
864 warnings.filterwarnings('ignore', r'divide by zero')
865 fluxCol, fluxErrCol = self.columns
866 x = df[fluxErrCol] / df[fluxCol]
867 y = self.fluxMag0Err / self.fluxMag0
868 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
869 return magErr
871 @property
872 def name(self):
873 return super().name + '_err'
876class MagDiff(Functor):
877 """Functor to calculate magnitude difference."""
878 _defaultDataset = 'meas'
880 def __init__(self, col1, col2, **kwargs):
881 self.col1 = fluxName(col1)
882 self.col2 = fluxName(col2)
883 super().__init__(**kwargs)
885 @property
886 def columns(self):
887 return [self.col1, self.col2]
889 def _func(self, df):
890 with warnings.catch_warnings():
891 warnings.filterwarnings('ignore', r'invalid value encountered')
892 warnings.filterwarnings('ignore', r'divide by zero')
893 return -2.5*np.log10(df[self.col1]/df[self.col2])
895 @property
896 def name(self):
897 return f'(mag_{self.col1} - mag_{self.col2})'
899 @property
900 def shortname(self):
901 return f'magDiff_{self.col1}_{self.col2}'
904class Color(Functor):
905 """Compute the color between two filters.
907 Computes color by initializing two different `Mag` functors based on the
908 ``col`` and filters provided, and then returning the difference.
910 This is enabled by the `_func` method expecting a DataFrame with a
911 multilevel column index, with both ``'band'`` and ``'column'``, instead of
912 just ``'column'``, which is the `Functor` default.
913 This is controlled by the `_dfLevels` attribute.
915 Also of note, the default dataset for `Color` is ``forced_src'``, whereas
916 for `Mag` it is ``'meas'``.
918 Parameters
919 ----------
920 col : str
921 Name of the flux column from which to compute; same as would be passed
922 to `~lsst.pipe.tasks.functors.Mag`.
924 filt2, filt1 : str
925 Filters from which to compute magnitude difference.
926 Color computed is ``Mag(filt2) - Mag(filt1)``.
927 """
928 _defaultDataset = 'forced_src'
929 _dfLevels = ('band', 'column')
930 _defaultNoDup = True
932 def __init__(self, col, filt2, filt1, **kwargs):
933 self.col = fluxName(col)
934 if filt2 == filt1:
935 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
936 self.filt2 = filt2
937 self.filt1 = filt1
939 self.mag2 = Mag(col, filt=filt2, **kwargs)
940 self.mag1 = Mag(col, filt=filt1, **kwargs)
942 super().__init__(**kwargs)
944 @property
945 def filt(self):
946 return None
948 @filt.setter
949 def filt(self, filt):
950 pass
952 def _func(self, df):
953 mag2 = self.mag2._func(df[self.filt2])
954 mag1 = self.mag1._func(df[self.filt1])
955 return mag2 - mag1
957 @property
958 def columns(self):
959 return [self.mag1.col, self.mag2.col]
961 def multilevelColumns(self, parq, **kwargs):
962 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
964 @property
965 def name(self):
966 return f'{self.filt2} - {self.filt1} ({self.col})'
968 @property
969 def shortname(self):
970 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
973class DeconvolvedMoments(Functor):
974 """This functor subtracts the trace of the PSF second moments from the
975 trace of the second moments of the source.
977 If the HsmShapeAlgorithm measurement is valid, then these will be used for
978 the sources.
979 Otherwise, the SdssShapeAlgorithm measurements will be used.
980 """
981 name = 'Deconvolved Moments'
982 shortname = 'deconvolvedMoments'
983 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
984 "ext_shapeHSM_HsmSourceMoments_yy",
985 "base_SdssShape_xx", "base_SdssShape_yy",
986 "ext_shapeHSM_HsmPsfMoments_xx",
987 "ext_shapeHSM_HsmPsfMoments_yy")
989 def _func(self, df):
990 """Calculate deconvolved moments."""
991 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
992 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
993 else:
994 hsm = np.ones(len(df))*np.nan
995 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
996 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
997 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
998 else:
999 # LSST does not have shape.sdss.psf.
1000 # We could instead add base_PsfShape to the catalog using
1001 # exposure.getPsf().computeShape(s.getCentroid()).getIxx().
1002 raise RuntimeError('No psf shape parameter found in catalog')
1004 return hsm.where(np.isfinite(hsm), sdss) - psf
1007class SdssTraceSize(Functor):
1008 """Functor to calculate the SDSS trace radius size for sources.
1010 The SDSS trace radius size is a measure of size equal to the square root of
1011 half of the trace of the second moments tensor measured with the
1012 SdssShapeAlgorithm plugin.
1013 This has units of pixels.
1014 """
1015 name = "SDSS Trace Size"
1016 shortname = 'sdssTrace'
1017 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1019 def _func(self, df):
1020 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1021 return srcSize
1024class PsfSdssTraceSizeDiff(Functor):
1025 """Functor to calculate the SDSS trace radius size difference (%) between
1026 the object and the PSF model.
1028 See Also
1029 --------
1030 SdssTraceSize
1031 """
1032 name = "PSF - SDSS Trace Size"
1033 shortname = 'psf_sdssTrace'
1034 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1035 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1037 def _func(self, df):
1038 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1039 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1040 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1041 return sizeDiff
1044class HsmTraceSize(Functor):
1045 """Functor to calculate the HSM trace radius size for sources.
1047 The HSM trace radius size is a measure of size equal to the square root of
1048 half of the trace of the second moments tensor measured with the
1049 HsmShapeAlgorithm plugin.
1050 This has units of pixels.
1051 """
1052 name = 'HSM Trace Size'
1053 shortname = 'hsmTrace'
1054 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1055 "ext_shapeHSM_HsmSourceMoments_yy")
1057 def _func(self, df):
1058 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1059 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1060 return srcSize
1063class PsfHsmTraceSizeDiff(Functor):
1064 """Functor to calculate the HSM trace radius size difference (%) between
1065 the object and the PSF model.
1067 See Also
1068 --------
1069 HsmTraceSize
1070 """
1071 name = 'PSF - HSM Trace Size'
1072 shortname = 'psf_HsmTrace'
1073 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1074 "ext_shapeHSM_HsmSourceMoments_yy",
1075 "ext_shapeHSM_HsmPsfMoments_xx",
1076 "ext_shapeHSM_HsmPsfMoments_yy")
1078 def _func(self, df):
1079 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1080 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1081 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1082 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1083 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1084 return sizeDiff
1087class HsmFwhm(Functor):
1088 """Functor to calculate the PSF FWHM with second moments measured from the
1089 HsmShapeAlgorithm plugin.
1091 This is in units of arcseconds, and assumes the hsc_rings_v1 skymap pixel
1092 scale of 0.168 arcseconds/pixel.
1094 Notes
1095 -----
1096 This conversion assumes the PSF is Gaussian, which is not always the case.
1097 """
1098 name = 'HSM Psf FWHM'
1099 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1100 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1101 pixelScale = 0.168
1102 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1104 def _func(self, df):
1105 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1106 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1109class E1(Functor):
1110 r"""Calculate :math:`e_1` ellipticity component for sources, defined as:
1112 .. math::
1113 e_1 &= (I_{xx}-I_{yy})/(I_{xx}+I_{yy})
1115 See Also
1116 --------
1117 E2
1118 """
1119 name = "Distortion Ellipticity (e1)"
1120 shortname = "Distortion"
1122 def __init__(self, colXX, colXY, colYY, **kwargs):
1123 self.colXX = colXX
1124 self.colXY = colXY
1125 self.colYY = colYY
1126 self._columns = [self.colXX, self.colXY, self.colYY]
1127 super().__init__(**kwargs)
1129 @property
1130 def columns(self):
1131 return [self.colXX, self.colXY, self.colYY]
1133 def _func(self, df):
1134 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1137class E2(Functor):
1138 r"""Calculate :math:`e_2` ellipticity component for sources, defined as:
1140 .. math::
1141 e_2 &= 2I_{xy}/(I_{xx}+I_{yy})
1143 See Also
1144 --------
1145 E1
1146 """
1147 name = "Ellipticity e2"
1149 def __init__(self, colXX, colXY, colYY, **kwargs):
1150 self.colXX = colXX
1151 self.colXY = colXY
1152 self.colYY = colYY
1153 super().__init__(**kwargs)
1155 @property
1156 def columns(self):
1157 return [self.colXX, self.colXY, self.colYY]
1159 def _func(self, df):
1160 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1163class RadiusFromQuadrupole(Functor):
1164 """Calculate the radius from the quadrupole moments.
1166 This returns the fourth root of the determinant of the second moments
1167 tensor, which has units of pixels.
1169 See Also
1170 --------
1171 SdssTraceSize
1172 HsmTraceSize
1173 """
1175 def __init__(self, colXX, colXY, colYY, **kwargs):
1176 self.colXX = colXX
1177 self.colXY = colXY
1178 self.colYY = colYY
1179 super().__init__(**kwargs)
1181 @property
1182 def columns(self):
1183 return [self.colXX, self.colXY, self.colYY]
1185 def _func(self, df):
1186 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1189class LocalWcs(Functor):
1190 """Computations using the stored localWcs."""
1191 name = "LocalWcsOperations"
1193 def __init__(self,
1194 colCD_1_1,
1195 colCD_1_2,
1196 colCD_2_1,
1197 colCD_2_2,
1198 **kwargs):
1199 self.colCD_1_1 = colCD_1_1
1200 self.colCD_1_2 = colCD_1_2
1201 self.colCD_2_1 = colCD_2_1
1202 self.colCD_2_2 = colCD_2_2
1203 super().__init__(**kwargs)
1205 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1206 """Compute the distance on the sphere from x2, y1 to x1, y1.
1208 Parameters
1209 ----------
1210 x : `~pandas.Series`
1211 X pixel coordinate.
1212 y : `~pandas.Series`
1213 Y pixel coordinate.
1214 cd11 : `~pandas.Series`
1215 [1, 1] element of the local Wcs affine transform.
1216 cd11 : `~pandas.Series`
1217 [1, 1] element of the local Wcs affine transform.
1218 cd12 : `~pandas.Series`
1219 [1, 2] element of the local Wcs affine transform.
1220 cd21 : `~pandas.Series`
1221 [2, 1] element of the local Wcs affine transform.
1222 cd22 : `~pandas.Series`
1223 [2, 2] element of the local Wcs affine transform.
1225 Returns
1226 -------
1227 raDecTuple : tuple
1228 RA and dec conversion of x and y given the local Wcs.
1229 Returned units are in radians.
1231 """
1232 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1234 def computeSkySeparation(self, ra1, dec1, ra2, dec2):
1235 """Compute the local pixel scale conversion.
1237 Parameters
1238 ----------
1239 ra1 : `~pandas.Series`
1240 Ra of the first coordinate in radians.
1241 dec1 : `~pandas.Series`
1242 Dec of the first coordinate in radians.
1243 ra2 : `~pandas.Series`
1244 Ra of the second coordinate in radians.
1245 dec2 : `~pandas.Series`
1246 Dec of the second coordinate in radians.
1248 Returns
1249 -------
1250 dist : `~pandas.Series`
1251 Distance on the sphere in radians.
1252 """
1253 deltaDec = dec2 - dec1
1254 deltaRa = ra2 - ra1
1255 return 2 * np.arcsin(
1256 np.sqrt(
1257 np.sin(deltaDec / 2) ** 2
1258 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1260 def getSkySeparationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1261 """Compute the distance on the sphere from x2, y1 to x1, y1.
1263 Parameters
1264 ----------
1265 x1 : `~pandas.Series`
1266 X pixel coordinate.
1267 y1 : `~pandas.Series`
1268 Y pixel coordinate.
1269 x2 : `~pandas.Series`
1270 X pixel coordinate.
1271 y2 : `~pandas.Series`
1272 Y pixel coordinate.
1273 cd11 : `~pandas.Series`
1274 [1, 1] element of the local Wcs affine transform.
1275 cd11 : `~pandas.Series`
1276 [1, 1] element of the local Wcs affine transform.
1277 cd12 : `~pandas.Series`
1278 [1, 2] element of the local Wcs affine transform.
1279 cd21 : `~pandas.Series`
1280 [2, 1] element of the local Wcs affine transform.
1281 cd22 : `~pandas.Series`
1282 [2, 2] element of the local Wcs affine transform.
1284 Returns
1285 -------
1286 Distance : `~pandas.Series`
1287 Arcseconds per pixel at the location of the local WC.
1288 """
1289 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1290 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1291 # Great circle distance for small separations.
1292 return self.computeSkySeparation(ra1, dec1, ra2, dec2)
1295class ComputePixelScale(LocalWcs):
1296 """Compute the local pixel scale from the stored CDMatrix.
1297 """
1298 name = "PixelScale"
1300 @property
1301 def columns(self):
1302 return [self.colCD_1_1,
1303 self.colCD_1_2,
1304 self.colCD_2_1,
1305 self.colCD_2_2]
1307 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1308 """Compute the local pixel to scale conversion in arcseconds.
1310 Parameters
1311 ----------
1312 cd11 : `~pandas.Series`
1313 [1, 1] element of the local Wcs affine transform in radians.
1314 cd11 : `~pandas.Series`
1315 [1, 1] element of the local Wcs affine transform in radians.
1316 cd12 : `~pandas.Series`
1317 [1, 2] element of the local Wcs affine transform in radians.
1318 cd21 : `~pandas.Series`
1319 [2, 1] element of the local Wcs affine transform in radians.
1320 cd22 : `~pandas.Series`
1321 [2, 2] element of the local Wcs affine transform in radians.
1323 Returns
1324 -------
1325 pixScale : `~pandas.Series`
1326 Arcseconds per pixel at the location of the local WC.
1327 """
1328 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1330 def _func(self, df):
1331 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1332 df[self.colCD_1_2],
1333 df[self.colCD_2_1],
1334 df[self.colCD_2_2])
1337class ConvertPixelToArcseconds(ComputePixelScale):
1338 """Convert a value in units of pixels to units of arcseconds."""
1340 def __init__(self,
1341 col,
1342 colCD_1_1,
1343 colCD_1_2,
1344 colCD_2_1,
1345 colCD_2_2,
1346 **kwargs):
1347 self.col = col
1348 super().__init__(colCD_1_1,
1349 colCD_1_2,
1350 colCD_2_1,
1351 colCD_2_2,
1352 **kwargs)
1354 @property
1355 def name(self):
1356 return f"{self.col}_asArcseconds"
1358 @property
1359 def columns(self):
1360 return [self.col,
1361 self.colCD_1_1,
1362 self.colCD_1_2,
1363 self.colCD_2_1,
1364 self.colCD_2_2]
1366 def _func(self, df):
1367 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1368 df[self.colCD_1_2],
1369 df[self.colCD_2_1],
1370 df[self.colCD_2_2])
1373class ConvertPixelSqToArcsecondsSq(ComputePixelScale):
1374 """Convert a value in units of pixels squared to units of arcseconds
1375 squared.
1376 """
1378 def __init__(self,
1379 col,
1380 colCD_1_1,
1381 colCD_1_2,
1382 colCD_2_1,
1383 colCD_2_2,
1384 **kwargs):
1385 self.col = col
1386 super().__init__(colCD_1_1,
1387 colCD_1_2,
1388 colCD_2_1,
1389 colCD_2_2,
1390 **kwargs)
1392 @property
1393 def name(self):
1394 return f"{self.col}_asArcsecondsSq"
1396 @property
1397 def columns(self):
1398 return [self.col,
1399 self.colCD_1_1,
1400 self.colCD_1_2,
1401 self.colCD_2_1,
1402 self.colCD_2_2]
1404 def _func(self, df):
1405 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1406 df[self.colCD_1_2],
1407 df[self.colCD_2_1],
1408 df[self.colCD_2_2])
1409 return df[self.col] * pixScale * pixScale
1412class ReferenceBand(Functor):
1413 """Return the band used to seed multiband forced photometry.
1415 This functor is to be used on Object tables.
1416 It converts the boolean merge_measurements_{band} columns into a single
1417 string representing the first band for which merge_measurements_{band}
1418 is True.
1420 Assumes the default priority order of i, r, z, y, g, u.
1421 """
1422 name = 'Reference Band'
1423 shortname = 'refBand'
1425 @property
1426 def columns(self):
1427 return ["merge_measurement_i",
1428 "merge_measurement_r",
1429 "merge_measurement_z",
1430 "merge_measurement_y",
1431 "merge_measurement_g",
1432 "merge_measurement_u"]
1434 def _func(self, df: pd.DataFrame) -> pd.Series:
1435 def getFilterAliasName(row):
1436 # Get column name with the max value (True > False).
1437 colName = row.idxmax()
1438 return colName.replace('merge_measurement_', '')
1440 # Skip columns that are unavailable, because this functor requests the
1441 # superset of bands that could be included in the object table.
1442 columns = [col for col in self.columns if col in df.columns]
1443 # Makes a Series of dtype object if df is empty.
1444 return df[columns].apply(getFilterAliasName, axis=1,
1445 result_type='reduce').astype('object')
1448class Photometry(Functor):
1449 """Base class for Object table calibrated fluxes and magnitudes."""
1450 # AB to NanoJansky (3631 Jansky).
1451 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1452 LOG_AB_FLUX_SCALE = 12.56
1453 FIVE_OVER_2LOG10 = 1.085736204758129569
1454 # TO DO: DM-21955 Replace hard coded photometic calibration values.
1455 COADD_ZP = 27
1457 def __init__(self, colFlux, colFluxErr=None, **kwargs):
1458 self.vhypot = np.vectorize(self.hypot)
1459 self.col = colFlux
1460 self.colFluxErr = colFluxErr
1462 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1463 self.fluxMag0Err = 0.
1465 super().__init__(**kwargs)
1467 @property
1468 def columns(self):
1469 return [self.col]
1471 @property
1472 def name(self):
1473 return f'mag_{self.col}'
1475 @classmethod
1476 def hypot(cls, a, b):
1477 """Compute sqrt(a^2 + b^2) without under/overflow."""
1478 if np.abs(a) < np.abs(b):
1479 a, b = b, a
1480 if a == 0.:
1481 return 0.
1482 q = b/a
1483 return np.abs(a) * np.sqrt(1. + q*q)
1485 def dn2flux(self, dn, fluxMag0):
1486 """Convert instrumental flux to nanojanskys."""
1487 return self.AB_FLUX_SCALE * dn / fluxMag0
1489 def dn2mag(self, dn, fluxMag0):
1490 """Convert instrumental flux to AB magnitude."""
1491 with warnings.catch_warnings():
1492 warnings.filterwarnings('ignore', r'invalid value encountered')
1493 warnings.filterwarnings('ignore', r'divide by zero')
1494 return -2.5 * np.log10(dn/fluxMag0)
1496 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1497 """Convert instrumental flux error to nanojanskys."""
1498 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1499 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1500 return retVal
1502 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1503 """Convert instrumental flux error to AB magnitude error."""
1504 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1505 return self.FIVE_OVER_2LOG10 * retVal
1508class NanoJansky(Photometry):
1509 """Convert instrumental flux to nanojanskys."""
1510 def _func(self, df):
1511 return self.dn2flux(df[self.col], self.fluxMag0)
1514class NanoJanskyErr(Photometry):
1515 """Convert instrumental flux error to nanojanskys."""
1516 @property
1517 def columns(self):
1518 return [self.col, self.colFluxErr]
1520 def _func(self, df):
1521 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1522 return pd.Series(retArr, index=df.index)
1525class LocalPhotometry(Functor):
1526 """Base class for calibrating the specified instrument flux column using
1527 the local photometric calibration.
1529 Parameters
1530 ----------
1531 instFluxCol : `str`
1532 Name of the instrument flux column.
1533 instFluxErrCol : `str`
1534 Name of the assocated error columns for ``instFluxCol``.
1535 photoCalibCol : `str`
1536 Name of local calibration column.
1537 photoCalibErrCol : `str`
1538 Error associated with ``photoCalibCol``
1540 See Also
1541 --------
1542 LocalNanojansky
1543 LocalNanojanskyErr
1544 """
1545 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1547 def __init__(self,
1548 instFluxCol,
1549 instFluxErrCol,
1550 photoCalibCol,
1551 photoCalibErrCol,
1552 **kwargs):
1553 self.instFluxCol = instFluxCol
1554 self.instFluxErrCol = instFluxErrCol
1555 self.photoCalibCol = photoCalibCol
1556 self.photoCalibErrCol = photoCalibErrCol
1557 super().__init__(**kwargs)
1559 def instFluxToNanojansky(self, instFlux, localCalib):
1560 """Convert instrument flux to nanojanskys.
1562 Parameters
1563 ----------
1564 instFlux : `~numpy.ndarray` or `~pandas.Series`
1565 Array of instrument flux measurements.
1566 localCalib : `~numpy.ndarray` or `~pandas.Series`
1567 Array of local photometric calibration estimates.
1569 Returns
1570 -------
1571 calibFlux : `~numpy.ndarray` or `~pandas.Series`
1572 Array of calibrated flux measurements.
1573 """
1574 return instFlux * localCalib
1576 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1577 """Convert instrument flux to nanojanskys.
1579 Parameters
1580 ----------
1581 instFlux : `~numpy.ndarray` or `~pandas.Series`
1582 Array of instrument flux measurements.
1583 instFluxErr : `~numpy.ndarray` or `~pandas.Series`
1584 Errors on associated ``instFlux`` values.
1585 localCalib : `~numpy.ndarray` or `~pandas.Series`
1586 Array of local photometric calibration estimates.
1587 localCalibErr : `~numpy.ndarray` or `~pandas.Series`
1588 Errors on associated ``localCalib`` values.
1590 Returns
1591 -------
1592 calibFluxErr : `~numpy.ndarray` or `~pandas.Series`
1593 Errors on calibrated flux measurements.
1594 """
1595 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1597 def instFluxToMagnitude(self, instFlux, localCalib):
1598 """Convert instrument flux to nanojanskys.
1600 Parameters
1601 ----------
1602 instFlux : `~numpy.ndarray` or `~pandas.Series`
1603 Array of instrument flux measurements.
1604 localCalib : `~numpy.ndarray` or `~pandas.Series`
1605 Array of local photometric calibration estimates.
1607 Returns
1608 -------
1609 calibMag : `~numpy.ndarray` or `~pandas.Series`
1610 Array of calibrated AB magnitudes.
1611 """
1612 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1614 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1615 """Convert instrument flux err to nanojanskys.
1617 Parameters
1618 ----------
1619 instFlux : `~numpy.ndarray` or `~pandas.Series`
1620 Array of instrument flux measurements.
1621 instFluxErr : `~numpy.ndarray` or `~pandas.Series`
1622 Errors on associated ``instFlux`` values.
1623 localCalib : `~numpy.ndarray` or `~pandas.Series`
1624 Array of local photometric calibration estimates.
1625 localCalibErr : `~numpy.ndarray` or `~pandas.Series`
1626 Errors on associated ``localCalib`` values.
1628 Returns
1629 -------
1630 calibMagErr: `~numpy.ndarray` or `~pandas.Series`
1631 Error on calibrated AB magnitudes.
1632 """
1633 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1634 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1637class LocalNanojansky(LocalPhotometry):
1638 """Compute calibrated fluxes using the local calibration value.
1640 This returns units of nanojanskys.
1641 """
1643 @property
1644 def columns(self):
1645 return [self.instFluxCol, self.photoCalibCol]
1647 @property
1648 def name(self):
1649 return f'flux_{self.instFluxCol}'
1651 def _func(self, df):
1652 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1655class LocalNanojanskyErr(LocalPhotometry):
1656 """Compute calibrated flux errors using the local calibration value.
1658 This returns units of nanojanskys.
1659 """
1661 @property
1662 def columns(self):
1663 return [self.instFluxCol, self.instFluxErrCol,
1664 self.photoCalibCol, self.photoCalibErrCol]
1666 @property
1667 def name(self):
1668 return f'fluxErr_{self.instFluxCol}'
1670 def _func(self, df):
1671 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1672 df[self.photoCalibCol], df[self.photoCalibErrCol])
1675class LocalDipoleMeanFlux(LocalPhotometry):
1676 """Compute absolute mean of dipole fluxes.
1678 See Also
1679 --------
1680 LocalNanojansky
1681 LocalNanojanskyErr
1682 LocalDipoleMeanFluxErr
1683 LocalDipoleDiffFlux
1684 LocalDipoleDiffFluxErr
1685 """
1686 def __init__(self,
1687 instFluxPosCol,
1688 instFluxNegCol,
1689 instFluxPosErrCol,
1690 instFluxNegErrCol,
1691 photoCalibCol,
1692 photoCalibErrCol,
1693 **kwargs):
1694 self.instFluxNegCol = instFluxNegCol
1695 self.instFluxPosCol = instFluxPosCol
1696 self.instFluxNegErrCol = instFluxNegErrCol
1697 self.instFluxPosErrCol = instFluxPosErrCol
1698 self.photoCalibCol = photoCalibCol
1699 self.photoCalibErrCol = photoCalibErrCol
1700 super().__init__(instFluxNegCol,
1701 instFluxNegErrCol,
1702 photoCalibCol,
1703 photoCalibErrCol,
1704 **kwargs)
1706 @property
1707 def columns(self):
1708 return [self.instFluxPosCol,
1709 self.instFluxNegCol,
1710 self.photoCalibCol]
1712 @property
1713 def name(self):
1714 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1716 def _func(self, df):
1717 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))
1718 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])))
1721class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux):
1722 """Compute the error on the absolute mean of dipole fluxes.
1724 See Also
1725 --------
1726 LocalNanojansky
1727 LocalNanojanskyErr
1728 LocalDipoleMeanFlux
1729 LocalDipoleDiffFlux
1730 LocalDipoleDiffFluxErr
1731 """
1733 @property
1734 def columns(self):
1735 return [self.instFluxPosCol,
1736 self.instFluxNegCol,
1737 self.instFluxPosErrCol,
1738 self.instFluxNegErrCol,
1739 self.photoCalibCol,
1740 self.photoCalibErrCol]
1742 @property
1743 def name(self):
1744 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1746 def _func(self, df):
1747 return 0.5*np.sqrt(
1748 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1749 * df[self.photoCalibErrCol])**2
1750 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1751 * df[self.photoCalibCol]**2)
1754class LocalDipoleDiffFlux(LocalDipoleMeanFlux):
1755 """Compute the absolute difference of dipole fluxes.
1757 Calculated value is (abs(pos) - abs(neg)).
1759 See Also
1760 --------
1761 LocalNanojansky
1762 LocalNanojanskyErr
1763 LocalDipoleMeanFlux
1764 LocalDipoleMeanFluxErr
1765 LocalDipoleDiffFluxErr
1766 """
1768 @property
1769 def columns(self):
1770 return [self.instFluxPosCol,
1771 self.instFluxNegCol,
1772 self.photoCalibCol]
1774 @property
1775 def name(self):
1776 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1778 def _func(self, df):
1779 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))
1780 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])))
1783class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux):
1784 """Compute the error on the absolute difference of dipole fluxes.
1786 See Also
1787 --------
1788 LocalNanojansky
1789 LocalNanojanskyErr
1790 LocalDipoleMeanFlux
1791 LocalDipoleMeanFluxErr
1792 LocalDipoleDiffFlux
1793 """
1795 @property
1796 def columns(self):
1797 return [self.instFluxPosCol,
1798 self.instFluxNegCol,
1799 self.instFluxPosErrCol,
1800 self.instFluxNegErrCol,
1801 self.photoCalibCol,
1802 self.photoCalibErrCol]
1804 @property
1805 def name(self):
1806 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1808 def _func(self, df):
1809 return np.sqrt(
1810 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1811 * df[self.photoCalibErrCol])**2
1812 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1813 * df[self.photoCalibCol]**2)
1816class Ebv(Functor):
1817 """Compute E(B-V) from dustmaps.sfd."""
1818 _defaultDataset = 'ref'
1819 name = "E(B-V)"
1820 shortname = "ebv"
1822 def __init__(self, **kwargs):
1823 # Import is only needed for Ebv.
1824 # Suppress unnecessary .dustmapsrc log message on import.
1825 with open(os.devnull, "w") as devnull:
1826 with redirect_stdout(devnull):
1827 from dustmaps.sfd import SFDQuery
1828 self._columns = ['coord_ra', 'coord_dec']
1829 self.sfd = SFDQuery()
1830 super().__init__(**kwargs)
1832 def _func(self, df):
1833 coords = SkyCoord(df['coord_ra'].values * u.rad, df['coord_dec'].values * u.rad)
1834 ebv = self.sfd(coords)
1835 # Double precision unnecessary scientifically but currently needed for
1836 # ingest to qserv.
1837 return pd.Series(ebv, index=df.index).astype('float64')