Coverage for python/lsst/pipe/tasks/functors.py: 42%
745 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-17 10:06 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-17 10:06 +0000
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["init_fromDict", "Functor", "CompositeFunctor", "mag_aware_eval",
23 "CustomFunctor", "Column", "Index", "CoordColumn", "RAColumn",
24 "DecColumn", "HtmIndex20", "fluxName", "fluxErrName", "Mag",
25 "MagErr", "MagDiff", "Color", "DeconvolvedMoments", "SdssTraceSize",
26 "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff",
27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs",
28 "ComputePixelScale", "ConvertPixelToArcseconds",
29 "ConvertPixelSqToArcsecondsSq", "ReferenceBand", "Photometry",
30 "NanoJansky", "NanoJanskyErr", "LocalPhotometry", "LocalNanojansky",
31 "LocalNanojanskyErr", "LocalDipoleMeanFlux",
32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux",
33 "LocalDipoleDiffFluxErr", "Ebv",
34 ]
36import yaml
37import re
38from itertools import product
39import logging
40import os.path
41import warnings
43import pandas as pd
44import numpy as np
45import astropy.units as u
46from astropy.coordinates import SkyCoord
48from lsst.utils import doImport
49from lsst.utils.introspection import get_full_type_name
50from lsst.daf.butler import DeferredDatasetHandle
51from lsst.pipe.base import InMemoryDatasetHandle
52import lsst.geom as geom
53import lsst.sphgeom as sphgeom
56def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
57 typeKey='functor', name=None):
58 """Initialize an object defined in a dictionary.
60 The object needs to be importable as f'{basePath}.{initDict[typeKey]}'.
61 The positional and keyword arguments (if any) are contained in "args" and
62 "kwargs" entries in the dictionary, respectively.
63 This is used in `~lsst.pipe.tasks.functors.CompositeFunctor.from_yaml` to
64 initialize a composite functor from a specification in a YAML file.
66 Parameters
67 ----------
68 initDict : dictionary
69 Dictionary describing object's initialization.
70 Must contain an entry keyed by ``typeKey`` that is the name of the
71 object, relative to ``basePath``.
72 basePath : str
73 Path relative to module in which ``initDict[typeKey]`` is defined.
74 typeKey : str
75 Key of ``initDict`` that is the name of the object (relative to
76 ``basePath``).
77 """
78 initDict = initDict.copy()
79 # TO DO: DM-21956 We should be able to define functors outside this module
80 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
81 args = []
82 if 'args' in initDict:
83 args = initDict.pop('args')
84 if isinstance(args, str):
85 args = [args]
86 try:
87 element = pythonType(*args, **initDict)
88 except Exception as e:
89 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
90 raise type(e)(message, e.args)
91 return element
94class Functor(object):
95 """Define and execute a calculation on a DataFrame or Handle holding a
96 DataFrame.
98 The `__call__` method accepts either a `~pandas.DataFrame` object or a
99 `~lsst.daf.butler.DeferredDatasetHandle` or
100 `~lsst.pipe.base.InMemoryDatasetHandle`, and returns the
101 result of the calculation as a single column.
102 Each functor defines what columns are needed for the calculation, and only
103 these columns are read from the dataset handle.
105 The action of `__call__` consists of two steps: first, loading the
106 necessary columns from disk into memory as a `~pandas.DataFrame` object;
107 and second, performing the computation on this DataFrame and returning the
108 result.
110 To define a new `Functor`, a subclass must define a `_func` method,
111 that takes a `~pandas.DataFrame` and returns result in a `~pandas.Series`.
112 In addition, it must define the following attributes:
114 * `_columns`: The columns necessary to perform the calculation
115 * `name`: A name appropriate for a figure axis label
116 * `shortname`: A name appropriate for use as a dictionary key
118 On initialization, a `Functor` should declare what band (``filt`` kwarg)
119 and dataset (e.g. ``'ref'``, ``'meas'``, ``'forced_src'``) it is intended
120 to be applied to.
121 This enables the `_get_data` method to extract the proper columns from the
122 underlying data.
123 If not specified, the dataset will fall back on the `_defaultDataset`
124 attribute.
125 If band is not specified and ``dataset`` is anything other than ``'ref'``,
126 then an error will be raised when trying to perform the calculation.
128 Originally, `Functor` was set up to expect datasets formatted like the
129 ``deepCoadd_obj`` dataset; that is, a DataFrame with a multi-level column
130 index, with the levels of the column index being ``band``, ``dataset``, and
131 ``column``.
132 It has since been generalized to apply to DataFrames without multi-level
133 indices and multi-level indices with just ``dataset`` and ``column``
134 levels.
135 In addition, the `_get_data` method that reads the columns from the
136 underlying data will return a DataFrame with column index levels defined by
137 the `_dfLevels` attribute; by default, this is ``column``.
139 The `_dfLevels` attributes should generally not need to be changed, unless
140 `_func` needs columns from multiple filters or datasets to do the
141 calculation.
142 An example of this is the `~lsst.pipe.tasks.functors.Color` functor, for
143 which `_dfLevels = ('band', 'column')`, and `_func` expects the DataFrame
144 it gets to have those levels in the column index.
146 Parameters
147 ----------
148 filt : str
149 Band upon which to do the calculation.
151 dataset : str
152 Dataset upon which to do the calculation (e.g., 'ref', 'meas',
153 'forced_src').
154 """
156 _defaultDataset = 'ref'
157 _dfLevels = ('column',)
158 _defaultNoDup = False
160 def __init__(self, filt=None, dataset=None, noDup=None):
161 self.filt = filt
162 self.dataset = dataset if dataset is not None else self._defaultDataset
163 self._noDup = noDup
164 self.log = logging.getLogger(type(self).__name__)
166 @property
167 def noDup(self):
168 """Do not explode by band if used on object table."""
169 if self._noDup is not None:
170 return self._noDup
171 else:
172 return self._defaultNoDup
174 @property
175 def columns(self):
176 """Columns required to perform calculation."""
177 if not hasattr(self, '_columns'):
178 raise NotImplementedError('Must define columns property or _columns attribute')
179 return self._columns
181 def _get_data_columnLevels(self, data, columnIndex=None):
182 """Gets the names of the column index levels.
184 This should only be called in the context of a multilevel table.
186 Parameters
187 ----------
188 data : various
189 The data to be read, can be a
190 `~lsst.daf.butler.DeferredDatasetHandle` or
191 `~lsst.pipe.base.InMemoryDatasetHandle`.
192 columnIndex (optional): pandas `~pandas.Index` object
193 If not passed, then it is read from the
194 `~lsst.daf.butler.DeferredDatasetHandle`
195 for `~lsst.pipe.base.InMemoryDatasetHandle`.
196 """
197 if columnIndex is None:
198 columnIndex = data.get(component="columns")
199 return columnIndex.names
201 def _get_data_columnLevelNames(self, data, columnIndex=None):
202 """Gets the content of each of the column levels for a multilevel
203 table.
204 """
205 if columnIndex is None:
206 columnIndex = data.get(component="columns")
208 columnLevels = columnIndex.names
209 columnLevelNames = {
210 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
211 for i, level in enumerate(columnLevels)
212 }
213 return columnLevelNames
215 def _colsFromDict(self, colDict, columnIndex=None):
216 """Converts dictionary column specficiation to a list of columns."""
217 new_colDict = {}
218 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
220 for i, lev in enumerate(columnLevels):
221 if lev in colDict:
222 if isinstance(colDict[lev], str):
223 new_colDict[lev] = [colDict[lev]]
224 else:
225 new_colDict[lev] = colDict[lev]
226 else:
227 new_colDict[lev] = columnIndex.levels[i]
229 levelCols = [new_colDict[lev] for lev in columnLevels]
230 cols = list(product(*levelCols))
231 colsAvailable = [col for col in cols if col in columnIndex]
232 return colsAvailable
234 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
235 """Returns columns needed by functor from multilevel dataset.
237 To access tables with multilevel column structure, the
238 `~lsst.daf.butler.DeferredDatasetHandle` or
239 `~lsst.pipe.base.InMemoryDatasetHandle` needs to be passed
240 either a list of tuples or a dictionary.
242 Parameters
243 ----------
244 data : various
245 The data as either `~lsst.daf.butler.DeferredDatasetHandle`, or
246 `~lsst.pipe.base.InMemoryDatasetHandle`.
247 columnIndex (optional): pandas `~pandas.Index` object
248 Either passed or read in from
249 `~lsst.daf.butler.DeferredDatasetHandle`.
250 `returnTuple` : `bool`
251 If true, then return a list of tuples rather than the column
252 dictionary specification.
253 This is set to `True` by `CompositeFunctor` in order to be able to
254 combine columns from the various component functors.
256 """
257 if not isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
258 raise RuntimeError(f"Unexpected data type. Got {get_full_type_name(data)}.")
260 if columnIndex is None:
261 columnIndex = data.get(component="columns")
263 # Confirm that the dataset has the column levels the functor is
264 # expecting it to have.
265 columnLevels = self._get_data_columnLevels(data, columnIndex)
267 columnDict = {'column': self.columns,
268 'dataset': self.dataset}
269 if self.filt is None:
270 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
271 if "band" in columnLevels:
272 if self.dataset == "ref":
273 columnDict["band"] = columnLevelNames["band"][0]
274 else:
275 raise ValueError(f"'filt' not set for functor {self.name}"
276 f"(dataset {self.dataset}) "
277 "and DataFrame "
278 "contains multiple filters in column index. "
279 "Set 'filt' or set 'dataset' to 'ref'.")
280 else:
281 columnDict['band'] = self.filt
283 if returnTuple:
284 return self._colsFromDict(columnDict, columnIndex=columnIndex)
285 else:
286 return columnDict
288 def _func(self, df, dropna=True):
289 raise NotImplementedError('Must define calculation on DataFrame')
291 def _get_columnIndex(self, data):
292 """Return columnIndex."""
294 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
295 return data.get(component="columns")
296 else:
297 return None
299 def _get_data(self, data):
300 """Retrieve DataFrame necessary for calculation.
302 The data argument can be a `~pandas.DataFrame`, a
303 `~lsst.daf.butler.DeferredDatasetHandle`, or
304 an `~lsst.pipe.base.InMemoryDatasetHandle`.
306 Returns a DataFrame upon which `self._func` can act.
307 """
308 # We wrap a DataFrame in a handle here to take advantage of the
309 # DataFrame delegate DataFrame column wrangling abilities.
310 if isinstance(data, pd.DataFrame):
311 _data = InMemoryDatasetHandle(data, storageClass="DataFrame")
312 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
313 _data = data
314 else:
315 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.")
317 # First thing to do: check to see if the data source has a multilevel
318 # column index or not.
319 columnIndex = self._get_columnIndex(_data)
320 is_multiLevel = isinstance(columnIndex, pd.MultiIndex)
322 # Get proper columns specification for this functor.
323 if is_multiLevel:
324 columns = self.multilevelColumns(_data, columnIndex=columnIndex)
325 else:
326 columns = self.columns
328 # Load in-memory DataFrame with appropriate columns the gen3 way.
329 df = _data.get(parameters={"columns": columns})
331 # Drop unnecessary column levels.
332 if is_multiLevel:
333 df = self._setLevels(df)
335 return df
337 def _setLevels(self, df):
338 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
339 df.columns = df.columns.droplevel(levelsToDrop)
340 return df
342 def _dropna(self, vals):
343 return vals.dropna()
345 def __call__(self, data, dropna=False):
346 df = self._get_data(data)
347 try:
348 vals = self._func(df)
349 except Exception as e:
350 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
351 vals = self.fail(df)
352 if dropna:
353 vals = self._dropna(vals)
355 return vals
357 def difference(self, data1, data2, **kwargs):
358 """Computes difference between functor called on two different
359 DataFrame/Handle objects.
360 """
361 return self(data1, **kwargs) - self(data2, **kwargs)
363 def fail(self, df):
364 return pd.Series(np.full(len(df), np.nan), index=df.index)
366 @property
367 def name(self):
368 """Full name of functor (suitable for figure labels)."""
369 return NotImplementedError
371 @property
372 def shortname(self):
373 """Short name of functor (suitable for column name/dict key)."""
374 return self.name
377class CompositeFunctor(Functor):
378 """Perform multiple calculations at once on a catalog.
380 The role of a `CompositeFunctor` is to group together computations from
381 multiple functors.
382 Instead of returning `~pandas.Series` a `CompositeFunctor` returns a
383 `~pandas.DataFrame`, with the column names being the keys of ``funcDict``.
385 The `columns` attribute of a `CompositeFunctor` is the union of all columns
386 in all the component functors.
388 A `CompositeFunctor` does not use a `_func` method itself; rather, when a
389 `CompositeFunctor` is called, all its columns are loaded at once, and the
390 resulting DataFrame is passed to the `_func` method of each component
391 functor.
392 This has the advantage of only doing I/O (reading from parquet file) once,
393 and works because each individual `_func` method of each component functor
394 does not care if there are *extra* columns in the DataFrame being passed;
395 only that it must contain *at least* the `columns` it expects.
397 An important and useful class method is `from_yaml`, which takes as an
398 argument the path to a YAML file specifying a collection of functors.
400 Parameters
401 ----------
402 funcs : `dict` or `list`
403 Dictionary or list of functors.
404 If a list, then it will be converted into a dictonary according to the
405 `.shortname` attribute of each functor.
406 """
407 dataset = None
408 name = "CompositeFunctor"
410 def __init__(self, funcs, **kwargs):
412 if type(funcs) == dict:
413 self.funcDict = funcs
414 else:
415 self.funcDict = {f.shortname: f for f in funcs}
417 self._filt = None
419 super().__init__(**kwargs)
421 @property
422 def filt(self):
423 return self._filt
425 @filt.setter
426 def filt(self, filt):
427 if filt is not None:
428 for _, f in self.funcDict.items():
429 f.filt = filt
430 self._filt = filt
432 def update(self, new):
433 """Update the functor with new functors."""
434 if isinstance(new, dict):
435 self.funcDict.update(new)
436 elif isinstance(new, CompositeFunctor):
437 self.funcDict.update(new.funcDict)
438 else:
439 raise TypeError('Can only update with dictionary or CompositeFunctor.')
441 # Make sure new functors have the same 'filt' set.
442 if self.filt is not None:
443 self.filt = self.filt
445 @property
446 def columns(self):
447 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
449 def multilevelColumns(self, data, **kwargs):
450 # Get the union of columns for all component functors.
451 # Note the need to have `returnTuple=True` here.
452 return list(
453 set(
454 [
455 x
456 for y in [
457 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
458 ]
459 for x in y
460 ]
461 )
462 )
464 def __call__(self, data, **kwargs):
465 """Apply the functor to the data table.
467 Parameters
468 ----------
469 data : various
470 The data represented as `~lsst.daf.butler.DeferredDatasetHandle`,
471 `~lsst.pipe.base.InMemoryDatasetHandle`, or `~pandas.DataFrame`.
472 The table or a pointer to a table on disk from which columns can
473 be accessed.
474 """
475 if isinstance(data, pd.DataFrame):
476 _data = InMemoryDatasetHandle(data, storageClass="DataFrame")
477 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
478 _data = data
479 else:
480 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.")
482 columnIndex = self._get_columnIndex(_data)
484 if isinstance(columnIndex, pd.MultiIndex):
485 columns = self.multilevelColumns(_data, columnIndex=columnIndex)
486 df = _data.get(parameters={"columns": columns})
488 valDict = {}
489 for k, f in self.funcDict.items():
490 try:
491 subdf = f._setLevels(
492 df[f.multilevelColumns(_data, returnTuple=True, columnIndex=columnIndex)]
493 )
494 valDict[k] = f._func(subdf)
495 except Exception as e:
496 self.log.exception(
497 "Exception in %s (funcs: %s) call: %s",
498 self.name,
499 str(list(self.funcDict.keys())),
500 type(e).__name__,
501 )
502 try:
503 valDict[k] = f.fail(subdf)
504 except NameError:
505 raise e
507 else:
508 df = _data.get(parameters={"columns": self.columns})
510 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
512 # Check that output columns are actually columns.
513 for name, colVal in valDict.items():
514 if len(colVal.shape) != 1:
515 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
516 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
518 try:
519 valDf = pd.concat(valDict, axis=1)
520 except TypeError:
521 print([(k, type(v)) for k, v in valDict.items()])
522 raise
524 if kwargs.get('dropna', False):
525 valDf = valDf.dropna(how='any')
527 return valDf
529 @classmethod
530 def renameCol(cls, col, renameRules):
531 if renameRules is None:
532 return col
533 for old, new in renameRules:
534 if col.startswith(old):
535 col = col.replace(old, new)
536 return col
538 @classmethod
539 def from_file(cls, filename, **kwargs):
540 # Allow environment variables in the filename.
541 filename = os.path.expandvars(filename)
542 with open(filename) as f:
543 translationDefinition = yaml.safe_load(f)
545 return cls.from_yaml(translationDefinition, **kwargs)
547 @classmethod
548 def from_yaml(cls, translationDefinition, **kwargs):
549 funcs = {}
550 for func, val in translationDefinition['funcs'].items():
551 funcs[func] = init_fromDict(val, name=func)
553 if 'flag_rename_rules' in translationDefinition:
554 renameRules = translationDefinition['flag_rename_rules']
555 else:
556 renameRules = None
558 if 'calexpFlags' in translationDefinition:
559 for flag in translationDefinition['calexpFlags']:
560 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
562 if 'refFlags' in translationDefinition:
563 for flag in translationDefinition['refFlags']:
564 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
566 if 'forcedFlags' in translationDefinition:
567 for flag in translationDefinition['forcedFlags']:
568 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
570 if 'flags' in translationDefinition:
571 for flag in translationDefinition['flags']:
572 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
574 return cls(funcs, **kwargs)
577def mag_aware_eval(df, expr, log):
578 """Evaluate an expression on a DataFrame, knowing what the 'mag' function
579 means.
581 Builds on `pandas.DataFrame.eval`, which parses and executes math on
582 DataFrames.
584 Parameters
585 ----------
586 df : ~pandas.DataFrame
587 DataFrame on which to evaluate expression.
589 expr : str
590 Expression.
591 """
592 try:
593 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
594 val = df.eval(expr_new)
595 except Exception as e: # Should check what actually gets raised
596 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e)
597 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
598 val = df.eval(expr_new)
599 return val
602class CustomFunctor(Functor):
603 """Arbitrary computation on a catalog.
605 Column names (and thus the columns to be loaded from catalog) are found by
606 finding all words and trying to ignore all "math-y" words.
608 Parameters
609 ----------
610 expr : str
611 Expression to evaluate, to be parsed and executed by
612 `~lsst.pipe.tasks.functors.mag_aware_eval`.
613 """
614 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
616 def __init__(self, expr, **kwargs):
617 self.expr = expr
618 super().__init__(**kwargs)
620 @property
621 def name(self):
622 return self.expr
624 @property
625 def columns(self):
626 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
628 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
629 not_a_col = []
630 for c in flux_cols:
631 if not re.search('_instFlux$', c):
632 cols.append(f'{c}_instFlux')
633 not_a_col.append(c)
634 else:
635 cols.append(c)
637 return list(set([c for c in cols if c not in not_a_col]))
639 def _func(self, df):
640 return mag_aware_eval(df, self.expr, self.log)
643class Column(Functor):
644 """Get column with a specified name."""
646 def __init__(self, col, **kwargs):
647 self.col = col
648 super().__init__(**kwargs)
650 @property
651 def name(self):
652 return self.col
654 @property
655 def columns(self):
656 return [self.col]
658 def _func(self, df):
659 return df[self.col]
662class Index(Functor):
663 """Return the value of the index for each object."""
665 columns = ['coord_ra'] # Just a dummy; something has to be here.
666 _defaultDataset = 'ref'
667 _defaultNoDup = True
669 def _func(self, df):
670 return pd.Series(df.index, index=df.index)
673class CoordColumn(Column):
674 """Base class for coordinate column, in degrees."""
675 _radians = True
677 def __init__(self, col, **kwargs):
678 super().__init__(col, **kwargs)
680 def _func(self, df):
681 # Must not modify original column in case that column is used by
682 # another functor.
683 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
684 return output
687class RAColumn(CoordColumn):
688 """Right Ascension, in degrees."""
689 name = 'RA'
690 _defaultNoDup = True
692 def __init__(self, **kwargs):
693 super().__init__('coord_ra', **kwargs)
695 def __call__(self, catalog, **kwargs):
696 return super().__call__(catalog, **kwargs)
699class DecColumn(CoordColumn):
700 """Declination, in degrees."""
701 name = 'Dec'
702 _defaultNoDup = True
704 def __init__(self, **kwargs):
705 super().__init__('coord_dec', **kwargs)
707 def __call__(self, catalog, **kwargs):
708 return super().__call__(catalog, **kwargs)
711class RAErrColumn(CoordColumn):
712 """Uncertainty in Right Ascension, in degrees."""
713 name = 'RAErr'
714 _defaultNoDup = True
716 def __init__(self, **kwargs):
717 super().__init__('coord_raErr', **kwargs)
720class DecErrColumn(CoordColumn):
721 """Uncertainty in declination, in degrees."""
722 name = 'DecErr'
723 _defaultNoDup = True
725 def __init__(self, **kwargs):
726 super().__init__('coord_decErr', **kwargs)
729class RADecCovColumn(Column):
730 """Coordinate covariance column, in degrees."""
731 _radians = True
732 name = 'RADecCov'
733 _defaultNoDup = True
735 def __init__(self, **kwargs):
736 super().__init__('coord_ra_dec_Cov', **kwargs)
738 def _func(self, df):
739 # Must not modify original column in case that column is used by
740 # another functor.
741 output = df[self.col]*(180/np.pi)**2 if self._radians else df[self.col]
742 return output
745class HtmIndex20(Functor):
746 """Compute the level 20 HtmIndex for the catalog.
748 Notes
749 -----
750 This functor was implemented to satisfy requirements of old APDB interface
751 which required the ``pixelId`` column in DiaObject with HTM20 index.
752 The APDB interface had migrated to not need that information, but we keep
753 this class in case it may be useful for something else.
754 """
755 name = "Htm20"
756 htmLevel = 20
757 _radians = True
759 def __init__(self, ra, dec, **kwargs):
760 self.pixelator = sphgeom.HtmPixelization(self.htmLevel)
761 self.ra = ra
762 self.dec = dec
763 self._columns = [self.ra, self.dec]
764 super().__init__(**kwargs)
766 def _func(self, df):
768 def computePixel(row):
769 if self._radians:
770 sphPoint = geom.SpherePoint(row[self.ra],
771 row[self.dec],
772 geom.radians)
773 else:
774 sphPoint = geom.SpherePoint(row[self.ra],
775 row[self.dec],
776 geom.degrees)
777 return self.pixelator.index(sphPoint.getVector())
779 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
782def fluxName(col):
783 """Append _instFlux to the column name if it doesn't have it already."""
784 if not col.endswith('_instFlux'):
785 col += '_instFlux'
786 return col
789def fluxErrName(col):
790 """Append _instFluxErr to the column name if it doesn't have it already."""
791 if not col.endswith('_instFluxErr'):
792 col += '_instFluxErr'
793 return col
796class Mag(Functor):
797 """Compute calibrated magnitude.
799 Returns the flux at mag=0.
800 The default ``fluxMag0`` is 63095734448.0194, which is default for HSC.
801 TO DO: This default should be made configurable in DM-21955.
803 This calculation hides warnings about invalid values and dividing by zero.
805 As with all functors, a ``dataset`` and ``filt`` kwarg should be provided
806 upon initialization.
807 Unlike the default `Functor`, however, the default dataset for a `Mag` is
808 ``'meas'``, rather than ``'ref'``.
810 Parameters
811 ----------
812 col : `str`
813 Name of flux column from which to compute magnitude.
814 Can be parseable by the `~lsst.pipe.tasks.functors.fluxName` function;
815 that is, you can pass ``'modelfit_CModel'`` instead of
816 ``'modelfit_CModel_instFlux'``, and it will understand.
817 """
818 _defaultDataset = 'meas'
820 def __init__(self, col, calib=None, **kwargs):
821 self.col = fluxName(col)
822 self.calib = calib
823 if calib is not None:
824 # TO DO: DM-39914 Remove deprecated calib argument in Mag functor.
825 warnings.warn(
826 "The 'calib' argument is deprecated, and will be removed after v26.",
827 FutureWarning,
828 stacklevel=2,
829 )
830 self.fluxMag0 = calib.getFluxMag0()[0]
831 else:
832 # TO DO: DM-21955 Replace hard coded photometic calibration values.
833 self.fluxMag0 = 63095734448.0194
835 super().__init__(**kwargs)
837 @property
838 def columns(self):
839 return [self.col]
841 def _func(self, df):
842 with warnings.catch_warnings():
843 warnings.filterwarnings('ignore', r'invalid value encountered')
844 warnings.filterwarnings('ignore', r'divide by zero')
845 return -2.5*np.log10(df[self.col] / self.fluxMag0)
847 @property
848 def name(self):
849 return f'mag_{self.col}'
852class MagErr(Mag):
853 """Compute calibrated magnitude uncertainty.
855 Parameters
856 ----------
857 col : `str`
858 Name of the flux column.
859 """
861 def __init__(self, *args, **kwargs):
862 super().__init__(*args, **kwargs)
863 if self.calib is not None:
864 # TO DO: DM-39914 Remove deprecated calib argument in Mag functor.
865 self.fluxMag0Err = self.calib.getFluxMag0()[1]
866 warnings.warn(
867 "The 'calib' argument is deprecated, and will be removed after v26.",
868 FutureWarning,
869 stacklevel=2,
870 )
871 else:
872 # TO DO: DM-21955 Replace hard coded photometic calibration values.
873 self.fluxMag0Err = 0.
875 @property
876 def columns(self):
877 return [self.col, self.col + 'Err']
879 def _func(self, df):
880 with warnings.catch_warnings():
881 warnings.filterwarnings('ignore', r'invalid value encountered')
882 warnings.filterwarnings('ignore', r'divide by zero')
883 fluxCol, fluxErrCol = self.columns
884 x = df[fluxErrCol] / df[fluxCol]
885 y = self.fluxMag0Err / self.fluxMag0
886 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
887 return magErr
889 @property
890 def name(self):
891 return super().name + '_err'
894class MagDiff(Functor):
895 """Functor to calculate magnitude difference."""
896 _defaultDataset = 'meas'
898 def __init__(self, col1, col2, **kwargs):
899 self.col1 = fluxName(col1)
900 self.col2 = fluxName(col2)
901 super().__init__(**kwargs)
903 @property
904 def columns(self):
905 return [self.col1, self.col2]
907 def _func(self, df):
908 with warnings.catch_warnings():
909 warnings.filterwarnings('ignore', r'invalid value encountered')
910 warnings.filterwarnings('ignore', r'divide by zero')
911 return -2.5*np.log10(df[self.col1]/df[self.col2])
913 @property
914 def name(self):
915 return f'(mag_{self.col1} - mag_{self.col2})'
917 @property
918 def shortname(self):
919 return f'magDiff_{self.col1}_{self.col2}'
922class Color(Functor):
923 """Compute the color between two filters.
925 Computes color by initializing two different `Mag` functors based on the
926 ``col`` and filters provided, and then returning the difference.
928 This is enabled by the `_func` method expecting a DataFrame with a
929 multilevel column index, with both ``'band'`` and ``'column'``, instead of
930 just ``'column'``, which is the `Functor` default.
931 This is controlled by the `_dfLevels` attribute.
933 Also of note, the default dataset for `Color` is ``forced_src'``, whereas
934 for `Mag` it is ``'meas'``.
936 Parameters
937 ----------
938 col : str
939 Name of the flux column from which to compute; same as would be passed
940 to `~lsst.pipe.tasks.functors.Mag`.
942 filt2, filt1 : str
943 Filters from which to compute magnitude difference.
944 Color computed is ``Mag(filt2) - Mag(filt1)``.
945 """
946 _defaultDataset = 'forced_src'
947 _dfLevels = ('band', 'column')
948 _defaultNoDup = True
950 def __init__(self, col, filt2, filt1, **kwargs):
951 self.col = fluxName(col)
952 if filt2 == filt1:
953 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
954 self.filt2 = filt2
955 self.filt1 = filt1
957 self.mag2 = Mag(col, filt=filt2, **kwargs)
958 self.mag1 = Mag(col, filt=filt1, **kwargs)
960 super().__init__(**kwargs)
962 @property
963 def filt(self):
964 return None
966 @filt.setter
967 def filt(self, filt):
968 pass
970 def _func(self, df):
971 mag2 = self.mag2._func(df[self.filt2])
972 mag1 = self.mag1._func(df[self.filt1])
973 return mag2 - mag1
975 @property
976 def columns(self):
977 return [self.mag1.col, self.mag2.col]
979 def multilevelColumns(self, parq, **kwargs):
980 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
982 @property
983 def name(self):
984 return f'{self.filt2} - {self.filt1} ({self.col})'
986 @property
987 def shortname(self):
988 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
991class DeconvolvedMoments(Functor):
992 """This functor subtracts the trace of the PSF second moments from the
993 trace of the second moments of the source.
995 If the HsmShapeAlgorithm measurement is valid, then these will be used for
996 the sources.
997 Otherwise, the SdssShapeAlgorithm measurements will be used.
998 """
999 name = 'Deconvolved Moments'
1000 shortname = 'deconvolvedMoments'
1001 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1002 "ext_shapeHSM_HsmSourceMoments_yy",
1003 "base_SdssShape_xx", "base_SdssShape_yy",
1004 "ext_shapeHSM_HsmPsfMoments_xx",
1005 "ext_shapeHSM_HsmPsfMoments_yy")
1007 def _func(self, df):
1008 """Calculate deconvolved moments."""
1009 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1010 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1011 else:
1012 hsm = np.ones(len(df))*np.nan
1013 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1014 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1015 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1016 else:
1017 # LSST does not have shape.sdss.psf.
1018 # We could instead add base_PsfShape to the catalog using
1019 # exposure.getPsf().computeShape(s.getCentroid()).getIxx().
1020 raise RuntimeError('No psf shape parameter found in catalog')
1022 return hsm.where(np.isfinite(hsm), sdss) - psf
1025class SdssTraceSize(Functor):
1026 """Functor to calculate the SDSS trace radius size for sources.
1028 The SDSS trace radius size is a measure of size equal to the square root of
1029 half of the trace of the second moments tensor measured with the
1030 SdssShapeAlgorithm plugin.
1031 This has units of pixels.
1032 """
1033 name = "SDSS Trace Size"
1034 shortname = 'sdssTrace'
1035 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1037 def _func(self, df):
1038 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1039 return srcSize
1042class PsfSdssTraceSizeDiff(Functor):
1043 """Functor to calculate the SDSS trace radius size difference (%) between
1044 the object and the PSF model.
1046 See Also
1047 --------
1048 SdssTraceSize
1049 """
1050 name = "PSF - SDSS Trace Size"
1051 shortname = 'psf_sdssTrace'
1052 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1053 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1055 def _func(self, df):
1056 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1057 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1058 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1059 return sizeDiff
1062class HsmTraceSize(Functor):
1063 """Functor to calculate the HSM trace radius size for sources.
1065 The HSM trace radius size is a measure of size equal to the square root of
1066 half of the trace of the second moments tensor measured with the
1067 HsmShapeAlgorithm plugin.
1068 This has units of pixels.
1069 """
1070 name = 'HSM Trace Size'
1071 shortname = 'hsmTrace'
1072 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1073 "ext_shapeHSM_HsmSourceMoments_yy")
1075 def _func(self, df):
1076 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1077 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1078 return srcSize
1081class PsfHsmTraceSizeDiff(Functor):
1082 """Functor to calculate the HSM trace radius size difference (%) between
1083 the object and the PSF model.
1085 See Also
1086 --------
1087 HsmTraceSize
1088 """
1089 name = 'PSF - HSM Trace Size'
1090 shortname = 'psf_HsmTrace'
1091 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1092 "ext_shapeHSM_HsmSourceMoments_yy",
1093 "ext_shapeHSM_HsmPsfMoments_xx",
1094 "ext_shapeHSM_HsmPsfMoments_yy")
1096 def _func(self, df):
1097 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1098 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1099 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1100 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1101 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1102 return sizeDiff
1105class HsmFwhm(Functor):
1106 """Functor to calculate the PSF FWHM with second moments measured from the
1107 HsmShapeAlgorithm plugin.
1109 This is in units of arcseconds, and assumes the hsc_rings_v1 skymap pixel
1110 scale of 0.168 arcseconds/pixel.
1112 Notes
1113 -----
1114 This conversion assumes the PSF is Gaussian, which is not always the case.
1115 """
1116 name = 'HSM Psf FWHM'
1117 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1118 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1119 pixelScale = 0.168
1120 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1122 def _func(self, df):
1123 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1124 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1127class E1(Functor):
1128 r"""Calculate :math:`e_1` ellipticity component for sources, defined as:
1130 .. math::
1131 e_1 &= (I_{xx}-I_{yy})/(I_{xx}+I_{yy})
1133 See Also
1134 --------
1135 E2
1136 """
1137 name = "Distortion Ellipticity (e1)"
1138 shortname = "Distortion"
1140 def __init__(self, colXX, colXY, colYY, **kwargs):
1141 self.colXX = colXX
1142 self.colXY = colXY
1143 self.colYY = colYY
1144 self._columns = [self.colXX, self.colXY, self.colYY]
1145 super().__init__(**kwargs)
1147 @property
1148 def columns(self):
1149 return [self.colXX, self.colXY, self.colYY]
1151 def _func(self, df):
1152 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1155class E2(Functor):
1156 r"""Calculate :math:`e_2` ellipticity component for sources, defined as:
1158 .. math::
1159 e_2 &= 2I_{xy}/(I_{xx}+I_{yy})
1161 See Also
1162 --------
1163 E1
1164 """
1165 name = "Ellipticity e2"
1167 def __init__(self, colXX, colXY, colYY, **kwargs):
1168 self.colXX = colXX
1169 self.colXY = colXY
1170 self.colYY = colYY
1171 super().__init__(**kwargs)
1173 @property
1174 def columns(self):
1175 return [self.colXX, self.colXY, self.colYY]
1177 def _func(self, df):
1178 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1181class RadiusFromQuadrupole(Functor):
1182 """Calculate the radius from the quadrupole moments.
1184 This returns the fourth root of the determinant of the second moments
1185 tensor, which has units of pixels.
1187 See Also
1188 --------
1189 SdssTraceSize
1190 HsmTraceSize
1191 """
1193 def __init__(self, colXX, colXY, colYY, **kwargs):
1194 self.colXX = colXX
1195 self.colXY = colXY
1196 self.colYY = colYY
1197 super().__init__(**kwargs)
1199 @property
1200 def columns(self):
1201 return [self.colXX, self.colXY, self.colYY]
1203 def _func(self, df):
1204 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1207class LocalWcs(Functor):
1208 """Computations using the stored localWcs."""
1209 name = "LocalWcsOperations"
1211 def __init__(self,
1212 colCD_1_1,
1213 colCD_1_2,
1214 colCD_2_1,
1215 colCD_2_2,
1216 **kwargs):
1217 self.colCD_1_1 = colCD_1_1
1218 self.colCD_1_2 = colCD_1_2
1219 self.colCD_2_1 = colCD_2_1
1220 self.colCD_2_2 = colCD_2_2
1221 super().__init__(**kwargs)
1223 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1224 """Compute the distance on the sphere from x2, y1 to x1, y1.
1226 Parameters
1227 ----------
1228 x : `~pandas.Series`
1229 X pixel coordinate.
1230 y : `~pandas.Series`
1231 Y pixel coordinate.
1232 cd11 : `~pandas.Series`
1233 [1, 1] element of the local Wcs affine transform.
1234 cd11 : `~pandas.Series`
1235 [1, 1] element of the local Wcs affine transform.
1236 cd12 : `~pandas.Series`
1237 [1, 2] element of the local Wcs affine transform.
1238 cd21 : `~pandas.Series`
1239 [2, 1] element of the local Wcs affine transform.
1240 cd22 : `~pandas.Series`
1241 [2, 2] element of the local Wcs affine transform.
1243 Returns
1244 -------
1245 raDecTuple : tuple
1246 RA and dec conversion of x and y given the local Wcs.
1247 Returned units are in radians.
1249 """
1250 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1252 def computeSkySeparation(self, ra1, dec1, ra2, dec2):
1253 """Compute the local pixel scale conversion.
1255 Parameters
1256 ----------
1257 ra1 : `~pandas.Series`
1258 Ra of the first coordinate in radians.
1259 dec1 : `~pandas.Series`
1260 Dec of the first coordinate in radians.
1261 ra2 : `~pandas.Series`
1262 Ra of the second coordinate in radians.
1263 dec2 : `~pandas.Series`
1264 Dec of the second coordinate in radians.
1266 Returns
1267 -------
1268 dist : `~pandas.Series`
1269 Distance on the sphere in radians.
1270 """
1271 deltaDec = dec2 - dec1
1272 deltaRa = ra2 - ra1
1273 return 2 * np.arcsin(
1274 np.sqrt(
1275 np.sin(deltaDec / 2) ** 2
1276 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1278 def getSkySeparationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1279 """Compute the distance on the sphere from x2, y1 to x1, y1.
1281 Parameters
1282 ----------
1283 x1 : `~pandas.Series`
1284 X pixel coordinate.
1285 y1 : `~pandas.Series`
1286 Y pixel coordinate.
1287 x2 : `~pandas.Series`
1288 X pixel coordinate.
1289 y2 : `~pandas.Series`
1290 Y pixel coordinate.
1291 cd11 : `~pandas.Series`
1292 [1, 1] element of the local Wcs affine transform.
1293 cd11 : `~pandas.Series`
1294 [1, 1] element of the local Wcs affine transform.
1295 cd12 : `~pandas.Series`
1296 [1, 2] element of the local Wcs affine transform.
1297 cd21 : `~pandas.Series`
1298 [2, 1] element of the local Wcs affine transform.
1299 cd22 : `~pandas.Series`
1300 [2, 2] element of the local Wcs affine transform.
1302 Returns
1303 -------
1304 Distance : `~pandas.Series`
1305 Arcseconds per pixel at the location of the local WC.
1306 """
1307 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1308 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1309 # Great circle distance for small separations.
1310 return self.computeSkySeparation(ra1, dec1, ra2, dec2)
1313class ComputePixelScale(LocalWcs):
1314 """Compute the local pixel scale from the stored CDMatrix.
1315 """
1316 name = "PixelScale"
1318 @property
1319 def columns(self):
1320 return [self.colCD_1_1,
1321 self.colCD_1_2,
1322 self.colCD_2_1,
1323 self.colCD_2_2]
1325 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1326 """Compute the local pixel to scale conversion in arcseconds.
1328 Parameters
1329 ----------
1330 cd11 : `~pandas.Series`
1331 [1, 1] element of the local Wcs affine transform in radians.
1332 cd11 : `~pandas.Series`
1333 [1, 1] element of the local Wcs affine transform in radians.
1334 cd12 : `~pandas.Series`
1335 [1, 2] element of the local Wcs affine transform in radians.
1336 cd21 : `~pandas.Series`
1337 [2, 1] element of the local Wcs affine transform in radians.
1338 cd22 : `~pandas.Series`
1339 [2, 2] element of the local Wcs affine transform in radians.
1341 Returns
1342 -------
1343 pixScale : `~pandas.Series`
1344 Arcseconds per pixel at the location of the local WC.
1345 """
1346 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1348 def _func(self, df):
1349 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1350 df[self.colCD_1_2],
1351 df[self.colCD_2_1],
1352 df[self.colCD_2_2])
1355class ConvertPixelToArcseconds(ComputePixelScale):
1356 """Convert a value in units of pixels to units of arcseconds."""
1358 def __init__(self,
1359 col,
1360 colCD_1_1,
1361 colCD_1_2,
1362 colCD_2_1,
1363 colCD_2_2,
1364 **kwargs):
1365 self.col = col
1366 super().__init__(colCD_1_1,
1367 colCD_1_2,
1368 colCD_2_1,
1369 colCD_2_2,
1370 **kwargs)
1372 @property
1373 def name(self):
1374 return f"{self.col}_asArcseconds"
1376 @property
1377 def columns(self):
1378 return [self.col,
1379 self.colCD_1_1,
1380 self.colCD_1_2,
1381 self.colCD_2_1,
1382 self.colCD_2_2]
1384 def _func(self, df):
1385 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1386 df[self.colCD_1_2],
1387 df[self.colCD_2_1],
1388 df[self.colCD_2_2])
1391class ConvertPixelSqToArcsecondsSq(ComputePixelScale):
1392 """Convert a value in units of pixels squared to units of arcseconds
1393 squared.
1394 """
1396 def __init__(self,
1397 col,
1398 colCD_1_1,
1399 colCD_1_2,
1400 colCD_2_1,
1401 colCD_2_2,
1402 **kwargs):
1403 self.col = col
1404 super().__init__(colCD_1_1,
1405 colCD_1_2,
1406 colCD_2_1,
1407 colCD_2_2,
1408 **kwargs)
1410 @property
1411 def name(self):
1412 return f"{self.col}_asArcsecondsSq"
1414 @property
1415 def columns(self):
1416 return [self.col,
1417 self.colCD_1_1,
1418 self.colCD_1_2,
1419 self.colCD_2_1,
1420 self.colCD_2_2]
1422 def _func(self, df):
1423 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1424 df[self.colCD_1_2],
1425 df[self.colCD_2_1],
1426 df[self.colCD_2_2])
1427 return df[self.col] * pixScale * pixScale
1430class ReferenceBand(Functor):
1431 """Return the band used to seed multiband forced photometry.
1433 This functor is to be used on Object tables.
1434 It converts the boolean merge_measurements_{band} columns into a single
1435 string representing the first band for which merge_measurements_{band}
1436 is True.
1438 Assumes the default priority order of i, r, z, y, g, u.
1439 """
1440 name = 'Reference Band'
1441 shortname = 'refBand'
1443 @property
1444 def columns(self):
1445 return ["merge_measurement_i",
1446 "merge_measurement_r",
1447 "merge_measurement_z",
1448 "merge_measurement_y",
1449 "merge_measurement_g",
1450 "merge_measurement_u"]
1452 def _func(self, df: pd.DataFrame) -> pd.Series:
1453 def getFilterAliasName(row):
1454 # Get column name with the max value (True > False).
1455 colName = row.idxmax()
1456 return colName.replace('merge_measurement_', '')
1458 # Skip columns that are unavailable, because this functor requests the
1459 # superset of bands that could be included in the object table.
1460 columns = [col for col in self.columns if col in df.columns]
1461 # Makes a Series of dtype object if df is empty.
1462 return df[columns].apply(getFilterAliasName, axis=1,
1463 result_type='reduce').astype('object')
1466class Photometry(Functor):
1467 """Base class for Object table calibrated fluxes and magnitudes."""
1468 # AB to NanoJansky (3631 Jansky).
1469 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1470 LOG_AB_FLUX_SCALE = 12.56
1471 FIVE_OVER_2LOG10 = 1.085736204758129569
1472 # TO DO: DM-21955 Replace hard coded photometic calibration values.
1473 COADD_ZP = 27
1475 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1476 self.vhypot = np.vectorize(self.hypot)
1477 self.col = colFlux
1478 self.colFluxErr = colFluxErr
1480 self.calib = calib
1481 if calib is not None:
1482 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1483 else:
1484 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1485 self.fluxMag0Err = 0.
1487 super().__init__(**kwargs)
1489 @property
1490 def columns(self):
1491 return [self.col]
1493 @property
1494 def name(self):
1495 return f'mag_{self.col}'
1497 @classmethod
1498 def hypot(cls, a, b):
1499 """Compute sqrt(a^2 + b^2) without under/overflow."""
1500 if np.abs(a) < np.abs(b):
1501 a, b = b, a
1502 if a == 0.:
1503 return 0.
1504 q = b/a
1505 return np.abs(a) * np.sqrt(1. + q*q)
1507 def dn2flux(self, dn, fluxMag0):
1508 """Convert instrumental flux to nanojanskys."""
1509 return self.AB_FLUX_SCALE * dn / fluxMag0
1511 def dn2mag(self, dn, fluxMag0):
1512 """Convert instrumental flux to AB magnitude."""
1513 with warnings.catch_warnings():
1514 warnings.filterwarnings('ignore', r'invalid value encountered')
1515 warnings.filterwarnings('ignore', r'divide by zero')
1516 return -2.5 * np.log10(dn/fluxMag0)
1518 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1519 """Convert instrumental flux error to nanojanskys."""
1520 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1521 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1522 return retVal
1524 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1525 """Convert instrumental flux error to AB magnitude error."""
1526 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1527 return self.FIVE_OVER_2LOG10 * retVal
1530class NanoJansky(Photometry):
1531 """Convert instrumental flux to nanojanskys."""
1532 def _func(self, df):
1533 return self.dn2flux(df[self.col], self.fluxMag0)
1536class NanoJanskyErr(Photometry):
1537 """Convert instrumental flux error to nanojanskys."""
1538 @property
1539 def columns(self):
1540 return [self.col, self.colFluxErr]
1542 def _func(self, df):
1543 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1544 return pd.Series(retArr, index=df.index)
1547class LocalPhotometry(Functor):
1548 """Base class for calibrating the specified instrument flux column using
1549 the local photometric calibration.
1551 Parameters
1552 ----------
1553 instFluxCol : `str`
1554 Name of the instrument flux column.
1555 instFluxErrCol : `str`
1556 Name of the assocated error columns for ``instFluxCol``.
1557 photoCalibCol : `str`
1558 Name of local calibration column.
1559 photoCalibErrCol : `str`
1560 Error associated with ``photoCalibCol``
1562 See Also
1563 --------
1564 LocalNanojansky
1565 LocalNanojanskyErr
1566 """
1567 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1569 def __init__(self,
1570 instFluxCol,
1571 instFluxErrCol,
1572 photoCalibCol,
1573 photoCalibErrCol,
1574 **kwargs):
1575 self.instFluxCol = instFluxCol
1576 self.instFluxErrCol = instFluxErrCol
1577 self.photoCalibCol = photoCalibCol
1578 self.photoCalibErrCol = photoCalibErrCol
1579 super().__init__(**kwargs)
1581 def instFluxToNanojansky(self, instFlux, localCalib):
1582 """Convert instrument flux to nanojanskys.
1584 Parameters
1585 ----------
1586 instFlux : `~numpy.ndarray` or `~pandas.Series`
1587 Array of instrument flux measurements.
1588 localCalib : `~numpy.ndarray` or `~pandas.Series`
1589 Array of local photometric calibration estimates.
1591 Returns
1592 -------
1593 calibFlux : `~numpy.ndarray` or `~pandas.Series`
1594 Array of calibrated flux measurements.
1595 """
1596 return instFlux * localCalib
1598 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1599 """Convert instrument flux to nanojanskys.
1601 Parameters
1602 ----------
1603 instFlux : `~numpy.ndarray` or `~pandas.Series`
1604 Array of instrument flux measurements.
1605 instFluxErr : `~numpy.ndarray` or `~pandas.Series`
1606 Errors on associated ``instFlux`` values.
1607 localCalib : `~numpy.ndarray` or `~pandas.Series`
1608 Array of local photometric calibration estimates.
1609 localCalibErr : `~numpy.ndarray` or `~pandas.Series`
1610 Errors on associated ``localCalib`` values.
1612 Returns
1613 -------
1614 calibFluxErr : `~numpy.ndarray` or `~pandas.Series`
1615 Errors on calibrated flux measurements.
1616 """
1617 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1619 def instFluxToMagnitude(self, instFlux, localCalib):
1620 """Convert instrument flux to nanojanskys.
1622 Parameters
1623 ----------
1624 instFlux : `~numpy.ndarray` or `~pandas.Series`
1625 Array of instrument flux measurements.
1626 localCalib : `~numpy.ndarray` or `~pandas.Series`
1627 Array of local photometric calibration estimates.
1629 Returns
1630 -------
1631 calibMag : `~numpy.ndarray` or `~pandas.Series`
1632 Array of calibrated AB magnitudes.
1633 """
1634 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1636 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1637 """Convert instrument flux err to nanojanskys.
1639 Parameters
1640 ----------
1641 instFlux : `~numpy.ndarray` or `~pandas.Series`
1642 Array of instrument flux measurements.
1643 instFluxErr : `~numpy.ndarray` or `~pandas.Series`
1644 Errors on associated ``instFlux`` values.
1645 localCalib : `~numpy.ndarray` or `~pandas.Series`
1646 Array of local photometric calibration estimates.
1647 localCalibErr : `~numpy.ndarray` or `~pandas.Series`
1648 Errors on associated ``localCalib`` values.
1650 Returns
1651 -------
1652 calibMagErr: `~numpy.ndarray` or `~pandas.Series`
1653 Error on calibrated AB magnitudes.
1654 """
1655 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1656 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1659class LocalNanojansky(LocalPhotometry):
1660 """Compute calibrated fluxes using the local calibration value.
1662 This returns units of nanojanskys.
1663 """
1665 @property
1666 def columns(self):
1667 return [self.instFluxCol, self.photoCalibCol]
1669 @property
1670 def name(self):
1671 return f'flux_{self.instFluxCol}'
1673 def _func(self, df):
1674 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1677class LocalNanojanskyErr(LocalPhotometry):
1678 """Compute calibrated flux errors using the local calibration value.
1680 This returns units of nanojanskys.
1681 """
1683 @property
1684 def columns(self):
1685 return [self.instFluxCol, self.instFluxErrCol,
1686 self.photoCalibCol, self.photoCalibErrCol]
1688 @property
1689 def name(self):
1690 return f'fluxErr_{self.instFluxCol}'
1692 def _func(self, df):
1693 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1694 df[self.photoCalibCol], df[self.photoCalibErrCol])
1697class LocalDipoleMeanFlux(LocalPhotometry):
1698 """Compute absolute mean of dipole fluxes.
1700 See Also
1701 --------
1702 LocalNanojansky
1703 LocalNanojanskyErr
1704 LocalDipoleMeanFluxErr
1705 LocalDipoleDiffFlux
1706 LocalDipoleDiffFluxErr
1707 """
1708 def __init__(self,
1709 instFluxPosCol,
1710 instFluxNegCol,
1711 instFluxPosErrCol,
1712 instFluxNegErrCol,
1713 photoCalibCol,
1714 photoCalibErrCol,
1715 **kwargs):
1716 self.instFluxNegCol = instFluxNegCol
1717 self.instFluxPosCol = instFluxPosCol
1718 self.instFluxNegErrCol = instFluxNegErrCol
1719 self.instFluxPosErrCol = instFluxPosErrCol
1720 self.photoCalibCol = photoCalibCol
1721 self.photoCalibErrCol = photoCalibErrCol
1722 super().__init__(instFluxNegCol,
1723 instFluxNegErrCol,
1724 photoCalibCol,
1725 photoCalibErrCol,
1726 **kwargs)
1728 @property
1729 def columns(self):
1730 return [self.instFluxPosCol,
1731 self.instFluxNegCol,
1732 self.photoCalibCol]
1734 @property
1735 def name(self):
1736 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1738 def _func(self, df):
1739 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))
1740 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])))
1743class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux):
1744 """Compute the error on the absolute mean of dipole fluxes.
1746 See Also
1747 --------
1748 LocalNanojansky
1749 LocalNanojanskyErr
1750 LocalDipoleMeanFlux
1751 LocalDipoleDiffFlux
1752 LocalDipoleDiffFluxErr
1753 """
1755 @property
1756 def columns(self):
1757 return [self.instFluxPosCol,
1758 self.instFluxNegCol,
1759 self.instFluxPosErrCol,
1760 self.instFluxNegErrCol,
1761 self.photoCalibCol,
1762 self.photoCalibErrCol]
1764 @property
1765 def name(self):
1766 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1768 def _func(self, df):
1769 return 0.5*np.sqrt(
1770 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1771 * df[self.photoCalibErrCol])**2
1772 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1773 * df[self.photoCalibCol]**2)
1776class LocalDipoleDiffFlux(LocalDipoleMeanFlux):
1777 """Compute the absolute difference of dipole fluxes.
1779 Calculated value is (abs(pos) - abs(neg)).
1781 See Also
1782 --------
1783 LocalNanojansky
1784 LocalNanojanskyErr
1785 LocalDipoleMeanFlux
1786 LocalDipoleMeanFluxErr
1787 LocalDipoleDiffFluxErr
1788 """
1790 @property
1791 def columns(self):
1792 return [self.instFluxPosCol,
1793 self.instFluxNegCol,
1794 self.photoCalibCol]
1796 @property
1797 def name(self):
1798 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1800 def _func(self, df):
1801 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))
1802 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])))
1805class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux):
1806 """Compute the error on the absolute difference of dipole fluxes.
1808 See Also
1809 --------
1810 LocalNanojansky
1811 LocalNanojanskyErr
1812 LocalDipoleMeanFlux
1813 LocalDipoleMeanFluxErr
1814 LocalDipoleDiffFlux
1815 """
1817 @property
1818 def columns(self):
1819 return [self.instFluxPosCol,
1820 self.instFluxNegCol,
1821 self.instFluxPosErrCol,
1822 self.instFluxNegErrCol,
1823 self.photoCalibCol,
1824 self.photoCalibErrCol]
1826 @property
1827 def name(self):
1828 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1830 def _func(self, df):
1831 return np.sqrt(
1832 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1833 * df[self.photoCalibErrCol])**2
1834 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1835 * df[self.photoCalibCol]**2)
1838class Ebv(Functor):
1839 """Compute E(B-V) from dustmaps.sfd."""
1840 _defaultDataset = 'ref'
1841 name = "E(B-V)"
1842 shortname = "ebv"
1844 def __init__(self, **kwargs):
1845 # Import is only needed for Ebv.
1846 from dustmaps.sfd import SFDQuery
1847 self._columns = ['coord_ra', 'coord_dec']
1848 self.sfd = SFDQuery()
1849 super().__init__(**kwargs)
1851 def _func(self, df):
1852 coords = SkyCoord(df['coord_ra'].values * u.rad, df['coord_dec'].values * u.rad)
1853 ebv = self.sfd(coords)
1854 # Double precision unnecessary scientifically but currently needed for
1855 # ingest to qserv.
1856 return pd.Series(ebv, index=df.index).astype('float64')