Coverage for python/lsst/pipe/tasks/functors.py: 34%
830 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-15 03:16 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-15 03:16 -0700
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["Functor", "CompositeFunctor", "CustomFunctor", "Column", "Index",
23 "IDColumn", "FootprintNPix", "CoordColumn", "RAColumn", "DecColumn",
24 "HtmIndex20", "Mag", "MagErr", "NanoMaggie", "MagDiff", "Color",
25 "Labeller", "StarGalaxyLabeller", "NumStarLabeller", "DeconvolvedMoments",
26 "SdssTraceSize", "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff",
27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs", "ComputePixelScale",
28 "ConvertPixelToArcseconds", "ConvertPixelSqToArcsecondsSq", "ReferenceBand",
29 "Photometry", "NanoJansky", "NanoJanskyErr", "Magnitude", "MagnitudeErr",
30 "LocalPhotometry", "LocalNanojansky", "LocalNanojanskyErr",
31 "LocalMagnitude", "LocalMagnitudeErr", "LocalDipoleMeanFlux",
32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux", "LocalDipoleDiffFluxErr",
33 "Ratio", "Ebv"]
35import yaml
36import re
37from itertools import product
38import logging
39import os.path
41import pandas as pd
42import numpy as np
43import astropy.units as u
44from astropy.coordinates import SkyCoord
46from lsst.utils import doImport
47from lsst.utils.introspection import get_full_type_name
48from lsst.daf.butler import DeferredDatasetHandle
49from lsst.pipe.base import InMemoryDatasetHandle
50import lsst.geom as geom
51import lsst.sphgeom as sphgeom
53from .parquetTable import ParquetTable, MultilevelParquetTable
56def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
57 typeKey='functor', name=None):
58 """Initialize an object defined in a dictionary
60 The object needs to be importable as
61 f'{basePath}.{initDict[typeKey]}'
62 The positional and keyword arguments (if any) are contained in
63 "args" and "kwargs" entries in the dictionary, respectively.
64 This is used in `functors.CompositeFunctor.from_yaml` to initialize
65 a composite functor from a specification in a YAML file.
67 Parameters
68 ----------
69 initDict : dictionary
70 Dictionary describing object's initialization. Must contain
71 an entry keyed by ``typeKey`` that is the name of the object,
72 relative to ``basePath``.
73 basePath : str
74 Path relative to module in which ``initDict[typeKey]`` is defined.
75 typeKey : str
76 Key of ``initDict`` that is the name of the object
77 (relative to `basePath`).
78 """
79 initDict = initDict.copy()
80 # TO DO: DM-21956 We should be able to define functors outside this module
81 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
82 args = []
83 if 'args' in initDict:
84 args = initDict.pop('args')
85 if isinstance(args, str):
86 args = [args]
87 try:
88 element = pythonType(*args, **initDict)
89 except Exception as e:
90 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
91 raise type(e)(message, e.args)
92 return element
95class Functor(object):
96 """Define and execute a calculation on a ParquetTable
98 The `__call__` method accepts either a `ParquetTable` object or a
99 `DeferredDatasetHandle` or `InMemoryDatasetHandle`, and returns the
100 result of the calculation as a single column. Each functor defines what
101 columns are needed for the calculation, and only these columns are read
102 from the `ParquetTable`.
104 The action of `__call__` consists of two steps: first, loading the
105 necessary columns from disk into memory as a `pandas.DataFrame` object;
106 and second, performing the computation on this dataframe and returning the
107 result.
110 To define a new `Functor`, a subclass must define a `_func` method,
111 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
112 In addition, it must define the following attributes
114 * `_columns`: The columns necessary to perform the calculation
115 * `name`: A name appropriate for a figure axis label
116 * `shortname`: A name appropriate for use as a dictionary key
118 On initialization, a `Functor` should declare what band (`filt` kwarg)
119 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
120 applied to. This enables the `_get_data` method to extract the proper
121 columns from the parquet file. If not specified, the dataset will fall back
122 on the `_defaultDataset`attribute. If band is not specified and `dataset`
123 is anything other than `'ref'`, then an error will be raised when trying to
124 perform the calculation.
126 Originally, `Functor` was set up to expect
127 datasets formatted like the `deepCoadd_obj` dataset; that is, a
128 dataframe with a multi-level column index, with the levels of the
129 column index being `band`, `dataset`, and `column`.
130 It has since been generalized to apply to dataframes without mutli-level
131 indices and multi-level indices with just `dataset` and `column` levels.
132 In addition, the `_get_data` method that reads
133 the dataframe from the `ParquetTable` will return a dataframe with column
134 index levels defined by the `_dfLevels` attribute; by default, this is
135 `column`.
137 The `_dfLevels` attributes should generally not need to
138 be changed, unless `_func` needs columns from multiple filters or datasets
139 to do the calculation.
140 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
141 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
142 it gets to have those levels in the column index.
144 Parameters
145 ----------
146 filt : str
147 Filter upon which to do the calculation
149 dataset : str
150 Dataset upon which to do the calculation
151 (e.g., 'ref', 'meas', 'forced_src').
153 """
155 _defaultDataset = 'ref'
156 _dfLevels = ('column',)
157 _defaultNoDup = False
159 def __init__(self, filt=None, dataset=None, noDup=None):
160 self.filt = filt
161 self.dataset = dataset if dataset is not None else self._defaultDataset
162 self._noDup = noDup
163 self.log = logging.getLogger(type(self).__name__)
165 @property
166 def noDup(self):
167 if self._noDup is not None:
168 return self._noDup
169 else:
170 return self._defaultNoDup
172 @property
173 def columns(self):
174 """Columns required to perform calculation
175 """
176 if not hasattr(self, '_columns'):
177 raise NotImplementedError('Must define columns property or _columns attribute')
178 return self._columns
180 def _get_data_columnLevels(self, data, columnIndex=None):
181 """Gets the names of the column index levels
183 This should only be called in the context of a multilevel table.
184 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
185 and with the gen3 `DeferredDatasetHandle`.
187 Parameters
188 ----------
189 data : various
190 The data to be read, can be a `MultilevelParquetTable`,
191 `DeferredDatasetHandle`, or `InMemoryDatasetHandle`.
192 columnnIndex (optional): pandas `Index` object
193 If not passed, then it is read from the `DeferredDatasetHandle`
194 for `InMemoryDatasetHandle`.
195 """
196 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
197 if columnIndex is None:
198 columnIndex = data.get(component="columns")
199 if columnIndex is not None:
200 return columnIndex.names
201 if isinstance(data, MultilevelParquetTable):
202 return data.columnLevels
203 else:
204 raise TypeError(f"Unknown type for data: {type(data)}!")
206 def _get_data_columnLevelNames(self, data, columnIndex=None):
207 """Gets the content of each of the column levels for a multilevel table
209 Similar to `_get_data_columnLevels`, this enables backward
210 compatibility with gen2.
212 Mirrors original gen2 implementation within
213 `pipe.tasks.parquetTable.MultilevelParquetTable`
214 """
215 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
216 if columnIndex is None:
217 columnIndex = data.get(component="columns")
218 if columnIndex is not None:
219 columnLevels = columnIndex.names
220 columnLevelNames = {
221 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
222 for i, level in enumerate(columnLevels)
223 }
224 return columnLevelNames
225 if isinstance(data, MultilevelParquetTable):
226 return data.columnLevelNames
227 else:
228 raise TypeError(f"Unknown type for data: {type(data)}!")
230 def _colsFromDict(self, colDict, columnIndex=None):
231 """Converts dictionary column specficiation to a list of columns
233 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
234 """
235 new_colDict = {}
236 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
238 for i, lev in enumerate(columnLevels):
239 if lev in colDict:
240 if isinstance(colDict[lev], str):
241 new_colDict[lev] = [colDict[lev]]
242 else:
243 new_colDict[lev] = colDict[lev]
244 else:
245 new_colDict[lev] = columnIndex.levels[i]
247 levelCols = [new_colDict[lev] for lev in columnLevels]
248 cols = list(product(*levelCols))
249 colsAvailable = [col for col in cols if col in columnIndex]
250 return colsAvailable
252 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
253 """Returns columns needed by functor from multilevel dataset
255 To access tables with multilevel column structure, the `MultilevelParquetTable`
256 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
257 dictionary.
259 Parameters
260 ----------
261 data : various
262 The data as either `MultilevelParquetTable`,
263 `DeferredDatasetHandle`, or `InMemoryDatasetHandle`.
264 columnIndex (optional): pandas `Index` object
265 either passed or read in from `DeferredDatasetHandle`.
266 `returnTuple` : `bool`
267 If true, then return a list of tuples rather than the column dictionary
268 specification. This is set to `True` by `CompositeFunctor` in order to be able to
269 combine columns from the various component functors.
271 """
272 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)) and columnIndex is None:
273 columnIndex = data.get(component="columns")
275 # Confirm that the dataset has the column levels the functor is expecting it to have.
276 columnLevels = self._get_data_columnLevels(data, columnIndex)
278 columnDict = {'column': self.columns,
279 'dataset': self.dataset}
280 if self.filt is None:
281 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
282 if "band" in columnLevels:
283 if self.dataset == "ref":
284 columnDict["band"] = columnLevelNames["band"][0]
285 else:
286 raise ValueError(f"'filt' not set for functor {self.name}"
287 f"(dataset {self.dataset}) "
288 "and ParquetTable "
289 "contains multiple filters in column index. "
290 "Set 'filt' or set 'dataset' to 'ref'.")
291 else:
292 columnDict['band'] = self.filt
294 if isinstance(data, MultilevelParquetTable):
295 return data._colsFromDict(columnDict)
296 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
297 if returnTuple:
298 return self._colsFromDict(columnDict, columnIndex=columnIndex)
299 else:
300 return columnDict
301 raise RuntimeError(f"Unexpected data type. Got {get_full_type_name}.")
303 def _func(self, df, dropna=True):
304 raise NotImplementedError('Must define calculation on dataframe')
306 def _get_columnIndex(self, data):
307 """Return columnIndex
308 """
310 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
311 return data.get(component="columns")
312 else:
313 return None
315 def _get_data(self, data):
316 """Retrieve dataframe necessary for calculation.
318 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
320 Returns dataframe upon which `self._func` can act.
322 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
323 """
324 if isinstance(data, pd.DataFrame):
325 return data
327 # First thing to do: check to see if the data source has a multilevel column index or not.
328 columnIndex = self._get_columnIndex(data)
329 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
331 # Simple single-level parquet table, gen2
332 if isinstance(data, ParquetTable) and not is_multiLevel:
333 columns = self.columns
334 df = data.toDataFrame(columns=columns)
335 return df
337 # Get proper columns specification for this functor
338 if is_multiLevel:
339 columns = self.multilevelColumns(data, columnIndex=columnIndex)
340 else:
341 columns = self.columns
343 if isinstance(data, MultilevelParquetTable):
344 # Load in-memory dataframe with appropriate columns the gen2 way
345 df = data.toDataFrame(columns=columns, droplevels=False)
346 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
347 # Load in-memory dataframe with appropriate columns the gen3 way
348 df = data.get(parameters={"columns": columns})
349 else:
350 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.")
352 # Drop unnecessary column levels
353 if is_multiLevel:
354 df = self._setLevels(df)
356 return df
358 def _setLevels(self, df):
359 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
360 df.columns = df.columns.droplevel(levelsToDrop)
361 return df
363 def _dropna(self, vals):
364 return vals.dropna()
366 def __call__(self, data, dropna=False):
367 df = self._get_data(data)
368 try:
369 vals = self._func(df)
370 except Exception as e:
371 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
372 vals = self.fail(df)
373 if dropna:
374 vals = self._dropna(vals)
376 return vals
378 def difference(self, data1, data2, **kwargs):
379 """Computes difference between functor called on two different ParquetTable objects
380 """
381 return self(data1, **kwargs) - self(data2, **kwargs)
383 def fail(self, df):
384 return pd.Series(np.full(len(df), np.nan), index=df.index)
386 @property
387 def name(self):
388 """Full name of functor (suitable for figure labels)
389 """
390 return NotImplementedError
392 @property
393 def shortname(self):
394 """Short name of functor (suitable for column name/dict key)
395 """
396 return self.name
399class CompositeFunctor(Functor):
400 """Perform multiple calculations at once on a catalog
402 The role of a `CompositeFunctor` is to group together computations from
403 multiple functors. Instead of returning `pandas.Series` a
404 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
405 being the keys of `funcDict`.
407 The `columns` attribute of a `CompositeFunctor` is the union of all columns
408 in all the component functors.
410 A `CompositeFunctor` does not use a `_func` method itself; rather,
411 when a `CompositeFunctor` is called, all its columns are loaded
412 at once, and the resulting dataframe is passed to the `_func` method of each component
413 functor. This has the advantage of only doing I/O (reading from parquet file) once,
414 and works because each individual `_func` method of each component functor does not
415 care if there are *extra* columns in the dataframe being passed; only that it must contain
416 *at least* the `columns` it expects.
418 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
419 file specifying a collection of functors.
421 Parameters
422 ----------
423 funcs : `dict` or `list`
424 Dictionary or list of functors. If a list, then it will be converted
425 into a dictonary according to the `.shortname` attribute of each functor.
427 """
428 dataset = None
430 def __init__(self, funcs, **kwargs):
432 if type(funcs) == dict:
433 self.funcDict = funcs
434 else:
435 self.funcDict = {f.shortname: f for f in funcs}
437 self._filt = None
439 super().__init__(**kwargs)
441 @property
442 def filt(self):
443 return self._filt
445 @filt.setter
446 def filt(self, filt):
447 if filt is not None:
448 for _, f in self.funcDict.items():
449 f.filt = filt
450 self._filt = filt
452 def update(self, new):
453 if isinstance(new, dict):
454 self.funcDict.update(new)
455 elif isinstance(new, CompositeFunctor):
456 self.funcDict.update(new.funcDict)
457 else:
458 raise TypeError('Can only update with dictionary or CompositeFunctor.')
460 # Make sure new functors have the same 'filt' set
461 if self.filt is not None:
462 self.filt = self.filt
464 @property
465 def columns(self):
466 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
468 def multilevelColumns(self, data, **kwargs):
469 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
470 return list(
471 set(
472 [
473 x
474 for y in [
475 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
476 ]
477 for x in y
478 ]
479 )
480 )
482 def __call__(self, data, **kwargs):
483 """Apply the functor to the data table
485 Parameters
486 ----------
487 data : various
488 The data represented as `lsst.daf.butler.DeferredDatasetHandle`,
489 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
490 `lsst.pipe.tasks.parquetTable.ParquetTable`,
491 `lsst.pipe.base.InMemoryDatasetHandle`,
492 or `pandas.DataFrame`.
493 The table or a pointer to a table on disk from which columns can
494 be accessed
495 """
496 columnIndex = self._get_columnIndex(data)
498 # First, determine whether data has a multilevel index (either gen2 or gen3)
499 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
501 # Multilevel index, gen2 or gen3
502 if is_multiLevel:
503 columns = self.multilevelColumns(data, columnIndex=columnIndex)
505 if isinstance(data, MultilevelParquetTable):
506 # Read data into memory the gen2 way
507 df = data.toDataFrame(columns=columns, droplevels=False)
508 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
509 # Read data into memory the gen3 way
510 df = data.get(parameters={"columns": columns})
512 valDict = {}
513 for k, f in self.funcDict.items():
514 try:
515 subdf = f._setLevels(
516 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
517 )
518 valDict[k] = f._func(subdf)
519 except Exception as e:
520 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
521 try:
522 valDict[k] = f.fail(subdf)
523 except NameError:
524 raise e
526 else:
527 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
528 # input if Gen3 deferLoad=True
529 df = data.get(parameters={"columns": self.columns})
530 elif isinstance(data, pd.DataFrame):
531 # input if Gen3 deferLoad=False
532 df = data
533 else:
534 # Original Gen2 input is type ParquetTable and the fallback
535 df = data.toDataFrame(columns=self.columns)
537 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
539 # Check that output columns are actually columns
540 for name, colVal in valDict.items():
541 if len(colVal.shape) != 1:
542 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
543 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
545 try:
546 valDf = pd.concat(valDict, axis=1)
547 except TypeError:
548 print([(k, type(v)) for k, v in valDict.items()])
549 raise
551 if kwargs.get('dropna', False):
552 valDf = valDf.dropna(how='any')
554 return valDf
556 @classmethod
557 def renameCol(cls, col, renameRules):
558 if renameRules is None:
559 return col
560 for old, new in renameRules:
561 if col.startswith(old):
562 col = col.replace(old, new)
563 return col
565 @classmethod
566 def from_file(cls, filename, **kwargs):
567 # Allow environment variables in the filename.
568 filename = os.path.expandvars(filename)
569 with open(filename) as f:
570 translationDefinition = yaml.safe_load(f)
572 return cls.from_yaml(translationDefinition, **kwargs)
574 @classmethod
575 def from_yaml(cls, translationDefinition, **kwargs):
576 funcs = {}
577 for func, val in translationDefinition['funcs'].items():
578 funcs[func] = init_fromDict(val, name=func)
580 if 'flag_rename_rules' in translationDefinition:
581 renameRules = translationDefinition['flag_rename_rules']
582 else:
583 renameRules = None
585 if 'calexpFlags' in translationDefinition:
586 for flag in translationDefinition['calexpFlags']:
587 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
589 if 'refFlags' in translationDefinition:
590 for flag in translationDefinition['refFlags']:
591 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
593 if 'forcedFlags' in translationDefinition:
594 for flag in translationDefinition['forcedFlags']:
595 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
597 if 'flags' in translationDefinition:
598 for flag in translationDefinition['flags']:
599 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
601 return cls(funcs, **kwargs)
604def mag_aware_eval(df, expr, log):
605 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
607 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
609 Parameters
610 ----------
611 df : pandas.DataFrame
612 Dataframe on which to evaluate expression.
614 expr : str
615 Expression.
616 """
617 try:
618 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
619 val = df.eval(expr_new)
620 except Exception as e: # Should check what actually gets raised
621 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e)
622 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
623 val = df.eval(expr_new)
624 return val
627class CustomFunctor(Functor):
628 """Arbitrary computation on a catalog
630 Column names (and thus the columns to be loaded from catalog) are found
631 by finding all words and trying to ignore all "math-y" words.
633 Parameters
634 ----------
635 expr : str
636 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
637 """
638 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
640 def __init__(self, expr, **kwargs):
641 self.expr = expr
642 super().__init__(**kwargs)
644 @property
645 def name(self):
646 return self.expr
648 @property
649 def columns(self):
650 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
652 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
653 not_a_col = []
654 for c in flux_cols:
655 if not re.search('_instFlux$', c):
656 cols.append(f'{c}_instFlux')
657 not_a_col.append(c)
658 else:
659 cols.append(c)
661 return list(set([c for c in cols if c not in not_a_col]))
663 def _func(self, df):
664 return mag_aware_eval(df, self.expr, self.log)
667class Column(Functor):
668 """Get column with specified name
669 """
671 def __init__(self, col, **kwargs):
672 self.col = col
673 super().__init__(**kwargs)
675 @property
676 def name(self):
677 return self.col
679 @property
680 def columns(self):
681 return [self.col]
683 def _func(self, df):
684 return df[self.col]
687class Index(Functor):
688 """Return the value of the index for each object
689 """
691 columns = ['coord_ra'] # just a dummy; something has to be here
692 _defaultDataset = 'ref'
693 _defaultNoDup = True
695 def _func(self, df):
696 return pd.Series(df.index, index=df.index)
699class IDColumn(Column):
700 col = 'id'
701 _allow_difference = False
702 _defaultNoDup = True
704 def _func(self, df):
705 return pd.Series(df.index, index=df.index)
708class FootprintNPix(Column):
709 col = 'base_Footprint_nPix'
712class CoordColumn(Column):
713 """Base class for coordinate column, in degrees
714 """
715 _radians = True
717 def __init__(self, col, **kwargs):
718 super().__init__(col, **kwargs)
720 def _func(self, df):
721 # Must not modify original column in case that column is used by another functor
722 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
723 return output
726class RAColumn(CoordColumn):
727 """Right Ascension, in degrees
728 """
729 name = 'RA'
730 _defaultNoDup = True
732 def __init__(self, **kwargs):
733 super().__init__('coord_ra', **kwargs)
735 def __call__(self, catalog, **kwargs):
736 return super().__call__(catalog, **kwargs)
739class DecColumn(CoordColumn):
740 """Declination, in degrees
741 """
742 name = 'Dec'
743 _defaultNoDup = True
745 def __init__(self, **kwargs):
746 super().__init__('coord_dec', **kwargs)
748 def __call__(self, catalog, **kwargs):
749 return super().__call__(catalog, **kwargs)
752class HtmIndex20(Functor):
753 """Compute the level 20 HtmIndex for the catalog.
755 Notes
756 -----
757 This functor was implemented to satisfy requirements of old APDB interface
758 which required ``pixelId`` column in DiaObject with HTM20 index. APDB
759 interface had migrated to not need that information, but we keep this
760 class in case it may be useful for something else.
761 """
762 name = "Htm20"
763 htmLevel = 20
764 _radians = True
766 def __init__(self, ra, decl, **kwargs):
767 self.pixelator = sphgeom.HtmPixelization(self.htmLevel)
768 self.ra = ra
769 self.decl = decl
770 self._columns = [self.ra, self.decl]
771 super().__init__(**kwargs)
773 def _func(self, df):
775 def computePixel(row):
776 if self._radians:
777 sphPoint = geom.SpherePoint(row[self.ra],
778 row[self.decl],
779 geom.radians)
780 else:
781 sphPoint = geom.SpherePoint(row[self.ra],
782 row[self.decl],
783 geom.degrees)
784 return self.pixelator.index(sphPoint.getVector())
786 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
789def fluxName(col):
790 if not col.endswith('_instFlux'):
791 col += '_instFlux'
792 return col
795def fluxErrName(col):
796 if not col.endswith('_instFluxErr'):
797 col += '_instFluxErr'
798 return col
801class Mag(Functor):
802 """Compute calibrated magnitude
804 Takes a `calib` argument, which returns the flux at mag=0
805 as `calib.getFluxMag0()`. If not provided, then the default
806 `fluxMag0` is 63095734448.0194, which is default for HSC.
807 This default should be removed in DM-21955
809 This calculation hides warnings about invalid values and dividing by zero.
811 As for all functors, a `dataset` and `filt` kwarg should be provided upon
812 initialization. Unlike the default `Functor`, however, the default dataset
813 for a `Mag` is `'meas'`, rather than `'ref'`.
815 Parameters
816 ----------
817 col : `str`
818 Name of flux column from which to compute magnitude. Can be parseable
819 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
820 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
821 understand.
822 calib : `lsst.afw.image.calib.Calib` (optional)
823 Object that knows zero point.
824 """
825 _defaultDataset = 'meas'
827 def __init__(self, col, calib=None, **kwargs):
828 self.col = fluxName(col)
829 self.calib = calib
830 if calib is not None:
831 self.fluxMag0 = calib.getFluxMag0()[0]
832 else:
833 # TO DO: DM-21955 Replace hard coded photometic calibration values
834 self.fluxMag0 = 63095734448.0194
836 super().__init__(**kwargs)
838 @property
839 def columns(self):
840 return [self.col]
842 def _func(self, df):
843 with np.warnings.catch_warnings():
844 np.warnings.filterwarnings('ignore', r'invalid value encountered')
845 np.warnings.filterwarnings('ignore', r'divide by zero')
846 return -2.5*np.log10(df[self.col] / self.fluxMag0)
848 @property
849 def name(self):
850 return f'mag_{self.col}'
853class MagErr(Mag):
854 """Compute calibrated magnitude uncertainty
856 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
858 Parameters
859 col : `str`
860 Name of flux column
861 calib : `lsst.afw.image.calib.Calib` (optional)
862 Object that knows zero point.
863 """
865 def __init__(self, *args, **kwargs):
866 super().__init__(*args, **kwargs)
867 if self.calib is not None:
868 self.fluxMag0Err = self.calib.getFluxMag0()[1]
869 else:
870 self.fluxMag0Err = 0.
872 @property
873 def columns(self):
874 return [self.col, self.col + 'Err']
876 def _func(self, df):
877 with np.warnings.catch_warnings():
878 np.warnings.filterwarnings('ignore', r'invalid value encountered')
879 np.warnings.filterwarnings('ignore', r'divide by zero')
880 fluxCol, fluxErrCol = self.columns
881 x = df[fluxErrCol] / df[fluxCol]
882 y = self.fluxMag0Err / self.fluxMag0
883 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
884 return magErr
886 @property
887 def name(self):
888 return super().name + '_err'
891class NanoMaggie(Mag):
892 """
893 """
895 def _func(self, df):
896 return (df[self.col] / self.fluxMag0) * 1e9
899class MagDiff(Functor):
900 _defaultDataset = 'meas'
902 """Functor to calculate magnitude difference"""
904 def __init__(self, col1, col2, **kwargs):
905 self.col1 = fluxName(col1)
906 self.col2 = fluxName(col2)
907 super().__init__(**kwargs)
909 @property
910 def columns(self):
911 return [self.col1, self.col2]
913 def _func(self, df):
914 with np.warnings.catch_warnings():
915 np.warnings.filterwarnings('ignore', r'invalid value encountered')
916 np.warnings.filterwarnings('ignore', r'divide by zero')
917 return -2.5*np.log10(df[self.col1]/df[self.col2])
919 @property
920 def name(self):
921 return f'(mag_{self.col1} - mag_{self.col2})'
923 @property
924 def shortname(self):
925 return f'magDiff_{self.col1}_{self.col2}'
928class Color(Functor):
929 """Compute the color between two filters
931 Computes color by initializing two different `Mag`
932 functors based on the `col` and filters provided, and
933 then returning the difference.
935 This is enabled by the `_func` expecting a dataframe with a
936 multilevel column index, with both `'band'` and `'column'`,
937 instead of just `'column'`, which is the `Functor` default.
938 This is controlled by the `_dfLevels` attribute.
940 Also of note, the default dataset for `Color` is `forced_src'`,
941 whereas for `Mag` it is `'meas'`.
943 Parameters
944 ----------
945 col : str
946 Name of flux column from which to compute; same as would be passed to
947 `lsst.pipe.tasks.functors.Mag`.
949 filt2, filt1 : str
950 Filters from which to compute magnitude difference.
951 Color computed is `Mag(filt2) - Mag(filt1)`.
952 """
953 _defaultDataset = 'forced_src'
954 _dfLevels = ('band', 'column')
955 _defaultNoDup = True
957 def __init__(self, col, filt2, filt1, **kwargs):
958 self.col = fluxName(col)
959 if filt2 == filt1:
960 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
961 self.filt2 = filt2
962 self.filt1 = filt1
964 self.mag2 = Mag(col, filt=filt2, **kwargs)
965 self.mag1 = Mag(col, filt=filt1, **kwargs)
967 super().__init__(**kwargs)
969 @property
970 def filt(self):
971 return None
973 @filt.setter
974 def filt(self, filt):
975 pass
977 def _func(self, df):
978 mag2 = self.mag2._func(df[self.filt2])
979 mag1 = self.mag1._func(df[self.filt1])
980 return mag2 - mag1
982 @property
983 def columns(self):
984 return [self.mag1.col, self.mag2.col]
986 def multilevelColumns(self, parq, **kwargs):
987 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
989 @property
990 def name(self):
991 return f'{self.filt2} - {self.filt1} ({self.col})'
993 @property
994 def shortname(self):
995 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
998class Labeller(Functor):
999 """Main function of this subclass is to override the dropna=True
1000 """
1001 _null_label = 'null'
1002 _allow_difference = False
1003 name = 'label'
1004 _force_str = False
1006 def __call__(self, parq, dropna=False, **kwargs):
1007 return super().__call__(parq, dropna=False, **kwargs)
1010class StarGalaxyLabeller(Labeller):
1011 _columns = ["base_ClassificationExtendedness_value"]
1012 _column = "base_ClassificationExtendedness_value"
1014 def _func(self, df):
1015 x = df[self._columns][self._column]
1016 mask = x.isnull()
1017 test = (x < 0.5).astype(int)
1018 test = test.mask(mask, 2)
1020 # TODO: DM-21954 Look into veracity of inline comment below
1021 # are these backwards?
1022 categories = ['galaxy', 'star', self._null_label]
1023 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
1024 index=x.index, name='label')
1025 if self._force_str:
1026 label = label.astype(str)
1027 return label
1030class NumStarLabeller(Labeller):
1031 _columns = ['numStarFlags']
1032 labels = {"star": 0, "maybe": 1, "notStar": 2}
1034 def _func(self, df):
1035 x = df[self._columns][self._columns[0]]
1037 # Number of filters
1038 n = len(x.unique()) - 1
1040 labels = ['noStar', 'maybe', 'star']
1041 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
1042 index=x.index, name='label')
1044 if self._force_str:
1045 label = label.astype(str)
1047 return label
1050class DeconvolvedMoments(Functor):
1051 name = 'Deconvolved Moments'
1052 shortname = 'deconvolvedMoments'
1053 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1054 "ext_shapeHSM_HsmSourceMoments_yy",
1055 "base_SdssShape_xx", "base_SdssShape_yy",
1056 "ext_shapeHSM_HsmPsfMoments_xx",
1057 "ext_shapeHSM_HsmPsfMoments_yy")
1059 def _func(self, df):
1060 """Calculate deconvolved moments"""
1061 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1062 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1063 else:
1064 hsm = np.ones(len(df))*np.nan
1065 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1066 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1067 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1068 else:
1069 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
1070 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
1071 # raise TaskError("No psf shape parameter found in catalog")
1072 raise RuntimeError('No psf shape parameter found in catalog')
1074 return hsm.where(np.isfinite(hsm), sdss) - psf
1077class SdssTraceSize(Functor):
1078 """Functor to calculate SDSS trace radius size for sources"""
1079 name = "SDSS Trace Size"
1080 shortname = 'sdssTrace'
1081 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1083 def _func(self, df):
1084 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1085 return srcSize
1088class PsfSdssTraceSizeDiff(Functor):
1089 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1090 name = "PSF - SDSS Trace Size"
1091 shortname = 'psf_sdssTrace'
1092 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1093 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1095 def _func(self, df):
1096 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1097 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1098 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1099 return sizeDiff
1102class HsmTraceSize(Functor):
1103 """Functor to calculate HSM trace radius size for sources"""
1104 name = 'HSM Trace Size'
1105 shortname = 'hsmTrace'
1106 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1107 "ext_shapeHSM_HsmSourceMoments_yy")
1109 def _func(self, df):
1110 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1111 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1112 return srcSize
1115class PsfHsmTraceSizeDiff(Functor):
1116 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1117 name = 'PSF - HSM Trace Size'
1118 shortname = 'psf_HsmTrace'
1119 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1120 "ext_shapeHSM_HsmSourceMoments_yy",
1121 "ext_shapeHSM_HsmPsfMoments_xx",
1122 "ext_shapeHSM_HsmPsfMoments_yy")
1124 def _func(self, df):
1125 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1126 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1127 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1128 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1129 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1130 return sizeDiff
1133class HsmFwhm(Functor):
1134 name = 'HSM Psf FWHM'
1135 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1136 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1137 pixelScale = 0.168
1138 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1140 def _func(self, df):
1141 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1142 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1145class E1(Functor):
1146 name = "Distortion Ellipticity (e1)"
1147 shortname = "Distortion"
1149 def __init__(self, colXX, colXY, colYY, **kwargs):
1150 self.colXX = colXX
1151 self.colXY = colXY
1152 self.colYY = colYY
1153 self._columns = [self.colXX, self.colXY, self.colYY]
1154 super().__init__(**kwargs)
1156 @property
1157 def columns(self):
1158 return [self.colXX, self.colXY, self.colYY]
1160 def _func(self, df):
1161 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1164class E2(Functor):
1165 name = "Ellipticity e2"
1167 def __init__(self, colXX, colXY, colYY, **kwargs):
1168 self.colXX = colXX
1169 self.colXY = colXY
1170 self.colYY = colYY
1171 super().__init__(**kwargs)
1173 @property
1174 def columns(self):
1175 return [self.colXX, self.colXY, self.colYY]
1177 def _func(self, df):
1178 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1181class RadiusFromQuadrupole(Functor):
1183 def __init__(self, colXX, colXY, colYY, **kwargs):
1184 self.colXX = colXX
1185 self.colXY = colXY
1186 self.colYY = colYY
1187 super().__init__(**kwargs)
1189 @property
1190 def columns(self):
1191 return [self.colXX, self.colXY, self.colYY]
1193 def _func(self, df):
1194 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1197class LocalWcs(Functor):
1198 """Computations using the stored localWcs.
1199 """
1200 name = "LocalWcsOperations"
1202 def __init__(self,
1203 colCD_1_1,
1204 colCD_1_2,
1205 colCD_2_1,
1206 colCD_2_2,
1207 **kwargs):
1208 self.colCD_1_1 = colCD_1_1
1209 self.colCD_1_2 = colCD_1_2
1210 self.colCD_2_1 = colCD_2_1
1211 self.colCD_2_2 = colCD_2_2
1212 super().__init__(**kwargs)
1214 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1215 """Compute the distance on the sphere from x2, y1 to x1, y1.
1217 Parameters
1218 ----------
1219 x : `pandas.Series`
1220 X pixel coordinate.
1221 y : `pandas.Series`
1222 Y pixel coordinate.
1223 cd11 : `pandas.Series`
1224 [1, 1] element of the local Wcs affine transform.
1225 cd11 : `pandas.Series`
1226 [1, 1] element of the local Wcs affine transform.
1227 cd12 : `pandas.Series`
1228 [1, 2] element of the local Wcs affine transform.
1229 cd21 : `pandas.Series`
1230 [2, 1] element of the local Wcs affine transform.
1231 cd22 : `pandas.Series`
1232 [2, 2] element of the local Wcs affine transform.
1234 Returns
1235 -------
1236 raDecTuple : tuple
1237 RA and dec conversion of x and y given the local Wcs. Returned
1238 units are in radians.
1240 """
1241 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1243 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1244 """Compute the local pixel scale conversion.
1246 Parameters
1247 ----------
1248 ra1 : `pandas.Series`
1249 Ra of the first coordinate in radians.
1250 dec1 : `pandas.Series`
1251 Dec of the first coordinate in radians.
1252 ra2 : `pandas.Series`
1253 Ra of the second coordinate in radians.
1254 dec2 : `pandas.Series`
1255 Dec of the second coordinate in radians.
1257 Returns
1258 -------
1259 dist : `pandas.Series`
1260 Distance on the sphere in radians.
1261 """
1262 deltaDec = dec2 - dec1
1263 deltaRa = ra2 - ra1
1264 return 2 * np.arcsin(
1265 np.sqrt(
1266 np.sin(deltaDec / 2) ** 2
1267 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1269 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1270 """Compute the distance on the sphere from x2, y1 to x1, y1.
1272 Parameters
1273 ----------
1274 x1 : `pandas.Series`
1275 X pixel coordinate.
1276 y1 : `pandas.Series`
1277 Y pixel coordinate.
1278 x2 : `pandas.Series`
1279 X pixel coordinate.
1280 y2 : `pandas.Series`
1281 Y pixel coordinate.
1282 cd11 : `pandas.Series`
1283 [1, 1] element of the local Wcs affine transform.
1284 cd11 : `pandas.Series`
1285 [1, 1] element of the local Wcs affine transform.
1286 cd12 : `pandas.Series`
1287 [1, 2] element of the local Wcs affine transform.
1288 cd21 : `pandas.Series`
1289 [2, 1] element of the local Wcs affine transform.
1290 cd22 : `pandas.Series`
1291 [2, 2] element of the local Wcs affine transform.
1293 Returns
1294 -------
1295 Distance : `pandas.Series`
1296 Arcseconds per pixel at the location of the local WC
1297 """
1298 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1299 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1300 # Great circle distance for small separations.
1301 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1304class ComputePixelScale(LocalWcs):
1305 """Compute the local pixel scale from the stored CDMatrix.
1306 """
1307 name = "PixelScale"
1309 @property
1310 def columns(self):
1311 return [self.colCD_1_1,
1312 self.colCD_1_2,
1313 self.colCD_2_1,
1314 self.colCD_2_2]
1316 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1317 """Compute the local pixel to scale conversion in arcseconds.
1319 Parameters
1320 ----------
1321 cd11 : `pandas.Series`
1322 [1, 1] element of the local Wcs affine transform in radians.
1323 cd11 : `pandas.Series`
1324 [1, 1] element of the local Wcs affine transform in radians.
1325 cd12 : `pandas.Series`
1326 [1, 2] element of the local Wcs affine transform in radians.
1327 cd21 : `pandas.Series`
1328 [2, 1] element of the local Wcs affine transform in radians.
1329 cd22 : `pandas.Series`
1330 [2, 2] element of the local Wcs affine transform in radians.
1332 Returns
1333 -------
1334 pixScale : `pandas.Series`
1335 Arcseconds per pixel at the location of the local WC
1336 """
1337 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1339 def _func(self, df):
1340 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1341 df[self.colCD_1_2],
1342 df[self.colCD_2_1],
1343 df[self.colCD_2_2])
1346class ConvertPixelToArcseconds(ComputePixelScale):
1347 """Convert a value in units pixels squared to units arcseconds squared.
1348 """
1350 def __init__(self,
1351 col,
1352 colCD_1_1,
1353 colCD_1_2,
1354 colCD_2_1,
1355 colCD_2_2,
1356 **kwargs):
1357 self.col = col
1358 super().__init__(colCD_1_1,
1359 colCD_1_2,
1360 colCD_2_1,
1361 colCD_2_2,
1362 **kwargs)
1364 @property
1365 def name(self):
1366 return f"{self.col}_asArcseconds"
1368 @property
1369 def columns(self):
1370 return [self.col,
1371 self.colCD_1_1,
1372 self.colCD_1_2,
1373 self.colCD_2_1,
1374 self.colCD_2_2]
1376 def _func(self, df):
1377 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1378 df[self.colCD_1_2],
1379 df[self.colCD_2_1],
1380 df[self.colCD_2_2])
1383class ConvertPixelSqToArcsecondsSq(ComputePixelScale):
1384 """Convert a value in units pixels to units arcseconds.
1385 """
1387 def __init__(self,
1388 col,
1389 colCD_1_1,
1390 colCD_1_2,
1391 colCD_2_1,
1392 colCD_2_2,
1393 **kwargs):
1394 self.col = col
1395 super().__init__(colCD_1_1,
1396 colCD_1_2,
1397 colCD_2_1,
1398 colCD_2_2,
1399 **kwargs)
1401 @property
1402 def name(self):
1403 return f"{self.col}_asArcsecondsSq"
1405 @property
1406 def columns(self):
1407 return [self.col,
1408 self.colCD_1_1,
1409 self.colCD_1_2,
1410 self.colCD_2_1,
1411 self.colCD_2_2]
1413 def _func(self, df):
1414 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1415 df[self.colCD_1_2],
1416 df[self.colCD_2_1],
1417 df[self.colCD_2_2])
1418 return df[self.col] * pixScale * pixScale
1421class ReferenceBand(Functor):
1422 name = 'Reference Band'
1423 shortname = 'refBand'
1425 @property
1426 def columns(self):
1427 return ["merge_measurement_i",
1428 "merge_measurement_r",
1429 "merge_measurement_z",
1430 "merge_measurement_y",
1431 "merge_measurement_g",
1432 "merge_measurement_u"]
1434 def _func(self, df: pd.DataFrame) -> pd.Series:
1435 def getFilterAliasName(row):
1436 # get column name with the max value (True > False)
1437 colName = row.idxmax()
1438 return colName.replace('merge_measurement_', '')
1440 # Skip columns that are unavailable, because this functor requests the
1441 # superset of bands that could be included in the object table
1442 columns = [col for col in self.columns if col in df.columns]
1443 # Makes a Series of dtype object if df is empty
1444 return df[columns].apply(getFilterAliasName, axis=1,
1445 result_type='reduce').astype('object')
1448class Photometry(Functor):
1449 # AB to NanoJansky (3631 Jansky)
1450 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1451 LOG_AB_FLUX_SCALE = 12.56
1452 FIVE_OVER_2LOG10 = 1.085736204758129569
1453 # TO DO: DM-21955 Replace hard coded photometic calibration values
1454 COADD_ZP = 27
1456 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1457 self.vhypot = np.vectorize(self.hypot)
1458 self.col = colFlux
1459 self.colFluxErr = colFluxErr
1461 self.calib = calib
1462 if calib is not None:
1463 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1464 else:
1465 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1466 self.fluxMag0Err = 0.
1468 super().__init__(**kwargs)
1470 @property
1471 def columns(self):
1472 return [self.col]
1474 @property
1475 def name(self):
1476 return f'mag_{self.col}'
1478 @classmethod
1479 def hypot(cls, a, b):
1480 if np.abs(a) < np.abs(b):
1481 a, b = b, a
1482 if a == 0.:
1483 return 0.
1484 q = b/a
1485 return np.abs(a) * np.sqrt(1. + q*q)
1487 def dn2flux(self, dn, fluxMag0):
1488 return self.AB_FLUX_SCALE * dn / fluxMag0
1490 def dn2mag(self, dn, fluxMag0):
1491 with np.warnings.catch_warnings():
1492 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1493 np.warnings.filterwarnings('ignore', r'divide by zero')
1494 return -2.5 * np.log10(dn/fluxMag0)
1496 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1497 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1498 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1499 return retVal
1501 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1502 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1503 return self.FIVE_OVER_2LOG10 * retVal
1506class NanoJansky(Photometry):
1507 def _func(self, df):
1508 return self.dn2flux(df[self.col], self.fluxMag0)
1511class NanoJanskyErr(Photometry):
1512 @property
1513 def columns(self):
1514 return [self.col, self.colFluxErr]
1516 def _func(self, df):
1517 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1518 return pd.Series(retArr, index=df.index)
1521class Magnitude(Photometry):
1522 def _func(self, df):
1523 return self.dn2mag(df[self.col], self.fluxMag0)
1526class MagnitudeErr(Photometry):
1527 @property
1528 def columns(self):
1529 return [self.col, self.colFluxErr]
1531 def _func(self, df):
1532 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1533 return pd.Series(retArr, index=df.index)
1536class LocalPhotometry(Functor):
1537 """Base class for calibrating the specified instrument flux column using
1538 the local photometric calibration.
1540 Parameters
1541 ----------
1542 instFluxCol : `str`
1543 Name of the instrument flux column.
1544 instFluxErrCol : `str`
1545 Name of the assocated error columns for ``instFluxCol``.
1546 photoCalibCol : `str`
1547 Name of local calibration column.
1548 photoCalibErrCol : `str`
1549 Error associated with ``photoCalibCol``
1551 See also
1552 --------
1553 LocalPhotometry
1554 LocalNanojansky
1555 LocalNanojanskyErr
1556 LocalMagnitude
1557 LocalMagnitudeErr
1558 """
1559 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1561 def __init__(self,
1562 instFluxCol,
1563 instFluxErrCol,
1564 photoCalibCol,
1565 photoCalibErrCol,
1566 **kwargs):
1567 self.instFluxCol = instFluxCol
1568 self.instFluxErrCol = instFluxErrCol
1569 self.photoCalibCol = photoCalibCol
1570 self.photoCalibErrCol = photoCalibErrCol
1571 super().__init__(**kwargs)
1573 def instFluxToNanojansky(self, instFlux, localCalib):
1574 """Convert instrument flux to nanojanskys.
1576 Parameters
1577 ----------
1578 instFlux : `numpy.ndarray` or `pandas.Series`
1579 Array of instrument flux measurements
1580 localCalib : `numpy.ndarray` or `pandas.Series`
1581 Array of local photometric calibration estimates.
1583 Returns
1584 -------
1585 calibFlux : `numpy.ndarray` or `pandas.Series`
1586 Array of calibrated flux measurements.
1587 """
1588 return instFlux * localCalib
1590 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1591 """Convert instrument flux to nanojanskys.
1593 Parameters
1594 ----------
1595 instFlux : `numpy.ndarray` or `pandas.Series`
1596 Array of instrument flux measurements
1597 instFluxErr : `numpy.ndarray` or `pandas.Series`
1598 Errors on associated ``instFlux`` values
1599 localCalib : `numpy.ndarray` or `pandas.Series`
1600 Array of local photometric calibration estimates.
1601 localCalibErr : `numpy.ndarray` or `pandas.Series`
1602 Errors on associated ``localCalib`` values
1604 Returns
1605 -------
1606 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1607 Errors on calibrated flux measurements.
1608 """
1609 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1611 def instFluxToMagnitude(self, instFlux, localCalib):
1612 """Convert instrument flux to nanojanskys.
1614 Parameters
1615 ----------
1616 instFlux : `numpy.ndarray` or `pandas.Series`
1617 Array of instrument flux measurements
1618 localCalib : `numpy.ndarray` or `pandas.Series`
1619 Array of local photometric calibration estimates.
1621 Returns
1622 -------
1623 calibMag : `numpy.ndarray` or `pandas.Series`
1624 Array of calibrated AB magnitudes.
1625 """
1626 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1628 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1629 """Convert instrument flux err to nanojanskys.
1631 Parameters
1632 ----------
1633 instFlux : `numpy.ndarray` or `pandas.Series`
1634 Array of instrument flux measurements
1635 instFluxErr : `numpy.ndarray` or `pandas.Series`
1636 Errors on associated ``instFlux`` values
1637 localCalib : `numpy.ndarray` or `pandas.Series`
1638 Array of local photometric calibration estimates.
1639 localCalibErr : `numpy.ndarray` or `pandas.Series`
1640 Errors on associated ``localCalib`` values
1642 Returns
1643 -------
1644 calibMagErr: `numpy.ndarray` or `pandas.Series`
1645 Error on calibrated AB magnitudes.
1646 """
1647 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1648 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1651class LocalNanojansky(LocalPhotometry):
1652 """Compute calibrated fluxes using the local calibration value.
1654 See also
1655 --------
1656 LocalNanojansky
1657 LocalNanojanskyErr
1658 LocalMagnitude
1659 LocalMagnitudeErr
1660 """
1662 @property
1663 def columns(self):
1664 return [self.instFluxCol, self.photoCalibCol]
1666 @property
1667 def name(self):
1668 return f'flux_{self.instFluxCol}'
1670 def _func(self, df):
1671 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1674class LocalNanojanskyErr(LocalPhotometry):
1675 """Compute calibrated flux errors using the local calibration value.
1677 See also
1678 --------
1679 LocalNanojansky
1680 LocalNanojanskyErr
1681 LocalMagnitude
1682 LocalMagnitudeErr
1683 """
1685 @property
1686 def columns(self):
1687 return [self.instFluxCol, self.instFluxErrCol,
1688 self.photoCalibCol, self.photoCalibErrCol]
1690 @property
1691 def name(self):
1692 return f'fluxErr_{self.instFluxCol}'
1694 def _func(self, df):
1695 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1696 df[self.photoCalibCol], df[self.photoCalibErrCol])
1699class LocalMagnitude(LocalPhotometry):
1700 """Compute calibrated AB magnitudes using the local calibration value.
1702 See also
1703 --------
1704 LocalNanojansky
1705 LocalNanojanskyErr
1706 LocalMagnitude
1707 LocalMagnitudeErr
1708 """
1710 @property
1711 def columns(self):
1712 return [self.instFluxCol, self.photoCalibCol]
1714 @property
1715 def name(self):
1716 return f'mag_{self.instFluxCol}'
1718 def _func(self, df):
1719 return self.instFluxToMagnitude(df[self.instFluxCol],
1720 df[self.photoCalibCol])
1723class LocalMagnitudeErr(LocalPhotometry):
1724 """Compute calibrated AB magnitude errors using the local calibration value.
1726 See also
1727 --------
1728 LocalNanojansky
1729 LocalNanojanskyErr
1730 LocalMagnitude
1731 LocalMagnitudeErr
1732 """
1734 @property
1735 def columns(self):
1736 return [self.instFluxCol, self.instFluxErrCol,
1737 self.photoCalibCol, self.photoCalibErrCol]
1739 @property
1740 def name(self):
1741 return f'magErr_{self.instFluxCol}'
1743 def _func(self, df):
1744 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1745 df[self.instFluxErrCol],
1746 df[self.photoCalibCol],
1747 df[self.photoCalibErrCol])
1750class LocalDipoleMeanFlux(LocalPhotometry):
1751 """Compute absolute mean of dipole fluxes.
1753 See also
1754 --------
1755 LocalNanojansky
1756 LocalNanojanskyErr
1757 LocalMagnitude
1758 LocalMagnitudeErr
1759 LocalDipoleMeanFlux
1760 LocalDipoleMeanFluxErr
1761 LocalDipoleDiffFlux
1762 LocalDipoleDiffFluxErr
1763 """
1764 def __init__(self,
1765 instFluxPosCol,
1766 instFluxNegCol,
1767 instFluxPosErrCol,
1768 instFluxNegErrCol,
1769 photoCalibCol,
1770 photoCalibErrCol,
1771 **kwargs):
1772 self.instFluxNegCol = instFluxNegCol
1773 self.instFluxPosCol = instFluxPosCol
1774 self.instFluxNegErrCol = instFluxNegErrCol
1775 self.instFluxPosErrCol = instFluxPosErrCol
1776 self.photoCalibCol = photoCalibCol
1777 self.photoCalibErrCol = photoCalibErrCol
1778 super().__init__(instFluxNegCol,
1779 instFluxNegErrCol,
1780 photoCalibCol,
1781 photoCalibErrCol,
1782 **kwargs)
1784 @property
1785 def columns(self):
1786 return [self.instFluxPosCol,
1787 self.instFluxNegCol,
1788 self.photoCalibCol]
1790 @property
1791 def name(self):
1792 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1794 def _func(self, df):
1795 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))
1796 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])))
1799class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux):
1800 """Compute the error on the absolute mean of dipole fluxes.
1802 See also
1803 --------
1804 LocalNanojansky
1805 LocalNanojanskyErr
1806 LocalMagnitude
1807 LocalMagnitudeErr
1808 LocalDipoleMeanFlux
1809 LocalDipoleMeanFluxErr
1810 LocalDipoleDiffFlux
1811 LocalDipoleDiffFluxErr
1812 """
1814 @property
1815 def columns(self):
1816 return [self.instFluxPosCol,
1817 self.instFluxNegCol,
1818 self.instFluxPosErrCol,
1819 self.instFluxNegErrCol,
1820 self.photoCalibCol,
1821 self.photoCalibErrCol]
1823 @property
1824 def name(self):
1825 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1827 def _func(self, df):
1828 return 0.5*np.sqrt(
1829 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1830 * df[self.photoCalibErrCol])**2
1831 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1832 * df[self.photoCalibCol]**2)
1835class LocalDipoleDiffFlux(LocalDipoleMeanFlux):
1836 """Compute the absolute difference of dipole fluxes.
1838 Value is (abs(pos) - abs(neg))
1840 See also
1841 --------
1842 LocalNanojansky
1843 LocalNanojanskyErr
1844 LocalMagnitude
1845 LocalMagnitudeErr
1846 LocalDipoleMeanFlux
1847 LocalDipoleMeanFluxErr
1848 LocalDipoleDiffFlux
1849 LocalDipoleDiffFluxErr
1850 """
1852 @property
1853 def columns(self):
1854 return [self.instFluxPosCol,
1855 self.instFluxNegCol,
1856 self.photoCalibCol]
1858 @property
1859 def name(self):
1860 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1862 def _func(self, df):
1863 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))
1864 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])))
1867class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux):
1868 """Compute the error on the absolute difference of dipole fluxes.
1870 See also
1871 --------
1872 LocalNanojansky
1873 LocalNanojanskyErr
1874 LocalMagnitude
1875 LocalMagnitudeErr
1876 LocalDipoleMeanFlux
1877 LocalDipoleMeanFluxErr
1878 LocalDipoleDiffFlux
1879 LocalDipoleDiffFluxErr
1880 """
1882 @property
1883 def columns(self):
1884 return [self.instFluxPosCol,
1885 self.instFluxNegCol,
1886 self.instFluxPosErrCol,
1887 self.instFluxNegErrCol,
1888 self.photoCalibCol,
1889 self.photoCalibErrCol]
1891 @property
1892 def name(self):
1893 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1895 def _func(self, df):
1896 return np.sqrt(
1897 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1898 * df[self.photoCalibErrCol])**2
1899 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1900 * df[self.photoCalibCol]**2)
1903class Ratio(Functor):
1904 """Base class for returning the ratio of 2 columns.
1906 Can be used to compute a Signal to Noise ratio for any input flux.
1908 Parameters
1909 ----------
1910 numerator : `str`
1911 Name of the column to use at the numerator in the ratio
1912 denominator : `str`
1913 Name of the column to use as the denominator in the ratio.
1914 """
1915 def __init__(self,
1916 numerator,
1917 denominator,
1918 **kwargs):
1919 self.numerator = numerator
1920 self.denominator = denominator
1921 super().__init__(**kwargs)
1923 @property
1924 def columns(self):
1925 return [self.numerator, self.denominator]
1927 @property
1928 def name(self):
1929 return f'ratio_{self.numerator}_{self.denominator}'
1931 def _func(self, df):
1932 with np.warnings.catch_warnings():
1933 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1934 np.warnings.filterwarnings('ignore', r'divide by zero')
1935 return df[self.numerator] / df[self.denominator]
1938class Ebv(Functor):
1939 """Compute E(B-V) from dustmaps.sfd
1940 """
1941 _defaultDataset = 'ref'
1942 name = "E(B-V)"
1943 shortname = "ebv"
1945 def __init__(self, **kwargs):
1946 # import is only needed for Ebv
1947 from dustmaps.sfd import SFDQuery
1948 self._columns = ['coord_ra', 'coord_dec']
1949 self.sfd = SFDQuery()
1950 super().__init__(**kwargs)
1952 def _func(self, df):
1953 coords = SkyCoord(df['coord_ra']*u.rad, df['coord_dec']*u.rad)
1954 ebv = self.sfd(coords)
1955 # Double precision unnecessary scientifically
1956 # but currently needed for ingest to qserv
1957 return pd.Series(ebv, index=df.index).astype('float64')