Coverage for python/lsst/pipe/tasks/functors.py: 34%
820 statements
« prev ^ index » next coverage.py v6.5.0, created at 2024-02-08 07:10 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2024-02-08 07:10 +0000
1# This file is part of pipe_tasks.
2#
3# LSST Data Management System
4# This product includes software developed by the
5# LSST Project (http://www.lsst.org/).
6# See COPYRIGHT file at the top of the source tree.
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22import yaml
23import re
24from itertools import product
25import os.path
27import pandas as pd
28import numpy as np
29import astropy.units as u
30from dustmaps.sfd import SFDQuery
31from astropy.coordinates import SkyCoord
33from lsst.daf.persistence import doImport
34from lsst.daf.butler import DeferredDatasetHandle
35import lsst.geom as geom
36import lsst.sphgeom as sphgeom
38from .parquetTable import ParquetTable, MultilevelParquetTable
41def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
42 typeKey='functor', name=None):
43 """Initialize an object defined in a dictionary
45 The object needs to be importable as
46 f'{basePath}.{initDict[typeKey]}'
47 The positional and keyword arguments (if any) are contained in
48 "args" and "kwargs" entries in the dictionary, respectively.
49 This is used in `functors.CompositeFunctor.from_yaml` to initialize
50 a composite functor from a specification in a YAML file.
52 Parameters
53 ----------
54 initDict : dictionary
55 Dictionary describing object's initialization. Must contain
56 an entry keyed by ``typeKey`` that is the name of the object,
57 relative to ``basePath``.
58 basePath : str
59 Path relative to module in which ``initDict[typeKey]`` is defined.
60 typeKey : str
61 Key of ``initDict`` that is the name of the object
62 (relative to `basePath`).
63 """
64 initDict = initDict.copy()
65 # TO DO: DM-21956 We should be able to define functors outside this module
66 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
67 args = []
68 if 'args' in initDict:
69 args = initDict.pop('args')
70 if isinstance(args, str):
71 args = [args]
72 try:
73 element = pythonType(*args, **initDict)
74 except Exception as e:
75 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
76 raise type(e)(message, e.args)
77 return element
80class Functor(object):
81 """Define and execute a calculation on a ParquetTable
83 The `__call__` method accepts either a `ParquetTable` object or a
84 `DeferredDatasetHandle`, and returns the
85 result of the calculation as a single column. Each functor defines what
86 columns are needed for the calculation, and only these columns are read
87 from the `ParquetTable`.
89 The action of `__call__` consists of two steps: first, loading the
90 necessary columns from disk into memory as a `pandas.DataFrame` object;
91 and second, performing the computation on this dataframe and returning the
92 result.
95 To define a new `Functor`, a subclass must define a `_func` method,
96 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
97 In addition, it must define the following attributes
99 * `_columns`: The columns necessary to perform the calculation
100 * `name`: A name appropriate for a figure axis label
101 * `shortname`: A name appropriate for use as a dictionary key
103 On initialization, a `Functor` should declare what band (`filt` kwarg)
104 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
105 applied to. This enables the `_get_data` method to extract the proper
106 columns from the parquet file. If not specified, the dataset will fall back
107 on the `_defaultDataset`attribute. If band is not specified and `dataset`
108 is anything other than `'ref'`, then an error will be raised when trying to
109 perform the calculation.
111 Originally, `Functor` was set up to expect
112 datasets formatted like the `deepCoadd_obj` dataset; that is, a
113 dataframe with a multi-level column index, with the levels of the
114 column index being `band`, `dataset`, and `column`.
115 It has since been generalized to apply to dataframes without mutli-level
116 indices and multi-level indices with just `dataset` and `column` levels.
117 In addition, the `_get_data` method that reads
118 the dataframe from the `ParquetTable` will return a dataframe with column
119 index levels defined by the `_dfLevels` attribute; by default, this is
120 `column`.
122 The `_dfLevels` attributes should generally not need to
123 be changed, unless `_func` needs columns from multiple filters or datasets
124 to do the calculation.
125 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
126 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
127 it gets to have those levels in the column index.
129 Parameters
130 ----------
131 filt : str
132 Filter upon which to do the calculation
134 dataset : str
135 Dataset upon which to do the calculation
136 (e.g., 'ref', 'meas', 'forced_src').
138 """
140 _defaultDataset = 'ref'
141 _dfLevels = ('column',)
142 _defaultNoDup = False
144 def __init__(self, filt=None, dataset=None, noDup=None):
145 self.filt = filt
146 self.dataset = dataset if dataset is not None else self._defaultDataset
147 self._noDup = noDup
149 @property
150 def noDup(self):
151 if self._noDup is not None:
152 return self._noDup
153 else:
154 return self._defaultNoDup
156 @property
157 def columns(self):
158 """Columns required to perform calculation
159 """
160 if not hasattr(self, '_columns'):
161 raise NotImplementedError('Must define columns property or _columns attribute')
162 return self._columns
164 def _get_data_columnLevels(self, data, columnIndex=None):
165 """Gets the names of the column index levels
167 This should only be called in the context of a multilevel table.
168 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
169 and with the gen3 `DeferredDatasetHandle`.
171 Parameters
172 ----------
173 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
175 columnnIndex (optional): pandas `Index` object
176 if not passed, then it is read from the `DeferredDatasetHandle`
177 """
178 if isinstance(data, DeferredDatasetHandle):
179 if columnIndex is None:
180 columnIndex = data.get(component="columns")
181 if columnIndex is not None:
182 return columnIndex.names
183 if isinstance(data, MultilevelParquetTable):
184 return data.columnLevels
185 else:
186 raise TypeError(f"Unknown type for data: {type(data)}!")
188 def _get_data_columnLevelNames(self, data, columnIndex=None):
189 """Gets the content of each of the column levels for a multilevel table
191 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
193 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
194 """
195 if isinstance(data, DeferredDatasetHandle):
196 if columnIndex is None:
197 columnIndex = data.get(component="columns")
198 if columnIndex is not None:
199 columnLevels = columnIndex.names
200 columnLevelNames = {
201 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
202 for i, level in enumerate(columnLevels)
203 }
204 return columnLevelNames
205 if isinstance(data, MultilevelParquetTable):
206 return data.columnLevelNames
207 else:
208 raise TypeError(f"Unknown type for data: {type(data)}!")
210 def _colsFromDict(self, colDict, columnIndex=None):
211 """Converts dictionary column specficiation to a list of columns
213 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
214 """
215 new_colDict = {}
216 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
218 for i, lev in enumerate(columnLevels):
219 if lev in colDict:
220 if isinstance(colDict[lev], str):
221 new_colDict[lev] = [colDict[lev]]
222 else:
223 new_colDict[lev] = colDict[lev]
224 else:
225 new_colDict[lev] = columnIndex.levels[i]
227 levelCols = [new_colDict[lev] for lev in columnLevels]
228 cols = list(product(*levelCols))
229 colsAvailable = [col for col in cols if col in columnIndex]
230 return colsAvailable
232 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
233 """Returns columns needed by functor from multilevel dataset
235 To access tables with multilevel column structure, the `MultilevelParquetTable`
236 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
237 dictionary.
239 Parameters
240 ----------
241 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
243 columnIndex (optional): pandas `Index` object
244 either passed or read in from `DeferredDatasetHandle`.
246 `returnTuple` : bool
247 If true, then return a list of tuples rather than the column dictionary
248 specification. This is set to `True` by `CompositeFunctor` in order to be able to
249 combine columns from the various component functors.
251 """
252 if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
253 columnIndex = data.get(component="columns")
255 # Confirm that the dataset has the column levels the functor is expecting it to have.
256 columnLevels = self._get_data_columnLevels(data, columnIndex)
258 columnDict = {'column': self.columns,
259 'dataset': self.dataset}
260 if self.filt is None:
261 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
262 if "band" in columnLevels:
263 if self.dataset == "ref":
264 columnDict["band"] = columnLevelNames["band"][0]
265 else:
266 raise ValueError(f"'filt' not set for functor {self.name}"
267 f"(dataset {self.dataset}) "
268 "and ParquetTable "
269 "contains multiple filters in column index. "
270 "Set 'filt' or set 'dataset' to 'ref'.")
271 else:
272 columnDict['band'] = self.filt
274 if isinstance(data, MultilevelParquetTable):
275 return data._colsFromDict(columnDict)
276 elif isinstance(data, DeferredDatasetHandle):
277 if returnTuple:
278 return self._colsFromDict(columnDict, columnIndex=columnIndex)
279 else:
280 return columnDict
282 def _func(self, df, dropna=True):
283 raise NotImplementedError('Must define calculation on dataframe')
285 def _get_columnIndex(self, data):
286 """Return columnIndex
287 """
289 if isinstance(data, DeferredDatasetHandle):
290 return data.get(component="columns")
291 else:
292 return None
294 def _get_data(self, data):
295 """Retrieve dataframe necessary for calculation.
297 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
299 Returns dataframe upon which `self._func` can act.
301 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
302 """
303 if isinstance(data, pd.DataFrame):
304 return data
306 # First thing to do: check to see if the data source has a multilevel column index or not.
307 columnIndex = self._get_columnIndex(data)
308 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
310 # Simple single-level parquet table, gen2
311 if isinstance(data, ParquetTable) and not is_multiLevel:
312 columns = self.columns
313 df = data.toDataFrame(columns=columns)
314 return df
316 # Get proper columns specification for this functor
317 if is_multiLevel:
318 columns = self.multilevelColumns(data, columnIndex=columnIndex)
319 else:
320 columns = self.columns
322 if isinstance(data, MultilevelParquetTable):
323 # Load in-memory dataframe with appropriate columns the gen2 way
324 df = data.toDataFrame(columns=columns, droplevels=False)
325 elif isinstance(data, DeferredDatasetHandle):
326 # Load in-memory dataframe with appropriate columns the gen3 way
327 df = data.get(parameters={"columns": columns})
329 # Drop unnecessary column levels
330 if is_multiLevel:
331 df = self._setLevels(df)
333 return df
335 def _setLevels(self, df):
336 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
337 df.columns = df.columns.droplevel(levelsToDrop)
338 return df
340 def _dropna(self, vals):
341 return vals.dropna()
343 def __call__(self, data, dropna=False):
344 try:
345 df = self._get_data(data)
346 vals = self._func(df)
347 except Exception:
348 vals = self.fail(df)
349 if dropna:
350 vals = self._dropna(vals)
352 return vals
354 def difference(self, data1, data2, **kwargs):
355 """Computes difference between functor called on two different ParquetTable objects
356 """
357 return self(data1, **kwargs) - self(data2, **kwargs)
359 def fail(self, df):
360 return pd.Series(np.full(len(df), np.nan), index=df.index)
362 @property
363 def name(self):
364 """Full name of functor (suitable for figure labels)
365 """
366 return NotImplementedError
368 @property
369 def shortname(self):
370 """Short name of functor (suitable for column name/dict key)
371 """
372 return self.name
375class CompositeFunctor(Functor):
376 """Perform multiple calculations at once on a catalog
378 The role of a `CompositeFunctor` is to group together computations from
379 multiple functors. Instead of returning `pandas.Series` a
380 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
381 being the keys of `funcDict`.
383 The `columns` attribute of a `CompositeFunctor` is the union of all columns
384 in all the component functors.
386 A `CompositeFunctor` does not use a `_func` method itself; rather,
387 when a `CompositeFunctor` is called, all its columns are loaded
388 at once, and the resulting dataframe is passed to the `_func` method of each component
389 functor. This has the advantage of only doing I/O (reading from parquet file) once,
390 and works because each individual `_func` method of each component functor does not
391 care if there are *extra* columns in the dataframe being passed; only that it must contain
392 *at least* the `columns` it expects.
394 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
395 file specifying a collection of functors.
397 Parameters
398 ----------
399 funcs : `dict` or `list`
400 Dictionary or list of functors. If a list, then it will be converted
401 into a dictonary according to the `.shortname` attribute of each functor.
403 """
404 dataset = None
406 def __init__(self, funcs, **kwargs):
408 if type(funcs) == dict:
409 self.funcDict = funcs
410 else:
411 self.funcDict = {f.shortname: f for f in funcs}
413 self._filt = None
415 super().__init__(**kwargs)
417 @property
418 def filt(self):
419 return self._filt
421 @filt.setter
422 def filt(self, filt):
423 if filt is not None:
424 for _, f in self.funcDict.items():
425 f.filt = filt
426 self._filt = filt
428 def update(self, new):
429 if isinstance(new, dict):
430 self.funcDict.update(new)
431 elif isinstance(new, CompositeFunctor):
432 self.funcDict.update(new.funcDict)
433 else:
434 raise TypeError('Can only update with dictionary or CompositeFunctor.')
436 # Make sure new functors have the same 'filt' set
437 if self.filt is not None:
438 self.filt = self.filt
440 @property
441 def columns(self):
442 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
444 def multilevelColumns(self, data, **kwargs):
445 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
446 return list(
447 set(
448 [
449 x
450 for y in [
451 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
452 ]
453 for x in y
454 ]
455 )
456 )
458 def __call__(self, data, **kwargs):
459 """Apply the functor to the data table
461 Parameters
462 ----------
463 data : `lsst.daf.butler.DeferredDatasetHandle`,
464 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`,
465 `lsst.pipe.tasks.parquetTable.ParquetTable`,
466 or `pandas.DataFrame`.
467 The table or a pointer to a table on disk from which columns can
468 be accessed
469 """
470 columnIndex = self._get_columnIndex(data)
472 # First, determine whether data has a multilevel index (either gen2 or gen3)
473 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
475 # Multilevel index, gen2 or gen3
476 if is_multiLevel:
477 columns = self.multilevelColumns(data, columnIndex=columnIndex)
479 if isinstance(data, MultilevelParquetTable):
480 # Read data into memory the gen2 way
481 df = data.toDataFrame(columns=columns, droplevels=False)
482 elif isinstance(data, DeferredDatasetHandle):
483 # Read data into memory the gen3 way
484 df = data.get(parameters={"columns": columns})
486 valDict = {}
487 for k, f in self.funcDict.items():
488 try:
489 subdf = f._setLevels(
490 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
491 )
492 valDict[k] = f._func(subdf)
493 except Exception as e:
494 try:
495 valDict[k] = f.fail(subdf)
496 except NameError:
497 raise e
499 else:
500 if isinstance(data, DeferredDatasetHandle):
501 # input if Gen3 deferLoad=True
502 df = data.get(parameters={"columns": self.columns})
503 elif isinstance(data, pd.DataFrame):
504 # input if Gen3 deferLoad=False
505 df = data
506 else:
507 # Original Gen2 input is type ParquetTable and the fallback
508 df = data.toDataFrame(columns=self.columns)
510 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
512 # Check that output columns are actually columns
513 for name, colVal in valDict.items():
514 if len(colVal.shape) != 1:
515 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
516 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
518 try:
519 valDf = pd.concat(valDict, axis=1)
520 except TypeError:
521 print([(k, type(v)) for k, v in valDict.items()])
522 raise
524 if kwargs.get('dropna', False):
525 valDf = valDf.dropna(how='any')
527 return valDf
529 @classmethod
530 def renameCol(cls, col, renameRules):
531 if renameRules is None:
532 return col
533 for old, new in renameRules:
534 if col.startswith(old):
535 col = col.replace(old, new)
536 return col
538 @classmethod
539 def from_file(cls, filename, **kwargs):
540 # Allow environment variables in the filename.
541 filename = os.path.expandvars(filename)
542 with open(filename) as f:
543 translationDefinition = yaml.safe_load(f)
545 return cls.from_yaml(translationDefinition, **kwargs)
547 @classmethod
548 def from_yaml(cls, translationDefinition, **kwargs):
549 funcs = {}
550 for func, val in translationDefinition['funcs'].items():
551 funcs[func] = init_fromDict(val, name=func)
553 if 'flag_rename_rules' in translationDefinition:
554 renameRules = translationDefinition['flag_rename_rules']
555 else:
556 renameRules = None
558 if 'calexpFlags' in translationDefinition:
559 for flag in translationDefinition['calexpFlags']:
560 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
562 if 'refFlags' in translationDefinition:
563 for flag in translationDefinition['refFlags']:
564 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
566 if 'forcedFlags' in translationDefinition:
567 for flag in translationDefinition['forcedFlags']:
568 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
570 if 'flags' in translationDefinition:
571 for flag in translationDefinition['flags']:
572 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
574 return cls(funcs, **kwargs)
577def mag_aware_eval(df, expr):
578 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
580 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
582 Parameters
583 ----------
584 df : pandas.DataFrame
585 Dataframe on which to evaluate expression.
587 expr : str
588 Expression.
589 """
590 try:
591 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr)
592 val = df.eval(expr_new)
593 except Exception: # Should check what actually gets raised
594 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
595 val = df.eval(expr_new)
596 return val
599class CustomFunctor(Functor):
600 """Arbitrary computation on a catalog
602 Column names (and thus the columns to be loaded from catalog) are found
603 by finding all words and trying to ignore all "math-y" words.
605 Parameters
606 ----------
607 expr : str
608 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
609 """
610 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
612 def __init__(self, expr, **kwargs):
613 self.expr = expr
614 super().__init__(**kwargs)
616 @property
617 def name(self):
618 return self.expr
620 @property
621 def columns(self):
622 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr)
624 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
625 not_a_col = []
626 for c in flux_cols:
627 if not re.search('_instFlux$', c):
628 cols.append(f'{c}_instFlux')
629 not_a_col.append(c)
630 else:
631 cols.append(c)
633 return list(set([c for c in cols if c not in not_a_col]))
635 def _func(self, df):
636 return mag_aware_eval(df, self.expr)
639class Column(Functor):
640 """Get column with specified name
641 """
643 def __init__(self, col, **kwargs):
644 self.col = col
645 super().__init__(**kwargs)
647 @property
648 def name(self):
649 return self.col
651 @property
652 def columns(self):
653 return [self.col]
655 def _func(self, df):
656 return df[self.col]
659class Index(Functor):
660 """Return the value of the index for each object
661 """
663 columns = ['coord_ra'] # just a dummy; something has to be here
664 _defaultDataset = 'ref'
665 _defaultNoDup = True
667 def _func(self, df):
668 return pd.Series(df.index, index=df.index)
671class IDColumn(Column):
672 col = 'id'
673 _allow_difference = False
674 _defaultNoDup = True
676 def _func(self, df):
677 return pd.Series(df.index, index=df.index)
680class FootprintNPix(Column):
681 col = 'base_Footprint_nPix'
684class CoordColumn(Column):
685 """Base class for coordinate column, in degrees
686 """
687 _radians = True
689 def __init__(self, col, **kwargs):
690 super().__init__(col, **kwargs)
692 def _func(self, df):
693 # Must not modify original column in case that column is used by another functor
694 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
695 return output
698class RAColumn(CoordColumn):
699 """Right Ascension, in degrees
700 """
701 name = 'RA'
702 _defaultNoDup = True
704 def __init__(self, **kwargs):
705 super().__init__('coord_ra', **kwargs)
707 def __call__(self, catalog, **kwargs):
708 return super().__call__(catalog, **kwargs)
711class DecColumn(CoordColumn):
712 """Declination, in degrees
713 """
714 name = 'Dec'
715 _defaultNoDup = True
717 def __init__(self, **kwargs):
718 super().__init__('coord_dec', **kwargs)
720 def __call__(self, catalog, **kwargs):
721 return super().__call__(catalog, **kwargs)
724class HtmIndex20(Functor):
725 """Compute the level 20 HtmIndex for the catalog.
727 Notes
728 -----
729 This functor was implemented to satisfy requirements of old APDB interface
730 which required ``pixelId`` column in DiaObject with HTM20 index. APDB
731 interface had migrated to not need that information, but we keep this
732 class in case it may be useful for something else.
733 """
734 name = "Htm20"
735 htmLevel = 20
736 _radians = True
738 def __init__(self, ra, decl, **kwargs):
739 self.pixelator = sphgeom.HtmPixelization(self.htmLevel)
740 self.ra = ra
741 self.decl = decl
742 self._columns = [self.ra, self.decl]
743 super().__init__(**kwargs)
745 def _func(self, df):
747 def computePixel(row):
748 if self._radians:
749 sphPoint = geom.SpherePoint(row[self.ra],
750 row[self.decl],
751 geom.radians)
752 else:
753 sphPoint = geom.SpherePoint(row[self.ra],
754 row[self.decl],
755 geom.degrees)
756 return self.pixelator.index(sphPoint.getVector())
758 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
761def fluxName(col):
762 if not col.endswith('_instFlux'):
763 col += '_instFlux'
764 return col
767def fluxErrName(col):
768 if not col.endswith('_instFluxErr'):
769 col += '_instFluxErr'
770 return col
773class Mag(Functor):
774 """Compute calibrated magnitude
776 Takes a `calib` argument, which returns the flux at mag=0
777 as `calib.getFluxMag0()`. If not provided, then the default
778 `fluxMag0` is 63095734448.0194, which is default for HSC.
779 This default should be removed in DM-21955
781 This calculation hides warnings about invalid values and dividing by zero.
783 As for all functors, a `dataset` and `filt` kwarg should be provided upon
784 initialization. Unlike the default `Functor`, however, the default dataset
785 for a `Mag` is `'meas'`, rather than `'ref'`.
787 Parameters
788 ----------
789 col : `str`
790 Name of flux column from which to compute magnitude. Can be parseable
791 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
792 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
793 understand.
794 calib : `lsst.afw.image.calib.Calib` (optional)
795 Object that knows zero point.
796 """
797 _defaultDataset = 'meas'
799 def __init__(self, col, calib=None, **kwargs):
800 self.col = fluxName(col)
801 self.calib = calib
802 if calib is not None:
803 self.fluxMag0 = calib.getFluxMag0()[0]
804 else:
805 # TO DO: DM-21955 Replace hard coded photometic calibration values
806 self.fluxMag0 = 63095734448.0194
808 super().__init__(**kwargs)
810 @property
811 def columns(self):
812 return [self.col]
814 def _func(self, df):
815 with np.warnings.catch_warnings():
816 np.warnings.filterwarnings('ignore', r'invalid value encountered')
817 np.warnings.filterwarnings('ignore', r'divide by zero')
818 return -2.5*np.log10(df[self.col] / self.fluxMag0)
820 @property
821 def name(self):
822 return f'mag_{self.col}'
825class MagErr(Mag):
826 """Compute calibrated magnitude uncertainty
828 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
830 Parameters
831 col : `str`
832 Name of flux column
833 calib : `lsst.afw.image.calib.Calib` (optional)
834 Object that knows zero point.
835 """
837 def __init__(self, *args, **kwargs):
838 super().__init__(*args, **kwargs)
839 if self.calib is not None:
840 self.fluxMag0Err = self.calib.getFluxMag0()[1]
841 else:
842 self.fluxMag0Err = 0.
844 @property
845 def columns(self):
846 return [self.col, self.col + 'Err']
848 def _func(self, df):
849 with np.warnings.catch_warnings():
850 np.warnings.filterwarnings('ignore', r'invalid value encountered')
851 np.warnings.filterwarnings('ignore', r'divide by zero')
852 fluxCol, fluxErrCol = self.columns
853 x = df[fluxErrCol] / df[fluxCol]
854 y = self.fluxMag0Err / self.fluxMag0
855 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
856 return magErr
858 @property
859 def name(self):
860 return super().name + '_err'
863class NanoMaggie(Mag):
864 """
865 """
867 def _func(self, df):
868 return (df[self.col] / self.fluxMag0) * 1e9
871class MagDiff(Functor):
872 _defaultDataset = 'meas'
874 """Functor to calculate magnitude difference"""
876 def __init__(self, col1, col2, **kwargs):
877 self.col1 = fluxName(col1)
878 self.col2 = fluxName(col2)
879 super().__init__(**kwargs)
881 @property
882 def columns(self):
883 return [self.col1, self.col2]
885 def _func(self, df):
886 with np.warnings.catch_warnings():
887 np.warnings.filterwarnings('ignore', r'invalid value encountered')
888 np.warnings.filterwarnings('ignore', r'divide by zero')
889 return -2.5*np.log10(df[self.col1]/df[self.col2])
891 @property
892 def name(self):
893 return f'(mag_{self.col1} - mag_{self.col2})'
895 @property
896 def shortname(self):
897 return f'magDiff_{self.col1}_{self.col2}'
900class Color(Functor):
901 """Compute the color between two filters
903 Computes color by initializing two different `Mag`
904 functors based on the `col` and filters provided, and
905 then returning the difference.
907 This is enabled by the `_func` expecting a dataframe with a
908 multilevel column index, with both `'band'` and `'column'`,
909 instead of just `'column'`, which is the `Functor` default.
910 This is controlled by the `_dfLevels` attribute.
912 Also of note, the default dataset for `Color` is `forced_src'`,
913 whereas for `Mag` it is `'meas'`.
915 Parameters
916 ----------
917 col : str
918 Name of flux column from which to compute; same as would be passed to
919 `lsst.pipe.tasks.functors.Mag`.
921 filt2, filt1 : str
922 Filters from which to compute magnitude difference.
923 Color computed is `Mag(filt2) - Mag(filt1)`.
924 """
925 _defaultDataset = 'forced_src'
926 _dfLevels = ('band', 'column')
927 _defaultNoDup = True
929 def __init__(self, col, filt2, filt1, **kwargs):
930 self.col = fluxName(col)
931 if filt2 == filt1:
932 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
933 self.filt2 = filt2
934 self.filt1 = filt1
936 self.mag2 = Mag(col, filt=filt2, **kwargs)
937 self.mag1 = Mag(col, filt=filt1, **kwargs)
939 super().__init__(**kwargs)
941 @property
942 def filt(self):
943 return None
945 @filt.setter
946 def filt(self, filt):
947 pass
949 def _func(self, df):
950 mag2 = self.mag2._func(df[self.filt2])
951 mag1 = self.mag1._func(df[self.filt1])
952 return mag2 - mag1
954 @property
955 def columns(self):
956 return [self.mag1.col, self.mag2.col]
958 def multilevelColumns(self, parq, **kwargs):
959 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
961 @property
962 def name(self):
963 return f'{self.filt2} - {self.filt1} ({self.col})'
965 @property
966 def shortname(self):
967 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
970class Labeller(Functor):
971 """Main function of this subclass is to override the dropna=True
972 """
973 _null_label = 'null'
974 _allow_difference = False
975 name = 'label'
976 _force_str = False
978 def __call__(self, parq, dropna=False, **kwargs):
979 return super().__call__(parq, dropna=False, **kwargs)
982class StarGalaxyLabeller(Labeller):
983 _columns = ["base_ClassificationExtendedness_value"]
984 _column = "base_ClassificationExtendedness_value"
986 def _func(self, df):
987 x = df[self._columns][self._column]
988 mask = x.isnull()
989 test = (x < 0.5).astype(int)
990 test = test.mask(mask, 2)
992 # TODO: DM-21954 Look into veracity of inline comment below
993 # are these backwards?
994 categories = ['galaxy', 'star', self._null_label]
995 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
996 index=x.index, name='label')
997 if self._force_str:
998 label = label.astype(str)
999 return label
1002class NumStarLabeller(Labeller):
1003 _columns = ['numStarFlags']
1004 labels = {"star": 0, "maybe": 1, "notStar": 2}
1006 def _func(self, df):
1007 x = df[self._columns][self._columns[0]]
1009 # Number of filters
1010 n = len(x.unique()) - 1
1012 labels = ['noStar', 'maybe', 'star']
1013 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
1014 index=x.index, name='label')
1016 if self._force_str:
1017 label = label.astype(str)
1019 return label
1022class DeconvolvedMoments(Functor):
1023 name = 'Deconvolved Moments'
1024 shortname = 'deconvolvedMoments'
1025 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1026 "ext_shapeHSM_HsmSourceMoments_yy",
1027 "base_SdssShape_xx", "base_SdssShape_yy",
1028 "ext_shapeHSM_HsmPsfMoments_xx",
1029 "ext_shapeHSM_HsmPsfMoments_yy")
1031 def _func(self, df):
1032 """Calculate deconvolved moments"""
1033 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1034 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1035 else:
1036 hsm = np.ones(len(df))*np.nan
1037 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1038 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1039 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1040 else:
1041 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
1042 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
1043 # raise TaskError("No psf shape parameter found in catalog")
1044 raise RuntimeError('No psf shape parameter found in catalog')
1046 return hsm.where(np.isfinite(hsm), sdss) - psf
1049class SdssTraceSize(Functor):
1050 """Functor to calculate SDSS trace radius size for sources"""
1051 name = "SDSS Trace Size"
1052 shortname = 'sdssTrace'
1053 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1055 def _func(self, df):
1056 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1057 return srcSize
1060class PsfSdssTraceSizeDiff(Functor):
1061 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1062 name = "PSF - SDSS Trace Size"
1063 shortname = 'psf_sdssTrace'
1064 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1065 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1067 def _func(self, df):
1068 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1069 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1070 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1071 return sizeDiff
1074class HsmTraceSize(Functor):
1075 """Functor to calculate HSM trace radius size for sources"""
1076 name = 'HSM Trace Size'
1077 shortname = 'hsmTrace'
1078 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1079 "ext_shapeHSM_HsmSourceMoments_yy")
1081 def _func(self, df):
1082 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1083 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1084 return srcSize
1087class PsfHsmTraceSizeDiff(Functor):
1088 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1089 name = 'PSF - HSM Trace Size'
1090 shortname = 'psf_HsmTrace'
1091 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1092 "ext_shapeHSM_HsmSourceMoments_yy",
1093 "ext_shapeHSM_HsmPsfMoments_xx",
1094 "ext_shapeHSM_HsmPsfMoments_yy")
1096 def _func(self, df):
1097 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1098 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1099 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1100 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1101 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1102 return sizeDiff
1105class HsmFwhm(Functor):
1106 name = 'HSM Psf FWHM'
1107 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1108 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1109 pixelScale = 0.168
1110 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1112 def _func(self, df):
1113 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1114 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1117class E1(Functor):
1118 name = "Distortion Ellipticity (e1)"
1119 shortname = "Distortion"
1121 def __init__(self, colXX, colXY, colYY, **kwargs):
1122 self.colXX = colXX
1123 self.colXY = colXY
1124 self.colYY = colYY
1125 self._columns = [self.colXX, self.colXY, self.colYY]
1126 super().__init__(**kwargs)
1128 @property
1129 def columns(self):
1130 return [self.colXX, self.colXY, self.colYY]
1132 def _func(self, df):
1133 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1136class E2(Functor):
1137 name = "Ellipticity e2"
1139 def __init__(self, colXX, colXY, colYY, **kwargs):
1140 self.colXX = colXX
1141 self.colXY = colXY
1142 self.colYY = colYY
1143 super().__init__(**kwargs)
1145 @property
1146 def columns(self):
1147 return [self.colXX, self.colXY, self.colYY]
1149 def _func(self, df):
1150 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1153class RadiusFromQuadrupole(Functor):
1155 def __init__(self, colXX, colXY, colYY, **kwargs):
1156 self.colXX = colXX
1157 self.colXY = colXY
1158 self.colYY = colYY
1159 super().__init__(**kwargs)
1161 @property
1162 def columns(self):
1163 return [self.colXX, self.colXY, self.colYY]
1165 def _func(self, df):
1166 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1169class LocalWcs(Functor):
1170 """Computations using the stored localWcs.
1171 """
1172 name = "LocalWcsOperations"
1174 def __init__(self,
1175 colCD_1_1,
1176 colCD_1_2,
1177 colCD_2_1,
1178 colCD_2_2,
1179 **kwargs):
1180 self.colCD_1_1 = colCD_1_1
1181 self.colCD_1_2 = colCD_1_2
1182 self.colCD_2_1 = colCD_2_1
1183 self.colCD_2_2 = colCD_2_2
1184 super().__init__(**kwargs)
1186 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1187 """Compute the distance on the sphere from x2, y1 to x1, y1.
1189 Parameters
1190 ----------
1191 x : `pandas.Series`
1192 X pixel coordinate.
1193 y : `pandas.Series`
1194 Y pixel coordinate.
1195 cd11 : `pandas.Series`
1196 [1, 1] element of the local Wcs affine transform.
1197 cd11 : `pandas.Series`
1198 [1, 1] element of the local Wcs affine transform.
1199 cd12 : `pandas.Series`
1200 [1, 2] element of the local Wcs affine transform.
1201 cd21 : `pandas.Series`
1202 [2, 1] element of the local Wcs affine transform.
1203 cd22 : `pandas.Series`
1204 [2, 2] element of the local Wcs affine transform.
1206 Returns
1207 -------
1208 raDecTuple : tuple
1209 RA and dec conversion of x and y given the local Wcs. Returned
1210 units are in radians.
1212 """
1213 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1215 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1216 """Compute the local pixel scale conversion.
1218 Parameters
1219 ----------
1220 ra1 : `pandas.Series`
1221 Ra of the first coordinate in radians.
1222 dec1 : `pandas.Series`
1223 Dec of the first coordinate in radians.
1224 ra2 : `pandas.Series`
1225 Ra of the second coordinate in radians.
1226 dec2 : `pandas.Series`
1227 Dec of the second coordinate in radians.
1229 Returns
1230 -------
1231 dist : `pandas.Series`
1232 Distance on the sphere in radians.
1233 """
1234 deltaDec = dec2 - dec1
1235 deltaRa = ra2 - ra1
1236 return 2 * np.arcsin(
1237 np.sqrt(
1238 np.sin(deltaDec / 2) ** 2
1239 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1241 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1242 """Compute the distance on the sphere from x2, y1 to x1, y1.
1244 Parameters
1245 ----------
1246 x1 : `pandas.Series`
1247 X pixel coordinate.
1248 y1 : `pandas.Series`
1249 Y pixel coordinate.
1250 x2 : `pandas.Series`
1251 X pixel coordinate.
1252 y2 : `pandas.Series`
1253 Y pixel coordinate.
1254 cd11 : `pandas.Series`
1255 [1, 1] element of the local Wcs affine transform.
1256 cd11 : `pandas.Series`
1257 [1, 1] element of the local Wcs affine transform.
1258 cd12 : `pandas.Series`
1259 [1, 2] element of the local Wcs affine transform.
1260 cd21 : `pandas.Series`
1261 [2, 1] element of the local Wcs affine transform.
1262 cd22 : `pandas.Series`
1263 [2, 2] element of the local Wcs affine transform.
1265 Returns
1266 -------
1267 Distance : `pandas.Series`
1268 Arcseconds per pixel at the location of the local WC
1269 """
1270 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1271 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1272 # Great circle distance for small separations.
1273 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1276class ComputePixelScale(LocalWcs):
1277 """Compute the local pixel scale from the stored CDMatrix.
1278 """
1279 name = "PixelScale"
1281 @property
1282 def columns(self):
1283 return [self.colCD_1_1,
1284 self.colCD_1_2,
1285 self.colCD_2_1,
1286 self.colCD_2_2]
1288 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1289 """Compute the local pixel to scale conversion in arcseconds.
1291 Parameters
1292 ----------
1293 cd11 : `pandas.Series`
1294 [1, 1] element of the local Wcs affine transform in radians.
1295 cd11 : `pandas.Series`
1296 [1, 1] element of the local Wcs affine transform in radians.
1297 cd12 : `pandas.Series`
1298 [1, 2] element of the local Wcs affine transform in radians.
1299 cd21 : `pandas.Series`
1300 [2, 1] element of the local Wcs affine transform in radians.
1301 cd22 : `pandas.Series`
1302 [2, 2] element of the local Wcs affine transform in radians.
1304 Returns
1305 -------
1306 pixScale : `pandas.Series`
1307 Arcseconds per pixel at the location of the local WC
1308 """
1309 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1311 def _func(self, df):
1312 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1313 df[self.colCD_1_2],
1314 df[self.colCD_2_1],
1315 df[self.colCD_2_2])
1318class ConvertPixelToArcseconds(ComputePixelScale):
1319 """Convert a value in units pixels squared to units arcseconds squared.
1320 """
1322 def __init__(self,
1323 col,
1324 colCD_1_1,
1325 colCD_1_2,
1326 colCD_2_1,
1327 colCD_2_2,
1328 **kwargs):
1329 self.col = col
1330 super().__init__(colCD_1_1,
1331 colCD_1_2,
1332 colCD_2_1,
1333 colCD_2_2,
1334 **kwargs)
1336 @property
1337 def name(self):
1338 return f"{self.col}_asArcseconds"
1340 @property
1341 def columns(self):
1342 return [self.col,
1343 self.colCD_1_1,
1344 self.colCD_1_2,
1345 self.colCD_2_1,
1346 self.colCD_2_2]
1348 def _func(self, df):
1349 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1350 df[self.colCD_1_2],
1351 df[self.colCD_2_1],
1352 df[self.colCD_2_2])
1355class ConvertPixelSqToArcsecondsSq(ComputePixelScale):
1356 """Convert a value in units pixels to units arcseconds.
1357 """
1359 def __init__(self,
1360 col,
1361 colCD_1_1,
1362 colCD_1_2,
1363 colCD_2_1,
1364 colCD_2_2,
1365 **kwargs):
1366 self.col = col
1367 super().__init__(colCD_1_1,
1368 colCD_1_2,
1369 colCD_2_1,
1370 colCD_2_2,
1371 **kwargs)
1373 @property
1374 def name(self):
1375 return f"{self.col}_asArcsecondsSq"
1377 @property
1378 def columns(self):
1379 return [self.col,
1380 self.colCD_1_1,
1381 self.colCD_1_2,
1382 self.colCD_2_1,
1383 self.colCD_2_2]
1385 def _func(self, df):
1386 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1387 df[self.colCD_1_2],
1388 df[self.colCD_2_1],
1389 df[self.colCD_2_2])
1390 return df[self.col] * pixScale * pixScale
1393class ReferenceBand(Functor):
1394 name = 'Reference Band'
1395 shortname = 'refBand'
1397 @property
1398 def columns(self):
1399 return ["merge_measurement_i",
1400 "merge_measurement_r",
1401 "merge_measurement_z",
1402 "merge_measurement_y",
1403 "merge_measurement_g",
1404 "merge_measurement_u"]
1406 def _func(self, df: pd.DataFrame) -> pd.Series:
1407 def getFilterAliasName(row):
1408 # get column name with the max value (True > False)
1409 colName = row.idxmax()
1410 return colName.replace('merge_measurement_', '')
1412 # Skip columns that are unavailable, because this functor requests the
1413 # superset of bands that could be included in the object table
1414 columns = [col for col in self.columns if col in df.columns]
1415 # Makes a Series of dtype object if df is empty
1416 return df[columns].apply(getFilterAliasName, axis=1,
1417 result_type='reduce').astype('object')
1420class Photometry(Functor):
1421 # AB to NanoJansky (3631 Jansky)
1422 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1423 LOG_AB_FLUX_SCALE = 12.56
1424 FIVE_OVER_2LOG10 = 1.085736204758129569
1425 # TO DO: DM-21955 Replace hard coded photometic calibration values
1426 COADD_ZP = 27
1428 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1429 self.vhypot = np.vectorize(self.hypot)
1430 self.col = colFlux
1431 self.colFluxErr = colFluxErr
1433 self.calib = calib
1434 if calib is not None:
1435 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1436 else:
1437 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1438 self.fluxMag0Err = 0.
1440 super().__init__(**kwargs)
1442 @property
1443 def columns(self):
1444 return [self.col]
1446 @property
1447 def name(self):
1448 return f'mag_{self.col}'
1450 @classmethod
1451 def hypot(cls, a, b):
1452 if np.abs(a) < np.abs(b):
1453 a, b = b, a
1454 if a == 0.:
1455 return 0.
1456 q = b/a
1457 return np.abs(a) * np.sqrt(1. + q*q)
1459 def dn2flux(self, dn, fluxMag0):
1460 return self.AB_FLUX_SCALE * dn / fluxMag0
1462 def dn2mag(self, dn, fluxMag0):
1463 with np.warnings.catch_warnings():
1464 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1465 np.warnings.filterwarnings('ignore', r'divide by zero')
1466 return -2.5 * np.log10(dn/fluxMag0)
1468 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1469 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1470 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1471 return retVal
1473 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1474 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1475 return self.FIVE_OVER_2LOG10 * retVal
1478class NanoJansky(Photometry):
1479 def _func(self, df):
1480 return self.dn2flux(df[self.col], self.fluxMag0)
1483class NanoJanskyErr(Photometry):
1484 @property
1485 def columns(self):
1486 return [self.col, self.colFluxErr]
1488 def _func(self, df):
1489 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1490 return pd.Series(retArr, index=df.index)
1493class Magnitude(Photometry):
1494 def _func(self, df):
1495 return self.dn2mag(df[self.col], self.fluxMag0)
1498class MagnitudeErr(Photometry):
1499 @property
1500 def columns(self):
1501 return [self.col, self.colFluxErr]
1503 def _func(self, df):
1504 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1505 return pd.Series(retArr, index=df.index)
1508class LocalPhotometry(Functor):
1509 """Base class for calibrating the specified instrument flux column using
1510 the local photometric calibration.
1512 Parameters
1513 ----------
1514 instFluxCol : `str`
1515 Name of the instrument flux column.
1516 instFluxErrCol : `str`
1517 Name of the assocated error columns for ``instFluxCol``.
1518 photoCalibCol : `str`
1519 Name of local calibration column.
1520 photoCalibErrCol : `str`
1521 Error associated with ``photoCalibCol``
1523 See also
1524 --------
1525 LocalPhotometry
1526 LocalNanojansky
1527 LocalNanojanskyErr
1528 LocalMagnitude
1529 LocalMagnitudeErr
1530 """
1531 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1533 def __init__(self,
1534 instFluxCol,
1535 instFluxErrCol,
1536 photoCalibCol,
1537 photoCalibErrCol,
1538 **kwargs):
1539 self.instFluxCol = instFluxCol
1540 self.instFluxErrCol = instFluxErrCol
1541 self.photoCalibCol = photoCalibCol
1542 self.photoCalibErrCol = photoCalibErrCol
1543 super().__init__(**kwargs)
1545 def instFluxToNanojansky(self, instFlux, localCalib):
1546 """Convert instrument flux to nanojanskys.
1548 Parameters
1549 ----------
1550 instFlux : `numpy.ndarray` or `pandas.Series`
1551 Array of instrument flux measurements
1552 localCalib : `numpy.ndarray` or `pandas.Series`
1553 Array of local photometric calibration estimates.
1555 Returns
1556 -------
1557 calibFlux : `numpy.ndarray` or `pandas.Series`
1558 Array of calibrated flux measurements.
1559 """
1560 return instFlux * localCalib
1562 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1563 """Convert instrument flux to nanojanskys.
1565 Parameters
1566 ----------
1567 instFlux : `numpy.ndarray` or `pandas.Series`
1568 Array of instrument flux measurements
1569 instFluxErr : `numpy.ndarray` or `pandas.Series`
1570 Errors on associated ``instFlux`` values
1571 localCalib : `numpy.ndarray` or `pandas.Series`
1572 Array of local photometric calibration estimates.
1573 localCalibErr : `numpy.ndarray` or `pandas.Series`
1574 Errors on associated ``localCalib`` values
1576 Returns
1577 -------
1578 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1579 Errors on calibrated flux measurements.
1580 """
1581 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1583 def instFluxToMagnitude(self, instFlux, localCalib):
1584 """Convert instrument flux to nanojanskys.
1586 Parameters
1587 ----------
1588 instFlux : `numpy.ndarray` or `pandas.Series`
1589 Array of instrument flux measurements
1590 localCalib : `numpy.ndarray` or `pandas.Series`
1591 Array of local photometric calibration estimates.
1593 Returns
1594 -------
1595 calibMag : `numpy.ndarray` or `pandas.Series`
1596 Array of calibrated AB magnitudes.
1597 """
1598 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1600 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1601 """Convert instrument flux err to nanojanskys.
1603 Parameters
1604 ----------
1605 instFlux : `numpy.ndarray` or `pandas.Series`
1606 Array of instrument flux measurements
1607 instFluxErr : `numpy.ndarray` or `pandas.Series`
1608 Errors on associated ``instFlux`` values
1609 localCalib : `numpy.ndarray` or `pandas.Series`
1610 Array of local photometric calibration estimates.
1611 localCalibErr : `numpy.ndarray` or `pandas.Series`
1612 Errors on associated ``localCalib`` values
1614 Returns
1615 -------
1616 calibMagErr: `numpy.ndarray` or `pandas.Series`
1617 Error on calibrated AB magnitudes.
1618 """
1619 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1620 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1623class LocalNanojansky(LocalPhotometry):
1624 """Compute calibrated fluxes using the local calibration value.
1626 See also
1627 --------
1628 LocalNanojansky
1629 LocalNanojanskyErr
1630 LocalMagnitude
1631 LocalMagnitudeErr
1632 """
1634 @property
1635 def columns(self):
1636 return [self.instFluxCol, self.photoCalibCol]
1638 @property
1639 def name(self):
1640 return f'flux_{self.instFluxCol}'
1642 def _func(self, df):
1643 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1646class LocalNanojanskyErr(LocalPhotometry):
1647 """Compute calibrated flux errors using the local calibration value.
1649 See also
1650 --------
1651 LocalNanojansky
1652 LocalNanojanskyErr
1653 LocalMagnitude
1654 LocalMagnitudeErr
1655 """
1657 @property
1658 def columns(self):
1659 return [self.instFluxCol, self.instFluxErrCol,
1660 self.photoCalibCol, self.photoCalibErrCol]
1662 @property
1663 def name(self):
1664 return f'fluxErr_{self.instFluxCol}'
1666 def _func(self, df):
1667 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1668 df[self.photoCalibCol], df[self.photoCalibErrCol])
1671class LocalMagnitude(LocalPhotometry):
1672 """Compute calibrated AB magnitudes using the local calibration value.
1674 See also
1675 --------
1676 LocalNanojansky
1677 LocalNanojanskyErr
1678 LocalMagnitude
1679 LocalMagnitudeErr
1680 """
1682 @property
1683 def columns(self):
1684 return [self.instFluxCol, self.photoCalibCol]
1686 @property
1687 def name(self):
1688 return f'mag_{self.instFluxCol}'
1690 def _func(self, df):
1691 return self.instFluxToMagnitude(df[self.instFluxCol],
1692 df[self.photoCalibCol])
1695class LocalMagnitudeErr(LocalPhotometry):
1696 """Compute calibrated AB magnitude errors using the local calibration value.
1698 See also
1699 --------
1700 LocalNanojansky
1701 LocalNanojanskyErr
1702 LocalMagnitude
1703 LocalMagnitudeErr
1704 """
1706 @property
1707 def columns(self):
1708 return [self.instFluxCol, self.instFluxErrCol,
1709 self.photoCalibCol, self.photoCalibErrCol]
1711 @property
1712 def name(self):
1713 return f'magErr_{self.instFluxCol}'
1715 def _func(self, df):
1716 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1717 df[self.instFluxErrCol],
1718 df[self.photoCalibCol],
1719 df[self.photoCalibErrCol])
1722class LocalDipoleMeanFlux(LocalPhotometry):
1723 """Compute absolute mean of dipole fluxes.
1725 See also
1726 --------
1727 LocalNanojansky
1728 LocalNanojanskyErr
1729 LocalMagnitude
1730 LocalMagnitudeErr
1731 LocalDipoleMeanFlux
1732 LocalDipoleMeanFluxErr
1733 LocalDipoleDiffFlux
1734 LocalDipoleDiffFluxErr
1735 """
1736 def __init__(self,
1737 instFluxPosCol,
1738 instFluxNegCol,
1739 instFluxPosErrCol,
1740 instFluxNegErrCol,
1741 photoCalibCol,
1742 photoCalibErrCol,
1743 **kwargs):
1744 self.instFluxNegCol = instFluxNegCol
1745 self.instFluxPosCol = instFluxPosCol
1746 self.instFluxNegErrCol = instFluxNegErrCol
1747 self.instFluxPosErrCol = instFluxPosErrCol
1748 self.photoCalibCol = photoCalibCol
1749 self.photoCalibErrCol = photoCalibErrCol
1750 super().__init__(instFluxNegCol,
1751 instFluxNegErrCol,
1752 photoCalibCol,
1753 photoCalibErrCol,
1754 **kwargs)
1756 @property
1757 def columns(self):
1758 return [self.instFluxPosCol,
1759 self.instFluxNegCol,
1760 self.photoCalibCol]
1762 @property
1763 def name(self):
1764 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1766 def _func(self, df):
1767 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))
1768 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])))
1771class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux):
1772 """Compute the error on the absolute mean of dipole fluxes.
1774 See also
1775 --------
1776 LocalNanojansky
1777 LocalNanojanskyErr
1778 LocalMagnitude
1779 LocalMagnitudeErr
1780 LocalDipoleMeanFlux
1781 LocalDipoleMeanFluxErr
1782 LocalDipoleDiffFlux
1783 LocalDipoleDiffFluxErr
1784 """
1786 @property
1787 def columns(self):
1788 return [self.instFluxPosCol,
1789 self.instFluxNegCol,
1790 self.instFluxPosErrCol,
1791 self.instFluxNegErrCol,
1792 self.photoCalibCol,
1793 self.photoCalibErrCol]
1795 @property
1796 def name(self):
1797 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1799 def _func(self, df):
1800 return 0.5*np.sqrt(
1801 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1802 * df[self.photoCalibErrCol])**2
1803 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1804 * df[self.photoCalibCol]**2)
1807class LocalDipoleDiffFlux(LocalDipoleMeanFlux):
1808 """Compute the absolute difference of dipole fluxes.
1810 Value is (abs(pos) - abs(neg))
1812 See also
1813 --------
1814 LocalNanojansky
1815 LocalNanojanskyErr
1816 LocalMagnitude
1817 LocalMagnitudeErr
1818 LocalDipoleMeanFlux
1819 LocalDipoleMeanFluxErr
1820 LocalDipoleDiffFlux
1821 LocalDipoleDiffFluxErr
1822 """
1824 @property
1825 def columns(self):
1826 return [self.instFluxPosCol,
1827 self.instFluxNegCol,
1828 self.photoCalibCol]
1830 @property
1831 def name(self):
1832 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1834 def _func(self, df):
1835 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))
1836 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])))
1839class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux):
1840 """Compute the error on the absolute difference of dipole fluxes.
1842 See also
1843 --------
1844 LocalNanojansky
1845 LocalNanojanskyErr
1846 LocalMagnitude
1847 LocalMagnitudeErr
1848 LocalDipoleMeanFlux
1849 LocalDipoleMeanFluxErr
1850 LocalDipoleDiffFlux
1851 LocalDipoleDiffFluxErr
1852 """
1854 @property
1855 def columns(self):
1856 return [self.instFluxPosCol,
1857 self.instFluxNegCol,
1858 self.instFluxPosErrCol,
1859 self.instFluxNegErrCol,
1860 self.photoCalibCol,
1861 self.photoCalibErrCol]
1863 @property
1864 def name(self):
1865 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1867 def _func(self, df):
1868 return np.sqrt(
1869 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1870 * df[self.photoCalibErrCol])**2
1871 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1872 * df[self.photoCalibCol]**2)
1875class Ratio(Functor):
1876 """Base class for returning the ratio of 2 columns.
1878 Can be used to compute a Signal to Noise ratio for any input flux.
1880 Parameters
1881 ----------
1882 numerator : `str`
1883 Name of the column to use at the numerator in the ratio
1884 denominator : `str`
1885 Name of the column to use as the denominator in the ratio.
1886 """
1887 def __init__(self,
1888 numerator,
1889 denominator,
1890 **kwargs):
1891 self.numerator = numerator
1892 self.denominator = denominator
1893 super().__init__(**kwargs)
1895 @property
1896 def columns(self):
1897 return [self.numerator, self.denominator]
1899 @property
1900 def name(self):
1901 return f'ratio_{self.numerator}_{self.denominator}'
1903 def _func(self, df):
1904 with np.warnings.catch_warnings():
1905 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1906 np.warnings.filterwarnings('ignore', r'divide by zero')
1907 return df[self.numerator] / df[self.denominator]
1910class Ebv(Functor):
1911 """Compute E(B-V) from dustmaps.sfd
1912 """
1913 _defaultDataset = 'ref'
1914 name = "E(B-V)"
1915 shortname = "ebv"
1917 def __init__(self, **kwargs):
1918 self._columns = ['coord_ra', 'coord_dec']
1919 self.sfd = SFDQuery()
1920 super().__init__(**kwargs)
1922 def _func(self, df):
1923 coords = SkyCoord(df['coord_ra']*u.rad, df['coord_dec']*u.rad)
1924 ebv = self.sfd(coords)
1925 # Double precision unnecessary scientifically
1926 # but currently needed for ingest to qserv
1927 return pd.Series(ebv, index=df.index).astype('float64')