22__all__ = [
"Functor",
"CompositeFunctor",
"CustomFunctor",
"Column",
"Index",
23 "IDColumn",
"FootprintNPix",
"CoordColumn",
"RAColumn",
"DecColumn",
24 "HtmIndex20",
"Mag",
"MagErr",
"NanoMaggie",
"MagDiff",
"Color",
25 "Labeller",
"StarGalaxyLabeller",
"NumStarLabeller",
"DeconvolvedMoments",
26 "SdssTraceSize",
"PsfSdssTraceSizeDiff",
"HsmTraceSize",
"PsfHsmTraceSizeDiff",
27 "HsmFwhm",
"E1",
"E2",
"RadiusFromQuadrupole",
"LocalWcs",
"ComputePixelScale",
28 "ConvertPixelToArcseconds",
"ConvertPixelSqToArcsecondsSq",
"ReferenceBand",
29 "Photometry",
"NanoJansky",
"NanoJanskyErr",
"Magnitude",
"MagnitudeErr",
30 "LocalPhotometry",
"LocalNanojansky",
"LocalNanojanskyErr",
31 "LocalMagnitude",
"LocalMagnitudeErr",
"LocalDipoleMeanFlux",
32 "LocalDipoleMeanFluxErr",
"LocalDipoleDiffFlux",
"LocalDipoleDiffFluxErr",
37from itertools
import product
43import astropy.units
as u
44from astropy.coordinates
import SkyCoord
46from lsst.utils
import doImport
47from lsst.utils.introspection
import get_full_type_name
48from lsst.daf.butler
import DeferredDatasetHandle
53from .parquetTable
import ParquetTable, MultilevelParquetTable
57 typeKey='functor', name=None):
58 """Initialize an object defined in a dictionary
60 The object needs to be importable as
61 f
'{basePath}.{initDict[typeKey]}'
62 The positional
and keyword arguments (
if any) are contained
in
63 "args" and "kwargs" entries
in the dictionary, respectively.
64 This
is used
in `functors.CompositeFunctor.from_yaml` to initialize
65 a composite functor
from a specification
in a YAML file.
70 Dictionary describing object
's initialization. Must contain
71 an entry keyed by ``typeKey`` that is the name of the object,
72 relative to ``basePath``.
74 Path relative to module
in which ``initDict[typeKey]``
is defined.
76 Key of ``initDict`` that
is the name of the object
77 (relative to `basePath`).
79 initDict = initDict.copy()
81 pythonType = doImport(f
'{basePath}.{initDict.pop(typeKey)}')
83 if 'args' in initDict:
84 args = initDict.pop(
'args')
85 if isinstance(args, str):
88 element = pythonType(*args, **initDict)
89 except Exception
as e:
90 message = f
'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
91 raise type(e)(message, e.args)
96 """Define and execute a calculation on a ParquetTable
98 The `__call__` method accepts either a `ParquetTable` object or a
99 `DeferredDatasetHandle`
or `InMemoryDatasetHandle`,
and returns the
100 result of the calculation
as a single column. Each functor defines what
101 columns are needed
for the calculation,
and only these columns are read
102 from the `ParquetTable`.
104 The action of `__call__` consists of two steps: first, loading the
105 necessary columns
from disk into memory
as a `pandas.DataFrame` object;
106 and second, performing the computation on this dataframe
and returning the
110 To define a new `Functor`, a subclass must define a `_func` method,
111 that takes a `pandas.DataFrame`
and returns result
in a `pandas.Series`.
112 In addition, it must define the following attributes
114 * `_columns`: The columns necessary to perform the calculation
115 * `name`: A name appropriate
for a figure axis label
116 * `shortname`: A name appropriate
for use
as a dictionary key
118 On initialization, a `Functor` should declare what band (`filt` kwarg)
119 and dataset (e.g. `
'ref'`, `
'meas'`, `
'forced_src'`) it
is intended to be
120 applied to. This enables the `_get_data` method to extract the proper
121 columns
from the parquet file. If
not specified, the dataset will fall back
122 on the `_defaultDataset`attribute. If band
is not specified
and `dataset`
123 is anything other than `
'ref'`, then an error will be raised when trying to
124 perform the calculation.
126 Originally, `Functor` was set up to expect
127 datasets formatted like the `deepCoadd_obj` dataset; that
is, a
128 dataframe
with a multi-level column index,
with the levels of the
129 column index being `band`, `dataset`,
and `column`.
130 It has since been generalized to apply to dataframes without mutli-level
131 indices
and multi-level indices
with just `dataset`
and `column` levels.
132 In addition, the `_get_data` method that reads
133 the dataframe
from the `ParquetTable` will
return a dataframe
with column
134 index levels defined by the `_dfLevels` attribute; by default, this
is
137 The `_dfLevels` attributes should generally
not need to
138 be changed, unless `_func` needs columns
from multiple filters
or datasets
139 to do the calculation.
141 which `_dfLevels = (
'band',
'column')`,
and `_func` expects the dataframe
142 it gets to have those levels
in the column index.
147 Filter upon which to do the calculation
150 Dataset upon which to do the calculation
151 (e.g.,
'ref',
'meas',
'forced_src').
155 _defaultDataset = 'ref'
156 _dfLevels = (
'column',)
157 _defaultNoDup =
False
159 def __init__(self, filt=None, dataset=None, noDup=None):
163 self.
log = logging.getLogger(type(self).__name__)
167 if self.
_noDup is not None:
174 """Columns required to perform calculation
176 if not hasattr(self,
'_columns'):
177 raise NotImplementedError(
'Must define columns property or _columns attribute')
180 def _get_data_columnLevels(self, data, columnIndex=None):
181 """Gets the names of the column index levels
183 This should only be called in the context of a multilevel table.
184 The logic here
is to enable this to work both
with the gen2 `MultilevelParquetTable`
185 and with the gen3 `DeferredDatasetHandle`.
190 The data to be read, can be a `MultilevelParquetTable`,
191 `DeferredDatasetHandle`,
or `InMemoryDatasetHandle`.
192 columnnIndex (optional): pandas `Index` object
193 If
not passed, then it
is read
from the `DeferredDatasetHandle`
194 for `InMemoryDatasetHandle`.
196 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
197 if columnIndex
is None:
198 columnIndex = data.get(component=
"columns")
199 if columnIndex
is not None:
200 return columnIndex.names
201 if isinstance(data, MultilevelParquetTable):
202 return data.columnLevels
204 raise TypeError(f
"Unknown type for data: {type(data)}!")
206 def _get_data_columnLevelNames(self, data, columnIndex=None):
207 """Gets the content of each of the column levels for a multilevel table
209 Similar to `_get_data_columnLevels`, this enables backward
210 compatibility with gen2.
212 Mirrors original gen2 implementation within
215 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
216 if columnIndex
is None:
217 columnIndex = data.get(component=
"columns")
218 if columnIndex
is not None:
219 columnLevels = columnIndex.names
221 level: list(np.unique(np.array([c
for c
in columnIndex])[:, i]))
222 for i, level
in enumerate(columnLevels)
224 return columnLevelNames
225 if isinstance(data, MultilevelParquetTable):
226 return data.columnLevelNames
228 raise TypeError(f
"Unknown type for data: {type(data)}!")
230 def _colsFromDict(self, colDict, columnIndex=None):
231 """Converts dictionary column specficiation to a list of columns
238 for i, lev
in enumerate(columnLevels):
240 if isinstance(colDict[lev], str):
241 new_colDict[lev] = [colDict[lev]]
243 new_colDict[lev] = colDict[lev]
245 new_colDict[lev] = columnIndex.levels[i]
247 levelCols = [new_colDict[lev]
for lev
in columnLevels]
248 cols = list(product(*levelCols))
249 colsAvailable = [col
for col
in cols
if col
in columnIndex]
253 """Returns columns needed by functor from multilevel dataset
255 To access tables with multilevel column structure, the `MultilevelParquetTable`
256 or `DeferredDatasetHandle` need to be passed either a list of tuples
or a
262 The data
as either `MultilevelParquetTable`,
263 `DeferredDatasetHandle`,
or `InMemoryDatasetHandle`.
264 columnIndex (optional): pandas `Index` object
265 either passed
or read
in from `DeferredDatasetHandle`.
266 `returnTuple` : `bool`
267 If true, then
return a list of tuples rather than the column dictionary
268 specification. This
is set to `
True` by `CompositeFunctor`
in order to be able to
269 combine columns
from the various component functors.
272 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle))
and columnIndex
is None:
273 columnIndex = data.get(component=
"columns")
278 columnDict = {
'column': self.
columns,
280 if self.
filt is None:
282 if "band" in columnLevels:
284 columnDict[
"band"] = columnLevelNames[
"band"][0]
286 raise ValueError(f
"'filt' not set for functor {self.name}"
287 f
"(dataset {self.dataset}) "
289 "contains multiple filters in column index. "
290 "Set 'filt' or set 'dataset' to 'ref'.")
292 columnDict[
'band'] = self.
filt
294 if isinstance(data, MultilevelParquetTable):
295 return data._colsFromDict(columnDict)
296 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
298 return self.
_colsFromDict(columnDict, columnIndex=columnIndex)
301 raise RuntimeError(f
"Unexpected data type. Got {get_full_type_name}.")
303 def _func(self, df, dropna=True):
304 raise NotImplementedError(
'Must define calculation on dataframe')
306 def _get_columnIndex(self, data):
307 """Return columnIndex
310 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
311 return data.get(component=
"columns")
315 def _get_data(self, data):
316 """Retrieve dataframe necessary for calculation.
318 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
320 Returns dataframe upon which `self.
_func` can act.
322 N.B.
while passing a raw pandas `DataFrame` *should* work here, it has
not been tested.
324 if isinstance(data, pd.DataFrame):
329 is_multiLevel = isinstance(data, MultilevelParquetTable)
or isinstance(columnIndex, pd.MultiIndex)
332 if isinstance(data, ParquetTable)
and not is_multiLevel:
334 df = data.toDataFrame(columns=columns)
343 if isinstance(data, MultilevelParquetTable):
345 df = data.toDataFrame(columns=columns, droplevels=
False)
346 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
348 df = data.get(parameters={
"columns": columns})
350 raise RuntimeError(f
"Unexpected type provided for data. Got {get_full_type_name(data)}.")
358 def _setLevels(self, df):
359 levelsToDrop = [n
for n
in df.columns.names
if n
not in self.
_dfLevels]
360 df.columns = df.columns.droplevel(levelsToDrop)
363 def _dropna(self, vals):
369 vals = self.
_func(df)
370 except Exception
as e:
371 self.
log.error(
"Exception in %s call: %s: %s", self.
name, type(e).__name__, e)
379 """Computes difference between functor called on two different ParquetTable objects
381 return self(data1, **kwargs) - self(data2, **kwargs)
384 return pd.Series(np.full(len(df), np.nan), index=df.index)
388 """Full name of functor (suitable for figure labels)
390 return NotImplementedError
394 """Short name of functor (suitable for column name/dict key)
400 """Perform multiple calculations at once on a catalog
402 The role of a `CompositeFunctor` is to group together computations
from
403 multiple functors. Instead of returning `pandas.Series` a
404 `CompositeFunctor` returns a `pandas.Dataframe`,
with the column names
405 being the keys of `funcDict`.
407 The `columns` attribute of a `CompositeFunctor`
is the union of all columns
408 in all the component functors.
410 A `CompositeFunctor` does
not use a `_func` method itself; rather,
411 when a `CompositeFunctor`
is called, all its columns are loaded
412 at once,
and the resulting dataframe
is passed to the `_func` method of each component
413 functor. This has the advantage of only doing I/O (reading
from parquet file) once,
414 and works because each individual `_func` method of each component functor does
not
415 care
if there are *extra* columns
in the dataframe being passed; only that it must contain
416 *at least* the `columns` it expects.
418 An important
and useful
class method is `from_yaml`, which takes
as argument the path to a YAML
419 file specifying a collection of functors.
423 funcs : `dict`
or `list`
424 Dictionary
or list of functors. If a list, then it will be converted
425 into a dictonary according to the `.shortname` attribute of each functor.
432 if type(funcs) == dict:
435 self.
funcDict = {f.shortname: f
for f
in funcs}
453 if isinstance(new, dict):
455 elif isinstance(new, CompositeFunctor):
458 raise TypeError(
'Can only update with dictionary or CompositeFunctor.')
466 return list(set([x
for y
in [f.columns
for f
in self.
funcDict.values()]
for x
in y]))
475 f.multilevelColumns(data, returnTuple=
True, **kwargs)
for f
in self.
funcDict.values()
483 """Apply the functor to the data table
488 The data represented as `lsst.daf.butler.DeferredDatasetHandle`,
491 `lsst.pipe.base.InMemoryDatasetHandle`,
492 or `pandas.DataFrame`.
493 The table
or a pointer to a table on disk
from which columns can
499 is_multiLevel = isinstance(data, MultilevelParquetTable)
or isinstance(columnIndex, pd.MultiIndex)
505 if isinstance(data, MultilevelParquetTable):
507 df = data.toDataFrame(columns=columns, droplevels=
False)
508 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
510 df = data.get(parameters={
"columns": columns})
515 subdf = f._setLevels(
516 df[f.multilevelColumns(data, returnTuple=
True, columnIndex=columnIndex)]
518 valDict[k] = f._func(subdf)
519 except Exception
as e:
520 self.
log.error(
"Exception in %s call: %s: %s", self.
name, type(e).__name__, e)
522 valDict[k] = f.fail(subdf)
527 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
530 elif isinstance(data, pd.DataFrame):
537 valDict = {k: f._func(df)
for k, f
in self.
funcDict.items()}
540 for name, colVal
in valDict.items():
541 if len(colVal.shape) != 1:
542 raise RuntimeError(
"Transformed column '%s' is not the shape of a column. "
543 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
546 valDf = pd.concat(valDict, axis=1)
548 print([(k, type(v))
for k, v
in valDict.items()])
551 if kwargs.get(
'dropna',
False):
552 valDf = valDf.dropna(how=
'any')
558 if renameRules
is None:
560 for old, new
in renameRules:
561 if col.startswith(old):
562 col = col.replace(old, new)
568 filename = os.path.expandvars(filename)
569 with open(filename)
as f:
570 translationDefinition = yaml.safe_load(f)
572 return cls.
from_yaml(translationDefinition, **kwargs)
577 for func, val
in translationDefinition[
'funcs'].items():
580 if 'flag_rename_rules' in translationDefinition:
581 renameRules = translationDefinition[
'flag_rename_rules']
585 if 'calexpFlags' in translationDefinition:
586 for flag
in translationDefinition[
'calexpFlags']:
587 funcs[cls.
renameCol(flag, renameRules)] =
Column(flag, dataset=
'calexp')
589 if 'refFlags' in translationDefinition:
590 for flag
in translationDefinition[
'refFlags']:
593 if 'forcedFlags' in translationDefinition:
594 for flag
in translationDefinition[
'forcedFlags']:
595 funcs[cls.
renameCol(flag, renameRules)] =
Column(flag, dataset=
'forced_src')
597 if 'flags' in translationDefinition:
598 for flag
in translationDefinition[
'flags']:
601 return cls(funcs, **kwargs)
605 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
607 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
611 df : pandas.DataFrame
612 Dataframe on which to evaluate expression.
618 expr_new = re.sub(
r'mag\((\w+)\)',
r'-2.5*log(\g<1>)/log(10)', expr)
619 val = df.eval(expr_new)
620 except Exception
as e:
621 log.error(
"Exception in mag_aware_eval: %s: %s", type(e).__name__, e)
622 expr_new = re.sub(
r'mag\((\w+)\)',
r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
623 val = df.eval(expr_new)
628 """Arbitrary computation on a catalog
630 Column names (and thus the columns to be loaded
from catalog) are found
631 by finding all words
and trying to ignore all
"math-y" words.
636 Expression to evaluate, to be parsed
and executed by `mag_aware_eval`.
638 _ignore_words = ('mag',
'sin',
'cos',
'exp',
'log',
'sqrt')
650 flux_cols = re.findall(
r'mag\(\s*(\w+)\s*\)', self.
expr)
652 cols = [c
for c
in re.findall(
r'[a-zA-Z_]+', self.
expr)
if c
not in self.
_ignore_words]
655 if not re.search(
'_instFlux$', c):
656 cols.append(f
'{c}_instFlux')
661 return list(set([c
for c
in cols
if c
not in not_a_col]))
668 """Get column with specified name
688 """Return the value of the index for each object
691 columns = ['coord_ra']
692 _defaultDataset =
'ref'
696 return pd.Series(df.index, index=df.index)
701 _allow_difference =
False
705 return pd.Series(df.index, index=df.index)
709 col =
'base_Footprint_nPix'
713 """Base class for coordinate column, in degrees
722 output = df[self.
col] * 180 / np.pi
if self.
_radians else df[self.
col]
727 """Right Ascension, in degrees
733 super().
__init__(
'coord_ra', **kwargs)
736 return super().
__call__(catalog, **kwargs)
740 """Declination, in degrees
746 super().
__init__(
'coord_dec', **kwargs)
749 return super().
__call__(catalog, **kwargs)
753 """Compute the level 20 HtmIndex for the catalog.
757 This functor was implemented to satisfy requirements of old APDB interface
758 which required ``pixelId`` column in DiaObject
with HTM20 index. APDB
759 interface had migrated to
not need that information, but we keep this
760 class in case it may be useful for something else.
775 def computePixel(row):
784 return self.
pixelator.index(sphPoint.getVector())
786 return df.apply(computePixel, axis=1, result_type=
'reduce').astype(
'int64')
790 if not col.endswith(
'_instFlux'):
796 if not col.endswith(
'_instFluxErr'):
797 col +=
'_instFluxErr'
802 """Compute calibrated magnitude
804 Takes a `calib` argument, which returns the flux at mag=0
805 as `calib.getFluxMag0()`. If
not provided, then the default
806 `fluxMag0`
is 63095734448.0194, which
is default
for HSC.
807 This default should be removed
in DM-21955
809 This calculation hides warnings about invalid values
and dividing by zero.
811 As
for all functors, a `dataset`
and `filt` kwarg should be provided upon
812 initialization. Unlike the default `Functor`, however, the default dataset
813 for a `Mag`
is `
'meas'`, rather than `
'ref'`.
818 Name of flux column
from which to compute magnitude. Can be parseable
819 by `lsst.pipe.tasks.functors.fluxName` function---that
is, you can
pass
820 `
'modelfit_CModel'` instead of `
'modelfit_CModel_instFlux'`)
and it will
822 calib : `lsst.afw.image.calib.Calib` (optional)
823 Object that knows zero point.
825 _defaultDataset = 'meas'
830 if calib
is not None:
843 with np.warnings.catch_warnings():
844 np.warnings.filterwarnings(
'ignore',
r'invalid value encountered')
845 np.warnings.filterwarnings(
'ignore',
r'divide by zero')
850 return f
'mag_{self.col}'
854 """Compute calibrated magnitude uncertainty
861 calib : `lsst.afw.image.calib.Calib` (optional)
862 Object that knows zero point.
867 if self.
calib is not None:
874 return [self.
col, self.
col +
'Err']
877 with np.warnings.catch_warnings():
878 np.warnings.filterwarnings(
'ignore',
r'invalid value encountered')
879 np.warnings.filterwarnings(
'ignore',
r'divide by zero')
881 x = df[fluxErrCol] / df[fluxCol]
883 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
888 return super().name +
'_err'
900 _defaultDataset =
'meas'
902 """Functor to calculate magnitude difference"""
914 with np.warnings.catch_warnings():
915 np.warnings.filterwarnings(
'ignore',
r'invalid value encountered')
916 np.warnings.filterwarnings(
'ignore',
r'divide by zero')
917 return -2.5*np.log10(df[self.
col1]/df[self.
col2])
921 return f
'(mag_{self.col1} - mag_{self.col2})'
925 return f
'magDiff_{self.col1}_{self.col2}'
929 """Compute the color between two filters
931 Computes color by initializing two different `Mag`
932 functors based on the `col` and filters provided,
and
933 then returning the difference.
935 This
is enabled by the `_func` expecting a dataframe
with a
936 multilevel column index,
with both `
'band'`
and `
'column'`,
937 instead of just `
'column'`, which
is the `Functor` default.
938 This
is controlled by the `_dfLevels` attribute.
940 Also of note, the default dataset
for `Color`
is `forced_src
'`,
941 whereas for `Mag` it
is `
'meas'`.
946 Name of flux column
from which to compute; same
as would be passed to
950 Filters
from which to compute magnitude difference.
951 Color computed
is `
Mag(filt2) -
Mag(filt1)`.
953 _defaultDataset = 'forced_src'
954 _dfLevels = (
'band',
'column')
960 raise RuntimeError(
"Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
978 mag2 = self.mag2._func(df[self.filt2])
979 mag1 = self.mag1._func(df[self.filt1])
984 return [self.
mag1.col, self.
mag2.col]
991 return f
'{self.filt2} - {self.filt1} ({self.col})'
995 return f
"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
999 """Main function of this subclass is to override the dropna=True
1001 _null_label = 'null'
1002 _allow_difference =
False
1007 return super().
__call__(parq, dropna=
False, **kwargs)
1011 _columns = [
"base_ClassificationExtendedness_value"]
1012 _column =
"base_ClassificationExtendedness_value"
1014 def _func(self, df):
1017 test = (x < 0.5).astype(int)
1018 test = test.mask(mask, 2)
1023 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
1024 index=x.index, name=
'label')
1026 label = label.astype(str)
1031 _columns = [
'numStarFlags']
1032 labels = {
"star": 0,
"maybe": 1,
"notStar": 2}
1034 def _func(self, df):
1038 n = len(x.unique()) - 1
1040 labels = [
'noStar',
'maybe',
'star']
1041 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
1042 index=x.index, name=
'label')
1045 label = label.astype(str)
1051 name =
'Deconvolved Moments'
1052 shortname =
'deconvolvedMoments'
1053 _columns = (
"ext_shapeHSM_HsmSourceMoments_xx",
1054 "ext_shapeHSM_HsmSourceMoments_yy",
1055 "base_SdssShape_xx",
"base_SdssShape_yy",
1056 "ext_shapeHSM_HsmPsfMoments_xx",
1057 "ext_shapeHSM_HsmPsfMoments_yy")
1059 def _func(self, df):
1060 """Calculate deconvolved moments"""
1061 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns:
1062 hsm = df[
"ext_shapeHSM_HsmSourceMoments_xx"] + df[
"ext_shapeHSM_HsmSourceMoments_yy"]
1064 hsm = np.ones(len(df))*np.nan
1065 sdss = df[
"base_SdssShape_xx"] + df[
"base_SdssShape_yy"]
1066 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1067 psf = df[
"ext_shapeHSM_HsmPsfMoments_xx"] + df[
"ext_shapeHSM_HsmPsfMoments_yy"]
1072 raise RuntimeError(
'No psf shape parameter found in catalog')
1074 return hsm.where(np.isfinite(hsm), sdss) - psf
1078 """Functor to calculate SDSS trace radius size for sources"""
1079 name =
"SDSS Trace Size"
1080 shortname =
'sdssTrace'
1081 _columns = (
"base_SdssShape_xx",
"base_SdssShape_yy")
1083 def _func(self, df):
1084 srcSize = np.sqrt(0.5*(df[
"base_SdssShape_xx"] + df[
"base_SdssShape_yy"]))
1089 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1090 name =
"PSF - SDSS Trace Size"
1091 shortname =
'psf_sdssTrace'
1092 _columns = (
"base_SdssShape_xx",
"base_SdssShape_yy",
1093 "base_SdssShape_psf_xx",
"base_SdssShape_psf_yy")
1095 def _func(self, df):
1096 srcSize = np.sqrt(0.5*(df[
"base_SdssShape_xx"] + df[
"base_SdssShape_yy"]))
1097 psfSize = np.sqrt(0.5*(df[
"base_SdssShape_psf_xx"] + df[
"base_SdssShape_psf_yy"]))
1098 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1103 """Functor to calculate HSM trace radius size for sources"""
1104 name =
'HSM Trace Size'
1105 shortname =
'hsmTrace'
1106 _columns = (
"ext_shapeHSM_HsmSourceMoments_xx",
1107 "ext_shapeHSM_HsmSourceMoments_yy")
1109 def _func(self, df):
1110 srcSize = np.sqrt(0.5*(df[
"ext_shapeHSM_HsmSourceMoments_xx"]
1111 + df[
"ext_shapeHSM_HsmSourceMoments_yy"]))
1116 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1117 name =
'PSF - HSM Trace Size'
1118 shortname =
'psf_HsmTrace'
1119 _columns = (
"ext_shapeHSM_HsmSourceMoments_xx",
1120 "ext_shapeHSM_HsmSourceMoments_yy",
1121 "ext_shapeHSM_HsmPsfMoments_xx",
1122 "ext_shapeHSM_HsmPsfMoments_yy")
1124 def _func(self, df):
1125 srcSize = np.sqrt(0.5*(df[
"ext_shapeHSM_HsmSourceMoments_xx"]
1126 + df[
"ext_shapeHSM_HsmSourceMoments_yy"]))
1127 psfSize = np.sqrt(0.5*(df[
"ext_shapeHSM_HsmPsfMoments_xx"]
1128 + df[
"ext_shapeHSM_HsmPsfMoments_yy"]))
1129 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1134 name =
'HSM Psf FWHM'
1135 _columns = (
'ext_shapeHSM_HsmPsfMoments_xx',
'ext_shapeHSM_HsmPsfMoments_yy')
1138 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1140 def _func(self, df):
1142 0.5*(df[
'ext_shapeHSM_HsmPsfMoments_xx'] + df[
'ext_shapeHSM_HsmPsfMoments_yy']))
1146 name =
"Distortion Ellipticity (e1)"
1147 shortname =
"Distortion"
1160 def _func(self, df):
1165 name =
"Ellipticity e2"
1177 def _func(self, df):
1193 def _func(self, df):
1198 """Computations using the stored localWcs.
1200 name = "LocalWcsOperations"
1215 """Compute the distance on the sphere from x2, y1 to x1, y1.
1223 cd11 : `pandas.Series`
1224 [1, 1] element of the local Wcs affine transform.
1225 cd11 : `pandas.Series`
1226 [1, 1] element of the local Wcs affine transform.
1227 cd12 : `pandas.Series`
1228 [1, 2] element of the local Wcs affine transform.
1229 cd21 : `pandas.Series`
1230 [2, 1] element of the local Wcs affine transform.
1231 cd22 : `pandas.Series`
1232 [2, 2] element of the local Wcs affine transform.
1237 RA and dec conversion of x
and y given the local Wcs. Returned
1238 units are
in radians.
1241 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1244 """Compute the local pixel scale conversion.
1248 ra1 : `pandas.Series`
1249 Ra of the first coordinate in radians.
1250 dec1 : `pandas.Series`
1251 Dec of the first coordinate
in radians.
1252 ra2 : `pandas.Series`
1253 Ra of the second coordinate
in radians.
1254 dec2 : `pandas.Series`
1255 Dec of the second coordinate
in radians.
1259 dist : `pandas.Series`
1260 Distance on the sphere
in radians.
1262 deltaDec = dec2 - dec1
1264 return 2 * np.arcsin(
1266 np.sin(deltaDec / 2) ** 2
1267 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1270 """Compute the distance on the sphere from x2, y1 to x1, y1.
1274 x1 : `pandas.Series`
1276 y1 : `pandas.Series`
1278 x2 : `pandas.Series`
1280 y2 : `pandas.Series`
1282 cd11 : `pandas.Series`
1283 [1, 1] element of the local Wcs affine transform.
1284 cd11 : `pandas.Series`
1285 [1, 1] element of the local Wcs affine transform.
1286 cd12 : `pandas.Series`
1287 [1, 2] element of the local Wcs affine transform.
1288 cd21 : `pandas.Series`
1289 [2, 1] element of the local Wcs affine transform.
1290 cd22 : `pandas.Series`
1291 [2, 2] element of the local Wcs affine transform.
1295 Distance : `pandas.Series`
1296 Arcseconds per pixel at the location of the local WC
1305 """Compute the local pixel scale from the stored CDMatrix.
1317 """Compute the local pixel to scale conversion in arcseconds.
1321 cd11 : `pandas.Series`
1322 [1, 1] element of the local Wcs affine transform in radians.
1323 cd11 : `pandas.Series`
1324 [1, 1] element of the local Wcs affine transform
in radians.
1325 cd12 : `pandas.Series`
1326 [1, 2] element of the local Wcs affine transform
in radians.
1327 cd21 : `pandas.Series`
1328 [2, 1] element of the local Wcs affine transform
in radians.
1329 cd22 : `pandas.Series`
1330 [2, 2] element of the local Wcs affine transform
in radians.
1334 pixScale : `pandas.Series`
1335 Arcseconds per pixel at the location of the local WC
1337 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1339 def _func(self, df):
1347 """Convert a value in units pixels squared to units arcseconds squared.
1366 return f
"{self.col}_asArcseconds"
1376 def _func(self, df):
1384 """Convert a value in units pixels to units arcseconds.
1403 return f
"{self.col}_asArcsecondsSq"
1413 def _func(self, df):
1418 return df[self.
col] * pixScale * pixScale
1422 name =
'Reference Band'
1423 shortname =
'refBand'
1427 return [
"merge_measurement_i",
1428 "merge_measurement_r",
1429 "merge_measurement_z",
1430 "merge_measurement_y",
1431 "merge_measurement_g",
1432 "merge_measurement_u"]
1434 def _func(self, df: pd.DataFrame) -> pd.Series:
1435 def getFilterAliasName(row):
1437 colName = row.idxmax()
1438 return colName.replace(
'merge_measurement_',
'')
1442 columns = [col
for col
in self.
columnscolumns if col
in df.columns]
1444 return df[columns].apply(getFilterAliasName, axis=1,
1445 result_type=
'reduce').astype(
'object')
1450 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1451 LOG_AB_FLUX_SCALE = 12.56
1452 FIVE_OVER_2LOG10 = 1.085736204758129569
1456 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1462 if calib
is not None:
1476 return f
'mag_{self.col}'
1480 if np.abs(a) < np.abs(b):
1485 return np.abs(a) * np.sqrt(1. + q*q)
1491 with np.warnings.catch_warnings():
1492 np.warnings.filterwarnings(
'ignore',
r'invalid value encountered')
1493 np.warnings.filterwarnings(
'ignore',
r'divide by zero')
1494 return -2.5 * np.log10(dn/fluxMag0)
1497 retVal = self.
vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1502 retVal = self.
dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.
dn2flux(dn, fluxMag0)
1507 def _func(self, df):
1516 def _func(self, df):
1518 return pd.Series(retArr, index=df.index)
1522 def _func(self, df):
1531 def _func(self, df):
1533 return pd.Series(retArr, index=df.index)
1537 """Base class for calibrating the specified instrument flux column using
1538 the local photometric calibration.
1543 Name of the instrument flux column.
1544 instFluxErrCol : `str`
1545 Name of the assocated error columns for ``instFluxCol``.
1546 photoCalibCol : `str`
1547 Name of local calibration column.
1548 photoCalibErrCol : `str`
1549 Error associated
with ``photoCalibCol``
1559 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1574 """Convert instrument flux to nanojanskys.
1578 instFlux : `numpy.ndarray` or `pandas.Series`
1579 Array of instrument flux measurements
1580 localCalib : `numpy.ndarray`
or `pandas.Series`
1581 Array of local photometric calibration estimates.
1585 calibFlux : `numpy.ndarray`
or `pandas.Series`
1586 Array of calibrated flux measurements.
1588 return instFlux * localCalib
1591 """Convert instrument flux to nanojanskys.
1595 instFlux : `numpy.ndarray` or `pandas.Series`
1596 Array of instrument flux measurements
1597 instFluxErr : `numpy.ndarray`
or `pandas.Series`
1598 Errors on associated ``instFlux`` values
1599 localCalib : `numpy.ndarray`
or `pandas.Series`
1600 Array of local photometric calibration estimates.
1601 localCalibErr : `numpy.ndarray`
or `pandas.Series`
1602 Errors on associated ``localCalib`` values
1606 calibFluxErr : `numpy.ndarray`
or `pandas.Series`
1607 Errors on calibrated flux measurements.
1609 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1612 """Convert instrument flux to nanojanskys.
1616 instFlux : `numpy.ndarray` or `pandas.Series`
1617 Array of instrument flux measurements
1618 localCalib : `numpy.ndarray`
or `pandas.Series`
1619 Array of local photometric calibration estimates.
1623 calibMag : `numpy.ndarray`
or `pandas.Series`
1624 Array of calibrated AB magnitudes.
1629 """Convert instrument flux err to nanojanskys.
1633 instFlux : `numpy.ndarray` or `pandas.Series`
1634 Array of instrument flux measurements
1635 instFluxErr : `numpy.ndarray`
or `pandas.Series`
1636 Errors on associated ``instFlux`` values
1637 localCalib : `numpy.ndarray`
or `pandas.Series`
1638 Array of local photometric calibration estimates.
1639 localCalibErr : `numpy.ndarray`
or `pandas.Series`
1640 Errors on associated ``localCalib`` values
1644 calibMagErr: `numpy.ndarray`
or `pandas.Series`
1645 Error on calibrated AB magnitudes.
1652 """Compute calibrated fluxes using the local calibration value.
1668 return f
'flux_{self.instFluxCol}'
1670 def _func(self, df):
1675 """Compute calibrated flux errors using the local calibration value.
1692 return f
'fluxErr_{self.instFluxCol}'
1694 def _func(self, df):
1700 """Compute calibrated AB magnitudes using the local calibration value.
1716 return f
'mag_{self.instFluxCol}'
1718 def _func(self, df):
1724 """Compute calibrated AB magnitude errors using the local calibration value.
1741 return f
'magErr_{self.instFluxCol}'
1743 def _func(self, df):
1751 """Compute absolute mean of dipole fluxes.
1760 LocalDipoleMeanFluxErr
1762 LocalDipoleDiffFluxErr
1792 return f
'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1794 def _func(self, df):
1800 """Compute the error on the absolute mean of dipole fluxes.
1809 LocalDipoleMeanFluxErr
1811 LocalDipoleDiffFluxErr
1825 return f
'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1827 def _func(self, df):
1836 """Compute the absolute difference of dipole fluxes.
1838 Value is (abs(pos) - abs(neg))
1847 LocalDipoleMeanFluxErr
1849 LocalDipoleDiffFluxErr
1860 return f
'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1862 def _func(self, df):
1868 """Compute the error on the absolute difference of dipole fluxes.
1877 LocalDipoleMeanFluxErr
1879 LocalDipoleDiffFluxErr
1893 return f
'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1895 def _func(self, df):
1904 """Base class for returning the ratio of 2 columns.
1906 Can be used to compute a Signal to Noise ratio for any input flux.
1911 Name of the column to use at the numerator
in the ratio
1913 Name of the column to use
as the denominator
in the ratio.
1929 return f
'ratio_{self.numerator}_{self.denominator}'
1931 def _func(self, df):
1932 with np.warnings.catch_warnings():
1933 np.warnings.filterwarnings(
'ignore',
r'invalid value encountered')
1934 np.warnings.filterwarnings(
'ignore',
r'divide by zero')
1939 """Compute E(B-V) from dustmaps.sfd
1941 _defaultDataset = 'ref'
1947 from dustmaps.sfd
import SFDQuery
1948 self.
_columns = [
'coord_ra',
'coord_dec']
1952 def _func(self, df):
1953 coords = SkyCoord(df[
'coord_ra']*u.rad, df[
'coord_dec']*u.rad)
1954 ebv = self.
sfd(coords)
1957 return pd.Series(ebv, index=df.index).astype(
'float64')
def multilevelColumns(self, parq, **kwargs)
def __init__(self, col, filt2, filt1, **kwargs)
def __init__(self, col, **kwargs)
def __init__(self, funcs, **kwargs)
def __call__(self, data, **kwargs)
def from_file(cls, filename, **kwargs)
def from_yaml(cls, translationDefinition, **kwargs)
def renameCol(cls, col, renameRules)
def multilevelColumns(self, data, **kwargs)
def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22)
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
def __init__(self, col, **kwargs)
def __init__(self, expr, **kwargs)
def __init__(self, **kwargs)
def __call__(self, catalog, **kwargs)
def __init__(self, colXX, colXY, colYY, **kwargs)
def __init__(self, colXX, colXY, colYY, **kwargs)
def __init__(self, **kwargs)
def __call__(self, data, dropna=False)
def _get_data(self, data)
def _func(self, df, dropna=True)
def multilevelColumns(self, data, columnIndex=None, returnTuple=False)
def _get_data_columnLevelNames(self, data, columnIndex=None)
def difference(self, data1, data2, **kwargs)
def __init__(self, filt=None, dataset=None, noDup=None)
def _get_columnIndex(self, data)
def _colsFromDict(self, colDict, columnIndex=None)
def _get_data_columnLevels(self, data, columnIndex=None)
def __init__(self, ra, decl, **kwargs)
def __call__(self, parq, dropna=False, **kwargs)
def __init__(self, instFluxPosCol, instFluxNegCol, instFluxPosErrCol, instFluxNegErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
def instFluxToNanojansky(self, instFlux, localCalib)
def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
def __init__(self, instFluxCol, instFluxErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
def instFluxToMagnitude(self, instFlux, localCalib)
def __init__(self, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22)
def computeSkySeperation(self, ra1, dec1, ra2, dec2)
def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22)
def __init__(self, col1, col2, **kwargs)
def __init__(self, *args, **kwargs)
def __init__(self, col, calib=None, **kwargs)
def dn2mag(self, dn, fluxMag0)
def dn2flux(self, dn, fluxMag0)
def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs)
def __call__(self, catalog, **kwargs)
def __init__(self, **kwargs)
def __init__(self, colXX, colXY, colYY, **kwargs)
def __init__(self, numerator, denominator, **kwargs)
def mag_aware_eval(df, expr, log)
def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor', name=None)