lsst.pipe.tasks g167d89548d+3a17f43c43
Loading...
Searching...
No Matches
functors.py
Go to the documentation of this file.
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22__all__ = ["Functor", "CompositeFunctor", "CustomFunctor", "Column", "Index",
23 "IDColumn", "FootprintNPix", "CoordColumn", "RAColumn", "DecColumn",
24 "HtmIndex20", "Mag", "MagErr", "NanoMaggie", "MagDiff", "Color",
25 "Labeller", "StarGalaxyLabeller", "NumStarLabeller", "DeconvolvedMoments",
26 "SdssTraceSize", "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff",
27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs", "ComputePixelScale",
28 "ConvertPixelToArcseconds", "ConvertPixelSqToArcsecondsSq", "ReferenceBand",
29 "Photometry", "NanoJansky", "NanoJanskyErr", "Magnitude", "MagnitudeErr",
30 "LocalPhotometry", "LocalNanojansky", "LocalNanojanskyErr",
31 "LocalMagnitude", "LocalMagnitudeErr", "LocalDipoleMeanFlux",
32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux", "LocalDipoleDiffFluxErr",
33 "Ratio", "Ebv"]
34
35import yaml
36import re
37from itertools import product
38import logging
39import os.path
40
41import pandas as pd
42import numpy as np
43import astropy.units as u
44from astropy.coordinates import SkyCoord
45
46from lsst.utils import doImport
47from lsst.daf.butler import DeferredDatasetHandle
48import lsst.geom as geom
49import lsst.sphgeom as sphgeom
50
51from .parquetTable import ParquetTable, MultilevelParquetTable
52
53
54def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
55 typeKey='functor', name=None):
56 """Initialize an object defined in a dictionary
57
58 The object needs to be importable as
59 f'{basePath}.{initDict[typeKey]}'
60 The positional and keyword arguments (if any) are contained in
61 "args" and "kwargs" entries in the dictionary, respectively.
62 This is used in `functors.CompositeFunctor.from_yaml` to initialize
63 a composite functor from a specification in a YAML file.
64
65 Parameters
66 ----------
67 initDict : dictionary
68 Dictionary describing object's initialization. Must contain
69 an entry keyed by ``typeKey`` that is the name of the object,
70 relative to ``basePath``.
71 basePath : str
72 Path relative to module in which ``initDict[typeKey]`` is defined.
73 typeKey : str
74 Key of ``initDict`` that is the name of the object
75 (relative to `basePath`).
76 """
77 initDict = initDict.copy()
78 # TO DO: DM-21956 We should be able to define functors outside this module
79 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
80 args = []
81 if 'args' in initDict:
82 args = initDict.pop('args')
83 if isinstance(args, str):
84 args = [args]
85 try:
86 element = pythonType(*args, **initDict)
87 except Exception as e:
88 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
89 raise type(e)(message, e.args)
90 return element
91
92
93class Functor(object):
94 """Define and execute a calculation on a ParquetTable
95
96 The `__call__` method accepts either a `ParquetTable` object or a
97 `DeferredDatasetHandle`, and returns the
98 result of the calculation as a single column. Each functor defines what
99 columns are needed for the calculation, and only these columns are read
100 from the `ParquetTable`.
101
102 The action of `__call__` consists of two steps: first, loading the
103 necessary columns from disk into memory as a `pandas.DataFrame` object;
104 and second, performing the computation on this dataframe and returning the
105 result.
106
107
108 To define a new `Functor`, a subclass must define a `_func` method,
109 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
110 In addition, it must define the following attributes
111
112 * `_columns`: The columns necessary to perform the calculation
113 * `name`: A name appropriate for a figure axis label
114 * `shortname`: A name appropriate for use as a dictionary key
115
116 On initialization, a `Functor` should declare what band (`filt` kwarg)
117 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
118 applied to. This enables the `_get_data` method to extract the proper
119 columns from the parquet file. If not specified, the dataset will fall back
120 on the `_defaultDataset`attribute. If band is not specified and `dataset`
121 is anything other than `'ref'`, then an error will be raised when trying to
122 perform the calculation.
123
124 Originally, `Functor` was set up to expect
125 datasets formatted like the `deepCoadd_obj` dataset; that is, a
126 dataframe with a multi-level column index, with the levels of the
127 column index being `band`, `dataset`, and `column`.
128 It has since been generalized to apply to dataframes without mutli-level
129 indices and multi-level indices with just `dataset` and `column` levels.
130 In addition, the `_get_data` method that reads
131 the dataframe from the `ParquetTable` will return a dataframe with column
132 index levels defined by the `_dfLevels` attribute; by default, this is
133 `column`.
134
135 The `_dfLevels` attributes should generally not need to
136 be changed, unless `_func` needs columns from multiple filters or datasets
137 to do the calculation.
138 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
139 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
140 it gets to have those levels in the column index.
141
142 Parameters
143 ----------
144 filt : str
145 Filter upon which to do the calculation
146
147 dataset : str
148 Dataset upon which to do the calculation
149 (e.g., 'ref', 'meas', 'forced_src').
150
151 """
152
153 _defaultDataset = 'ref'
154 _dfLevels = ('column',)
155 _defaultNoDup = False
156
157 def __init__(self, filt=None, dataset=None, noDup=None):
158 self.filt = filt
159 self.dataset = dataset if dataset is not None else self._defaultDataset
160 self._noDup = noDup
161 self.log = logging.getLogger(type(self).__name__)
162
163 @property
164 def noDup(self):
165 if self._noDup is not None:
166 return self._noDup
167 else:
168 return self._defaultNoDup
169
170 @property
171 def columns(self):
172 """Columns required to perform calculation
173 """
174 if not hasattr(self, '_columns'):
175 raise NotImplementedError('Must define columns property or _columns attribute')
176 return self._columns
177
178 def _get_data_columnLevels(self, data, columnIndex=None):
179 """Gets the names of the column index levels
180
181 This should only be called in the context of a multilevel table.
182 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
183 and with the gen3 `DeferredDatasetHandle`.
184
185 Parameters
186 ----------
187 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
188
189 columnnIndex (optional): pandas `Index` object
190 if not passed, then it is read from the `DeferredDatasetHandle`
191 """
192 if isinstance(data, DeferredDatasetHandle):
193 if columnIndex is None:
194 columnIndex = data.get(component="columns")
195 if columnIndex is not None:
196 return columnIndex.names
197 if isinstance(data, MultilevelParquetTable):
198 return data.columnLevels
199 else:
200 raise TypeError(f"Unknown type for data: {type(data)}!")
201
202 def _get_data_columnLevelNames(self, data, columnIndex=None):
203 """Gets the content of each of the column levels for a multilevel table
204
205 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
206
207 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
208 """
209 if isinstance(data, DeferredDatasetHandle):
210 if columnIndex is None:
211 columnIndex = data.get(component="columns")
212 if columnIndex is not None:
213 columnLevels = columnIndex.names
214 columnLevelNames = {
215 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
216 for i, level in enumerate(columnLevels)
217 }
218 return columnLevelNames
219 if isinstance(data, MultilevelParquetTable):
220 return data.columnLevelNames
221 else:
222 raise TypeError(f"Unknown type for data: {type(data)}!")
223
224 def _colsFromDict(self, colDict, columnIndex=None):
225 """Converts dictionary column specficiation to a list of columns
226
227 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
228 """
229 new_colDict = {}
230 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
231
232 for i, lev in enumerate(columnLevels):
233 if lev in colDict:
234 if isinstance(colDict[lev], str):
235 new_colDict[lev] = [colDict[lev]]
236 else:
237 new_colDict[lev] = colDict[lev]
238 else:
239 new_colDict[lev] = columnIndex.levels[i]
240
241 levelCols = [new_colDict[lev] for lev in columnLevels]
242 cols = list(product(*levelCols))
243 colsAvailable = [col for col in cols if col in columnIndex]
244 return colsAvailable
245
246 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
247 """Returns columns needed by functor from multilevel dataset
248
249 To access tables with multilevel column structure, the `MultilevelParquetTable`
250 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
251 dictionary.
252
253 Parameters
254 ----------
255 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
256
257 columnIndex (optional): pandas `Index` object
258 either passed or read in from `DeferredDatasetHandle`.
259
260 `returnTuple` : bool
261 If true, then return a list of tuples rather than the column dictionary
262 specification. This is set to `True` by `CompositeFunctor` in order to be able to
263 combine columns from the various component functors.
264
265 """
266 if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
267 columnIndex = data.get(component="columns")
268
269 # Confirm that the dataset has the column levels the functor is expecting it to have.
270 columnLevels = self._get_data_columnLevels(data, columnIndex)
271
272 columnDict = {'column': self.columns,
273 'dataset': self.dataset}
274 if self.filt is None:
275 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
276 if "band" in columnLevels:
277 if self.dataset == "ref":
278 columnDict["band"] = columnLevelNames["band"][0]
279 else:
280 raise ValueError(f"'filt' not set for functor {self.name}"
281 f"(dataset {self.dataset}) "
282 "and ParquetTable "
283 "contains multiple filters in column index. "
284 "Set 'filt' or set 'dataset' to 'ref'.")
285 else:
286 columnDict['band'] = self.filt
287
288 if isinstance(data, MultilevelParquetTable):
289 return data._colsFromDict(columnDict)
290 elif isinstance(data, DeferredDatasetHandle):
291 if returnTuple:
292 return self._colsFromDict(columnDict, columnIndex=columnIndex)
293 else:
294 return columnDict
295
296 def _func(self, df, dropna=True):
297 raise NotImplementedError('Must define calculation on dataframe')
298
299 def _get_columnIndex(self, data):
300 """Return columnIndex
301 """
302
303 if isinstance(data, DeferredDatasetHandle):
304 return data.get(component="columns")
305 else:
306 return None
307
308 def _get_data(self, data):
309 """Retrieve dataframe necessary for calculation.
310
311 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
312
313 Returns dataframe upon which `self._func` can act.
314
315 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
316 """
317 if isinstance(data, pd.DataFrame):
318 return data
319
320 # First thing to do: check to see if the data source has a multilevel column index or not.
321 columnIndex = self._get_columnIndex(data)
322 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
323
324 # Simple single-level parquet table, gen2
325 if isinstance(data, ParquetTable) and not is_multiLevel:
326 columns = self.columns
327 df = data.toDataFrame(columns=columns)
328 return df
329
330 # Get proper columns specification for this functor
331 if is_multiLevel:
332 columns = self.multilevelColumns(data, columnIndex=columnIndex)
333 else:
334 columns = self.columns
335
336 if isinstance(data, MultilevelParquetTable):
337 # Load in-memory dataframe with appropriate columns the gen2 way
338 df = data.toDataFrame(columns=columns, droplevels=False)
339 elif isinstance(data, DeferredDatasetHandle):
340 # Load in-memory dataframe with appropriate columns the gen3 way
341 df = data.get(parameters={"columns": columns})
342
343 # Drop unnecessary column levels
344 if is_multiLevel:
345 df = self._setLevels(df)
346
347 return df
348
349 def _setLevels(self, df):
350 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
351 df.columns = df.columns.droplevel(levelsToDrop)
352 return df
353
354 def _dropna(self, vals):
355 return vals.dropna()
356
357 def __call__(self, data, dropna=False):
358 try:
359 df = self._get_data(data)
360 vals = self._func(df)
361 except Exception as e:
362 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
363 vals = self.fail(df)
364 if dropna:
365 vals = self._dropna(vals)
366
367 return vals
368
369 def difference(self, data1, data2, **kwargs):
370 """Computes difference between functor called on two different ParquetTable objects
371 """
372 return self(data1, **kwargs) - self(data2, **kwargs)
373
374 def fail(self, df):
375 return pd.Series(np.full(len(df), np.nan), index=df.index)
376
377 @property
378 def name(self):
379 """Full name of functor (suitable for figure labels)
380 """
381 return NotImplementedError
382
383 @property
384 def shortname(self):
385 """Short name of functor (suitable for column name/dict key)
386 """
387 return self.name
388
389
391 """Perform multiple calculations at once on a catalog
392
393 The role of a `CompositeFunctor` is to group together computations from
394 multiple functors. Instead of returning `pandas.Series` a
395 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
396 being the keys of `funcDict`.
397
398 The `columns` attribute of a `CompositeFunctor` is the union of all columns
399 in all the component functors.
400
401 A `CompositeFunctor` does not use a `_func` method itself; rather,
402 when a `CompositeFunctor` is called, all its columns are loaded
403 at once, and the resulting dataframe is passed to the `_func` method of each component
404 functor. This has the advantage of only doing I/O (reading from parquet file) once,
405 and works because each individual `_func` method of each component functor does not
406 care if there are *extra* columns in the dataframe being passed; only that it must contain
407 *at least* the `columns` it expects.
408
409 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
410 file specifying a collection of functors.
411
412 Parameters
413 ----------
414 funcs : `dict` or `list`
415 Dictionary or list of functors. If a list, then it will be converted
416 into a dictonary according to the `.shortname` attribute of each functor.
417
418 """
419 dataset = None
420
421 def __init__(self, funcs, **kwargs):
422
423 if type(funcs) == dict:
424 self.funcDict = funcs
425 else:
426 self.funcDict = {f.shortname: f for f in funcs}
427
428 self._filt = None
429
430 super().__init__(**kwargs)
431
432 @property
433 def filt(self):
434 return self._filt
435
436 @filt.setter
437 def filt(self, filt):
438 if filt is not None:
439 for _, f in self.funcDict.items():
440 f.filt = filt
441 self._filt = filt
442
443 def update(self, new):
444 if isinstance(new, dict):
445 self.funcDict.update(new)
446 elif isinstance(new, CompositeFunctor):
447 self.funcDict.update(new.funcDict)
448 else:
449 raise TypeError('Can only update with dictionary or CompositeFunctor.')
450
451 # Make sure new functors have the same 'filt' set
452 if self.filtfiltfiltfilt is not None:
454
455 @property
456 def columns(self):
457 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
458
459 def multilevelColumns(self, data, **kwargs):
460 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
461 return list(
462 set(
463 [
464 x
465 for y in [
466 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
467 ]
468 for x in y
469 ]
470 )
471 )
472
473 def __call__(self, data, **kwargs):
474 """Apply the functor to the data table
475
476 Parameters
477 ----------
478 data : `lsst.daf.butler.DeferredDatasetHandle`,
481 or `pandas.DataFrame`.
482 The table or a pointer to a table on disk from which columns can
483 be accessed
484 """
485 columnIndex = self._get_columnIndex(data)
486
487 # First, determine whether data has a multilevel index (either gen2 or gen3)
488 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
489
490 # Multilevel index, gen2 or gen3
491 if is_multiLevel:
492 columns = self.multilevelColumnsmultilevelColumns(data, columnIndex=columnIndex)
493
494 if isinstance(data, MultilevelParquetTable):
495 # Read data into memory the gen2 way
496 df = data.toDataFrame(columns=columns, droplevels=False)
497 elif isinstance(data, DeferredDatasetHandle):
498 # Read data into memory the gen3 way
499 df = data.get(parameters={"columns": columns})
500
501 valDict = {}
502 for k, f in self.funcDict.items():
503 try:
504 subdf = f._setLevels(
505 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
506 )
507 valDict[k] = f._func(subdf)
508 except Exception as e:
509 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
510 try:
511 valDict[k] = f.fail(subdf)
512 except NameError:
513 raise e
514
515 else:
516 if isinstance(data, DeferredDatasetHandle):
517 # input if Gen3 deferLoad=True
518 df = data.get(parameters={"columns": self.columnscolumns})
519 elif isinstance(data, pd.DataFrame):
520 # input if Gen3 deferLoad=False
521 df = data
522 else:
523 # Original Gen2 input is type ParquetTable and the fallback
524 df = data.toDataFrame(columns=self.columnscolumns)
525
526 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
527
528 # Check that output columns are actually columns
529 for name, colVal in valDict.items():
530 if len(colVal.shape) != 1:
531 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
532 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
533
534 try:
535 valDf = pd.concat(valDict, axis=1)
536 except TypeError:
537 print([(k, type(v)) for k, v in valDict.items()])
538 raise
539
540 if kwargs.get('dropna', False):
541 valDf = valDf.dropna(how='any')
542
543 return valDf
544
545 @classmethod
546 def renameCol(cls, col, renameRules):
547 if renameRules is None:
548 return col
549 for old, new in renameRules:
550 if col.startswith(old):
551 col = col.replace(old, new)
552 return col
553
554 @classmethod
555 def from_file(cls, filename, **kwargs):
556 # Allow environment variables in the filename.
557 filename = os.path.expandvars(filename)
558 with open(filename) as f:
559 translationDefinition = yaml.safe_load(f)
560
561 return cls.from_yaml(translationDefinition, **kwargs)
562
563 @classmethod
564 def from_yaml(cls, translationDefinition, **kwargs):
565 funcs = {}
566 for func, val in translationDefinition['funcs'].items():
567 funcs[func] = init_fromDict(val, name=func)
568
569 if 'flag_rename_rules' in translationDefinition:
570 renameRules = translationDefinition['flag_rename_rules']
571 else:
572 renameRules = None
573
574 if 'calexpFlags' in translationDefinition:
575 for flag in translationDefinition['calexpFlags']:
576 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
577
578 if 'refFlags' in translationDefinition:
579 for flag in translationDefinition['refFlags']:
580 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
581
582 if 'forcedFlags' in translationDefinition:
583 for flag in translationDefinition['forcedFlags']:
584 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
585
586 if 'flags' in translationDefinition:
587 for flag in translationDefinition['flags']:
588 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
589
590 return cls(funcs, **kwargs)
591
592
593def mag_aware_eval(df, expr, log):
594 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
595
596 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
597
598 Parameters
599 ----------
600 df : pandas.DataFrame
601 Dataframe on which to evaluate expression.
602
603 expr : str
604 Expression.
605 """
606 try:
607 expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>)/log(10)', expr)
608 val = df.eval(expr_new)
609 except Exception as e: # Should check what actually gets raised
610 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e)
611 expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
612 val = df.eval(expr_new)
613 return val
614
615
617 """Arbitrary computation on a catalog
618
619 Column names (and thus the columns to be loaded from catalog) are found
620 by finding all words and trying to ignore all "math-y" words.
621
622 Parameters
623 ----------
624 expr : str
625 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
626 """
627 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
628
629 def __init__(self, expr, **kwargs):
630 self.expr = expr
631 super().__init__(**kwargs)
632
633 @property
634 def name(self):
635 return self.expr
636
637 @property
638 def columns(self):
639 flux_cols = re.findall(r'mag\‍(\s*(\w+)\s*\‍)', self.expr)
640
641 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
642 not_a_col = []
643 for c in flux_cols:
644 if not re.search('_instFlux$', c):
645 cols.append(f'{c}_instFlux')
646 not_a_col.append(c)
647 else:
648 cols.append(c)
649
650 return list(set([c for c in cols if c not in not_a_col]))
651
652 def _func(self, df):
653 return mag_aware_eval(df, self.expr, self.log)
654
655
657 """Get column with specified name
658 """
659
660 def __init__(self, col, **kwargs):
661 self.col = col
662 super().__init__(**kwargs)
663
664 @property
665 def name(self):
666 return self.col
667
668 @property
669 def columns(self):
670 return [self.col]
671
672 def _func(self, df):
673 return df[self.col]
674
675
677 """Return the value of the index for each object
678 """
679
680 columns = ['coord_ra'] # just a dummy; something has to be here
681 _defaultDataset = 'ref'
682 _defaultNoDup = True
683
684 def _func(self, df):
685 return pd.Series(df.index, index=df.index)
686
687
689 col = 'id'
690 _allow_difference = False
691 _defaultNoDup = True
692
693 def _func(self, df):
694 return pd.Series(df.index, index=df.index)
695
696
698 col = 'base_Footprint_nPix'
699
700
702 """Base class for coordinate column, in degrees
703 """
704 _radians = True
705
706 def __init__(self, col, **kwargs):
707 super().__init__(col, **kwargs)
708
709 def _func(self, df):
710 # Must not modify original column in case that column is used by another functor
711 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
712 return output
713
714
716 """Right Ascension, in degrees
717 """
718 name = 'RA'
719 _defaultNoDup = True
720
721 def __init__(self, **kwargs):
722 super().__init__('coord_ra', **kwargs)
723
724 def __call__(self, catalog, **kwargs):
725 return super().__call__(catalog, **kwargs)
726
727
729 """Declination, in degrees
730 """
731 name = 'Dec'
732 _defaultNoDup = True
733
734 def __init__(self, **kwargs):
735 super().__init__('coord_dec', **kwargs)
736
737 def __call__(self, catalog, **kwargs):
738 return super().__call__(catalog, **kwargs)
739
740
742 """Compute the level 20 HtmIndex for the catalog.
743
744 Notes
745 -----
746 This functor was implemented to satisfy requirements of old APDB interface
747 which required ``pixelId`` column in DiaObject with HTM20 index. APDB
748 interface had migrated to not need that information, but we keep this
749 class in case it may be useful for something else.
750 """
751 name = "Htm20"
752 htmLevel = 20
753 _radians = True
754
755 def __init__(self, ra, decl, **kwargs):
757 self.ra = ra
758 self.decl = decl
759 self._columns = [self.ra, self.decl]
760 super().__init__(**kwargs)
761
762 def _func(self, df):
763
764 def computePixel(row):
765 if self._radians:
766 sphPoint = geom.SpherePoint(row[self.ra],
767 row[self.decl],
768 geom.radians)
769 else:
770 sphPoint = geom.SpherePoint(row[self.ra],
771 row[self.decl],
772 geom.degrees)
773 return self.pixelator.index(sphPoint.getVector())
774
775 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
776
777
778def fluxName(col):
779 if not col.endswith('_instFlux'):
780 col += '_instFlux'
781 return col
782
783
784def fluxErrName(col):
785 if not col.endswith('_instFluxErr'):
786 col += '_instFluxErr'
787 return col
788
789
791 """Compute calibrated magnitude
792
793 Takes a `calib` argument, which returns the flux at mag=0
794 as `calib.getFluxMag0()`. If not provided, then the default
795 `fluxMag0` is 63095734448.0194, which is default for HSC.
796 This default should be removed in DM-21955
797
798 This calculation hides warnings about invalid values and dividing by zero.
799
800 As for all functors, a `dataset` and `filt` kwarg should be provided upon
801 initialization. Unlike the default `Functor`, however, the default dataset
802 for a `Mag` is `'meas'`, rather than `'ref'`.
803
804 Parameters
805 ----------
806 col : `str`
807 Name of flux column from which to compute magnitude. Can be parseable
808 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
809 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
810 understand.
811 calib : `lsst.afw.image.calib.Calib` (optional)
812 Object that knows zero point.
813 """
814 _defaultDataset = 'meas'
815
816 def __init__(self, col, calib=None, **kwargs):
817 self.col = fluxName(col)
818 self.calib = calib
819 if calib is not None:
820 self.fluxMag0 = calib.getFluxMag0()[0]
821 else:
822 # TO DO: DM-21955 Replace hard coded photometic calibration values
823 self.fluxMag0 = 63095734448.0194
824
825 super().__init__(**kwargs)
826
827 @property
828 def columns(self):
829 return [self.col]
830
831 def _func(self, df):
832 with np.warnings.catch_warnings():
833 np.warnings.filterwarnings('ignore', r'invalid value encountered')
834 np.warnings.filterwarnings('ignore', r'divide by zero')
835 return -2.5*np.log10(df[self.col] / self.fluxMag0)
836
837 @property
838 def name(self):
839 return f'mag_{self.col}'
840
841
842class MagErr(Mag):
843 """Compute calibrated magnitude uncertainty
844
845 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
846
847 Parameters
848 col : `str`
849 Name of flux column
850 calib : `lsst.afw.image.calib.Calib` (optional)
851 Object that knows zero point.
852 """
853
854 def __init__(self, *args, **kwargs):
855 super().__init__(*args, **kwargs)
856 if self.calib is not None:
857 self.fluxMag0Err = self.calib.getFluxMag0()[1]
858 else:
859 self.fluxMag0Err = 0.
860
861 @property
862 def columns(self):
863 return [self.col, self.col + 'Err']
864
865 def _func(self, df):
866 with np.warnings.catch_warnings():
867 np.warnings.filterwarnings('ignore', r'invalid value encountered')
868 np.warnings.filterwarnings('ignore', r'divide by zero')
869 fluxCol, fluxErrCol = self.columnscolumnscolumns
870 x = df[fluxErrCol] / df[fluxCol]
871 y = self.fluxMag0Err / self.fluxMag0
872 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
873 return magErr
874
875 @property
876 def name(self):
877 return super().name + '_err'
878
879
881 """
882 """
883
884 def _func(self, df):
885 return (df[self.col] / self.fluxMag0) * 1e9
886
887
889 _defaultDataset = 'meas'
890
891 """Functor to calculate magnitude difference"""
892
893 def __init__(self, col1, col2, **kwargs):
894 self.col1 = fluxName(col1)
895 self.col2 = fluxName(col2)
896 super().__init__(**kwargs)
897
898 @property
899 def columns(self):
900 return [self.col1, self.col2]
901
902 def _func(self, df):
903 with np.warnings.catch_warnings():
904 np.warnings.filterwarnings('ignore', r'invalid value encountered')
905 np.warnings.filterwarnings('ignore', r'divide by zero')
906 return -2.5*np.log10(df[self.col1]/df[self.col2])
907
908 @property
909 def name(self):
910 return f'(mag_{self.col1} - mag_{self.col2})'
911
912 @property
913 def shortname(self):
914 return f'magDiff_{self.col1}_{self.col2}'
915
916
918 """Compute the color between two filters
919
920 Computes color by initializing two different `Mag`
921 functors based on the `col` and filters provided, and
922 then returning the difference.
923
924 This is enabled by the `_func` expecting a dataframe with a
925 multilevel column index, with both `'band'` and `'column'`,
926 instead of just `'column'`, which is the `Functor` default.
927 This is controlled by the `_dfLevels` attribute.
928
929 Also of note, the default dataset for `Color` is `forced_src'`,
930 whereas for `Mag` it is `'meas'`.
931
932 Parameters
933 ----------
934 col : str
935 Name of flux column from which to compute; same as would be passed to
937
938 filt2, filt1 : str
939 Filters from which to compute magnitude difference.
940 Color computed is `Mag(filt2) - Mag(filt1)`.
941 """
942 _defaultDataset = 'forced_src'
943 _dfLevels = ('band', 'column')
944 _defaultNoDup = True
945
946 def __init__(self, col, filt2, filt1, **kwargs):
947 self.col = fluxName(col)
948 if filt2 == filt1:
949 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
950 self.filt2 = filt2
951 self.filt1 = filt1
952
953 self.mag2 = Mag(col, filt=filt2, **kwargs)
954 self.mag1 = Mag(col, filt=filt1, **kwargs)
955
956 super().__init__(**kwargs)
957
958 @property
959 def filt(self):
960 return None
961
962 @filt.setter
963 def filt(self, filt):
964 pass
965
966 def _func(self, df):
967 mag2 = self.mag2._func(df[self.filt2])
968 mag1 = self.mag1._func(df[self.filt1])
969 return mag2 - mag1
970
971 @property
972 def columns(self):
973 return [self.mag1.col, self.mag2.col]
974
975 def multilevelColumns(self, parq, **kwargs):
976 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
977
978 @property
979 def name(self):
980 return f'{self.filt2} - {self.filt1} ({self.col})'
981
982 @property
983 def shortname(self):
984 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
985
986
988 """Main function of this subclass is to override the dropna=True
989 """
990 _null_label = 'null'
991 _allow_difference = False
992 name = 'label'
993 _force_str = False
994
995 def __call__(self, parq, dropna=False, **kwargs):
996 return super().__call__(parq, dropna=False, **kwargs)
997
998
1000 _columns = ["base_ClassificationExtendedness_value"]
1001 _column = "base_ClassificationExtendedness_value"
1002
1003 def _func(self, df):
1004 x = df[self._columns][self._column]
1005 mask = x.isnull()
1006 test = (x < 0.5).astype(int)
1007 test = test.mask(mask, 2)
1008
1009 # TODO: DM-21954 Look into veracity of inline comment below
1010 # are these backwards?
1011 categories = ['galaxy', 'star', self._null_label]
1012 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
1013 index=x.index, name='label')
1014 if self._force_str:
1015 label = label.astype(str)
1016 return label
1017
1018
1020 _columns = ['numStarFlags']
1021 labels = {"star": 0, "maybe": 1, "notStar": 2}
1022
1023 def _func(self, df):
1024 x = df[self._columns][self._columns[0]]
1025
1026 # Number of filters
1027 n = len(x.unique()) - 1
1028
1029 labels = ['noStar', 'maybe', 'star']
1030 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
1031 index=x.index, name='label')
1032
1033 if self._force_str:
1034 label = label.astype(str)
1035
1036 return label
1037
1038
1040 name = 'Deconvolved Moments'
1041 shortname = 'deconvolvedMoments'
1042 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1043 "ext_shapeHSM_HsmSourceMoments_yy",
1044 "base_SdssShape_xx", "base_SdssShape_yy",
1045 "ext_shapeHSM_HsmPsfMoments_xx",
1046 "ext_shapeHSM_HsmPsfMoments_yy")
1047
1048 def _func(self, df):
1049 """Calculate deconvolved moments"""
1050 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1051 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1052 else:
1053 hsm = np.ones(len(df))*np.nan
1054 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1055 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1056 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1057 else:
1058 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
1059 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
1060 # raise TaskError("No psf shape parameter found in catalog")
1061 raise RuntimeError('No psf shape parameter found in catalog')
1062
1063 return hsm.where(np.isfinite(hsm), sdss) - psf
1064
1065
1067 """Functor to calculate SDSS trace radius size for sources"""
1068 name = "SDSS Trace Size"
1069 shortname = 'sdssTrace'
1070 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1071
1072 def _func(self, df):
1073 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1074 return srcSize
1075
1076
1078 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1079 name = "PSF - SDSS Trace Size"
1080 shortname = 'psf_sdssTrace'
1081 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1082 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1083
1084 def _func(self, df):
1085 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1086 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1087 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1088 return sizeDiff
1089
1090
1092 """Functor to calculate HSM trace radius size for sources"""
1093 name = 'HSM Trace Size'
1094 shortname = 'hsmTrace'
1095 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1096 "ext_shapeHSM_HsmSourceMoments_yy")
1097
1098 def _func(self, df):
1099 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1100 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1101 return srcSize
1102
1103
1105 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1106 name = 'PSF - HSM Trace Size'
1107 shortname = 'psf_HsmTrace'
1108 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1109 "ext_shapeHSM_HsmSourceMoments_yy",
1110 "ext_shapeHSM_HsmPsfMoments_xx",
1111 "ext_shapeHSM_HsmPsfMoments_yy")
1112
1113 def _func(self, df):
1114 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1115 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1116 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1117 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1118 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1119 return sizeDiff
1120
1121
1123 name = 'HSM Psf FWHM'
1124 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1125 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1126 pixelScale = 0.168
1127 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1128
1129 def _func(self, df):
1130 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1131 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1132
1133
1135 name = "Distortion Ellipticity (e1)"
1136 shortname = "Distortion"
1137
1138 def __init__(self, colXX, colXY, colYY, **kwargs):
1139 self.colXX = colXX
1140 self.colXY = colXY
1141 self.colYY = colYY
1142 self._columns = [self.colXX, self.colXY, self.colYY]
1143 super().__init__(**kwargs)
1144
1145 @property
1146 def columns(self):
1147 return [self.colXX, self.colXY, self.colYY]
1148
1149 def _func(self, df):
1150 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1151
1152
1154 name = "Ellipticity e2"
1155
1156 def __init__(self, colXX, colXY, colYY, **kwargs):
1157 self.colXX = colXX
1158 self.colXY = colXY
1159 self.colYY = colYY
1160 super().__init__(**kwargs)
1161
1162 @property
1163 def columns(self):
1164 return [self.colXX, self.colXY, self.colYY]
1165
1166 def _func(self, df):
1167 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1168
1169
1171
1172 def __init__(self, colXX, colXY, colYY, **kwargs):
1173 self.colXX = colXX
1174 self.colXY = colXY
1175 self.colYY = colYY
1176 super().__init__(**kwargs)
1177
1178 @property
1179 def columns(self):
1180 return [self.colXX, self.colXY, self.colYY]
1181
1182 def _func(self, df):
1183 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1184
1185
1187 """Computations using the stored localWcs.
1188 """
1189 name = "LocalWcsOperations"
1190
1191 def __init__(self,
1192 colCD_1_1,
1193 colCD_1_2,
1194 colCD_2_1,
1195 colCD_2_2,
1196 **kwargs):
1197 self.colCD_1_1 = colCD_1_1
1198 self.colCD_1_2 = colCD_1_2
1199 self.colCD_2_1 = colCD_2_1
1200 self.colCD_2_2 = colCD_2_2
1201 super().__init__(**kwargs)
1202
1203 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1204 """Compute the distance on the sphere from x2, y1 to x1, y1.
1205
1206 Parameters
1207 ----------
1208 x : `pandas.Series`
1209 X pixel coordinate.
1210 y : `pandas.Series`
1211 Y pixel coordinate.
1212 cd11 : `pandas.Series`
1213 [1, 1] element of the local Wcs affine transform.
1214 cd11 : `pandas.Series`
1215 [1, 1] element of the local Wcs affine transform.
1216 cd12 : `pandas.Series`
1217 [1, 2] element of the local Wcs affine transform.
1218 cd21 : `pandas.Series`
1219 [2, 1] element of the local Wcs affine transform.
1220 cd22 : `pandas.Series`
1221 [2, 2] element of the local Wcs affine transform.
1222
1223 Returns
1224 -------
1225 raDecTuple : tuple
1226 RA and dec conversion of x and y given the local Wcs. Returned
1227 units are in radians.
1228
1229 """
1230 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1231
1232 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1233 """Compute the local pixel scale conversion.
1234
1235 Parameters
1236 ----------
1237 ra1 : `pandas.Series`
1238 Ra of the first coordinate in radians.
1239 dec1 : `pandas.Series`
1240 Dec of the first coordinate in radians.
1241 ra2 : `pandas.Series`
1242 Ra of the second coordinate in radians.
1243 dec2 : `pandas.Series`
1244 Dec of the second coordinate in radians.
1245
1246 Returns
1247 -------
1248 dist : `pandas.Series`
1249 Distance on the sphere in radians.
1250 """
1251 deltaDec = dec2 - dec1
1252 deltaRa = ra2 - ra1
1253 return 2 * np.arcsin(
1254 np.sqrt(
1255 np.sin(deltaDec / 2) ** 2
1256 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1257
1258 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1259 """Compute the distance on the sphere from x2, y1 to x1, y1.
1260
1261 Parameters
1262 ----------
1263 x1 : `pandas.Series`
1264 X pixel coordinate.
1265 y1 : `pandas.Series`
1266 Y pixel coordinate.
1267 x2 : `pandas.Series`
1268 X pixel coordinate.
1269 y2 : `pandas.Series`
1270 Y pixel coordinate.
1271 cd11 : `pandas.Series`
1272 [1, 1] element of the local Wcs affine transform.
1273 cd11 : `pandas.Series`
1274 [1, 1] element of the local Wcs affine transform.
1275 cd12 : `pandas.Series`
1276 [1, 2] element of the local Wcs affine transform.
1277 cd21 : `pandas.Series`
1278 [2, 1] element of the local Wcs affine transform.
1279 cd22 : `pandas.Series`
1280 [2, 2] element of the local Wcs affine transform.
1281
1282 Returns
1283 -------
1284 Distance : `pandas.Series`
1285 Arcseconds per pixel at the location of the local WC
1286 """
1287 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1288 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1289 # Great circle distance for small separations.
1290 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1291
1292
1294 """Compute the local pixel scale from the stored CDMatrix.
1295 """
1296 name = "PixelScale"
1297
1298 @property
1299 def columns(self):
1300 return [self.colCD_1_1,
1301 self.colCD_1_2,
1302 self.colCD_2_1,
1303 self.colCD_2_2]
1304
1305 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1306 """Compute the local pixel to scale conversion in arcseconds.
1307
1308 Parameters
1309 ----------
1310 cd11 : `pandas.Series`
1311 [1, 1] element of the local Wcs affine transform in radians.
1312 cd11 : `pandas.Series`
1313 [1, 1] element of the local Wcs affine transform in radians.
1314 cd12 : `pandas.Series`
1315 [1, 2] element of the local Wcs affine transform in radians.
1316 cd21 : `pandas.Series`
1317 [2, 1] element of the local Wcs affine transform in radians.
1318 cd22 : `pandas.Series`
1319 [2, 2] element of the local Wcs affine transform in radians.
1320
1321 Returns
1322 -------
1323 pixScale : `pandas.Series`
1324 Arcseconds per pixel at the location of the local WC
1325 """
1326 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1327
1328 def _func(self, df):
1329 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1330 df[self.colCD_1_2],
1331 df[self.colCD_2_1],
1332 df[self.colCD_2_2])
1333
1334
1336 """Convert a value in units pixels squared to units arcseconds squared.
1337 """
1338
1339 def __init__(self,
1340 col,
1341 colCD_1_1,
1342 colCD_1_2,
1343 colCD_2_1,
1344 colCD_2_2,
1345 **kwargs):
1346 self.col = col
1347 super().__init__(colCD_1_1,
1348 colCD_1_2,
1349 colCD_2_1,
1350 colCD_2_2,
1351 **kwargs)
1352
1353 @property
1354 def name(self):
1355 return f"{self.col}_asArcseconds"
1356
1357 @property
1358 def columns(self):
1359 return [self.col,
1360 self.colCD_1_1,
1361 self.colCD_1_2,
1362 self.colCD_2_1,
1363 self.colCD_2_2]
1364
1365 def _func(self, df):
1366 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1367 df[self.colCD_1_2],
1368 df[self.colCD_2_1],
1369 df[self.colCD_2_2])
1370
1371
1373 """Convert a value in units pixels to units arcseconds.
1374 """
1375
1376 def __init__(self,
1377 col,
1378 colCD_1_1,
1379 colCD_1_2,
1380 colCD_2_1,
1381 colCD_2_2,
1382 **kwargs):
1383 self.col = col
1384 super().__init__(colCD_1_1,
1385 colCD_1_2,
1386 colCD_2_1,
1387 colCD_2_2,
1388 **kwargs)
1389
1390 @property
1391 def name(self):
1392 return f"{self.col}_asArcsecondsSq"
1393
1394 @property
1395 def columns(self):
1396 return [self.col,
1397 self.colCD_1_1,
1398 self.colCD_1_2,
1399 self.colCD_2_1,
1400 self.colCD_2_2]
1401
1402 def _func(self, df):
1403 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1404 df[self.colCD_1_2],
1405 df[self.colCD_2_1],
1406 df[self.colCD_2_2])
1407 return df[self.col] * pixScale * pixScale
1408
1409
1411 name = 'Reference Band'
1412 shortname = 'refBand'
1413
1414 @property
1415 def columns(self):
1416 return ["merge_measurement_i",
1417 "merge_measurement_r",
1418 "merge_measurement_z",
1419 "merge_measurement_y",
1420 "merge_measurement_g",
1421 "merge_measurement_u"]
1422
1423 def _func(self, df: pd.DataFrame) -> pd.Series:
1424 def getFilterAliasName(row):
1425 # get column name with the max value (True > False)
1426 colName = row.idxmax()
1427 return colName.replace('merge_measurement_', '')
1428
1429 # Skip columns that are unavailable, because this functor requests the
1430 # superset of bands that could be included in the object table
1431 columns = [col for col in self.columnscolumns if col in df.columns]
1432 # Makes a Series of dtype object if df is empty
1433 return df[columns].apply(getFilterAliasName, axis=1,
1434 result_type='reduce').astype('object')
1435
1436
1438 # AB to NanoJansky (3631 Jansky)
1439 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1440 LOG_AB_FLUX_SCALE = 12.56
1441 FIVE_OVER_2LOG10 = 1.085736204758129569
1442 # TO DO: DM-21955 Replace hard coded photometic calibration values
1443 COADD_ZP = 27
1444
1445 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1446 self.vhypot = np.vectorize(self.hypot)
1447 self.col = colFlux
1448 self.colFluxErr = colFluxErr
1449
1450 self.calib = calib
1451 if calib is not None:
1452 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1453 else:
1454 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1455 self.fluxMag0Err = 0.
1456
1457 super().__init__(**kwargs)
1458
1459 @property
1460 def columns(self):
1461 return [self.col]
1462
1463 @property
1464 def name(self):
1465 return f'mag_{self.col}'
1466
1467 @classmethod
1468 def hypot(cls, a, b):
1469 if np.abs(a) < np.abs(b):
1470 a, b = b, a
1471 if a == 0.:
1472 return 0.
1473 q = b/a
1474 return np.abs(a) * np.sqrt(1. + q*q)
1475
1476 def dn2flux(self, dn, fluxMag0):
1477 return self.AB_FLUX_SCALE * dn / fluxMag0
1478
1479 def dn2mag(self, dn, fluxMag0):
1480 with np.warnings.catch_warnings():
1481 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1482 np.warnings.filterwarnings('ignore', r'divide by zero')
1483 return -2.5 * np.log10(dn/fluxMag0)
1484
1485 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1486 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1487 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1488 return retVal
1489
1490 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1491 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1492 return self.FIVE_OVER_2LOG10 * retVal
1493
1494
1496 def _func(self, df):
1497 return self.dn2flux(df[self.col], self.fluxMag0)
1498
1499
1501 @property
1502 def columns(self):
1503 return [self.col, self.colFluxErr]
1504
1505 def _func(self, df):
1506 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1507 return pd.Series(retArr, index=df.index)
1508
1509
1511 def _func(self, df):
1512 return self.dn2mag(df[self.col], self.fluxMag0)
1513
1514
1516 @property
1517 def columns(self):
1518 return [self.col, self.colFluxErr]
1519
1520 def _func(self, df):
1521 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1522 return pd.Series(retArr, index=df.index)
1523
1524
1526 """Base class for calibrating the specified instrument flux column using
1527 the local photometric calibration.
1528
1529 Parameters
1530 ----------
1531 instFluxCol : `str`
1532 Name of the instrument flux column.
1533 instFluxErrCol : `str`
1534 Name of the assocated error columns for ``instFluxCol``.
1535 photoCalibCol : `str`
1536 Name of local calibration column.
1537 photoCalibErrCol : `str`
1538 Error associated with ``photoCalibCol``
1539
1540 See also
1541 --------
1542 LocalPhotometry
1543 LocalNanojansky
1544 LocalNanojanskyErr
1545 LocalMagnitude
1546 LocalMagnitudeErr
1547 """
1548 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1549
1550 def __init__(self,
1551 instFluxCol,
1552 instFluxErrCol,
1553 photoCalibCol,
1554 photoCalibErrCol,
1555 **kwargs):
1556 self.instFluxCol = instFluxCol
1557 self.instFluxErrCol = instFluxErrCol
1558 self.photoCalibCol = photoCalibCol
1559 self.photoCalibErrCol = photoCalibErrCol
1560 super().__init__(**kwargs)
1561
1562 def instFluxToNanojansky(self, instFlux, localCalib):
1563 """Convert instrument flux to nanojanskys.
1564
1565 Parameters
1566 ----------
1567 instFlux : `numpy.ndarray` or `pandas.Series`
1568 Array of instrument flux measurements
1569 localCalib : `numpy.ndarray` or `pandas.Series`
1570 Array of local photometric calibration estimates.
1571
1572 Returns
1573 -------
1574 calibFlux : `numpy.ndarray` or `pandas.Series`
1575 Array of calibrated flux measurements.
1576 """
1577 return instFlux * localCalib
1578
1579 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1580 """Convert instrument flux to nanojanskys.
1581
1582 Parameters
1583 ----------
1584 instFlux : `numpy.ndarray` or `pandas.Series`
1585 Array of instrument flux measurements
1586 instFluxErr : `numpy.ndarray` or `pandas.Series`
1587 Errors on associated ``instFlux`` values
1588 localCalib : `numpy.ndarray` or `pandas.Series`
1589 Array of local photometric calibration estimates.
1590 localCalibErr : `numpy.ndarray` or `pandas.Series`
1591 Errors on associated ``localCalib`` values
1592
1593 Returns
1594 -------
1595 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1596 Errors on calibrated flux measurements.
1597 """
1598 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1599
1600 def instFluxToMagnitude(self, instFlux, localCalib):
1601 """Convert instrument flux to nanojanskys.
1602
1603 Parameters
1604 ----------
1605 instFlux : `numpy.ndarray` or `pandas.Series`
1606 Array of instrument flux measurements
1607 localCalib : `numpy.ndarray` or `pandas.Series`
1608 Array of local photometric calibration estimates.
1609
1610 Returns
1611 -------
1612 calibMag : `numpy.ndarray` or `pandas.Series`
1613 Array of calibrated AB magnitudes.
1614 """
1615 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1616
1617 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1618 """Convert instrument flux err to nanojanskys.
1619
1620 Parameters
1621 ----------
1622 instFlux : `numpy.ndarray` or `pandas.Series`
1623 Array of instrument flux measurements
1624 instFluxErr : `numpy.ndarray` or `pandas.Series`
1625 Errors on associated ``instFlux`` values
1626 localCalib : `numpy.ndarray` or `pandas.Series`
1627 Array of local photometric calibration estimates.
1628 localCalibErr : `numpy.ndarray` or `pandas.Series`
1629 Errors on associated ``localCalib`` values
1630
1631 Returns
1632 -------
1633 calibMagErr: `numpy.ndarray` or `pandas.Series`
1634 Error on calibrated AB magnitudes.
1635 """
1636 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1637 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1638
1639
1641 """Compute calibrated fluxes using the local calibration value.
1642
1643 See also
1644 --------
1645 LocalNanojansky
1646 LocalNanojanskyErr
1647 LocalMagnitude
1648 LocalMagnitudeErr
1649 """
1650
1651 @property
1652 def columns(self):
1653 return [self.instFluxCol, self.photoCalibCol]
1654
1655 @property
1656 def name(self):
1657 return f'flux_{self.instFluxCol}'
1658
1659 def _func(self, df):
1660 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1661
1662
1664 """Compute calibrated flux errors using the local calibration value.
1665
1666 See also
1667 --------
1668 LocalNanojansky
1669 LocalNanojanskyErr
1670 LocalMagnitude
1671 LocalMagnitudeErr
1672 """
1673
1674 @property
1675 def columns(self):
1676 return [self.instFluxCol, self.instFluxErrCol,
1678
1679 @property
1680 def name(self):
1681 return f'fluxErr_{self.instFluxCol}'
1682
1683 def _func(self, df):
1684 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1685 df[self.photoCalibCol], df[self.photoCalibErrCol])
1686
1687
1689 """Compute calibrated AB magnitudes using the local calibration value.
1690
1691 See also
1692 --------
1693 LocalNanojansky
1694 LocalNanojanskyErr
1695 LocalMagnitude
1696 LocalMagnitudeErr
1697 """
1698
1699 @property
1700 def columns(self):
1701 return [self.instFluxCol, self.photoCalibCol]
1702
1703 @property
1704 def name(self):
1705 return f'mag_{self.instFluxCol}'
1706
1707 def _func(self, df):
1708 return self.instFluxToMagnitude(df[self.instFluxCol],
1709 df[self.photoCalibCol])
1710
1711
1713 """Compute calibrated AB magnitude errors using the local calibration value.
1714
1715 See also
1716 --------
1717 LocalNanojansky
1718 LocalNanojanskyErr
1719 LocalMagnitude
1720 LocalMagnitudeErr
1721 """
1722
1723 @property
1724 def columns(self):
1725 return [self.instFluxCol, self.instFluxErrCol,
1727
1728 @property
1729 def name(self):
1730 return f'magErr_{self.instFluxCol}'
1731
1732 def _func(self, df):
1733 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1734 df[self.instFluxErrCol],
1735 df[self.photoCalibCol],
1736 df[self.photoCalibErrCol])
1737
1738
1740 """Compute absolute mean of dipole fluxes.
1741
1742 See also
1743 --------
1744 LocalNanojansky
1745 LocalNanojanskyErr
1746 LocalMagnitude
1747 LocalMagnitudeErr
1748 LocalDipoleMeanFlux
1749 LocalDipoleMeanFluxErr
1750 LocalDipoleDiffFlux
1751 LocalDipoleDiffFluxErr
1752 """
1753 def __init__(self,
1754 instFluxPosCol,
1755 instFluxNegCol,
1756 instFluxPosErrCol,
1757 instFluxNegErrCol,
1758 photoCalibCol,
1759 photoCalibErrCol,
1760 **kwargs):
1761 self.instFluxNegCol = instFluxNegCol
1762 self.instFluxPosCol = instFluxPosCol
1763 self.instFluxNegErrCol = instFluxNegErrCol
1764 self.instFluxPosErrCol = instFluxPosErrCol
1765 self.photoCalibColphotoCalibCol = photoCalibCol
1766 self.photoCalibErrColphotoCalibErrCol = photoCalibErrCol
1767 super().__init__(instFluxNegCol,
1768 instFluxNegErrCol,
1769 photoCalibCol,
1770 photoCalibErrCol,
1771 **kwargs)
1772
1773 @property
1774 def columns(self):
1775 return [self.instFluxPosCol,
1776 self.instFluxNegCol,
1778
1779 @property
1780 def name(self):
1781 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1782
1783 def _func(self, df):
1784 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibColphotoCalibCol]))
1785 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibColphotoCalibCol])))
1786
1787
1789 """Compute the error on the absolute mean of dipole fluxes.
1790
1791 See also
1792 --------
1793 LocalNanojansky
1794 LocalNanojanskyErr
1795 LocalMagnitude
1796 LocalMagnitudeErr
1797 LocalDipoleMeanFlux
1798 LocalDipoleMeanFluxErr
1799 LocalDipoleDiffFlux
1800 LocalDipoleDiffFluxErr
1801 """
1802
1803 @property
1804 def columns(self):
1805 return [self.instFluxPosCol,
1806 self.instFluxNegCol,
1807 self.instFluxPosErrCol,
1808 self.instFluxNegErrCol,
1811
1812 @property
1813 def name(self):
1814 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1815
1816 def _func(self, df):
1817 return 0.5*np.sqrt(
1818 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1820 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1821 * df[self.photoCalibColphotoCalibCol]**2)
1822
1823
1825 """Compute the absolute difference of dipole fluxes.
1826
1827 Value is (abs(pos) - abs(neg))
1828
1829 See also
1830 --------
1831 LocalNanojansky
1832 LocalNanojanskyErr
1833 LocalMagnitude
1834 LocalMagnitudeErr
1835 LocalDipoleMeanFlux
1836 LocalDipoleMeanFluxErr
1837 LocalDipoleDiffFlux
1838 LocalDipoleDiffFluxErr
1839 """
1840
1841 @property
1842 def columns(self):
1843 return [self.instFluxPosCol,
1844 self.instFluxNegCol,
1846
1847 @property
1848 def name(self):
1849 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1850
1851 def _func(self, df):
1852 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibColphotoCalibCol]))
1853 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibColphotoCalibCol])))
1854
1855
1857 """Compute the error on the absolute difference of dipole fluxes.
1858
1859 See also
1860 --------
1861 LocalNanojansky
1862 LocalNanojanskyErr
1863 LocalMagnitude
1864 LocalMagnitudeErr
1865 LocalDipoleMeanFlux
1866 LocalDipoleMeanFluxErr
1867 LocalDipoleDiffFlux
1868 LocalDipoleDiffFluxErr
1869 """
1870
1871 @property
1872 def columns(self):
1873 return [self.instFluxPosCol,
1874 self.instFluxNegCol,
1875 self.instFluxPosErrCol,
1876 self.instFluxNegErrCol,
1879
1880 @property
1881 def name(self):
1882 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1883
1884 def _func(self, df):
1885 return np.sqrt(
1886 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1888 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1889 * df[self.photoCalibColphotoCalibCol]**2)
1890
1891
1893 """Base class for returning the ratio of 2 columns.
1894
1895 Can be used to compute a Signal to Noise ratio for any input flux.
1896
1897 Parameters
1898 ----------
1899 numerator : `str`
1900 Name of the column to use at the numerator in the ratio
1901 denominator : `str`
1902 Name of the column to use as the denominator in the ratio.
1903 """
1904 def __init__(self,
1905 numerator,
1906 denominator,
1907 **kwargs):
1908 self.numerator = numerator
1909 self.denominator = denominator
1910 super().__init__(**kwargs)
1911
1912 @property
1913 def columns(self):
1914 return [self.numerator, self.denominator]
1915
1916 @property
1917 def name(self):
1918 return f'ratio_{self.numerator}_{self.denominator}'
1919
1920 def _func(self, df):
1921 with np.warnings.catch_warnings():
1922 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1923 np.warnings.filterwarnings('ignore', r'divide by zero')
1924 return df[self.numerator] / df[self.denominator]
1925
1926
1928 """Compute E(B-V) from dustmaps.sfd
1929 """
1930 _defaultDataset = 'ref'
1931 name = "E(B-V)"
1932 shortname = "ebv"
1933
1934 def __init__(self, **kwargs):
1935 # import is only needed for Ebv
1936 from dustmaps.sfd import SFDQuery
1937 self._columns = ['coord_ra', 'coord_dec']
1938 self.sfd = SFDQuery()
1939 super().__init__(**kwargs)
1940
1941 def _func(self, df):
1942 coords = SkyCoord(df['coord_ra']*u.rad, df['coord_dec']*u.rad)
1943 ebv = self.sfd(coords)
1944 # Double precision unnecessary scientifically
1945 # but currently needed for ingest to qserv
1946 return pd.Series(ebv, index=df.index).astype('float64')
def multilevelColumns(self, parq, **kwargs)
Definition: functors.py:975
def __init__(self, col, filt2, filt1, **kwargs)
Definition: functors.py:946
def __init__(self, col, **kwargs)
Definition: functors.py:660
def __init__(self, funcs, **kwargs)
Definition: functors.py:421
def __call__(self, data, **kwargs)
Definition: functors.py:473
def from_file(cls, filename, **kwargs)
Definition: functors.py:555
def from_yaml(cls, translationDefinition, **kwargs)
Definition: functors.py:564
def renameCol(cls, col, renameRules)
Definition: functors.py:546
def multilevelColumns(self, data, **kwargs)
Definition: functors.py:459
def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22)
Definition: functors.py:1305
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1382
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1345
def __init__(self, col, **kwargs)
Definition: functors.py:706
def __init__(self, expr, **kwargs)
Definition: functors.py:629
def __init__(self, **kwargs)
Definition: functors.py:734
def __call__(self, catalog, **kwargs)
Definition: functors.py:737
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1138
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1156
def __init__(self, **kwargs)
Definition: functors.py:1934
def __call__(self, data, dropna=False)
Definition: functors.py:357
def _func(self, df, dropna=True)
Definition: functors.py:296
def multilevelColumns(self, data, columnIndex=None, returnTuple=False)
Definition: functors.py:246
def _get_data_columnLevelNames(self, data, columnIndex=None)
Definition: functors.py:202
def difference(self, data1, data2, **kwargs)
Definition: functors.py:369
def __init__(self, filt=None, dataset=None, noDup=None)
Definition: functors.py:157
def _get_columnIndex(self, data)
Definition: functors.py:299
def _colsFromDict(self, colDict, columnIndex=None)
Definition: functors.py:224
def _get_data_columnLevels(self, data, columnIndex=None)
Definition: functors.py:178
def __init__(self, ra, decl, **kwargs)
Definition: functors.py:755
def __call__(self, parq, dropna=False, **kwargs)
Definition: functors.py:995
def __init__(self, instFluxPosCol, instFluxNegCol, instFluxPosErrCol, instFluxNegErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1760
def instFluxToNanojansky(self, instFlux, localCalib)
Definition: functors.py:1562
def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1617
def __init__(self, instFluxCol, instFluxErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1555
def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1579
def instFluxToMagnitude(self, instFlux, localCalib)
Definition: functors.py:1600
def __init__(self, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1196
def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22)
Definition: functors.py:1203
def computeSkySeperation(self, ra1, dec1, ra2, dec2)
Definition: functors.py:1232
def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22)
Definition: functors.py:1258
def __init__(self, col1, col2, **kwargs)
Definition: functors.py:893
def __init__(self, *args, **kwargs)
Definition: functors.py:854
def __init__(self, col, calib=None, **kwargs)
Definition: functors.py:816
def dn2mag(self, dn, fluxMag0)
Definition: functors.py:1479
def dn2flux(self, dn, fluxMag0)
Definition: functors.py:1476
def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1485
def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1490
def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs)
Definition: functors.py:1445
def __call__(self, catalog, **kwargs)
Definition: functors.py:724
def __init__(self, **kwargs)
Definition: functors.py:721
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1172
def __init__(self, numerator, denominator, **kwargs)
Definition: functors.py:1907
def mag_aware_eval(df, expr, log)
Definition: functors.py:593
def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor', name=None)
Definition: functors.py:55