lsst.pipe.tasks 21.0.0-175-g7497abfa+f8b3f0027d
functors.py
Go to the documentation of this file.
1# This file is part of pipe_tasks.
2#
3# LSST Data Management System
4# This product includes software developed by the
5# LSST Project (http://www.lsst.org/).
6# See COPYRIGHT file at the top of the source tree.
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22import yaml
23import re
24from itertools import product
25import os.path
26
27import pandas as pd
28import numpy as np
29import astropy.units as u
30
31from lsst.daf.persistence import doImport
32from lsst.daf.butler import DeferredDatasetHandle
33import lsst.geom as geom
34import lsst.sphgeom as sphgeom
35
36from .parquetTable import ParquetTable, MultilevelParquetTable
37
38
39def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
40 typeKey='functor', name=None):
41 """Initialize an object defined in a dictionary
42
43 The object needs to be importable as
44 f'{basePath}.{initDict[typeKey]}'
45 The positional and keyword arguments (if any) are contained in
46 "args" and "kwargs" entries in the dictionary, respectively.
47 This is used in `functors.CompositeFunctor.from_yaml` to initialize
48 a composite functor from a specification in a YAML file.
49
50 Parameters
51 ----------
52 initDict : dictionary
53 Dictionary describing object's initialization. Must contain
54 an entry keyed by ``typeKey`` that is the name of the object,
55 relative to ``basePath``.
56 basePath : str
57 Path relative to module in which ``initDict[typeKey]`` is defined.
58 typeKey : str
59 Key of ``initDict`` that is the name of the object
60 (relative to `basePath`).
61 """
62 initDict = initDict.copy()
63 # TO DO: DM-21956 We should be able to define functors outside this module
64 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
65 args = []
66 if 'args' in initDict:
67 args = initDict.pop('args')
68 if isinstance(args, str):
69 args = [args]
70 try:
71 element = pythonType(*args, **initDict)
72 except Exception as e:
73 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
74 raise type(e)(message, e.args)
75 return element
76
77
78class Functor(object):
79 """Define and execute a calculation on a ParquetTable
80
81 The `__call__` method accepts either a `ParquetTable` object or a
82 `DeferredDatasetHandle`, and returns the
83 result of the calculation as a single column. Each functor defines what
84 columns are needed for the calculation, and only these columns are read
85 from the `ParquetTable`.
86
87 The action of `__call__` consists of two steps: first, loading the
88 necessary columns from disk into memory as a `pandas.DataFrame` object;
89 and second, performing the computation on this dataframe and returning the
90 result.
91
92
93 To define a new `Functor`, a subclass must define a `_func` method,
94 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
95 In addition, it must define the following attributes
96
97 * `_columns`: The columns necessary to perform the calculation
98 * `name`: A name appropriate for a figure axis label
99 * `shortname`: A name appropriate for use as a dictionary key
100
101 On initialization, a `Functor` should declare what band (`filt` kwarg)
102 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
103 applied to. This enables the `_get_data` method to extract the proper
104 columns from the parquet file. If not specified, the dataset will fall back
105 on the `_defaultDataset`attribute. If band is not specified and `dataset`
106 is anything other than `'ref'`, then an error will be raised when trying to
107 perform the calculation.
108
109 Originally, `Functor` was set up to expect
110 datasets formatted like the `deepCoadd_obj` dataset; that is, a
111 dataframe with a multi-level column index, with the levels of the
112 column index being `band`, `dataset`, and `column`.
113 It has since been generalized to apply to dataframes without mutli-level
114 indices and multi-level indices with just `dataset` and `column` levels.
115 In addition, the `_get_data` method that reads
116 the dataframe from the `ParquetTable` will return a dataframe with column
117 index levels defined by the `_dfLevels` attribute; by default, this is
118 `column`.
119
120 The `_dfLevels` attributes should generally not need to
121 be changed, unless `_func` needs columns from multiple filters or datasets
122 to do the calculation.
123 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
124 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
125 it gets to have those levels in the column index.
126
127 Parameters
128 ----------
129 filt : str
130 Filter upon which to do the calculation
131
132 dataset : str
133 Dataset upon which to do the calculation
134 (e.g., 'ref', 'meas', 'forced_src').
135
136 """
137
138 _defaultDataset = 'ref'
139 _dfLevels = ('column',)
140 _defaultNoDup = False
141
142 def __init__(self, filt=None, dataset=None, noDup=None):
143 self.filtfilt = filt
144 self.datasetdataset = dataset if dataset is not None else self._defaultDataset_defaultDataset
145 self._noDup_noDup = noDup
146
147 @property
148 def noDup(self):
149 if self._noDup_noDup is not None:
150 return self._noDup_noDup
151 else:
152 return self._defaultNoDup_defaultNoDup
153
154 @property
155 def columns(self):
156 """Columns required to perform calculation
157 """
158 if not hasattr(self, '_columns'):
159 raise NotImplementedError('Must define columns property or _columns attribute')
160 return self._columns
161
162 def _get_data_columnLevels(self, data, columnIndex=None):
163 """Gets the names of the column index levels
164
165 This should only be called in the context of a multilevel table.
166 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
167 and with the gen3 `DeferredDatasetHandle`.
168
169 Parameters
170 ----------
171 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
172
173 columnnIndex (optional): pandas `Index` object
174 if not passed, then it is read from the `DeferredDatasetHandle`
175 """
176 if isinstance(data, DeferredDatasetHandle):
177 if columnIndex is None:
178 columnIndex = data.get(component="columns")
179 if columnIndex is not None:
180 return columnIndex.names
181 if isinstance(data, MultilevelParquetTable):
182 return data.columnLevels
183 else:
184 raise TypeError(f"Unknown type for data: {type(data)}!")
185
186 def _get_data_columnLevelNames(self, data, columnIndex=None):
187 """Gets the content of each of the column levels for a multilevel table
188
189 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
190
191 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
192 """
193 if isinstance(data, DeferredDatasetHandle):
194 if columnIndex is None:
195 columnIndex = data.get(component="columns")
196 if columnIndex is not None:
197 columnLevels = columnIndex.names
198 columnLevelNames = {
199 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
200 for i, level in enumerate(columnLevels)
201 }
202 return columnLevelNames
203 if isinstance(data, MultilevelParquetTable):
204 return data.columnLevelNames
205 else:
206 raise TypeError(f"Unknown type for data: {type(data)}!")
207
208 def _colsFromDict(self, colDict, columnIndex=None):
209 """Converts dictionary column specficiation to a list of columns
210
211 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
212 """
213 new_colDict = {}
214 columnLevels = self._get_data_columnLevels_get_data_columnLevels(None, columnIndex=columnIndex)
215
216 for i, lev in enumerate(columnLevels):
217 if lev in colDict:
218 if isinstance(colDict[lev], str):
219 new_colDict[lev] = [colDict[lev]]
220 else:
221 new_colDict[lev] = colDict[lev]
222 else:
223 new_colDict[lev] = columnIndex.levels[i]
224
225 levelCols = [new_colDict[lev] for lev in columnLevels]
226 cols = product(*levelCols)
227 return list(cols)
228
229 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
230 """Returns columns needed by functor from multilevel dataset
231
232 To access tables with multilevel column structure, the `MultilevelParquetTable`
233 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
234 dictionary.
235
236 Parameters
237 ----------
238 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
239
240 columnIndex (optional): pandas `Index` object
241 either passed or read in from `DeferredDatasetHandle`.
242
243 `returnTuple` : bool
244 If true, then return a list of tuples rather than the column dictionary
245 specification. This is set to `True` by `CompositeFunctor` in order to be able to
246 combine columns from the various component functors.
247
248 """
249 if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
250 columnIndex = data.get(component="columns")
251
252 # Confirm that the dataset has the column levels the functor is expecting it to have.
253 columnLevels = self._get_data_columnLevels_get_data_columnLevels(data, columnIndex)
254
255 columnDict = {'column': self.columnscolumns,
256 'dataset': self.datasetdataset}
257 if self.filtfilt is None:
258 columnLevelNames = self._get_data_columnLevelNames_get_data_columnLevelNames(data, columnIndex)
259 if "band" in columnLevels:
260 if self.datasetdataset == "ref":
261 columnDict["band"] = columnLevelNames["band"][0]
262 else:
263 raise ValueError(f"'filt' not set for functor {self.name}"
264 f"(dataset {self.dataset}) "
265 "and ParquetTable "
266 "contains multiple filters in column index. "
267 "Set 'filt' or set 'dataset' to 'ref'.")
268 else:
269 columnDict['band'] = self.filtfilt
270
271 if isinstance(data, MultilevelParquetTable):
272 return data._colsFromDict(columnDict)
273 elif isinstance(data, DeferredDatasetHandle):
274 if returnTuple:
275 return self._colsFromDict_colsFromDict(columnDict, columnIndex=columnIndex)
276 else:
277 return columnDict
278
279 def _func(self, df, dropna=True):
280 raise NotImplementedError('Must define calculation on dataframe')
281
282 def _get_columnIndex(self, data):
283 """Return columnIndex
284 """
285
286 if isinstance(data, DeferredDatasetHandle):
287 return data.get(component="columns")
288 else:
289 return None
290
291 def _get_data(self, data):
292 """Retrieve dataframe necessary for calculation.
293
294 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
295
296 Returns dataframe upon which `self._func_func` can act.
297
298 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
299 """
300 if isinstance(data, pd.DataFrame):
301 return data
302
303 # First thing to do: check to see if the data source has a multilevel column index or not.
304 columnIndex = self._get_columnIndex_get_columnIndex(data)
305 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
306
307 # Simple single-level parquet table, gen2
308 if isinstance(data, ParquetTable) and not is_multiLevel:
309 columns = self.columnscolumns
310 df = data.toDataFrame(columns=columns)
311 return df
312
313 # Get proper columns specification for this functor
314 if is_multiLevel:
315 columns = self.multilevelColumnsmultilevelColumns(data, columnIndex=columnIndex)
316 else:
317 columns = self.columnscolumns
318
319 if isinstance(data, MultilevelParquetTable):
320 # Load in-memory dataframe with appropriate columns the gen2 way
321 df = data.toDataFrame(columns=columns, droplevels=False)
322 elif isinstance(data, DeferredDatasetHandle):
323 # Load in-memory dataframe with appropriate columns the gen3 way
324 df = data.get(parameters={"columns": columns})
325
326 # Drop unnecessary column levels
327 if is_multiLevel:
328 df = self._setLevels_setLevels(df)
329
330 return df
331
332 def _setLevels(self, df):
333 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels_dfLevels]
334 df.columns = df.columns.droplevel(levelsToDrop)
335 return df
336
337 def _dropna(self, vals):
338 return vals.dropna()
339
340 def __call__(self, data, dropna=False):
341 try:
342 df = self._get_data_get_data(data)
343 vals = self._func_func(df)
344 except Exception:
345 vals = self.failfail(df)
346 if dropna:
347 vals = self._dropna_dropna(vals)
348
349 return vals
350
351 def difference(self, data1, data2, **kwargs):
352 """Computes difference between functor called on two different ParquetTable objects
353 """
354 return self(data1, **kwargs) - self(data2, **kwargs)
355
356 def fail(self, df):
357 return pd.Series(np.full(len(df), np.nan), index=df.index)
358
359 @property
360 def name(self):
361 """Full name of functor (suitable for figure labels)
362 """
363 return NotImplementedError
364
365 @property
366 def shortname(self):
367 """Short name of functor (suitable for column name/dict key)
368 """
369 return self.namename
370
371
373 """Perform multiple calculations at once on a catalog
374
375 The role of a `CompositeFunctor` is to group together computations from
376 multiple functors. Instead of returning `pandas.Series` a
377 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
378 being the keys of `funcDict`.
379
380 The `columns` attribute of a `CompositeFunctor` is the union of all columns
381 in all the component functors.
382
383 A `CompositeFunctor` does not use a `_func` method itself; rather,
384 when a `CompositeFunctor` is called, all its columns are loaded
385 at once, and the resulting dataframe is passed to the `_func` method of each component
386 functor. This has the advantage of only doing I/O (reading from parquet file) once,
387 and works because each individual `_func` method of each component functor does not
388 care if there are *extra* columns in the dataframe being passed; only that it must contain
389 *at least* the `columns` it expects.
390
391 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
392 file specifying a collection of functors.
393
394 Parameters
395 ----------
396 funcs : `dict` or `list`
397 Dictionary or list of functors. If a list, then it will be converted
398 into a dictonary according to the `.shortname` attribute of each functor.
399
400 """
401 dataset = None
402
403 def __init__(self, funcs, **kwargs):
404
405 if type(funcs) == dict:
406 self.funcDictfuncDict = funcs
407 else:
408 self.funcDictfuncDict = {f.shortname: f for f in funcs}
409
410 self._filt_filt = None
411
412 super().__init__(**kwargs)
413
414 @property
415 def filt(self):
416 return self._filt_filt
417
418 @filt.setter
419 def filt(self, filt):
420 if filt is not None:
421 for _, f in self.funcDictfuncDict.items():
422 f.filt = filt
423 self._filt_filt = filt
424
425 def update(self, new):
426 if isinstance(new, dict):
427 self.funcDictfuncDict.update(new)
428 elif isinstance(new, CompositeFunctor):
429 self.funcDictfuncDict.update(new.funcDict)
430 else:
431 raise TypeError('Can only update with dictionary or CompositeFunctor.')
432
433 # Make sure new functors have the same 'filt' set
434 if self.filtfiltfiltfiltfilt is not None:
436
437 @property
438 def columns(self):
439 return list(set([x for y in [f.columns for f in self.funcDictfuncDict.values()] for x in y]))
440
441 def multilevelColumns(self, data, **kwargs):
442 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
443 return list(
444 set(
445 [
446 x
447 for y in [
448 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDictfuncDict.values()
449 ]
450 for x in y
451 ]
452 )
453 )
454
455 def __call__(self, data, **kwargs):
456 """Apply the functor to the data table
457
458 Parameters
459 ----------
460 data : `lsst.daf.butler.DeferredDatasetHandle`,
463 or `pandas.DataFrame`.
464 The table or a pointer to a table on disk from which columns can
465 be accessed
466 """
467 columnIndex = self._get_columnIndex_get_columnIndex(data)
468
469 # First, determine whether data has a multilevel index (either gen2 or gen3)
470 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
471
472 # Multilevel index, gen2 or gen3
473 if is_multiLevel:
474 columns = self.multilevelColumnsmultilevelColumnsmultilevelColumns(data, columnIndex=columnIndex)
475
476 if isinstance(data, MultilevelParquetTable):
477 # Read data into memory the gen2 way
478 df = data.toDataFrame(columns=columns, droplevels=False)
479 elif isinstance(data, DeferredDatasetHandle):
480 # Read data into memory the gen3 way
481 df = data.get(parameters={"columns": columns})
482
483 valDict = {}
484 for k, f in self.funcDictfuncDict.items():
485 try:
486 subdf = f._setLevels(
487 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
488 )
489 valDict[k] = f._func(subdf)
490 except Exception as e:
491 try:
492 valDict[k] = f.fail(subdf)
493 except NameError:
494 raise e
495
496 else:
497 if isinstance(data, DeferredDatasetHandle):
498 # input if Gen3 deferLoad=True
499 df = data.get(parameters={"columns": self.columnscolumnscolumns})
500 elif isinstance(data, pd.DataFrame):
501 # input if Gen3 deferLoad=False
502 df = data
503 else:
504 # Original Gen2 input is type ParquetTable and the fallback
505 df = data.toDataFrame(columns=self.columnscolumnscolumns)
506
507 valDict = {k: f._func(df) for k, f in self.funcDictfuncDict.items()}
508
509 # Check that output columns are actually columns
510 for name, colVal in valDict.items():
511 if len(colVal.shape) != 1:
512 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
513 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
514
515 try:
516 valDf = pd.concat(valDict, axis=1)
517 except TypeError:
518 print([(k, type(v)) for k, v in valDict.items()])
519 raise
520
521 if kwargs.get('dropna', False):
522 valDf = valDf.dropna(how='any')
523
524 return valDf
525
526 @classmethod
527 def renameCol(cls, col, renameRules):
528 if renameRules is None:
529 return col
530 for old, new in renameRules:
531 if col.startswith(old):
532 col = col.replace(old, new)
533 return col
534
535 @classmethod
536 def from_file(cls, filename, **kwargs):
537 # Allow environment variables in the filename.
538 filename = os.path.expandvars(filename)
539 with open(filename) as f:
540 translationDefinition = yaml.safe_load(f)
541
542 return cls.from_yamlfrom_yaml(translationDefinition, **kwargs)
543
544 @classmethod
545 def from_yaml(cls, translationDefinition, **kwargs):
546 funcs = {}
547 for func, val in translationDefinition['funcs'].items():
548 funcs[func] = init_fromDict(val, name=func)
549
550 if 'flag_rename_rules' in translationDefinition:
551 renameRules = translationDefinition['flag_rename_rules']
552 else:
553 renameRules = None
554
555 if 'calexpFlags' in translationDefinition:
556 for flag in translationDefinition['calexpFlags']:
557 funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='calexp')
558
559 if 'refFlags' in translationDefinition:
560 for flag in translationDefinition['refFlags']:
561 funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='ref')
562
563 if 'forcedFlags' in translationDefinition:
564 for flag in translationDefinition['forcedFlags']:
565 funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
566
567 if 'flags' in translationDefinition:
568 for flag in translationDefinition['flags']:
569 funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='meas')
570
571 return cls(funcs, **kwargs)
572
573
574def mag_aware_eval(df, expr):
575 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
576
577 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
578
579 Parameters
580 ----------
581 df : pandas.DataFrame
582 Dataframe on which to evaluate expression.
583
584 expr : str
585 Expression.
586 """
587 try:
588 expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>)/log(10)', expr)
589 val = df.eval(expr_new, truediv=True)
590 except Exception: # Should check what actually gets raised
591 expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
592 val = df.eval(expr_new, truediv=True)
593 return val
594
595
597 """Arbitrary computation on a catalog
598
599 Column names (and thus the columns to be loaded from catalog) are found
600 by finding all words and trying to ignore all "math-y" words.
601
602 Parameters
603 ----------
604 expr : str
605 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
606 """
607 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
608
609 def __init__(self, expr, **kwargs):
610 self.exprexpr = expr
611 super().__init__(**kwargs)
612
613 @property
614 def name(self):
615 return self.exprexpr
616
617 @property
618 def columns(self):
619 flux_cols = re.findall(r'mag\‍(\s*(\w+)\s*\‍)', self.exprexpr)
620
621 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.exprexpr) if c not in self._ignore_words_ignore_words]
622 not_a_col = []
623 for c in flux_cols:
624 if not re.search('_instFlux$', c):
625 cols.append(f'{c}_instFlux')
626 not_a_col.append(c)
627 else:
628 cols.append(c)
629
630 return list(set([c for c in cols if c not in not_a_col]))
631
632 def _func(self, df):
633 return mag_aware_eval(df, self.exprexpr)
634
635
637 """Get column with specified name
638 """
639
640 def __init__(self, col, **kwargs):
641 self.colcol = col
642 super().__init__(**kwargs)
643
644 @property
645 def name(self):
646 return self.colcol
647
648 @property
649 def columns(self):
650 return [self.colcol]
651
652 def _func(self, df):
653 return df[self.colcol]
654
655
657 """Return the value of the index for each object
658 """
659
660 columns = ['coord_ra'] # just a dummy; something has to be here
661 _defaultDataset = 'ref'
662 _defaultNoDup = True
663
664 def _func(self, df):
665 return pd.Series(df.index, index=df.index)
666
667
669 col = 'id'
670 _allow_difference = False
671 _defaultNoDup = True
672
673 def _func(self, df):
674 return pd.Series(df.index, index=df.index)
675
676
678 col = 'base_Footprint_nPix'
679
680
682 """Base class for coordinate column, in degrees
683 """
684 _radians = True
685
686 def __init__(self, col, **kwargs):
687 super().__init__(col, **kwargs)
688
689 def _func(self, df):
690 # Must not modify original column in case that column is used by another functor
691 output = df[self.colcol] * 180 / np.pi if self._radians_radians else df[self.colcol]
692 return output
693
694
696 """Right Ascension, in degrees
697 """
698 name = 'RA'
699 _defaultNoDup = True
700
701 def __init__(self, **kwargs):
702 super().__init__('coord_ra', **kwargs)
703
704 def __call__(self, catalog, **kwargs):
705 return super().__call__(catalog, **kwargs)
706
707
709 """Declination, in degrees
710 """
711 name = 'Dec'
712 _defaultNoDup = True
713
714 def __init__(self, **kwargs):
715 super().__init__('coord_dec', **kwargs)
716
717 def __call__(self, catalog, **kwargs):
718 return super().__call__(catalog, **kwargs)
719
720
722 """Compute the level 20 HtmIndex for the catalog.
723
724 Notes
725 -----
726 This functor was implemented to satisfy requirements of old APDB interface
727 which required ``pixelId`` column in DiaObject with HTM20 index. APDB
728 interface had migrated to not need that information, but we keep this
729 class in case it may be useful for something else.
730 """
731 name = "Htm20"
732 htmLevel = 20
733 _radians = True
734
735 def __init__(self, ra, decl, **kwargs):
736 self.pixelatorpixelator = sphgeom.HtmPixelization(self.htmLevelhtmLevel)
737 self.rara = ra
738 self.decldecl = decl
739 self._columns_columns = [self.rara, self.decldecl]
740 super().__init__(**kwargs)
741
742 def _func(self, df):
743
744 def computePixel(row):
745 if self._radians_radians:
746 sphPoint = geom.SpherePoint(row[self.rara],
747 row[self.decldecl],
748 geom.radians)
749 else:
750 sphPoint = geom.SpherePoint(row[self.rara],
751 row[self.decldecl],
752 geom.degrees)
753 return self.pixelatorpixelator.index(sphPoint.getVector())
754
755 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
756
757
758def fluxName(col):
759 if not col.endswith('_instFlux'):
760 col += '_instFlux'
761 return col
762
763
764def fluxErrName(col):
765 if not col.endswith('_instFluxErr'):
766 col += '_instFluxErr'
767 return col
768
769
771 """Compute calibrated magnitude
772
773 Takes a `calib` argument, which returns the flux at mag=0
774 as `calib.getFluxMag0()`. If not provided, then the default
775 `fluxMag0` is 63095734448.0194, which is default for HSC.
776 This default should be removed in DM-21955
777
778 This calculation hides warnings about invalid values and dividing by zero.
779
780 As for all functors, a `dataset` and `filt` kwarg should be provided upon
781 initialization. Unlike the default `Functor`, however, the default dataset
782 for a `Mag` is `'meas'`, rather than `'ref'`.
783
784 Parameters
785 ----------
786 col : `str`
787 Name of flux column from which to compute magnitude. Can be parseable
788 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
789 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
790 understand.
791 calib : `lsst.afw.image.calib.Calib` (optional)
792 Object that knows zero point.
793 """
794 _defaultDataset = 'meas'
795
796 def __init__(self, col, calib=None, **kwargs):
797 self.colcol = fluxName(col)
798 self.calibcalib = calib
799 if calib is not None:
800 self.fluxMag0fluxMag0 = calib.getFluxMag0()[0]
801 else:
802 # TO DO: DM-21955 Replace hard coded photometic calibration values
803 self.fluxMag0fluxMag0 = 63095734448.0194
804
805 super().__init__(**kwargs)
806
807 @property
808 def columns(self):
809 return [self.colcol]
810
811 def _func(self, df):
812 with np.warnings.catch_warnings():
813 np.warnings.filterwarnings('ignore', r'invalid value encountered')
814 np.warnings.filterwarnings('ignore', r'divide by zero')
815 return -2.5*np.log10(df[self.colcol] / self.fluxMag0fluxMag0)
816
817 @property
818 def name(self):
819 return f'mag_{self.col}'
820
821
822class MagErr(Mag):
823 """Compute calibrated magnitude uncertainty
824
825 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
826
827 Parameters
828 col : `str`
829 Name of flux column
830 calib : `lsst.afw.image.calib.Calib` (optional)
831 Object that knows zero point.
832 """
833
834 def __init__(self, *args, **kwargs):
835 super().__init__(*args, **kwargs)
836 if self.calibcalib is not None:
837 self.fluxMag0ErrfluxMag0Err = self.calibcalib.getFluxMag0()[1]
838 else:
839 self.fluxMag0ErrfluxMag0Err = 0.
840
841 @property
842 def columns(self):
843 return [self.colcol, self.colcol + 'Err']
844
845 def _func(self, df):
846 with np.warnings.catch_warnings():
847 np.warnings.filterwarnings('ignore', r'invalid value encountered')
848 np.warnings.filterwarnings('ignore', r'divide by zero')
849 fluxCol, fluxErrCol = self.columnscolumnscolumnscolumns
850 x = df[fluxErrCol] / df[fluxCol]
851 y = self.fluxMag0ErrfluxMag0Err / self.fluxMag0fluxMag0
852 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
853 return magErr
854
855 @property
856 def name(self):
857 return super().name + '_err'
858
859
861 """
862 """
863
864 def _func(self, df):
865 return (df[self.colcol] / self.fluxMag0fluxMag0) * 1e9
866
867
869 _defaultDataset = 'meas'
870
871 """Functor to calculate magnitude difference"""
872
873 def __init__(self, col1, col2, **kwargs):
874 self.col1col1 = fluxName(col1)
875 self.col2col2 = fluxName(col2)
876 super().__init__(**kwargs)
877
878 @property
879 def columns(self):
880 return [self.col1col1, self.col2col2]
881
882 def _func(self, df):
883 with np.warnings.catch_warnings():
884 np.warnings.filterwarnings('ignore', r'invalid value encountered')
885 np.warnings.filterwarnings('ignore', r'divide by zero')
886 return -2.5*np.log10(df[self.col1col1]/df[self.col2col2])
887
888 @property
889 def name(self):
890 return f'(mag_{self.col1} - mag_{self.col2})'
891
892 @property
893 def shortname(self):
894 return f'magDiff_{self.col1}_{self.col2}'
895
896
898 """Compute the color between two filters
899
900 Computes color by initializing two different `Mag`
901 functors based on the `col` and filters provided, and
902 then returning the difference.
903
904 This is enabled by the `_func` expecting a dataframe with a
905 multilevel column index, with both `'band'` and `'column'`,
906 instead of just `'column'`, which is the `Functor` default.
907 This is controlled by the `_dfLevels` attribute.
908
909 Also of note, the default dataset for `Color` is `forced_src'`,
910 whereas for `Mag` it is `'meas'`.
911
912 Parameters
913 ----------
914 col : str
915 Name of flux column from which to compute; same as would be passed to
917
918 filt2, filt1 : str
919 Filters from which to compute magnitude difference.
920 Color computed is `Mag(filt2) - Mag(filt1)`.
921 """
922 _defaultDataset = 'forced_src'
923 _dfLevels = ('band', 'column')
924 _defaultNoDup = True
925
926 def __init__(self, col, filt2, filt1, **kwargs):
927 self.colcol = fluxName(col)
928 if filt2 == filt1:
929 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
930 self.filt2filt2 = filt2
931 self.filt1filt1 = filt1
932
933 self.mag2mag2 = Mag(col, filt=filt2, **kwargs)
934 self.mag1mag1 = Mag(col, filt=filt1, **kwargs)
935
936 super().__init__(**kwargs)
937
938 @property
939 def filt(self):
940 return None
941
942 @filt.setter
943 def filt(self, filt):
944 pass
945
946 def _func(self, df):
947 mag2 = self.mag2._func(df[self.filt2])
948 mag1 = self.mag1._func(df[self.filt1])
949 return mag2 - mag1
950
951 @property
952 def columns(self):
953 return [self.mag1mag1.col, self.mag2mag2.col]
954
955 def multilevelColumns(self, parq, **kwargs):
956 return [(self.datasetdataset, self.filt1filt1, self.colcol), (self.datasetdataset, self.filt2filt2, self.colcol)]
957
958 @property
959 def name(self):
960 return f'{self.filt2} - {self.filt1} ({self.col})'
961
962 @property
963 def shortname(self):
964 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
965
966
968 """Main function of this subclass is to override the dropna=True
969 """
970 _null_label = 'null'
971 _allow_difference = False
972 name = 'label'
973 _force_str = False
974
975 def __call__(self, parq, dropna=False, **kwargs):
976 return super().__call__(parq, dropna=False, **kwargs)
977
978
980 _columns = ["base_ClassificationExtendedness_value"]
981 _column = "base_ClassificationExtendedness_value"
982
983 def _func(self, df):
984 x = df[self._columns_columns][self._column_column]
985 mask = x.isnull()
986 test = (x < 0.5).astype(int)
987 test = test.mask(mask, 2)
988
989 # TODO: DM-21954 Look into veracity of inline comment below
990 # are these backwards?
991 categories = ['galaxy', 'star', self._null_label_null_label]
992 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
993 index=x.index, name='label')
994 if self._force_str_force_str:
995 label = label.astype(str)
996 return label
997
998
1000 _columns = ['numStarFlags']
1001 labels = {"star": 0, "maybe": 1, "notStar": 2}
1002
1003 def _func(self, df):
1004 x = df[self._columns_columns][self._columns_columns[0]]
1005
1006 # Number of filters
1007 n = len(x.unique()) - 1
1008
1009 labels = ['noStar', 'maybe', 'star']
1010 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
1011 index=x.index, name='label')
1012
1013 if self._force_str_force_str:
1014 label = label.astype(str)
1015
1016 return label
1017
1018
1020 name = 'Deconvolved Moments'
1021 shortname = 'deconvolvedMoments'
1022 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1023 "ext_shapeHSM_HsmSourceMoments_yy",
1024 "base_SdssShape_xx", "base_SdssShape_yy",
1025 "ext_shapeHSM_HsmPsfMoments_xx",
1026 "ext_shapeHSM_HsmPsfMoments_yy")
1027
1028 def _func(self, df):
1029 """Calculate deconvolved moments"""
1030 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1031 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1032 else:
1033 hsm = np.ones(len(df))*np.nan
1034 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1035 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1036 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1037 else:
1038 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
1039 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
1040 # raise TaskError("No psf shape parameter found in catalog")
1041 raise RuntimeError('No psf shape parameter found in catalog')
1042
1043 return hsm.where(np.isfinite(hsm), sdss) - psf
1044
1045
1047 """Functor to calculate SDSS trace radius size for sources"""
1048 name = "SDSS Trace Size"
1049 shortname = 'sdssTrace'
1050 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1051
1052 def _func(self, df):
1053 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1054 return srcSize
1055
1056
1058 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1059 name = "PSF - SDSS Trace Size"
1060 shortname = 'psf_sdssTrace'
1061 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1062 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1063
1064 def _func(self, df):
1065 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1066 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1067 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1068 return sizeDiff
1069
1070
1072 """Functor to calculate HSM trace radius size for sources"""
1073 name = 'HSM Trace Size'
1074 shortname = 'hsmTrace'
1075 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1076 "ext_shapeHSM_HsmSourceMoments_yy")
1077
1078 def _func(self, df):
1079 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1080 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1081 return srcSize
1082
1083
1085 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1086 name = 'PSF - HSM Trace Size'
1087 shortname = 'psf_HsmTrace'
1088 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1089 "ext_shapeHSM_HsmSourceMoments_yy",
1090 "ext_shapeHSM_HsmPsfMoments_xx",
1091 "ext_shapeHSM_HsmPsfMoments_yy")
1092
1093 def _func(self, df):
1094 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1095 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1096 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1097 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1098 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1099 return sizeDiff
1100
1101
1103 name = 'HSM Psf FWHM'
1104 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1105 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1106 pixelScale = 0.168
1107 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1108
1109 def _func(self, df):
1110 return self.pixelScalepixelScale*self.SIGMA2FWHMSIGMA2FWHM*np.sqrt(
1111 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1112
1113
1115 name = "Distortion Ellipticity (e1)"
1116 shortname = "Distortion"
1117
1118 def __init__(self, colXX, colXY, colYY, **kwargs):
1119 self.colXXcolXX = colXX
1120 self.colXYcolXY = colXY
1121 self.colYYcolYY = colYY
1122 self._columns_columns = [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1123 super().__init__(**kwargs)
1124
1125 @property
1126 def columns(self):
1127 return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1128
1129 def _func(self, df):
1130 return df[self.colXXcolXX] - df[self.colYYcolYY] / (df[self.colXXcolXX] + df[self.colYYcolYY])
1131
1132
1134 name = "Ellipticity e2"
1135
1136 def __init__(self, colXX, colXY, colYY, **kwargs):
1137 self.colXXcolXX = colXX
1138 self.colXYcolXY = colXY
1139 self.colYYcolYY = colYY
1140 super().__init__(**kwargs)
1141
1142 @property
1143 def columns(self):
1144 return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1145
1146 def _func(self, df):
1147 return 2*df[self.colXYcolXY] / (df[self.colXXcolXX] + df[self.colYYcolYY])
1148
1149
1151
1152 def __init__(self, colXX, colXY, colYY, **kwargs):
1153 self.colXXcolXX = colXX
1154 self.colXYcolXY = colXY
1155 self.colYYcolYY = colYY
1156 super().__init__(**kwargs)
1157
1158 @property
1159 def columns(self):
1160 return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1161
1162 def _func(self, df):
1163 return (df[self.colXXcolXX]*df[self.colYYcolYY] - df[self.colXYcolXY]**2)**0.25
1164
1165
1167 """Computations using the stored localWcs.
1168 """
1169 name = "LocalWcsOperations"
1170
1171 def __init__(self,
1172 colCD_1_1,
1173 colCD_1_2,
1174 colCD_2_1,
1175 colCD_2_2,
1176 **kwargs):
1177 self.colCD_1_1colCD_1_1 = colCD_1_1
1178 self.colCD_1_2colCD_1_2 = colCD_1_2
1179 self.colCD_2_1colCD_2_1 = colCD_2_1
1180 self.colCD_2_2colCD_2_2 = colCD_2_2
1181 super().__init__(**kwargs)
1182
1183 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1184 """Compute the distance on the sphere from x2, y1 to x1, y1.
1185
1186 Parameters
1187 ----------
1188 x : `pandas.Series`
1189 X pixel coordinate.
1190 y : `pandas.Series`
1191 Y pixel coordinate.
1192 cd11 : `pandas.Series`
1193 [1, 1] element of the local Wcs affine transform.
1194 cd11 : `pandas.Series`
1195 [1, 1] element of the local Wcs affine transform.
1196 cd12 : `pandas.Series`
1197 [1, 2] element of the local Wcs affine transform.
1198 cd21 : `pandas.Series`
1199 [2, 1] element of the local Wcs affine transform.
1200 cd22 : `pandas.Series`
1201 [2, 2] element of the local Wcs affine transform.
1202
1203 Returns
1204 -------
1205 raDecTuple : tuple
1206 RA and dec conversion of x and y given the local Wcs. Returned
1207 units are in radians.
1208
1209 """
1210 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1211
1212 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1213 """Compute the local pixel scale conversion.
1214
1215 Parameters
1216 ----------
1217 ra1 : `pandas.Series`
1218 Ra of the first coordinate in radians.
1219 dec1 : `pandas.Series`
1220 Dec of the first coordinate in radians.
1221 ra2 : `pandas.Series`
1222 Ra of the second coordinate in radians.
1223 dec2 : `pandas.Series`
1224 Dec of the second coordinate in radians.
1225
1226 Returns
1227 -------
1228 dist : `pandas.Series`
1229 Distance on the sphere in radians.
1230 """
1231 deltaDec = dec2 - dec1
1232 deltaRa = ra2 - ra1
1233 return 2 * np.arcsin(
1234 np.sqrt(
1235 np.sin(deltaDec / 2) ** 2
1236 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1237
1238 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1239 """Compute the distance on the sphere from x2, y1 to x1, y1.
1240
1241 Parameters
1242 ----------
1243 x1 : `pandas.Series`
1244 X pixel coordinate.
1245 y1 : `pandas.Series`
1246 Y pixel coordinate.
1247 x2 : `pandas.Series`
1248 X pixel coordinate.
1249 y2 : `pandas.Series`
1250 Y pixel coordinate.
1251 cd11 : `pandas.Series`
1252 [1, 1] element of the local Wcs affine transform.
1253 cd11 : `pandas.Series`
1254 [1, 1] element of the local Wcs affine transform.
1255 cd12 : `pandas.Series`
1256 [1, 2] element of the local Wcs affine transform.
1257 cd21 : `pandas.Series`
1258 [2, 1] element of the local Wcs affine transform.
1259 cd22 : `pandas.Series`
1260 [2, 2] element of the local Wcs affine transform.
1261
1262 Returns
1263 -------
1264 Distance : `pandas.Series`
1265 Arcseconds per pixel at the location of the local WC
1266 """
1267 ra1, dec1 = self.computeDeltaRaDeccomputeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1268 ra2, dec2 = self.computeDeltaRaDeccomputeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1269 # Great circle distance for small separations.
1270 return self.computeSkySeperationcomputeSkySeperation(ra1, dec1, ra2, dec2)
1271
1272
1274 """Compute the local pixel scale from the stored CDMatrix.
1275 """
1276 name = "PixelScale"
1277
1278 @property
1279 def columns(self):
1280 return [self.colCD_1_1colCD_1_1,
1281 self.colCD_1_2colCD_1_2,
1282 self.colCD_2_1colCD_2_1,
1283 self.colCD_2_2colCD_2_2]
1284
1285 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1286 """Compute the local pixel to scale conversion in arcseconds.
1287
1288 Parameters
1289 ----------
1290 cd11 : `pandas.Series`
1291 [1, 1] element of the local Wcs affine transform in radians.
1292 cd11 : `pandas.Series`
1293 [1, 1] element of the local Wcs affine transform in radians.
1294 cd12 : `pandas.Series`
1295 [1, 2] element of the local Wcs affine transform in radians.
1296 cd21 : `pandas.Series`
1297 [2, 1] element of the local Wcs affine transform in radians.
1298 cd22 : `pandas.Series`
1299 [2, 2] element of the local Wcs affine transform in radians.
1300
1301 Returns
1302 -------
1303 pixScale : `pandas.Series`
1304 Arcseconds per pixel at the location of the local WC
1305 """
1306 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1307
1308 def _func(self, df):
1309 return self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1310 df[self.colCD_1_2colCD_1_2],
1311 df[self.colCD_2_1colCD_2_1],
1312 df[self.colCD_2_2colCD_2_2])
1313
1314
1316 """Convert a value in units pixels squared to units arcseconds squared.
1317 """
1318
1319 def __init__(self,
1320 col,
1321 colCD_1_1,
1322 colCD_1_2,
1323 colCD_2_1,
1324 colCD_2_2,
1325 **kwargs):
1326 self.colcol = col
1327 super().__init__(colCD_1_1,
1328 colCD_1_2,
1329 colCD_2_1,
1330 colCD_2_2,
1331 **kwargs)
1332
1333 @property
1334 def name(self):
1335 return f"{self.col}_asArcseconds"
1336
1337 @property
1338 def columns(self):
1339 return [self.colcol,
1340 self.colCD_1_1colCD_1_1,
1341 self.colCD_1_2colCD_1_2,
1342 self.colCD_2_1colCD_2_1,
1343 self.colCD_2_2colCD_2_2]
1344
1345 def _func(self, df):
1346 return df[self.colcol] * self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1347 df[self.colCD_1_2colCD_1_2],
1348 df[self.colCD_2_1colCD_2_1],
1349 df[self.colCD_2_2colCD_2_2])
1350
1351
1353 """Convert a value in units pixels to units arcseconds.
1354 """
1355
1356 def __init__(self,
1357 col,
1358 colCD_1_1,
1359 colCD_1_2,
1360 colCD_2_1,
1361 colCD_2_2,
1362 **kwargs):
1363 self.colcol = col
1364 super().__init__(colCD_1_1,
1365 colCD_1_2,
1366 colCD_2_1,
1367 colCD_2_2,
1368 **kwargs)
1369
1370 @property
1371 def name(self):
1372 return f"{self.col}_asArcsecondsSq"
1373
1374 @property
1375 def columns(self):
1376 return [self.colcol,
1377 self.colCD_1_1colCD_1_1,
1378 self.colCD_1_2colCD_1_2,
1379 self.colCD_2_1colCD_2_1,
1380 self.colCD_2_2colCD_2_2]
1381
1382 def _func(self, df):
1383 pixScale = self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1384 df[self.colCD_1_2colCD_1_2],
1385 df[self.colCD_2_1colCD_2_1],
1386 df[self.colCD_2_2colCD_2_2])
1387 return df[self.colcol] * pixScale * pixScale
1388
1389
1391 name = 'Reference Band'
1392 shortname = 'refBand'
1393
1394 @property
1395 def columns(self):
1396 return ["merge_measurement_i",
1397 "merge_measurement_r",
1398 "merge_measurement_z",
1399 "merge_measurement_y",
1400 "merge_measurement_g",
1401 "merge_measurement_u"]
1402
1403 def _func(self, df: pd.DataFrame) -> pd.Series:
1404 def getFilterAliasName(row):
1405 # get column name with the max value (True > False)
1406 colName = row.idxmax()
1407 return colName.replace('merge_measurement_', '')
1408
1409 # Makes a Series of dtype object if df is empty
1410 return df[self.columnscolumnscolumns].apply(getFilterAliasName, axis=1,
1411 result_type='reduce').astype('object')
1412
1413
1415 # AB to NanoJansky (3631 Jansky)
1416 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1417 LOG_AB_FLUX_SCALE = 12.56
1418 FIVE_OVER_2LOG10 = 1.085736204758129569
1419 # TO DO: DM-21955 Replace hard coded photometic calibration values
1420 COADD_ZP = 27
1421
1422 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1423 self.vhypotvhypot = np.vectorize(self.hypothypot)
1424 self.colcol = colFlux
1425 self.colFluxErrcolFluxErr = colFluxErr
1426
1427 self.calibcalib = calib
1428 if calib is not None:
1429 self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err = calib.getFluxMag0()
1430 else:
1431 self.fluxMag0fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZPCOADD_ZP)
1432 self.fluxMag0ErrfluxMag0Err = 0.
1433
1434 super().__init__(**kwargs)
1435
1436 @property
1437 def columns(self):
1438 return [self.colcol]
1439
1440 @property
1441 def name(self):
1442 return f'mag_{self.col}'
1443
1444 @classmethod
1445 def hypot(cls, a, b):
1446 if np.abs(a) < np.abs(b):
1447 a, b = b, a
1448 if a == 0.:
1449 return 0.
1450 q = b/a
1451 return np.abs(a) * np.sqrt(1. + q*q)
1452
1453 def dn2flux(self, dn, fluxMag0):
1454 return self.AB_FLUX_SCALEAB_FLUX_SCALE * dn / fluxMag0
1455
1456 def dn2mag(self, dn, fluxMag0):
1457 with np.warnings.catch_warnings():
1458 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1459 np.warnings.filterwarnings('ignore', r'divide by zero')
1460 return -2.5 * np.log10(dn/fluxMag0)
1461
1462 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1463 retVal = self.vhypotvhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1464 retVal *= self.AB_FLUX_SCALEAB_FLUX_SCALE / fluxMag0 / fluxMag0
1465 return retVal
1466
1467 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1468 retVal = self.dn2fluxErrdn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2fluxdn2flux(dn, fluxMag0)
1469 return self.FIVE_OVER_2LOG10FIVE_OVER_2LOG10 * retVal
1470
1471
1473 def _func(self, df):
1474 return self.dn2fluxdn2flux(df[self.colcol], self.fluxMag0fluxMag0)
1475
1476
1478 @property
1479 def columns(self):
1480 return [self.colcol, self.colFluxErrcolFluxErr]
1481
1482 def _func(self, df):
1483 retArr = self.dn2fluxErrdn2fluxErr(df[self.colcol], df[self.colFluxErrcolFluxErr], self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err)
1484 return pd.Series(retArr, index=df.index)
1485
1486
1488 def _func(self, df):
1489 return self.dn2magdn2mag(df[self.colcol], self.fluxMag0fluxMag0)
1490
1491
1493 @property
1494 def columns(self):
1495 return [self.colcol, self.colFluxErrcolFluxErr]
1496
1497 def _func(self, df):
1498 retArr = self.dn2MagErrdn2MagErr(df[self.colcol], df[self.colFluxErrcolFluxErr], self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err)
1499 return pd.Series(retArr, index=df.index)
1500
1501
1503 """Base class for calibrating the specified instrument flux column using
1504 the local photometric calibration.
1505
1506 Parameters
1507 ----------
1508 instFluxCol : `str`
1509 Name of the instrument flux column.
1510 instFluxErrCol : `str`
1511 Name of the assocated error columns for ``instFluxCol``.
1512 photoCalibCol : `str`
1513 Name of local calibration column.
1514 photoCalibErrCol : `str`
1515 Error associated with ``photoCalibCol``
1516
1517 See also
1518 --------
1519 LocalPhotometry
1520 LocalNanojansky
1521 LocalNanojanskyErr
1522 LocalMagnitude
1523 LocalMagnitudeErr
1524 """
1525 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1526
1527 def __init__(self,
1528 instFluxCol,
1529 instFluxErrCol,
1530 photoCalibCol,
1531 photoCalibErrCol,
1532 **kwargs):
1533 self.instFluxColinstFluxCol = instFluxCol
1534 self.instFluxErrColinstFluxErrCol = instFluxErrCol
1535 self.photoCalibColphotoCalibCol = photoCalibCol
1536 self.photoCalibErrColphotoCalibErrCol = photoCalibErrCol
1537 super().__init__(**kwargs)
1538
1539 def instFluxToNanojansky(self, instFlux, localCalib):
1540 """Convert instrument flux to nanojanskys.
1541
1542 Parameters
1543 ----------
1544 instFlux : `numpy.ndarray` or `pandas.Series`
1545 Array of instrument flux measurements
1546 localCalib : `numpy.ndarray` or `pandas.Series`
1547 Array of local photometric calibration estimates.
1548
1549 Returns
1550 -------
1551 calibFlux : `numpy.ndarray` or `pandas.Series`
1552 Array of calibrated flux measurements.
1553 """
1554 return instFlux * localCalib
1555
1556 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1557 """Convert instrument flux to nanojanskys.
1558
1559 Parameters
1560 ----------
1561 instFlux : `numpy.ndarray` or `pandas.Series`
1562 Array of instrument flux measurements
1563 instFluxErr : `numpy.ndarray` or `pandas.Series`
1564 Errors on associated ``instFlux`` values
1565 localCalib : `numpy.ndarray` or `pandas.Series`
1566 Array of local photometric calibration estimates.
1567 localCalibErr : `numpy.ndarray` or `pandas.Series`
1568 Errors on associated ``localCalib`` values
1569
1570 Returns
1571 -------
1572 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1573 Errors on calibrated flux measurements.
1574 """
1575 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1576
1577 def instFluxToMagnitude(self, instFlux, localCalib):
1578 """Convert instrument flux to nanojanskys.
1579
1580 Parameters
1581 ----------
1582 instFlux : `numpy.ndarray` or `pandas.Series`
1583 Array of instrument flux measurements
1584 localCalib : `numpy.ndarray` or `pandas.Series`
1585 Array of local photometric calibration estimates.
1586
1587 Returns
1588 -------
1589 calibMag : `numpy.ndarray` or `pandas.Series`
1590 Array of calibrated AB magnitudes.
1591 """
1592 return -2.5 * np.log10(self.instFluxToNanojanskyinstFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToABlogNJanskyToAB
1593
1594 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1595 """Convert instrument flux err to nanojanskys.
1596
1597 Parameters
1598 ----------
1599 instFlux : `numpy.ndarray` or `pandas.Series`
1600 Array of instrument flux measurements
1601 instFluxErr : `numpy.ndarray` or `pandas.Series`
1602 Errors on associated ``instFlux`` values
1603 localCalib : `numpy.ndarray` or `pandas.Series`
1604 Array of local photometric calibration estimates.
1605 localCalibErr : `numpy.ndarray` or `pandas.Series`
1606 Errors on associated ``localCalib`` values
1607
1608 Returns
1609 -------
1610 calibMagErr: `numpy.ndarray` or `pandas.Series`
1611 Error on calibrated AB magnitudes.
1612 """
1613 err = self.instFluxErrToNanojanskyErrinstFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1614 return 2.5 / np.log(10) * err / self.instFluxToNanojanskyinstFluxToNanojansky(instFlux, instFluxErr)
1615
1616
1618 """Compute calibrated fluxes using the local calibration value.
1619
1620 See also
1621 --------
1622 LocalNanojansky
1623 LocalNanojanskyErr
1624 LocalMagnitude
1625 LocalMagnitudeErr
1626 """
1627
1628 @property
1629 def columns(self):
1630 return [self.instFluxColinstFluxCol, self.photoCalibColphotoCalibCol]
1631
1632 @property
1633 def name(self):
1634 return f'flux_{self.instFluxCol}'
1635
1636 def _func(self, df):
1637 return self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxColinstFluxCol], df[self.photoCalibColphotoCalibCol])
1638
1639
1641 """Compute calibrated flux errors using the local calibration value.
1642
1643 See also
1644 --------
1645 LocalNanojansky
1646 LocalNanojanskyErr
1647 LocalMagnitude
1648 LocalMagnitudeErr
1649 """
1650
1651 @property
1652 def columns(self):
1653 return [self.instFluxColinstFluxCol, self.instFluxErrColinstFluxErrCol,
1654 self.photoCalibColphotoCalibCol, self.photoCalibErrColphotoCalibErrCol]
1655
1656 @property
1657 def name(self):
1658 return f'fluxErr_{self.instFluxCol}'
1659
1660 def _func(self, df):
1661 return self.instFluxErrToNanojanskyErrinstFluxErrToNanojanskyErr(df[self.instFluxColinstFluxCol], df[self.instFluxErrColinstFluxErrCol],
1662 df[self.photoCalibColphotoCalibCol], df[self.photoCalibErrColphotoCalibErrCol])
1663
1664
1666 """Compute calibrated AB magnitudes using the local calibration value.
1667
1668 See also
1669 --------
1670 LocalNanojansky
1671 LocalNanojanskyErr
1672 LocalMagnitude
1673 LocalMagnitudeErr
1674 """
1675
1676 @property
1677 def columns(self):
1678 return [self.instFluxColinstFluxCol, self.photoCalibColphotoCalibCol]
1679
1680 @property
1681 def name(self):
1682 return f'mag_{self.instFluxCol}'
1683
1684 def _func(self, df):
1685 return self.instFluxToMagnitudeinstFluxToMagnitude(df[self.instFluxColinstFluxCol],
1686 df[self.photoCalibColphotoCalibCol])
1687
1688
1690 """Compute calibrated AB magnitude errors using the local calibration value.
1691
1692 See also
1693 --------
1694 LocalNanojansky
1695 LocalNanojanskyErr
1696 LocalMagnitude
1697 LocalMagnitudeErr
1698 """
1699
1700 @property
1701 def columns(self):
1702 return [self.instFluxColinstFluxCol, self.instFluxErrColinstFluxErrCol,
1703 self.photoCalibColphotoCalibCol, self.photoCalibErrColphotoCalibErrCol]
1704
1705 @property
1706 def name(self):
1707 return f'magErr_{self.instFluxCol}'
1708
1709 def _func(self, df):
1710 return self.instFluxErrToMagnitudeErrinstFluxErrToMagnitudeErr(df[self.instFluxColinstFluxCol],
1711 df[self.instFluxErrColinstFluxErrCol],
1712 df[self.photoCalibColphotoCalibCol],
1713 df[self.photoCalibErrColphotoCalibErrCol])
1714
1715
1717 """Compute absolute mean of dipole fluxes.
1718
1719 See also
1720 --------
1721 LocalNanojansky
1722 LocalNanojanskyErr
1723 LocalMagnitude
1724 LocalMagnitudeErr
1725 LocalDipoleMeanFlux
1726 LocalDipoleMeanFluxErr
1727 LocalDipoleDiffFlux
1728 LocalDipoleDiffFluxErr
1729 """
1730 def __init__(self,
1731 instFluxPosCol,
1732 instFluxNegCol,
1733 instFluxPosErrCol,
1734 instFluxNegErrCol,
1735 photoCalibCol,
1736 photoCalibErrCol,
1737 **kwargs):
1738 self.instFluxNegColinstFluxNegCol = instFluxNegCol
1739 self.instFluxPosColinstFluxPosCol = instFluxPosCol
1740 self.instFluxNegErrColinstFluxNegErrCol = instFluxNegErrCol
1741 self.instFluxPosErrColinstFluxPosErrCol = instFluxPosErrCol
1742 self.photoCalibColphotoCalibColphotoCalibCol = photoCalibCol
1743 self.photoCalibErrColphotoCalibErrColphotoCalibErrCol = photoCalibErrCol
1744 super().__init__(instFluxNegCol,
1745 instFluxNegErrCol,
1746 photoCalibCol,
1747 photoCalibErrCol,
1748 **kwargs)
1749
1750 @property
1751 def columns(self):
1752 return [self.instFluxPosColinstFluxPosCol,
1753 self.instFluxNegColinstFluxNegCol,
1754 self.photoCalibColphotoCalibColphotoCalibCol]
1755
1756 @property
1757 def name(self):
1758 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1759
1760 def _func(self, df):
1761 return 0.5*(np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxNegColinstFluxNegCol], df[self.photoCalibColphotoCalibColphotoCalibCol]))
1762 + np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxPosColinstFluxPosCol], df[self.photoCalibColphotoCalibColphotoCalibCol])))
1763
1764
1766 """Compute the error on the absolute mean of dipole fluxes.
1767
1768 See also
1769 --------
1770 LocalNanojansky
1771 LocalNanojanskyErr
1772 LocalMagnitude
1773 LocalMagnitudeErr
1774 LocalDipoleMeanFlux
1775 LocalDipoleMeanFluxErr
1776 LocalDipoleDiffFlux
1777 LocalDipoleDiffFluxErr
1778 """
1779
1780 @property
1781 def columns(self):
1782 return [self.instFluxPosColinstFluxPosCol,
1783 self.instFluxNegColinstFluxNegCol,
1784 self.instFluxPosErrColinstFluxPosErrCol,
1785 self.instFluxNegErrColinstFluxNegErrCol,
1786 self.photoCalibColphotoCalibColphotoCalibCol,
1787 self.photoCalibErrColphotoCalibErrColphotoCalibErrCol]
1788
1789 @property
1790 def name(self):
1791 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1792
1793 def _func(self, df):
1794 return 0.5*np.sqrt(
1795 (np.fabs(df[self.instFluxNegColinstFluxNegCol]) + np.fabs(df[self.instFluxPosColinstFluxPosCol])
1796 * df[self.photoCalibErrColphotoCalibErrColphotoCalibErrCol])**2
1797 + (df[self.instFluxNegErrColinstFluxNegErrCol]**2 + df[self.instFluxPosErrColinstFluxPosErrCol]**2)
1798 * df[self.photoCalibColphotoCalibColphotoCalibCol]**2)
1799
1800
1802 """Compute the absolute difference of dipole fluxes.
1803
1804 Value is (abs(pos) - abs(neg))
1805
1806 See also
1807 --------
1808 LocalNanojansky
1809 LocalNanojanskyErr
1810 LocalMagnitude
1811 LocalMagnitudeErr
1812 LocalDipoleMeanFlux
1813 LocalDipoleMeanFluxErr
1814 LocalDipoleDiffFlux
1815 LocalDipoleDiffFluxErr
1816 """
1817
1818 @property
1819 def columns(self):
1820 return [self.instFluxPosColinstFluxPosCol,
1821 self.instFluxNegColinstFluxNegCol,
1822 self.photoCalibColphotoCalibColphotoCalibCol]
1823
1824 @property
1825 def name(self):
1826 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1827
1828 def _func(self, df):
1829 return (np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxPosColinstFluxPosCol], df[self.photoCalibColphotoCalibColphotoCalibCol]))
1830 - np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxNegColinstFluxNegCol], df[self.photoCalibColphotoCalibColphotoCalibCol])))
1831
1832
1834 """Compute the error on the absolute difference of dipole fluxes.
1835
1836 See also
1837 --------
1838 LocalNanojansky
1839 LocalNanojanskyErr
1840 LocalMagnitude
1841 LocalMagnitudeErr
1842 LocalDipoleMeanFlux
1843 LocalDipoleMeanFluxErr
1844 LocalDipoleDiffFlux
1845 LocalDipoleDiffFluxErr
1846 """
1847
1848 @property
1849 def columns(self):
1850 return [self.instFluxPosColinstFluxPosCol,
1851 self.instFluxNegColinstFluxNegCol,
1852 self.instFluxPosErrColinstFluxPosErrCol,
1853 self.instFluxNegErrColinstFluxNegErrCol,
1854 self.photoCalibColphotoCalibColphotoCalibCol,
1855 self.photoCalibErrColphotoCalibErrColphotoCalibErrCol]
1856
1857 @property
1858 def name(self):
1859 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1860
1861 def _func(self, df):
1862 return np.sqrt(
1863 ((np.fabs(df[self.instFluxPosColinstFluxPosCol]) - np.fabs(df[self.instFluxNegColinstFluxNegCol]))
1864 * df[self.photoCalibErrColphotoCalibErrColphotoCalibErrCol])**2
1865 + (df[self.instFluxPosErrColinstFluxPosErrCol]**2 + df[self.instFluxNegErrColinstFluxNegErrCol]**2)
1866 * df[self.photoCalibColphotoCalibColphotoCalibCol]**2)
1867
1868
1870 """Base class for returning the ratio of 2 columns.
1871
1872 Can be used to compute a Signal to Noise ratio for any input flux.
1873
1874 Parameters
1875 ----------
1876 numerator : `str`
1877 Name of the column to use at the numerator in the ratio
1878 denominator : `str`
1879 Name of the column to use as the denominator in the ratio.
1880 """
1881 def __init__(self,
1882 numerator,
1883 denominator,
1884 **kwargs):
1885 self.numeratornumerator = numerator
1886 self.denominatordenominator = denominator
1887 super().__init__(**kwargs)
1888
1889 @property
1890 def columns(self):
1891 return [self.numeratornumerator, self.denominatordenominator]
1892
1893 @property
1894 def name(self):
1895 return f'ratio_{self.numerator}_{self.denominator}'
1896
1897 def _func(self, df):
1898 with np.warnings.catch_warnings():
1899 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1900 np.warnings.filterwarnings('ignore', r'divide by zero')
1901 return df[self.numeratornumerator] / df[self.denominatordenominator]
def multilevelColumns(self, parq, **kwargs)
Definition: functors.py:955
def __init__(self, col, filt2, filt1, **kwargs)
Definition: functors.py:926
def __init__(self, col, **kwargs)
Definition: functors.py:640
def __init__(self, funcs, **kwargs)
Definition: functors.py:403
def __call__(self, data, **kwargs)
Definition: functors.py:455
def from_file(cls, filename, **kwargs)
Definition: functors.py:536
def from_yaml(cls, translationDefinition, **kwargs)
Definition: functors.py:545
def renameCol(cls, col, renameRules)
Definition: functors.py:527
def multilevelColumns(self, data, **kwargs)
Definition: functors.py:441
def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22)
Definition: functors.py:1285
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1362
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1325
def __init__(self, col, **kwargs)
Definition: functors.py:686
def __init__(self, expr, **kwargs)
Definition: functors.py:609
def __init__(self, **kwargs)
Definition: functors.py:714
def __call__(self, catalog, **kwargs)
Definition: functors.py:717
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1118
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1136
def __call__(self, data, dropna=False)
Definition: functors.py:340
def _func(self, df, dropna=True)
Definition: functors.py:279
def multilevelColumns(self, data, columnIndex=None, returnTuple=False)
Definition: functors.py:229
def _get_data_columnLevelNames(self, data, columnIndex=None)
Definition: functors.py:186
def difference(self, data1, data2, **kwargs)
Definition: functors.py:351
def __init__(self, filt=None, dataset=None, noDup=None)
Definition: functors.py:142
def _get_columnIndex(self, data)
Definition: functors.py:282
def _colsFromDict(self, colDict, columnIndex=None)
Definition: functors.py:208
def _get_data_columnLevels(self, data, columnIndex=None)
Definition: functors.py:162
def __init__(self, ra, decl, **kwargs)
Definition: functors.py:735
def __call__(self, parq, dropna=False, **kwargs)
Definition: functors.py:975
def __init__(self, instFluxPosCol, instFluxNegCol, instFluxPosErrCol, instFluxNegErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1737
def instFluxToNanojansky(self, instFlux, localCalib)
Definition: functors.py:1539
def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1594
def __init__(self, instFluxCol, instFluxErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1532
def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1556
def instFluxToMagnitude(self, instFlux, localCalib)
Definition: functors.py:1577
def __init__(self, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1176
def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22)
Definition: functors.py:1183
def computeSkySeperation(self, ra1, dec1, ra2, dec2)
Definition: functors.py:1212
def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22)
Definition: functors.py:1238
def __init__(self, col1, col2, **kwargs)
Definition: functors.py:873
def __init__(self, *args, **kwargs)
Definition: functors.py:834
def __init__(self, col, calib=None, **kwargs)
Definition: functors.py:796
def dn2mag(self, dn, fluxMag0)
Definition: functors.py:1456
def dn2flux(self, dn, fluxMag0)
Definition: functors.py:1453
def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1462
def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1467
def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs)
Definition: functors.py:1422
def __call__(self, catalog, **kwargs)
Definition: functors.py:704
def __init__(self, **kwargs)
Definition: functors.py:701
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1152
def __init__(self, numerator, denominator, **kwargs)
Definition: functors.py:1884
def mag_aware_eval(df, expr)
Definition: functors.py:574
def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor', name=None)
Definition: functors.py:40