lsst.pipe.tasks 23.0.0+f2fbba1123
functors.py
Go to the documentation of this file.
1# This file is part of pipe_tasks.
2#
3# LSST Data Management System
4# This product includes software developed by the
5# LSST Project (http://www.lsst.org/).
6# See COPYRIGHT file at the top of the source tree.
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22import yaml
23import re
24from itertools import product
25import os.path
26
27import pandas as pd
28import numpy as np
29import astropy.units as u
30
31from lsst.daf.persistence import doImport
32from lsst.daf.butler import DeferredDatasetHandle
33import lsst.geom as geom
34import lsst.sphgeom as sphgeom
35
36from .parquetTable import ParquetTable, MultilevelParquetTable
37
38
39def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
40 typeKey='functor', name=None):
41 """Initialize an object defined in a dictionary
42
43 The object needs to be importable as
44 f'{basePath}.{initDict[typeKey]}'
45 The positional and keyword arguments (if any) are contained in
46 "args" and "kwargs" entries in the dictionary, respectively.
47 This is used in `functors.CompositeFunctor.from_yaml` to initialize
48 a composite functor from a specification in a YAML file.
49
50 Parameters
51 ----------
52 initDict : dictionary
53 Dictionary describing object's initialization. Must contain
54 an entry keyed by ``typeKey`` that is the name of the object,
55 relative to ``basePath``.
56 basePath : str
57 Path relative to module in which ``initDict[typeKey]`` is defined.
58 typeKey : str
59 Key of ``initDict`` that is the name of the object
60 (relative to `basePath`).
61 """
62 initDict = initDict.copy()
63 # TO DO: DM-21956 We should be able to define functors outside this module
64 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
65 args = []
66 if 'args' in initDict:
67 args = initDict.pop('args')
68 if isinstance(args, str):
69 args = [args]
70 try:
71 element = pythonType(*args, **initDict)
72 except Exception as e:
73 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
74 raise type(e)(message, e.args)
75 return element
76
77
78class Functor(object):
79 """Define and execute a calculation on a ParquetTable
80
81 The `__call__` method accepts either a `ParquetTable` object or a
82 `DeferredDatasetHandle`, and returns the
83 result of the calculation as a single column. Each functor defines what
84 columns are needed for the calculation, and only these columns are read
85 from the `ParquetTable`.
86
87 The action of `__call__` consists of two steps: first, loading the
88 necessary columns from disk into memory as a `pandas.DataFrame` object;
89 and second, performing the computation on this dataframe and returning the
90 result.
91
92
93 To define a new `Functor`, a subclass must define a `_func` method,
94 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
95 In addition, it must define the following attributes
96
97 * `_columns`: The columns necessary to perform the calculation
98 * `name`: A name appropriate for a figure axis label
99 * `shortname`: A name appropriate for use as a dictionary key
100
101 On initialization, a `Functor` should declare what band (`filt` kwarg)
102 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
103 applied to. This enables the `_get_data` method to extract the proper
104 columns from the parquet file. If not specified, the dataset will fall back
105 on the `_defaultDataset`attribute. If band is not specified and `dataset`
106 is anything other than `'ref'`, then an error will be raised when trying to
107 perform the calculation.
108
109 Originally, `Functor` was set up to expect
110 datasets formatted like the `deepCoadd_obj` dataset; that is, a
111 dataframe with a multi-level column index, with the levels of the
112 column index being `band`, `dataset`, and `column`.
113 It has since been generalized to apply to dataframes without mutli-level
114 indices and multi-level indices with just `dataset` and `column` levels.
115 In addition, the `_get_data` method that reads
116 the dataframe from the `ParquetTable` will return a dataframe with column
117 index levels defined by the `_dfLevels` attribute; by default, this is
118 `column`.
119
120 The `_dfLevels` attributes should generally not need to
121 be changed, unless `_func` needs columns from multiple filters or datasets
122 to do the calculation.
123 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
124 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
125 it gets to have those levels in the column index.
126
127 Parameters
128 ----------
129 filt : str
130 Filter upon which to do the calculation
131
132 dataset : str
133 Dataset upon which to do the calculation
134 (e.g., 'ref', 'meas', 'forced_src').
135
136 """
137
138 _defaultDataset = 'ref'
139 _dfLevels = ('column',)
140 _defaultNoDup = False
141
142 def __init__(self, filt=None, dataset=None, noDup=None):
143 self.filtfilt = filt
144 self.datasetdataset = dataset if dataset is not None else self._defaultDataset_defaultDataset
145 self._noDup_noDup = noDup
146
147 @property
148 def noDup(self):
149 if self._noDup_noDup is not None:
150 return self._noDup_noDup
151 else:
152 return self._defaultNoDup_defaultNoDup
153
154 @property
155 def columns(self):
156 """Columns required to perform calculation
157 """
158 if not hasattr(self, '_columns'):
159 raise NotImplementedError('Must define columns property or _columns attribute')
160 return self._columns
161
162 def _get_data_columnLevels(self, data, columnIndex=None):
163 """Gets the names of the column index levels
164
165 This should only be called in the context of a multilevel table.
166 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
167 and with the gen3 `DeferredDatasetHandle`.
168
169 Parameters
170 ----------
171 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
172
173 columnnIndex (optional): pandas `Index` object
174 if not passed, then it is read from the `DeferredDatasetHandle`
175 """
176 if isinstance(data, DeferredDatasetHandle):
177 if columnIndex is None:
178 columnIndex = data.get(component="columns")
179 if columnIndex is not None:
180 return columnIndex.names
181 if isinstance(data, MultilevelParquetTable):
182 return data.columnLevels
183 else:
184 raise TypeError(f"Unknown type for data: {type(data)}!")
185
186 def _get_data_columnLevelNames(self, data, columnIndex=None):
187 """Gets the content of each of the column levels for a multilevel table
188
189 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
190
191 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
192 """
193 if isinstance(data, DeferredDatasetHandle):
194 if columnIndex is None:
195 columnIndex = data.get(component="columns")
196 if columnIndex is not None:
197 columnLevels = columnIndex.names
198 columnLevelNames = {
199 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
200 for i, level in enumerate(columnLevels)
201 }
202 return columnLevelNames
203 if isinstance(data, MultilevelParquetTable):
204 return data.columnLevelNames
205 else:
206 raise TypeError(f"Unknown type for data: {type(data)}!")
207
208 def _colsFromDict(self, colDict, columnIndex=None):
209 """Converts dictionary column specficiation to a list of columns
210
211 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
212 """
213 new_colDict = {}
214 columnLevels = self._get_data_columnLevels_get_data_columnLevels(None, columnIndex=columnIndex)
215
216 for i, lev in enumerate(columnLevels):
217 if lev in colDict:
218 if isinstance(colDict[lev], str):
219 new_colDict[lev] = [colDict[lev]]
220 else:
221 new_colDict[lev] = colDict[lev]
222 else:
223 new_colDict[lev] = columnIndex.levels[i]
224
225 levelCols = [new_colDict[lev] for lev in columnLevels]
226 cols = product(*levelCols)
227 return list(cols)
228
229 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
230 """Returns columns needed by functor from multilevel dataset
231
232 To access tables with multilevel column structure, the `MultilevelParquetTable`
233 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
234 dictionary.
235
236 Parameters
237 ----------
238 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
239
240 columnIndex (optional): pandas `Index` object
241 either passed or read in from `DeferredDatasetHandle`.
242
243 `returnTuple` : bool
244 If true, then return a list of tuples rather than the column dictionary
245 specification. This is set to `True` by `CompositeFunctor` in order to be able to
246 combine columns from the various component functors.
247
248 """
249 if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
250 columnIndex = data.get(component="columns")
251
252 # Confirm that the dataset has the column levels the functor is expecting it to have.
253 columnLevels = self._get_data_columnLevels_get_data_columnLevels(data, columnIndex)
254
255 columnDict = {'column': self.columnscolumns,
256 'dataset': self.datasetdataset}
257 if self.filtfilt is None:
258 columnLevelNames = self._get_data_columnLevelNames_get_data_columnLevelNames(data, columnIndex)
259 if "band" in columnLevels:
260 if self.datasetdataset == "ref":
261 columnDict["band"] = columnLevelNames["band"][0]
262 else:
263 raise ValueError(f"'filt' not set for functor {self.name}"
264 f"(dataset {self.dataset}) "
265 "and ParquetTable "
266 "contains multiple filters in column index. "
267 "Set 'filt' or set 'dataset' to 'ref'.")
268 else:
269 columnDict['band'] = self.filtfilt
270
271 if isinstance(data, MultilevelParquetTable):
272 return data._colsFromDict(columnDict)
273 elif isinstance(data, DeferredDatasetHandle):
274 if returnTuple:
275 return self._colsFromDict_colsFromDict(columnDict, columnIndex=columnIndex)
276 else:
277 return columnDict
278
279 def _func(self, df, dropna=True):
280 raise NotImplementedError('Must define calculation on dataframe')
281
282 def _get_columnIndex(self, data):
283 """Return columnIndex
284 """
285
286 if isinstance(data, DeferredDatasetHandle):
287 return data.get(component="columns")
288 else:
289 return None
290
291 def _get_data(self, data):
292 """Retrieve dataframe necessary for calculation.
293
294 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
295
296 Returns dataframe upon which `self._func_func` can act.
297
298 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
299 """
300 if isinstance(data, pd.DataFrame):
301 return data
302
303 # First thing to do: check to see if the data source has a multilevel column index or not.
304 columnIndex = self._get_columnIndex_get_columnIndex(data)
305 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
306
307 # Simple single-level parquet table, gen2
308 if isinstance(data, ParquetTable) and not is_multiLevel:
309 columns = self.columnscolumns
310 df = data.toDataFrame(columns=columns)
311 return df
312
313 # Get proper columns specification for this functor
314 if is_multiLevel:
315 columns = self.multilevelColumnsmultilevelColumns(data, columnIndex=columnIndex)
316 else:
317 columns = self.columnscolumns
318
319 if isinstance(data, MultilevelParquetTable):
320 # Load in-memory dataframe with appropriate columns the gen2 way
321 df = data.toDataFrame(columns=columns, droplevels=False)
322 elif isinstance(data, DeferredDatasetHandle):
323 # Load in-memory dataframe with appropriate columns the gen3 way
324 df = data.get(parameters={"columns": columns})
325
326 # Drop unnecessary column levels
327 if is_multiLevel:
328 df = self._setLevels_setLevels(df)
329
330 return df
331
332 def _setLevels(self, df):
333 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels_dfLevels]
334 df.columns = df.columns.droplevel(levelsToDrop)
335 return df
336
337 def _dropna(self, vals):
338 return vals.dropna()
339
340 def __call__(self, data, dropna=False):
341 try:
342 df = self._get_data_get_data(data)
343 vals = self._func_func(df)
344 except Exception:
345 vals = self.failfail(df)
346 if dropna:
347 vals = self._dropna_dropna(vals)
348
349 return vals
350
351 def difference(self, data1, data2, **kwargs):
352 """Computes difference between functor called on two different ParquetTable objects
353 """
354 return self(data1, **kwargs) - self(data2, **kwargs)
355
356 def fail(self, df):
357 return pd.Series(np.full(len(df), np.nan), index=df.index)
358
359 @property
360 def name(self):
361 """Full name of functor (suitable for figure labels)
362 """
363 return NotImplementedError
364
365 @property
366 def shortname(self):
367 """Short name of functor (suitable for column name/dict key)
368 """
369 return self.namename
370
371
373 """Perform multiple calculations at once on a catalog
374
375 The role of a `CompositeFunctor` is to group together computations from
376 multiple functors. Instead of returning `pandas.Series` a
377 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
378 being the keys of `funcDict`.
379
380 The `columns` attribute of a `CompositeFunctor` is the union of all columns
381 in all the component functors.
382
383 A `CompositeFunctor` does not use a `_func` method itself; rather,
384 when a `CompositeFunctor` is called, all its columns are loaded
385 at once, and the resulting dataframe is passed to the `_func` method of each component
386 functor. This has the advantage of only doing I/O (reading from parquet file) once,
387 and works because each individual `_func` method of each component functor does not
388 care if there are *extra* columns in the dataframe being passed; only that it must contain
389 *at least* the `columns` it expects.
390
391 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
392 file specifying a collection of functors.
393
394 Parameters
395 ----------
396 funcs : `dict` or `list`
397 Dictionary or list of functors. If a list, then it will be converted
398 into a dictonary according to the `.shortname` attribute of each functor.
399
400 """
401 dataset = None
402
403 def __init__(self, funcs, **kwargs):
404
405 if type(funcs) == dict:
406 self.funcDictfuncDict = funcs
407 else:
408 self.funcDictfuncDict = {f.shortname: f for f in funcs}
409
410 self._filt_filt = None
411
412 super().__init__(**kwargs)
413
414 @property
415 def filt(self):
416 return self._filt_filt
417
418 @filt.setter
419 def filt(self, filt):
420 if filt is not None:
421 for _, f in self.funcDictfuncDict.items():
422 f.filt = filt
423 self._filt_filt = filt
424
425 def update(self, new):
426 if isinstance(new, dict):
427 self.funcDictfuncDict.update(new)
428 elif isinstance(new, CompositeFunctor):
429 self.funcDictfuncDict.update(new.funcDict)
430 else:
431 raise TypeError('Can only update with dictionary or CompositeFunctor.')
432
433 # Make sure new functors have the same 'filt' set
434 if self.filtfiltfiltfiltfilt is not None:
436
437 @property
438 def columns(self):
439 return list(set([x for y in [f.columns for f in self.funcDictfuncDict.values()] for x in y]))
440
441 def multilevelColumns(self, data, **kwargs):
442 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
443 return list(
444 set(
445 [
446 x
447 for y in [
448 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDictfuncDict.values()
449 ]
450 for x in y
451 ]
452 )
453 )
454
455 def __call__(self, data, **kwargs):
456 """Apply the functor to the data table
457
458 Parameters
459 ----------
460 data : `lsst.daf.butler.DeferredDatasetHandle`,
463 or `pandas.DataFrame`.
464 The table or a pointer to a table on disk from which columns can
465 be accessed
466 """
467 columnIndex = self._get_columnIndex_get_columnIndex(data)
468
469 # First, determine whether data has a multilevel index (either gen2 or gen3)
470 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
471
472 # Multilevel index, gen2 or gen3
473 if is_multiLevel:
474 columns = self.multilevelColumnsmultilevelColumnsmultilevelColumns(data, columnIndex=columnIndex)
475
476 if isinstance(data, MultilevelParquetTable):
477 # Read data into memory the gen2 way
478 df = data.toDataFrame(columns=columns, droplevels=False)
479 elif isinstance(data, DeferredDatasetHandle):
480 # Read data into memory the gen3 way
481 df = data.get(parameters={"columns": columns})
482
483 valDict = {}
484 for k, f in self.funcDictfuncDict.items():
485 try:
486 subdf = f._setLevels(
487 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
488 )
489 valDict[k] = f._func(subdf)
490 except Exception as e:
491 try:
492 valDict[k] = f.fail(subdf)
493 except NameError:
494 raise e
495
496 else:
497 if isinstance(data, DeferredDatasetHandle):
498 # input if Gen3 deferLoad=True
499 df = data.get(parameters={"columns": self.columnscolumnscolumns})
500 elif isinstance(data, pd.DataFrame):
501 # input if Gen3 deferLoad=False
502 df = data
503 else:
504 # Original Gen2 input is type ParquetTable and the fallback
505 df = data.toDataFrame(columns=self.columnscolumnscolumns)
506
507 valDict = {k: f._func(df) for k, f in self.funcDictfuncDict.items()}
508
509 # Check that output columns are actually columns
510 for name, colVal in valDict.items():
511 if len(colVal.shape) != 1:
512 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
513 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
514
515 try:
516 valDf = pd.concat(valDict, axis=1)
517 except TypeError:
518 print([(k, type(v)) for k, v in valDict.items()])
519 raise
520
521 if kwargs.get('dropna', False):
522 valDf = valDf.dropna(how='any')
523
524 return valDf
525
526 @classmethod
527 def renameCol(cls, col, renameRules):
528 if renameRules is None:
529 return col
530 for old, new in renameRules:
531 if col.startswith(old):
532 col = col.replace(old, new)
533 return col
534
535 @classmethod
536 def from_file(cls, filename, **kwargs):
537 # Allow environment variables in the filename.
538 filename = os.path.expandvars(filename)
539 with open(filename) as f:
540 translationDefinition = yaml.safe_load(f)
541
542 return cls.from_yamlfrom_yaml(translationDefinition, **kwargs)
543
544 @classmethod
545 def from_yaml(cls, translationDefinition, **kwargs):
546 funcs = {}
547 for func, val in translationDefinition['funcs'].items():
548 funcs[func] = init_fromDict(val, name=func)
549
550 if 'flag_rename_rules' in translationDefinition:
551 renameRules = translationDefinition['flag_rename_rules']
552 else:
553 renameRules = None
554
555 if 'calexpFlags' in translationDefinition:
556 for flag in translationDefinition['calexpFlags']:
557 funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='calexp')
558
559 if 'refFlags' in translationDefinition:
560 for flag in translationDefinition['refFlags']:
561 funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='ref')
562
563 if 'forcedFlags' in translationDefinition:
564 for flag in translationDefinition['forcedFlags']:
565 funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
566
567 if 'flags' in translationDefinition:
568 for flag in translationDefinition['flags']:
569 funcs[cls.renameColrenameCol(flag, renameRules)] = Column(flag, dataset='meas')
570
571 return cls(funcs, **kwargs)
572
573
574def mag_aware_eval(df, expr):
575 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
576
577 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
578
579 Parameters
580 ----------
581 df : pandas.DataFrame
582 Dataframe on which to evaluate expression.
583
584 expr : str
585 Expression.
586 """
587 try:
588 expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>)/log(10)', expr)
589 val = df.eval(expr_new, truediv=True)
590 except Exception: # Should check what actually gets raised
591 expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
592 val = df.eval(expr_new, truediv=True)
593 return val
594
595
597 """Arbitrary computation on a catalog
598
599 Column names (and thus the columns to be loaded from catalog) are found
600 by finding all words and trying to ignore all "math-y" words.
601
602 Parameters
603 ----------
604 expr : str
605 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
606 """
607 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
608
609 def __init__(self, expr, **kwargs):
610 self.exprexpr = expr
611 super().__init__(**kwargs)
612
613 @property
614 def name(self):
615 return self.exprexpr
616
617 @property
618 def columns(self):
619 flux_cols = re.findall(r'mag\‍(\s*(\w+)\s*\‍)', self.exprexpr)
620
621 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.exprexpr) if c not in self._ignore_words_ignore_words]
622 not_a_col = []
623 for c in flux_cols:
624 if not re.search('_instFlux$', c):
625 cols.append(f'{c}_instFlux')
626 not_a_col.append(c)
627 else:
628 cols.append(c)
629
630 return list(set([c for c in cols if c not in not_a_col]))
631
632 def _func(self, df):
633 return mag_aware_eval(df, self.exprexpr)
634
635
637 """Get column with specified name
638 """
639
640 def __init__(self, col, **kwargs):
641 self.colcol = col
642 super().__init__(**kwargs)
643
644 @property
645 def name(self):
646 return self.colcol
647
648 @property
649 def columns(self):
650 return [self.colcol]
651
652 def _func(self, df):
653 return df[self.colcol]
654
655
657 """Return the value of the index for each object
658 """
659
660 columns = ['coord_ra'] # just a dummy; something has to be here
661 _defaultDataset = 'ref'
662 _defaultNoDup = True
663
664 def _func(self, df):
665 return pd.Series(df.index, index=df.index)
666
667
669 col = 'id'
670 _allow_difference = False
671 _defaultNoDup = True
672
673 def _func(self, df):
674 return pd.Series(df.index, index=df.index)
675
676
678 col = 'base_Footprint_nPix'
679
680
682 """Base class for coordinate column, in degrees
683 """
684 _radians = True
685
686 def __init__(self, col, **kwargs):
687 super().__init__(col, **kwargs)
688
689 def _func(self, df):
690 # Must not modify original column in case that column is used by another functor
691 output = df[self.colcol] * 180 / np.pi if self._radians_radians else df[self.colcol]
692 return output
693
694
696 """Right Ascension, in degrees
697 """
698 name = 'RA'
699 _defaultNoDup = True
700
701 def __init__(self, **kwargs):
702 super().__init__('coord_ra', **kwargs)
703
704 def __call__(self, catalog, **kwargs):
705 return super().__call__(catalog, **kwargs)
706
707
709 """Declination, in degrees
710 """
711 name = 'Dec'
712 _defaultNoDup = True
713
714 def __init__(self, **kwargs):
715 super().__init__('coord_dec', **kwargs)
716
717 def __call__(self, catalog, **kwargs):
718 return super().__call__(catalog, **kwargs)
719
720
722 """Compute the level 20 HtmIndex for the catalog.
723 """
724 name = "Htm20"
725 htmLevel = 20
726 _radians = True
727
728 def __init__(self, ra, decl, **kwargs):
729 self.pixelatorpixelator = sphgeom.HtmPixelization(self.htmLevelhtmLevel)
730 self.rara = ra
731 self.decldecl = decl
732 self._columns_columns = [self.rara, self.decldecl]
733 super().__init__(**kwargs)
734
735 def _func(self, df):
736
737 def computePixel(row):
738 if self._radians_radians:
739 sphPoint = geom.SpherePoint(row[self.rara],
740 row[self.decldecl],
741 geom.radians)
742 else:
743 sphPoint = geom.SpherePoint(row[self.rara],
744 row[self.decldecl],
745 geom.degrees)
746 return self.pixelatorpixelator.index(sphPoint.getVector())
747
748 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
749
750
751def fluxName(col):
752 if not col.endswith('_instFlux'):
753 col += '_instFlux'
754 return col
755
756
757def fluxErrName(col):
758 if not col.endswith('_instFluxErr'):
759 col += '_instFluxErr'
760 return col
761
762
764 """Compute calibrated magnitude
765
766 Takes a `calib` argument, which returns the flux at mag=0
767 as `calib.getFluxMag0()`. If not provided, then the default
768 `fluxMag0` is 63095734448.0194, which is default for HSC.
769 This default should be removed in DM-21955
770
771 This calculation hides warnings about invalid values and dividing by zero.
772
773 As for all functors, a `dataset` and `filt` kwarg should be provided upon
774 initialization. Unlike the default `Functor`, however, the default dataset
775 for a `Mag` is `'meas'`, rather than `'ref'`.
776
777 Parameters
778 ----------
779 col : `str`
780 Name of flux column from which to compute magnitude. Can be parseable
781 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
782 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
783 understand.
784 calib : `lsst.afw.image.calib.Calib` (optional)
785 Object that knows zero point.
786 """
787 _defaultDataset = 'meas'
788
789 def __init__(self, col, calib=None, **kwargs):
790 self.colcol = fluxName(col)
791 self.calibcalib = calib
792 if calib is not None:
793 self.fluxMag0fluxMag0 = calib.getFluxMag0()[0]
794 else:
795 # TO DO: DM-21955 Replace hard coded photometic calibration values
796 self.fluxMag0fluxMag0 = 63095734448.0194
797
798 super().__init__(**kwargs)
799
800 @property
801 def columns(self):
802 return [self.colcol]
803
804 def _func(self, df):
805 with np.warnings.catch_warnings():
806 np.warnings.filterwarnings('ignore', r'invalid value encountered')
807 np.warnings.filterwarnings('ignore', r'divide by zero')
808 return -2.5*np.log10(df[self.colcol] / self.fluxMag0fluxMag0)
809
810 @property
811 def name(self):
812 return f'mag_{self.col}'
813
814
815class MagErr(Mag):
816 """Compute calibrated magnitude uncertainty
817
818 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
819
820 Parameters
821 col : `str`
822 Name of flux column
823 calib : `lsst.afw.image.calib.Calib` (optional)
824 Object that knows zero point.
825 """
826
827 def __init__(self, *args, **kwargs):
828 super().__init__(*args, **kwargs)
829 if self.calibcalib is not None:
830 self.fluxMag0ErrfluxMag0Err = self.calibcalib.getFluxMag0()[1]
831 else:
832 self.fluxMag0ErrfluxMag0Err = 0.
833
834 @property
835 def columns(self):
836 return [self.colcol, self.colcol + 'Err']
837
838 def _func(self, df):
839 with np.warnings.catch_warnings():
840 np.warnings.filterwarnings('ignore', r'invalid value encountered')
841 np.warnings.filterwarnings('ignore', r'divide by zero')
842 fluxCol, fluxErrCol = self.columnscolumnscolumnscolumns
843 x = df[fluxErrCol] / df[fluxCol]
844 y = self.fluxMag0ErrfluxMag0Err / self.fluxMag0fluxMag0
845 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
846 return magErr
847
848 @property
849 def name(self):
850 return super().name + '_err'
851
852
854 """
855 """
856
857 def _func(self, df):
858 return (df[self.colcol] / self.fluxMag0fluxMag0) * 1e9
859
860
862 _defaultDataset = 'meas'
863
864 """Functor to calculate magnitude difference"""
865
866 def __init__(self, col1, col2, **kwargs):
867 self.col1col1 = fluxName(col1)
868 self.col2col2 = fluxName(col2)
869 super().__init__(**kwargs)
870
871 @property
872 def columns(self):
873 return [self.col1col1, self.col2col2]
874
875 def _func(self, df):
876 with np.warnings.catch_warnings():
877 np.warnings.filterwarnings('ignore', r'invalid value encountered')
878 np.warnings.filterwarnings('ignore', r'divide by zero')
879 return -2.5*np.log10(df[self.col1col1]/df[self.col2col2])
880
881 @property
882 def name(self):
883 return f'(mag_{self.col1} - mag_{self.col2})'
884
885 @property
886 def shortname(self):
887 return f'magDiff_{self.col1}_{self.col2}'
888
889
891 """Compute the color between two filters
892
893 Computes color by initializing two different `Mag`
894 functors based on the `col` and filters provided, and
895 then returning the difference.
896
897 This is enabled by the `_func` expecting a dataframe with a
898 multilevel column index, with both `'band'` and `'column'`,
899 instead of just `'column'`, which is the `Functor` default.
900 This is controlled by the `_dfLevels` attribute.
901
902 Also of note, the default dataset for `Color` is `forced_src'`,
903 whereas for `Mag` it is `'meas'`.
904
905 Parameters
906 ----------
907 col : str
908 Name of flux column from which to compute; same as would be passed to
910
911 filt2, filt1 : str
912 Filters from which to compute magnitude difference.
913 Color computed is `Mag(filt2) - Mag(filt1)`.
914 """
915 _defaultDataset = 'forced_src'
916 _dfLevels = ('band', 'column')
917 _defaultNoDup = True
918
919 def __init__(self, col, filt2, filt1, **kwargs):
920 self.colcol = fluxName(col)
921 if filt2 == filt1:
922 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
923 self.filt2filt2 = filt2
924 self.filt1filt1 = filt1
925
926 self.mag2mag2 = Mag(col, filt=filt2, **kwargs)
927 self.mag1mag1 = Mag(col, filt=filt1, **kwargs)
928
929 super().__init__(**kwargs)
930
931 @property
932 def filt(self):
933 return None
934
935 @filt.setter
936 def filt(self, filt):
937 pass
938
939 def _func(self, df):
940 mag2 = self.mag2._func(df[self.filt2])
941 mag1 = self.mag1._func(df[self.filt1])
942 return mag2 - mag1
943
944 @property
945 def columns(self):
946 return [self.mag1mag1.col, self.mag2mag2.col]
947
948 def multilevelColumns(self, parq, **kwargs):
949 return [(self.datasetdataset, self.filt1filt1, self.colcol), (self.datasetdataset, self.filt2filt2, self.colcol)]
950
951 @property
952 def name(self):
953 return f'{self.filt2} - {self.filt1} ({self.col})'
954
955 @property
956 def shortname(self):
957 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
958
959
961 """Main function of this subclass is to override the dropna=True
962 """
963 _null_label = 'null'
964 _allow_difference = False
965 name = 'label'
966 _force_str = False
967
968 def __call__(self, parq, dropna=False, **kwargs):
969 return super().__call__(parq, dropna=False, **kwargs)
970
971
973 _columns = ["base_ClassificationExtendedness_value"]
974 _column = "base_ClassificationExtendedness_value"
975
976 def _func(self, df):
977 x = df[self._columns_columns][self._column_column]
978 mask = x.isnull()
979 test = (x < 0.5).astype(int)
980 test = test.mask(mask, 2)
981
982 # TODO: DM-21954 Look into veracity of inline comment below
983 # are these backwards?
984 categories = ['galaxy', 'star', self._null_label_null_label]
985 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
986 index=x.index, name='label')
987 if self._force_str_force_str:
988 label = label.astype(str)
989 return label
990
991
993 _columns = ['numStarFlags']
994 labels = {"star": 0, "maybe": 1, "notStar": 2}
995
996 def _func(self, df):
997 x = df[self._columns_columns][self._columns_columns[0]]
998
999 # Number of filters
1000 n = len(x.unique()) - 1
1001
1002 labels = ['noStar', 'maybe', 'star']
1003 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
1004 index=x.index, name='label')
1005
1006 if self._force_str_force_str:
1007 label = label.astype(str)
1008
1009 return label
1010
1011
1013 name = 'Deconvolved Moments'
1014 shortname = 'deconvolvedMoments'
1015 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1016 "ext_shapeHSM_HsmSourceMoments_yy",
1017 "base_SdssShape_xx", "base_SdssShape_yy",
1018 "ext_shapeHSM_HsmPsfMoments_xx",
1019 "ext_shapeHSM_HsmPsfMoments_yy")
1020
1021 def _func(self, df):
1022 """Calculate deconvolved moments"""
1023 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1024 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1025 else:
1026 hsm = np.ones(len(df))*np.nan
1027 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1028 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1029 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1030 else:
1031 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
1032 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
1033 # raise TaskError("No psf shape parameter found in catalog")
1034 raise RuntimeError('No psf shape parameter found in catalog')
1035
1036 return hsm.where(np.isfinite(hsm), sdss) - psf
1037
1038
1040 """Functor to calculate SDSS trace radius size for sources"""
1041 name = "SDSS Trace Size"
1042 shortname = 'sdssTrace'
1043 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1044
1045 def _func(self, df):
1046 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1047 return srcSize
1048
1049
1051 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1052 name = "PSF - SDSS Trace Size"
1053 shortname = 'psf_sdssTrace'
1054 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1055 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1056
1057 def _func(self, df):
1058 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1059 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1060 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1061 return sizeDiff
1062
1063
1065 """Functor to calculate HSM trace radius size for sources"""
1066 name = 'HSM Trace Size'
1067 shortname = 'hsmTrace'
1068 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1069 "ext_shapeHSM_HsmSourceMoments_yy")
1070
1071 def _func(self, df):
1072 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1073 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1074 return srcSize
1075
1076
1078 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1079 name = 'PSF - HSM Trace Size'
1080 shortname = 'psf_HsmTrace'
1081 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1082 "ext_shapeHSM_HsmSourceMoments_yy",
1083 "ext_shapeHSM_HsmPsfMoments_xx",
1084 "ext_shapeHSM_HsmPsfMoments_yy")
1085
1086 def _func(self, df):
1087 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1088 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1089 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1090 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1091 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1092 return sizeDiff
1093
1094
1096 name = 'HSM Psf FWHM'
1097 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1098 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1099 pixelScale = 0.168
1100 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1101
1102 def _func(self, df):
1103 return self.pixelScalepixelScale*self.SIGMA2FWHMSIGMA2FWHM*np.sqrt(
1104 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1105
1106
1108 name = "Distortion Ellipticity (e1)"
1109 shortname = "Distortion"
1110
1111 def __init__(self, colXX, colXY, colYY, **kwargs):
1112 self.colXXcolXX = colXX
1113 self.colXYcolXY = colXY
1114 self.colYYcolYY = colYY
1115 self._columns_columns = [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1116 super().__init__(**kwargs)
1117
1118 @property
1119 def columns(self):
1120 return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1121
1122 def _func(self, df):
1123 return df[self.colXXcolXX] - df[self.colYYcolYY] / (df[self.colXXcolXX] + df[self.colYYcolYY])
1124
1125
1127 name = "Ellipticity e2"
1128
1129 def __init__(self, colXX, colXY, colYY, **kwargs):
1130 self.colXXcolXX = colXX
1131 self.colXYcolXY = colXY
1132 self.colYYcolYY = colYY
1133 super().__init__(**kwargs)
1134
1135 @property
1136 def columns(self):
1137 return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1138
1139 def _func(self, df):
1140 return 2*df[self.colXYcolXY] / (df[self.colXXcolXX] + df[self.colYYcolYY])
1141
1142
1144
1145 def __init__(self, colXX, colXY, colYY, **kwargs):
1146 self.colXXcolXX = colXX
1147 self.colXYcolXY = colXY
1148 self.colYYcolYY = colYY
1149 super().__init__(**kwargs)
1150
1151 @property
1152 def columns(self):
1153 return [self.colXXcolXX, self.colXYcolXY, self.colYYcolYY]
1154
1155 def _func(self, df):
1156 return (df[self.colXXcolXX]*df[self.colYYcolYY] - df[self.colXYcolXY]**2)**0.25
1157
1158
1160 """Computations using the stored localWcs.
1161 """
1162 name = "LocalWcsOperations"
1163
1164 def __init__(self,
1165 colCD_1_1,
1166 colCD_1_2,
1167 colCD_2_1,
1168 colCD_2_2,
1169 **kwargs):
1170 self.colCD_1_1colCD_1_1 = colCD_1_1
1171 self.colCD_1_2colCD_1_2 = colCD_1_2
1172 self.colCD_2_1colCD_2_1 = colCD_2_1
1173 self.colCD_2_2colCD_2_2 = colCD_2_2
1174 super().__init__(**kwargs)
1175
1176 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1177 """Compute the distance on the sphere from x2, y1 to x1, y1.
1178
1179 Parameters
1180 ----------
1181 x : `pandas.Series`
1182 X pixel coordinate.
1183 y : `pandas.Series`
1184 Y pixel coordinate.
1185 cd11 : `pandas.Series`
1186 [1, 1] element of the local Wcs affine transform.
1187 cd11 : `pandas.Series`
1188 [1, 1] element of the local Wcs affine transform.
1189 cd12 : `pandas.Series`
1190 [1, 2] element of the local Wcs affine transform.
1191 cd21 : `pandas.Series`
1192 [2, 1] element of the local Wcs affine transform.
1193 cd22 : `pandas.Series`
1194 [2, 2] element of the local Wcs affine transform.
1195
1196 Returns
1197 -------
1198 raDecTuple : tuple
1199 RA and dec conversion of x and y given the local Wcs. Returned
1200 units are in radians.
1201
1202 """
1203 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1204
1205 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1206 """Compute the local pixel scale conversion.
1207
1208 Parameters
1209 ----------
1210 ra1 : `pandas.Series`
1211 Ra of the first coordinate in radians.
1212 dec1 : `pandas.Series`
1213 Dec of the first coordinate in radians.
1214 ra2 : `pandas.Series`
1215 Ra of the second coordinate in radians.
1216 dec2 : `pandas.Series`
1217 Dec of the second coordinate in radians.
1218
1219 Returns
1220 -------
1221 dist : `pandas.Series`
1222 Distance on the sphere in radians.
1223 """
1224 deltaDec = dec2 - dec1
1225 deltaRa = ra2 - ra1
1226 return 2 * np.arcsin(
1227 np.sqrt(
1228 np.sin(deltaDec / 2) ** 2
1229 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1230
1231 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1232 """Compute the distance on the sphere from x2, y1 to x1, y1.
1233
1234 Parameters
1235 ----------
1236 x1 : `pandas.Series`
1237 X pixel coordinate.
1238 y1 : `pandas.Series`
1239 Y pixel coordinate.
1240 x2 : `pandas.Series`
1241 X pixel coordinate.
1242 y2 : `pandas.Series`
1243 Y pixel coordinate.
1244 cd11 : `pandas.Series`
1245 [1, 1] element of the local Wcs affine transform.
1246 cd11 : `pandas.Series`
1247 [1, 1] element of the local Wcs affine transform.
1248 cd12 : `pandas.Series`
1249 [1, 2] element of the local Wcs affine transform.
1250 cd21 : `pandas.Series`
1251 [2, 1] element of the local Wcs affine transform.
1252 cd22 : `pandas.Series`
1253 [2, 2] element of the local Wcs affine transform.
1254
1255 Returns
1256 -------
1257 Distance : `pandas.Series`
1258 Arcseconds per pixel at the location of the local WC
1259 """
1260 ra1, dec1 = self.computeDeltaRaDeccomputeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1261 ra2, dec2 = self.computeDeltaRaDeccomputeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1262 # Great circle distance for small separations.
1263 return self.computeSkySeperationcomputeSkySeperation(ra1, dec1, ra2, dec2)
1264
1265
1267 """Compute the local pixel scale from the stored CDMatrix.
1268 """
1269 name = "PixelScale"
1270
1271 @property
1272 def columns(self):
1273 return [self.colCD_1_1colCD_1_1,
1274 self.colCD_1_2colCD_1_2,
1275 self.colCD_2_1colCD_2_1,
1276 self.colCD_2_2colCD_2_2]
1277
1278 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1279 """Compute the local pixel to scale conversion in arcseconds.
1280
1281 Parameters
1282 ----------
1283 cd11 : `pandas.Series`
1284 [1, 1] element of the local Wcs affine transform in radians.
1285 cd11 : `pandas.Series`
1286 [1, 1] element of the local Wcs affine transform in radians.
1287 cd12 : `pandas.Series`
1288 [1, 2] element of the local Wcs affine transform in radians.
1289 cd21 : `pandas.Series`
1290 [2, 1] element of the local Wcs affine transform in radians.
1291 cd22 : `pandas.Series`
1292 [2, 2] element of the local Wcs affine transform in radians.
1293
1294 Returns
1295 -------
1296 pixScale : `pandas.Series`
1297 Arcseconds per pixel at the location of the local WC
1298 """
1299 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1300
1301 def _func(self, df):
1302 return self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1303 df[self.colCD_1_2colCD_1_2],
1304 df[self.colCD_2_1colCD_2_1],
1305 df[self.colCD_2_2colCD_2_2])
1306
1307
1309 """Convert a value in units pixels squared to units arcseconds squared.
1310 """
1311
1312 def __init__(self,
1313 col,
1314 colCD_1_1,
1315 colCD_1_2,
1316 colCD_2_1,
1317 colCD_2_2,
1318 **kwargs):
1319 self.colcol = col
1320 super().__init__(colCD_1_1,
1321 colCD_1_2,
1322 colCD_2_1,
1323 colCD_2_2,
1324 **kwargs)
1325
1326 @property
1327 def name(self):
1328 return f"{self.col}_asArcseconds"
1329
1330 @property
1331 def columns(self):
1332 return [self.colcol,
1333 self.colCD_1_1colCD_1_1,
1334 self.colCD_1_2colCD_1_2,
1335 self.colCD_2_1colCD_2_1,
1336 self.colCD_2_2colCD_2_2]
1337
1338 def _func(self, df):
1339 return df[self.colcol] * self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1340 df[self.colCD_1_2colCD_1_2],
1341 df[self.colCD_2_1colCD_2_1],
1342 df[self.colCD_2_2colCD_2_2])
1343
1344
1346 """Convert a value in units pixels to units arcseconds.
1347 """
1348
1349 def __init__(self,
1350 col,
1351 colCD_1_1,
1352 colCD_1_2,
1353 colCD_2_1,
1354 colCD_2_2,
1355 **kwargs):
1356 self.colcol = col
1357 super().__init__(colCD_1_1,
1358 colCD_1_2,
1359 colCD_2_1,
1360 colCD_2_2,
1361 **kwargs)
1362
1363 @property
1364 def name(self):
1365 return f"{self.col}_asArcsecondsSq"
1366
1367 @property
1368 def columns(self):
1369 return [self.colcol,
1370 self.colCD_1_1colCD_1_1,
1371 self.colCD_1_2colCD_1_2,
1372 self.colCD_2_1colCD_2_1,
1373 self.colCD_2_2colCD_2_2]
1374
1375 def _func(self, df):
1376 pixScale = self.pixelScaleArcsecondspixelScaleArcseconds(df[self.colCD_1_1colCD_1_1],
1377 df[self.colCD_1_2colCD_1_2],
1378 df[self.colCD_2_1colCD_2_1],
1379 df[self.colCD_2_2colCD_2_2])
1380 return df[self.colcol] * pixScale * pixScale
1381
1382
1384 name = 'Reference Band'
1385 shortname = 'refBand'
1386
1387 @property
1388 def columns(self):
1389 return ["merge_measurement_i",
1390 "merge_measurement_r",
1391 "merge_measurement_z",
1392 "merge_measurement_y",
1393 "merge_measurement_g",
1394 "merge_measurement_u"]
1395
1396 def _func(self, df: pd.DataFrame) -> pd.Series:
1397 def getFilterAliasName(row):
1398 # get column name with the max value (True > False)
1399 colName = row.idxmax()
1400 return colName.replace('merge_measurement_', '')
1401
1402 # Makes a Series of dtype object if df is empty
1403 return df[self.columnscolumnscolumns].apply(getFilterAliasName, axis=1,
1404 result_type='reduce').astype('object')
1405
1406
1408 # AB to NanoJansky (3631 Jansky)
1409 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1410 LOG_AB_FLUX_SCALE = 12.56
1411 FIVE_OVER_2LOG10 = 1.085736204758129569
1412 # TO DO: DM-21955 Replace hard coded photometic calibration values
1413 COADD_ZP = 27
1414
1415 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1416 self.vhypotvhypot = np.vectorize(self.hypothypot)
1417 self.colcol = colFlux
1418 self.colFluxErrcolFluxErr = colFluxErr
1419
1420 self.calibcalib = calib
1421 if calib is not None:
1422 self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err = calib.getFluxMag0()
1423 else:
1424 self.fluxMag0fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZPCOADD_ZP)
1425 self.fluxMag0ErrfluxMag0Err = 0.
1426
1427 super().__init__(**kwargs)
1428
1429 @property
1430 def columns(self):
1431 return [self.colcol]
1432
1433 @property
1434 def name(self):
1435 return f'mag_{self.col}'
1436
1437 @classmethod
1438 def hypot(cls, a, b):
1439 if np.abs(a) < np.abs(b):
1440 a, b = b, a
1441 if a == 0.:
1442 return 0.
1443 q = b/a
1444 return np.abs(a) * np.sqrt(1. + q*q)
1445
1446 def dn2flux(self, dn, fluxMag0):
1447 return self.AB_FLUX_SCALEAB_FLUX_SCALE * dn / fluxMag0
1448
1449 def dn2mag(self, dn, fluxMag0):
1450 with np.warnings.catch_warnings():
1451 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1452 np.warnings.filterwarnings('ignore', r'divide by zero')
1453 return -2.5 * np.log10(dn/fluxMag0)
1454
1455 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1456 retVal = self.vhypotvhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1457 retVal *= self.AB_FLUX_SCALEAB_FLUX_SCALE / fluxMag0 / fluxMag0
1458 return retVal
1459
1460 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1461 retVal = self.dn2fluxErrdn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2fluxdn2flux(dn, fluxMag0)
1462 return self.FIVE_OVER_2LOG10FIVE_OVER_2LOG10 * retVal
1463
1464
1466 def _func(self, df):
1467 return self.dn2fluxdn2flux(df[self.colcol], self.fluxMag0fluxMag0)
1468
1469
1471 @property
1472 def columns(self):
1473 return [self.colcol, self.colFluxErrcolFluxErr]
1474
1475 def _func(self, df):
1476 retArr = self.dn2fluxErrdn2fluxErr(df[self.colcol], df[self.colFluxErrcolFluxErr], self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err)
1477 return pd.Series(retArr, index=df.index)
1478
1479
1481 def _func(self, df):
1482 return self.dn2magdn2mag(df[self.colcol], self.fluxMag0fluxMag0)
1483
1484
1486 @property
1487 def columns(self):
1488 return [self.colcol, self.colFluxErrcolFluxErr]
1489
1490 def _func(self, df):
1491 retArr = self.dn2MagErrdn2MagErr(df[self.colcol], df[self.colFluxErrcolFluxErr], self.fluxMag0fluxMag0, self.fluxMag0ErrfluxMag0Err)
1492 return pd.Series(retArr, index=df.index)
1493
1494
1496 """Base class for calibrating the specified instrument flux column using
1497 the local photometric calibration.
1498
1499 Parameters
1500 ----------
1501 instFluxCol : `str`
1502 Name of the instrument flux column.
1503 instFluxErrCol : `str`
1504 Name of the assocated error columns for ``instFluxCol``.
1505 photoCalibCol : `str`
1506 Name of local calibration column.
1507 photoCalibErrCol : `str`
1508 Error associated with ``photoCalibCol``
1509
1510 See also
1511 --------
1512 LocalPhotometry
1513 LocalNanojansky
1514 LocalNanojanskyErr
1515 LocalMagnitude
1516 LocalMagnitudeErr
1517 """
1518 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1519
1520 def __init__(self,
1521 instFluxCol,
1522 instFluxErrCol,
1523 photoCalibCol,
1524 photoCalibErrCol,
1525 **kwargs):
1526 self.instFluxColinstFluxCol = instFluxCol
1527 self.instFluxErrColinstFluxErrCol = instFluxErrCol
1528 self.photoCalibColphotoCalibCol = photoCalibCol
1529 self.photoCalibErrColphotoCalibErrCol = photoCalibErrCol
1530 super().__init__(**kwargs)
1531
1532 def instFluxToNanojansky(self, instFlux, localCalib):
1533 """Convert instrument flux to nanojanskys.
1534
1535 Parameters
1536 ----------
1537 instFlux : `numpy.ndarray` or `pandas.Series`
1538 Array of instrument flux measurements
1539 localCalib : `numpy.ndarray` or `pandas.Series`
1540 Array of local photometric calibration estimates.
1541
1542 Returns
1543 -------
1544 calibFlux : `numpy.ndarray` or `pandas.Series`
1545 Array of calibrated flux measurements.
1546 """
1547 return instFlux * localCalib
1548
1549 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1550 """Convert instrument flux to nanojanskys.
1551
1552 Parameters
1553 ----------
1554 instFlux : `numpy.ndarray` or `pandas.Series`
1555 Array of instrument flux measurements
1556 instFluxErr : `numpy.ndarray` or `pandas.Series`
1557 Errors on associated ``instFlux`` values
1558 localCalib : `numpy.ndarray` or `pandas.Series`
1559 Array of local photometric calibration estimates.
1560 localCalibErr : `numpy.ndarray` or `pandas.Series`
1561 Errors on associated ``localCalib`` values
1562
1563 Returns
1564 -------
1565 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1566 Errors on calibrated flux measurements.
1567 """
1568 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1569
1570 def instFluxToMagnitude(self, instFlux, localCalib):
1571 """Convert instrument flux to nanojanskys.
1572
1573 Parameters
1574 ----------
1575 instFlux : `numpy.ndarray` or `pandas.Series`
1576 Array of instrument flux measurements
1577 localCalib : `numpy.ndarray` or `pandas.Series`
1578 Array of local photometric calibration estimates.
1579
1580 Returns
1581 -------
1582 calibMag : `numpy.ndarray` or `pandas.Series`
1583 Array of calibrated AB magnitudes.
1584 """
1585 return -2.5 * np.log10(self.instFluxToNanojanskyinstFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToABlogNJanskyToAB
1586
1587 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1588 """Convert instrument flux err to nanojanskys.
1589
1590 Parameters
1591 ----------
1592 instFlux : `numpy.ndarray` or `pandas.Series`
1593 Array of instrument flux measurements
1594 instFluxErr : `numpy.ndarray` or `pandas.Series`
1595 Errors on associated ``instFlux`` values
1596 localCalib : `numpy.ndarray` or `pandas.Series`
1597 Array of local photometric calibration estimates.
1598 localCalibErr : `numpy.ndarray` or `pandas.Series`
1599 Errors on associated ``localCalib`` values
1600
1601 Returns
1602 -------
1603 calibMagErr: `numpy.ndarray` or `pandas.Series`
1604 Error on calibrated AB magnitudes.
1605 """
1606 err = self.instFluxErrToNanojanskyErrinstFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1607 return 2.5 / np.log(10) * err / self.instFluxToNanojanskyinstFluxToNanojansky(instFlux, instFluxErr)
1608
1609
1611 """Compute calibrated fluxes using the local calibration value.
1612
1613 See also
1614 --------
1615 LocalNanojansky
1616 LocalNanojanskyErr
1617 LocalMagnitude
1618 LocalMagnitudeErr
1619 """
1620
1621 @property
1622 def columns(self):
1623 return [self.instFluxColinstFluxCol, self.photoCalibColphotoCalibCol]
1624
1625 @property
1626 def name(self):
1627 return f'flux_{self.instFluxCol}'
1628
1629 def _func(self, df):
1630 return self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxColinstFluxCol], df[self.photoCalibColphotoCalibCol])
1631
1632
1634 """Compute calibrated flux errors using the local calibration value.
1635
1636 See also
1637 --------
1638 LocalNanojansky
1639 LocalNanojanskyErr
1640 LocalMagnitude
1641 LocalMagnitudeErr
1642 """
1643
1644 @property
1645 def columns(self):
1646 return [self.instFluxColinstFluxCol, self.instFluxErrColinstFluxErrCol,
1647 self.photoCalibColphotoCalibCol, self.photoCalibErrColphotoCalibErrCol]
1648
1649 @property
1650 def name(self):
1651 return f'fluxErr_{self.instFluxCol}'
1652
1653 def _func(self, df):
1654 return self.instFluxErrToNanojanskyErrinstFluxErrToNanojanskyErr(df[self.instFluxColinstFluxCol], df[self.instFluxErrColinstFluxErrCol],
1655 df[self.photoCalibColphotoCalibCol], df[self.photoCalibErrColphotoCalibErrCol])
1656
1657
1659 """Compute calibrated AB magnitudes using the local calibration value.
1660
1661 See also
1662 --------
1663 LocalNanojansky
1664 LocalNanojanskyErr
1665 LocalMagnitude
1666 LocalMagnitudeErr
1667 """
1668
1669 @property
1670 def columns(self):
1671 return [self.instFluxColinstFluxCol, self.photoCalibColphotoCalibCol]
1672
1673 @property
1674 def name(self):
1675 return f'mag_{self.instFluxCol}'
1676
1677 def _func(self, df):
1678 return self.instFluxToMagnitudeinstFluxToMagnitude(df[self.instFluxColinstFluxCol],
1679 df[self.photoCalibColphotoCalibCol])
1680
1681
1683 """Compute calibrated AB magnitude errors using the local calibration value.
1684
1685 See also
1686 --------
1687 LocalNanojansky
1688 LocalNanojanskyErr
1689 LocalMagnitude
1690 LocalMagnitudeErr
1691 """
1692
1693 @property
1694 def columns(self):
1695 return [self.instFluxColinstFluxCol, self.instFluxErrColinstFluxErrCol,
1696 self.photoCalibColphotoCalibCol, self.photoCalibErrColphotoCalibErrCol]
1697
1698 @property
1699 def name(self):
1700 return f'magErr_{self.instFluxCol}'
1701
1702 def _func(self, df):
1703 return self.instFluxErrToMagnitudeErrinstFluxErrToMagnitudeErr(df[self.instFluxColinstFluxCol],
1704 df[self.instFluxErrColinstFluxErrCol],
1705 df[self.photoCalibColphotoCalibCol],
1706 df[self.photoCalibErrColphotoCalibErrCol])
1707
1708
1710 """Compute absolute mean of dipole fluxes.
1711
1712 See also
1713 --------
1714 LocalNanojansky
1715 LocalNanojanskyErr
1716 LocalMagnitude
1717 LocalMagnitudeErr
1718 LocalDipoleMeanFlux
1719 LocalDipoleMeanFluxErr
1720 LocalDipoleDiffFlux
1721 LocalDipoleDiffFluxErr
1722 """
1723 def __init__(self,
1724 instFluxPosCol,
1725 instFluxNegCol,
1726 instFluxPosErrCol,
1727 instFluxNegErrCol,
1728 photoCalibCol,
1729 photoCalibErrCol,
1730 **kwargs):
1731 self.instFluxNegColinstFluxNegCol = instFluxNegCol
1732 self.instFluxPosColinstFluxPosCol = instFluxPosCol
1733 self.instFluxNegErrColinstFluxNegErrCol = instFluxNegErrCol
1734 self.instFluxPosErrColinstFluxPosErrCol = instFluxPosErrCol
1735 self.photoCalibColphotoCalibColphotoCalibCol = photoCalibCol
1736 self.photoCalibErrColphotoCalibErrColphotoCalibErrCol = photoCalibErrCol
1737 super().__init__(instFluxNegCol,
1738 instFluxNegErrCol,
1739 photoCalibCol,
1740 photoCalibErrCol,
1741 **kwargs)
1742
1743 @property
1744 def columns(self):
1745 return [self.instFluxPosColinstFluxPosCol,
1746 self.instFluxNegColinstFluxNegCol,
1747 self.photoCalibColphotoCalibColphotoCalibCol]
1748
1749 @property
1750 def name(self):
1751 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1752
1753 def _func(self, df):
1754 return 0.5*(np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxNegColinstFluxNegCol], df[self.photoCalibColphotoCalibColphotoCalibCol]))
1755 + np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxPosColinstFluxPosCol], df[self.photoCalibColphotoCalibColphotoCalibCol])))
1756
1757
1759 """Compute the error on the absolute mean of dipole fluxes.
1760
1761 See also
1762 --------
1763 LocalNanojansky
1764 LocalNanojanskyErr
1765 LocalMagnitude
1766 LocalMagnitudeErr
1767 LocalDipoleMeanFlux
1768 LocalDipoleMeanFluxErr
1769 LocalDipoleDiffFlux
1770 LocalDipoleDiffFluxErr
1771 """
1772
1773 @property
1774 def columns(self):
1775 return [self.instFluxPosColinstFluxPosCol,
1776 self.instFluxNegColinstFluxNegCol,
1777 self.instFluxPosErrColinstFluxPosErrCol,
1778 self.instFluxNegErrColinstFluxNegErrCol,
1779 self.photoCalibColphotoCalibColphotoCalibCol,
1780 self.photoCalibErrColphotoCalibErrColphotoCalibErrCol]
1781
1782 @property
1783 def name(self):
1784 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1785
1786 def _func(self, df):
1787 return 0.5*np.sqrt(
1788 (np.fabs(df[self.instFluxNegColinstFluxNegCol]) + np.fabs(df[self.instFluxPosColinstFluxPosCol])
1789 * df[self.photoCalibErrColphotoCalibErrColphotoCalibErrCol])**2
1790 + (df[self.instFluxNegErrColinstFluxNegErrCol]**2 + df[self.instFluxPosErrColinstFluxPosErrCol]**2)
1791 * df[self.photoCalibColphotoCalibColphotoCalibCol]**2)
1792
1793
1795 """Compute the absolute difference of dipole fluxes.
1796
1797 Value is (abs(pos) - abs(neg))
1798
1799 See also
1800 --------
1801 LocalNanojansky
1802 LocalNanojanskyErr
1803 LocalMagnitude
1804 LocalMagnitudeErr
1805 LocalDipoleMeanFlux
1806 LocalDipoleMeanFluxErr
1807 LocalDipoleDiffFlux
1808 LocalDipoleDiffFluxErr
1809 """
1810
1811 @property
1812 def columns(self):
1813 return [self.instFluxPosColinstFluxPosCol,
1814 self.instFluxNegColinstFluxNegCol,
1815 self.photoCalibColphotoCalibColphotoCalibCol]
1816
1817 @property
1818 def name(self):
1819 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1820
1821 def _func(self, df):
1822 return (np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxPosColinstFluxPosCol], df[self.photoCalibColphotoCalibColphotoCalibCol]))
1823 - np.fabs(self.instFluxToNanojanskyinstFluxToNanojansky(df[self.instFluxNegColinstFluxNegCol], df[self.photoCalibColphotoCalibColphotoCalibCol])))
1824
1825
1827 """Compute the error on the absolute difference of dipole fluxes.
1828
1829 See also
1830 --------
1831 LocalNanojansky
1832 LocalNanojanskyErr
1833 LocalMagnitude
1834 LocalMagnitudeErr
1835 LocalDipoleMeanFlux
1836 LocalDipoleMeanFluxErr
1837 LocalDipoleDiffFlux
1838 LocalDipoleDiffFluxErr
1839 """
1840
1841 @property
1842 def columns(self):
1843 return [self.instFluxPosColinstFluxPosCol,
1844 self.instFluxNegColinstFluxNegCol,
1845 self.instFluxPosErrColinstFluxPosErrCol,
1846 self.instFluxNegErrColinstFluxNegErrCol,
1847 self.photoCalibColphotoCalibColphotoCalibCol,
1848 self.photoCalibErrColphotoCalibErrColphotoCalibErrCol]
1849
1850 @property
1851 def name(self):
1852 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1853
1854 def _func(self, df):
1855 return np.sqrt(
1856 ((np.fabs(df[self.instFluxPosColinstFluxPosCol]) - np.fabs(df[self.instFluxNegColinstFluxNegCol]))
1857 * df[self.photoCalibErrColphotoCalibErrColphotoCalibErrCol])**2
1858 + (df[self.instFluxPosErrColinstFluxPosErrCol]**2 + df[self.instFluxNegErrColinstFluxNegErrCol]**2)
1859 * df[self.photoCalibColphotoCalibColphotoCalibCol]**2)
1860
1861
1863 """Base class for returning the ratio of 2 columns.
1864
1865 Can be used to compute a Signal to Noise ratio for any input flux.
1866
1867 Parameters
1868 ----------
1869 numerator : `str`
1870 Name of the column to use at the numerator in the ratio
1871 denominator : `str`
1872 Name of the column to use as the denominator in the ratio.
1873 """
1874 def __init__(self,
1875 numerator,
1876 denominator,
1877 **kwargs):
1878 self.numeratornumerator = numerator
1879 self.denominatordenominator = denominator
1880 super().__init__(**kwargs)
1881
1882 @property
1883 def columns(self):
1884 return [self.numeratornumerator, self.denominatordenominator]
1885
1886 @property
1887 def name(self):
1888 return f'ratio_{self.numerator}_{self.denominator}'
1889
1890 def _func(self, df):
1891 with np.warnings.catch_warnings():
1892 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1893 np.warnings.filterwarnings('ignore', r'divide by zero')
1894 return df[self.numeratornumerator] / df[self.denominatordenominator]
def multilevelColumns(self, parq, **kwargs)
Definition: functors.py:948
def __init__(self, col, filt2, filt1, **kwargs)
Definition: functors.py:919
def __init__(self, col, **kwargs)
Definition: functors.py:640
def __init__(self, funcs, **kwargs)
Definition: functors.py:403
def __call__(self, data, **kwargs)
Definition: functors.py:455
def from_file(cls, filename, **kwargs)
Definition: functors.py:536
def from_yaml(cls, translationDefinition, **kwargs)
Definition: functors.py:545
def renameCol(cls, col, renameRules)
Definition: functors.py:527
def multilevelColumns(self, data, **kwargs)
Definition: functors.py:441
def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22)
Definition: functors.py:1278
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1355
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1318
def __init__(self, col, **kwargs)
Definition: functors.py:686
def __init__(self, expr, **kwargs)
Definition: functors.py:609
def __init__(self, **kwargs)
Definition: functors.py:714
def __call__(self, catalog, **kwargs)
Definition: functors.py:717
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1111
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1129
def __call__(self, data, dropna=False)
Definition: functors.py:340
def _func(self, df, dropna=True)
Definition: functors.py:279
def multilevelColumns(self, data, columnIndex=None, returnTuple=False)
Definition: functors.py:229
def _get_data_columnLevelNames(self, data, columnIndex=None)
Definition: functors.py:186
def difference(self, data1, data2, **kwargs)
Definition: functors.py:351
def __init__(self, filt=None, dataset=None, noDup=None)
Definition: functors.py:142
def _get_columnIndex(self, data)
Definition: functors.py:282
def _colsFromDict(self, colDict, columnIndex=None)
Definition: functors.py:208
def _get_data_columnLevels(self, data, columnIndex=None)
Definition: functors.py:162
def __init__(self, ra, decl, **kwargs)
Definition: functors.py:728
def __call__(self, parq, dropna=False, **kwargs)
Definition: functors.py:968
def __init__(self, instFluxPosCol, instFluxNegCol, instFluxPosErrCol, instFluxNegErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1730
def instFluxToNanojansky(self, instFlux, localCalib)
Definition: functors.py:1532
def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1587
def __init__(self, instFluxCol, instFluxErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1525
def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1549
def instFluxToMagnitude(self, instFlux, localCalib)
Definition: functors.py:1570
def __init__(self, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1169
def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22)
Definition: functors.py:1176
def computeSkySeperation(self, ra1, dec1, ra2, dec2)
Definition: functors.py:1205
def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22)
Definition: functors.py:1231
def __init__(self, col1, col2, **kwargs)
Definition: functors.py:866
def __init__(self, *args, **kwargs)
Definition: functors.py:827
def __init__(self, col, calib=None, **kwargs)
Definition: functors.py:789
def dn2mag(self, dn, fluxMag0)
Definition: functors.py:1449
def dn2flux(self, dn, fluxMag0)
Definition: functors.py:1446
def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1455
def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1460
def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs)
Definition: functors.py:1415
def __call__(self, catalog, **kwargs)
Definition: functors.py:704
def __init__(self, **kwargs)
Definition: functors.py:701
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1145
def __init__(self, numerator, denominator, **kwargs)
Definition: functors.py:1877
def mag_aware_eval(df, expr)
Definition: functors.py:574
def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor', name=None)
Definition: functors.py:40