lsst.pipe.tasks gbbe75d3d81+f128b91f83
Loading...
Searching...
No Matches
functors.py
Go to the documentation of this file.
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22__all__ = ["Functor", "CompositeFunctor", "CustomFunctor", "Column", "Index",
23 "IDColumn", "FootprintNPix", "CoordColumn", "RAColumn", "DecColumn",
24 "HtmIndex20", "Mag", "MagErr", "NanoMaggie", "MagDiff", "Color",
25 "Labeller", "StarGalaxyLabeller", "NumStarLabeller", "DeconvolvedMoments",
26 "SdssTraceSize", "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff",
27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs", "ComputePixelScale",
28 "ConvertPixelToArcseconds", "ConvertPixelSqToArcsecondsSq", "ReferenceBand",
29 "Photometry", "NanoJansky", "NanoJanskyErr", "Magnitude", "MagnitudeErr",
30 "LocalPhotometry", "LocalNanojansky", "LocalNanojanskyErr",
31 "LocalMagnitude", "LocalMagnitudeErr", "LocalDipoleMeanFlux",
32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux", "LocalDipoleDiffFluxErr",
33 "Ratio", "Ebv"]
34
35import yaml
36import re
37from itertools import product
38import logging
39import os.path
40
41import pandas as pd
42import numpy as np
43import astropy.units as u
44from astropy.coordinates import SkyCoord
45
46from lsst.utils import doImport
47from lsst.utils.introspection import get_full_type_name
48from lsst.daf.butler import DeferredDatasetHandle
49from lsst.pipe.base import InMemoryDatasetHandle
50import lsst.geom as geom
51import lsst.sphgeom as sphgeom
52
53from .parquetTable import ParquetTable, MultilevelParquetTable
54
55
56def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
57 typeKey='functor', name=None):
58 """Initialize an object defined in a dictionary
59
60 The object needs to be importable as
61 f'{basePath}.{initDict[typeKey]}'
62 The positional and keyword arguments (if any) are contained in
63 "args" and "kwargs" entries in the dictionary, respectively.
64 This is used in `functors.CompositeFunctor.from_yaml` to initialize
65 a composite functor from a specification in a YAML file.
66
67 Parameters
68 ----------
69 initDict : dictionary
70 Dictionary describing object's initialization. Must contain
71 an entry keyed by ``typeKey`` that is the name of the object,
72 relative to ``basePath``.
73 basePath : str
74 Path relative to module in which ``initDict[typeKey]`` is defined.
75 typeKey : str
76 Key of ``initDict`` that is the name of the object
77 (relative to `basePath`).
78 """
79 initDict = initDict.copy()
80 # TO DO: DM-21956 We should be able to define functors outside this module
81 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
82 args = []
83 if 'args' in initDict:
84 args = initDict.pop('args')
85 if isinstance(args, str):
86 args = [args]
87 try:
88 element = pythonType(*args, **initDict)
89 except Exception as e:
90 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
91 raise type(e)(message, e.args)
92 return element
93
94
95class Functor(object):
96 """Define and execute a calculation on a ParquetTable
97
98 The `__call__` method accepts either a `ParquetTable` object or a
99 `DeferredDatasetHandle` or `InMemoryDatasetHandle`, and returns the
100 result of the calculation as a single column. Each functor defines what
101 columns are needed for the calculation, and only these columns are read
102 from the `ParquetTable`.
103
104 The action of `__call__` consists of two steps: first, loading the
105 necessary columns from disk into memory as a `pandas.DataFrame` object;
106 and second, performing the computation on this dataframe and returning the
107 result.
108
109
110 To define a new `Functor`, a subclass must define a `_func` method,
111 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
112 In addition, it must define the following attributes
113
114 * `_columns`: The columns necessary to perform the calculation
115 * `name`: A name appropriate for a figure axis label
116 * `shortname`: A name appropriate for use as a dictionary key
117
118 On initialization, a `Functor` should declare what band (`filt` kwarg)
119 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
120 applied to. This enables the `_get_data` method to extract the proper
121 columns from the parquet file. If not specified, the dataset will fall back
122 on the `_defaultDataset`attribute. If band is not specified and `dataset`
123 is anything other than `'ref'`, then an error will be raised when trying to
124 perform the calculation.
125
126 Originally, `Functor` was set up to expect
127 datasets formatted like the `deepCoadd_obj` dataset; that is, a
128 dataframe with a multi-level column index, with the levels of the
129 column index being `band`, `dataset`, and `column`.
130 It has since been generalized to apply to dataframes without mutli-level
131 indices and multi-level indices with just `dataset` and `column` levels.
132 In addition, the `_get_data` method that reads
133 the dataframe from the `ParquetTable` will return a dataframe with column
134 index levels defined by the `_dfLevels` attribute; by default, this is
135 `column`.
136
137 The `_dfLevels` attributes should generally not need to
138 be changed, unless `_func` needs columns from multiple filters or datasets
139 to do the calculation.
140 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
141 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
142 it gets to have those levels in the column index.
143
144 Parameters
145 ----------
146 filt : str
147 Filter upon which to do the calculation
148
149 dataset : str
150 Dataset upon which to do the calculation
151 (e.g., 'ref', 'meas', 'forced_src').
152
153 """
154
155 _defaultDataset = 'ref'
156 _dfLevels = ('column',)
157 _defaultNoDup = False
158
159 def __init__(self, filt=None, dataset=None, noDup=None):
160 self.filt = filt
161 self.dataset = dataset if dataset is not None else self._defaultDataset
162 self._noDup = noDup
163 self.log = logging.getLogger(type(self).__name__)
164
165 @property
166 def noDup(self):
167 if self._noDup is not None:
168 return self._noDup
169 else:
170 return self._defaultNoDup
171
172 @property
173 def columns(self):
174 """Columns required to perform calculation
175 """
176 if not hasattr(self, '_columns'):
177 raise NotImplementedError('Must define columns property or _columns attribute')
178 return self._columns
179
180 def _get_data_columnLevels(self, data, columnIndex=None):
181 """Gets the names of the column index levels
182
183 This should only be called in the context of a multilevel table.
184 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
185 and with the gen3 `DeferredDatasetHandle`.
186
187 Parameters
188 ----------
189 data : various
190 The data to be read, can be a `MultilevelParquetTable`,
191 `DeferredDatasetHandle`, or `InMemoryDatasetHandle`.
192 columnnIndex (optional): pandas `Index` object
193 If not passed, then it is read from the `DeferredDatasetHandle`
194 for `InMemoryDatasetHandle`.
195 """
196 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
197 if columnIndex is None:
198 columnIndex = data.get(component="columns")
199 if columnIndex is not None:
200 return columnIndex.names
201 if isinstance(data, MultilevelParquetTable):
202 return data.columnLevels
203 else:
204 raise TypeError(f"Unknown type for data: {type(data)}!")
205
206 def _get_data_columnLevelNames(self, data, columnIndex=None):
207 """Gets the content of each of the column levels for a multilevel table
208
209 Similar to `_get_data_columnLevels`, this enables backward
210 compatibility with gen2.
211
212 Mirrors original gen2 implementation within
214 """
215 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
216 if columnIndex is None:
217 columnIndex = data.get(component="columns")
218 if columnIndex is not None:
219 columnLevels = columnIndex.names
220 columnLevelNames = {
221 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
222 for i, level in enumerate(columnLevels)
223 }
224 return columnLevelNames
225 if isinstance(data, MultilevelParquetTable):
226 return data.columnLevelNames
227 else:
228 raise TypeError(f"Unknown type for data: {type(data)}!")
229
230 def _colsFromDict(self, colDict, columnIndex=None):
231 """Converts dictionary column specficiation to a list of columns
232
233 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
234 """
235 new_colDict = {}
236 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
237
238 for i, lev in enumerate(columnLevels):
239 if lev in colDict:
240 if isinstance(colDict[lev], str):
241 new_colDict[lev] = [colDict[lev]]
242 else:
243 new_colDict[lev] = colDict[lev]
244 else:
245 new_colDict[lev] = columnIndex.levels[i]
246
247 levelCols = [new_colDict[lev] for lev in columnLevels]
248 cols = list(product(*levelCols))
249 colsAvailable = [col for col in cols if col in columnIndex]
250 return colsAvailable
251
252 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
253 """Returns columns needed by functor from multilevel dataset
254
255 To access tables with multilevel column structure, the `MultilevelParquetTable`
256 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
257 dictionary.
258
259 Parameters
260 ----------
261 data : various
262 The data as either `MultilevelParquetTable`,
263 `DeferredDatasetHandle`, or `InMemoryDatasetHandle`.
264 columnIndex (optional): pandas `Index` object
265 either passed or read in from `DeferredDatasetHandle`.
266 `returnTuple` : `bool`
267 If true, then return a list of tuples rather than the column dictionary
268 specification. This is set to `True` by `CompositeFunctor` in order to be able to
269 combine columns from the various component functors.
270
271 """
272 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)) and columnIndex is None:
273 columnIndex = data.get(component="columns")
274
275 # Confirm that the dataset has the column levels the functor is expecting it to have.
276 columnLevels = self._get_data_columnLevels(data, columnIndex)
277
278 columnDict = {'column': self.columns,
279 'dataset': self.dataset}
280 if self.filt is None:
281 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
282 if "band" in columnLevels:
283 if self.dataset == "ref":
284 columnDict["band"] = columnLevelNames["band"][0]
285 else:
286 raise ValueError(f"'filt' not set for functor {self.name}"
287 f"(dataset {self.dataset}) "
288 "and ParquetTable "
289 "contains multiple filters in column index. "
290 "Set 'filt' or set 'dataset' to 'ref'.")
291 else:
292 columnDict['band'] = self.filt
293
294 if isinstance(data, MultilevelParquetTable):
295 return data._colsFromDict(columnDict)
296 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
297 if returnTuple:
298 return self._colsFromDict(columnDict, columnIndex=columnIndex)
299 else:
300 return columnDict
301 raise RuntimeError(f"Unexpected data type. Got {get_full_type_name}.")
302
303 def _func(self, df, dropna=True):
304 raise NotImplementedError('Must define calculation on dataframe')
305
306 def _get_columnIndex(self, data):
307 """Return columnIndex
308 """
309
310 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
311 return data.get(component="columns")
312 else:
313 return None
314
315 def _get_data(self, data):
316 """Retrieve dataframe necessary for calculation.
317
318 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
319
320 Returns dataframe upon which `self._func` can act.
321
322 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
323 """
324 if isinstance(data, pd.DataFrame):
325 return data
326
327 # First thing to do: check to see if the data source has a multilevel column index or not.
328 columnIndex = self._get_columnIndex(data)
329 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
330
331 # Simple single-level parquet table, gen2
332 if isinstance(data, ParquetTable) and not is_multiLevel:
333 columns = self.columns
334 df = data.toDataFrame(columns=columns)
335 return df
336
337 # Get proper columns specification for this functor
338 if is_multiLevel:
339 columns = self.multilevelColumns(data, columnIndex=columnIndex)
340 else:
341 columns = self.columns
342
343 if isinstance(data, MultilevelParquetTable):
344 # Load in-memory dataframe with appropriate columns the gen2 way
345 df = data.toDataFrame(columns=columns, droplevels=False)
346 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
347 # Load in-memory dataframe with appropriate columns the gen3 way
348 df = data.get(parameters={"columns": columns})
349 else:
350 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.")
351
352 # Drop unnecessary column levels
353 if is_multiLevel:
354 df = self._setLevels(df)
355
356 return df
357
358 def _setLevels(self, df):
359 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
360 df.columns = df.columns.droplevel(levelsToDrop)
361 return df
362
363 def _dropna(self, vals):
364 return vals.dropna()
365
366 def __call__(self, data, dropna=False):
367 df = self._get_data(data)
368 try:
369 vals = self._func(df)
370 except Exception as e:
371 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
372 vals = self.fail(df)
373 if dropna:
374 vals = self._dropna(vals)
375
376 return vals
377
378 def difference(self, data1, data2, **kwargs):
379 """Computes difference between functor called on two different ParquetTable objects
380 """
381 return self(data1, **kwargs) - self(data2, **kwargs)
382
383 def fail(self, df):
384 return pd.Series(np.full(len(df), np.nan), index=df.index)
385
386 @property
387 def name(self):
388 """Full name of functor (suitable for figure labels)
389 """
390 return NotImplementedError
391
392 @property
393 def shortname(self):
394 """Short name of functor (suitable for column name/dict key)
395 """
396 return self.name
397
398
400 """Perform multiple calculations at once on a catalog
401
402 The role of a `CompositeFunctor` is to group together computations from
403 multiple functors. Instead of returning `pandas.Series` a
404 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
405 being the keys of `funcDict`.
406
407 The `columns` attribute of a `CompositeFunctor` is the union of all columns
408 in all the component functors.
409
410 A `CompositeFunctor` does not use a `_func` method itself; rather,
411 when a `CompositeFunctor` is called, all its columns are loaded
412 at once, and the resulting dataframe is passed to the `_func` method of each component
413 functor. This has the advantage of only doing I/O (reading from parquet file) once,
414 and works because each individual `_func` method of each component functor does not
415 care if there are *extra* columns in the dataframe being passed; only that it must contain
416 *at least* the `columns` it expects.
417
418 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
419 file specifying a collection of functors.
420
421 Parameters
422 ----------
423 funcs : `dict` or `list`
424 Dictionary or list of functors. If a list, then it will be converted
425 into a dictonary according to the `.shortname` attribute of each functor.
426
427 """
428 dataset = None
429
430 def __init__(self, funcs, **kwargs):
431
432 if type(funcs) == dict:
433 self.funcDict = funcs
434 else:
435 self.funcDict = {f.shortname: f for f in funcs}
436
437 self._filt = None
438
439 super().__init__(**kwargs)
440
441 @property
442 def filt(self):
443 return self._filt
444
445 @filt.setter
446 def filt(self, filt):
447 if filt is not None:
448 for _, f in self.funcDict.items():
449 f.filt = filt
450 self._filt = filt
451
452 def update(self, new):
453 if isinstance(new, dict):
454 self.funcDict.update(new)
455 elif isinstance(new, CompositeFunctor):
456 self.funcDict.update(new.funcDict)
457 else:
458 raise TypeError('Can only update with dictionary or CompositeFunctor.')
459
460 # Make sure new functors have the same 'filt' set
461 if self.filtfiltfiltfilt is not None:
463
464 @property
465 def columns(self):
466 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
467
468 def multilevelColumns(self, data, **kwargs):
469 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
470 return list(
471 set(
472 [
473 x
474 for y in [
475 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
476 ]
477 for x in y
478 ]
479 )
480 )
481
482 def __call__(self, data, **kwargs):
483 """Apply the functor to the data table
484
485 Parameters
486 ----------
487 data : various
488 The data represented as `lsst.daf.butler.DeferredDatasetHandle`,
491 `lsst.pipe.base.InMemoryDatasetHandle`,
492 or `pandas.DataFrame`.
493 The table or a pointer to a table on disk from which columns can
494 be accessed
495 """
496 columnIndex = self._get_columnIndex(data)
497
498 # First, determine whether data has a multilevel index (either gen2 or gen3)
499 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
500
501 # Multilevel index, gen2 or gen3
502 if is_multiLevel:
503 columns = self.multilevelColumnsmultilevelColumns(data, columnIndex=columnIndex)
504
505 if isinstance(data, MultilevelParquetTable):
506 # Read data into memory the gen2 way
507 df = data.toDataFrame(columns=columns, droplevels=False)
508 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
509 # Read data into memory the gen3 way
510 df = data.get(parameters={"columns": columns})
511
512 valDict = {}
513 for k, f in self.funcDict.items():
514 try:
515 subdf = f._setLevels(
516 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
517 )
518 valDict[k] = f._func(subdf)
519 except Exception as e:
520 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
521 try:
522 valDict[k] = f.fail(subdf)
523 except NameError:
524 raise e
525
526 else:
527 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)):
528 # input if Gen3 deferLoad=True
529 df = data.get(parameters={"columns": self.columnscolumns})
530 elif isinstance(data, pd.DataFrame):
531 # input if Gen3 deferLoad=False
532 df = data
533 else:
534 # Original Gen2 input is type ParquetTable and the fallback
535 df = data.toDataFrame(columns=self.columnscolumns)
536
537 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
538
539 # Check that output columns are actually columns
540 for name, colVal in valDict.items():
541 if len(colVal.shape) != 1:
542 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
543 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
544
545 try:
546 valDf = pd.concat(valDict, axis=1)
547 except TypeError:
548 print([(k, type(v)) for k, v in valDict.items()])
549 raise
550
551 if kwargs.get('dropna', False):
552 valDf = valDf.dropna(how='any')
553
554 return valDf
555
556 @classmethod
557 def renameCol(cls, col, renameRules):
558 if renameRules is None:
559 return col
560 for old, new in renameRules:
561 if col.startswith(old):
562 col = col.replace(old, new)
563 return col
564
565 @classmethod
566 def from_file(cls, filename, **kwargs):
567 # Allow environment variables in the filename.
568 filename = os.path.expandvars(filename)
569 with open(filename) as f:
570 translationDefinition = yaml.safe_load(f)
571
572 return cls.from_yaml(translationDefinition, **kwargs)
573
574 @classmethod
575 def from_yaml(cls, translationDefinition, **kwargs):
576 funcs = {}
577 for func, val in translationDefinition['funcs'].items():
578 funcs[func] = init_fromDict(val, name=func)
579
580 if 'flag_rename_rules' in translationDefinition:
581 renameRules = translationDefinition['flag_rename_rules']
582 else:
583 renameRules = None
584
585 if 'calexpFlags' in translationDefinition:
586 for flag in translationDefinition['calexpFlags']:
587 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
588
589 if 'refFlags' in translationDefinition:
590 for flag in translationDefinition['refFlags']:
591 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
592
593 if 'forcedFlags' in translationDefinition:
594 for flag in translationDefinition['forcedFlags']:
595 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
596
597 if 'flags' in translationDefinition:
598 for flag in translationDefinition['flags']:
599 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
600
601 return cls(funcs, **kwargs)
602
603
604def mag_aware_eval(df, expr, log):
605 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
606
607 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
608
609 Parameters
610 ----------
611 df : pandas.DataFrame
612 Dataframe on which to evaluate expression.
613
614 expr : str
615 Expression.
616 """
617 try:
618 expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>)/log(10)', expr)
619 val = df.eval(expr_new)
620 except Exception as e: # Should check what actually gets raised
621 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e)
622 expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
623 val = df.eval(expr_new)
624 return val
625
626
628 """Arbitrary computation on a catalog
629
630 Column names (and thus the columns to be loaded from catalog) are found
631 by finding all words and trying to ignore all "math-y" words.
632
633 Parameters
634 ----------
635 expr : str
636 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
637 """
638 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
639
640 def __init__(self, expr, **kwargs):
641 self.expr = expr
642 super().__init__(**kwargs)
643
644 @property
645 def name(self):
646 return self.expr
647
648 @property
649 def columns(self):
650 flux_cols = re.findall(r'mag\‍(\s*(\w+)\s*\‍)', self.expr)
651
652 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
653 not_a_col = []
654 for c in flux_cols:
655 if not re.search('_instFlux$', c):
656 cols.append(f'{c}_instFlux')
657 not_a_col.append(c)
658 else:
659 cols.append(c)
660
661 return list(set([c for c in cols if c not in not_a_col]))
662
663 def _func(self, df):
664 return mag_aware_eval(df, self.expr, self.log)
665
666
668 """Get column with specified name
669 """
670
671 def __init__(self, col, **kwargs):
672 self.col = col
673 super().__init__(**kwargs)
674
675 @property
676 def name(self):
677 return self.col
678
679 @property
680 def columns(self):
681 return [self.col]
682
683 def _func(self, df):
684 return df[self.col]
685
686
688 """Return the value of the index for each object
689 """
690
691 columns = ['coord_ra'] # just a dummy; something has to be here
692 _defaultDataset = 'ref'
693 _defaultNoDup = True
694
695 def _func(self, df):
696 return pd.Series(df.index, index=df.index)
697
698
700 col = 'id'
701 _allow_difference = False
702 _defaultNoDup = True
703
704 def _func(self, df):
705 return pd.Series(df.index, index=df.index)
706
707
709 col = 'base_Footprint_nPix'
710
711
713 """Base class for coordinate column, in degrees
714 """
715 _radians = True
716
717 def __init__(self, col, **kwargs):
718 super().__init__(col, **kwargs)
719
720 def _func(self, df):
721 # Must not modify original column in case that column is used by another functor
722 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
723 return output
724
725
727 """Right Ascension, in degrees
728 """
729 name = 'RA'
730 _defaultNoDup = True
731
732 def __init__(self, **kwargs):
733 super().__init__('coord_ra', **kwargs)
734
735 def __call__(self, catalog, **kwargs):
736 return super().__call__(catalog, **kwargs)
737
738
740 """Declination, in degrees
741 """
742 name = 'Dec'
743 _defaultNoDup = True
744
745 def __init__(self, **kwargs):
746 super().__init__('coord_dec', **kwargs)
747
748 def __call__(self, catalog, **kwargs):
749 return super().__call__(catalog, **kwargs)
750
751
753 """Compute the level 20 HtmIndex for the catalog.
754
755 Notes
756 -----
757 This functor was implemented to satisfy requirements of old APDB interface
758 which required ``pixelId`` column in DiaObject with HTM20 index. APDB
759 interface had migrated to not need that information, but we keep this
760 class in case it may be useful for something else.
761 """
762 name = "Htm20"
763 htmLevel = 20
764 _radians = True
765
766 def __init__(self, ra, decl, **kwargs):
768 self.ra = ra
769 self.decl = decl
770 self._columns = [self.ra, self.decl]
771 super().__init__(**kwargs)
772
773 def _func(self, df):
774
775 def computePixel(row):
776 if self._radians:
777 sphPoint = geom.SpherePoint(row[self.ra],
778 row[self.decl],
779 geom.radians)
780 else:
781 sphPoint = geom.SpherePoint(row[self.ra],
782 row[self.decl],
783 geom.degrees)
784 return self.pixelator.index(sphPoint.getVector())
785
786 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
787
788
789def fluxName(col):
790 if not col.endswith('_instFlux'):
791 col += '_instFlux'
792 return col
793
794
795def fluxErrName(col):
796 if not col.endswith('_instFluxErr'):
797 col += '_instFluxErr'
798 return col
799
800
802 """Compute calibrated magnitude
803
804 Takes a `calib` argument, which returns the flux at mag=0
805 as `calib.getFluxMag0()`. If not provided, then the default
806 `fluxMag0` is 63095734448.0194, which is default for HSC.
807 This default should be removed in DM-21955
808
809 This calculation hides warnings about invalid values and dividing by zero.
810
811 As for all functors, a `dataset` and `filt` kwarg should be provided upon
812 initialization. Unlike the default `Functor`, however, the default dataset
813 for a `Mag` is `'meas'`, rather than `'ref'`.
814
815 Parameters
816 ----------
817 col : `str`
818 Name of flux column from which to compute magnitude. Can be parseable
819 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
820 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
821 understand.
822 calib : `lsst.afw.image.calib.Calib` (optional)
823 Object that knows zero point.
824 """
825 _defaultDataset = 'meas'
826
827 def __init__(self, col, calib=None, **kwargs):
828 self.col = fluxName(col)
829 self.calib = calib
830 if calib is not None:
831 self.fluxMag0 = calib.getFluxMag0()[0]
832 else:
833 # TO DO: DM-21955 Replace hard coded photometic calibration values
834 self.fluxMag0 = 63095734448.0194
835
836 super().__init__(**kwargs)
837
838 @property
839 def columns(self):
840 return [self.col]
841
842 def _func(self, df):
843 with np.warnings.catch_warnings():
844 np.warnings.filterwarnings('ignore', r'invalid value encountered')
845 np.warnings.filterwarnings('ignore', r'divide by zero')
846 return -2.5*np.log10(df[self.col] / self.fluxMag0)
847
848 @property
849 def name(self):
850 return f'mag_{self.col}'
851
852
853class MagErr(Mag):
854 """Compute calibrated magnitude uncertainty
855
856 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
857
858 Parameters
859 col : `str`
860 Name of flux column
861 calib : `lsst.afw.image.calib.Calib` (optional)
862 Object that knows zero point.
863 """
864
865 def __init__(self, *args, **kwargs):
866 super().__init__(*args, **kwargs)
867 if self.calib is not None:
868 self.fluxMag0Err = self.calib.getFluxMag0()[1]
869 else:
870 self.fluxMag0Err = 0.
871
872 @property
873 def columns(self):
874 return [self.col, self.col + 'Err']
875
876 def _func(self, df):
877 with np.warnings.catch_warnings():
878 np.warnings.filterwarnings('ignore', r'invalid value encountered')
879 np.warnings.filterwarnings('ignore', r'divide by zero')
880 fluxCol, fluxErrCol = self.columnscolumnscolumns
881 x = df[fluxErrCol] / df[fluxCol]
882 y = self.fluxMag0Err / self.fluxMag0
883 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
884 return magErr
885
886 @property
887 def name(self):
888 return super().name + '_err'
889
890
892 """
893 """
894
895 def _func(self, df):
896 return (df[self.col] / self.fluxMag0) * 1e9
897
898
900 _defaultDataset = 'meas'
901
902 """Functor to calculate magnitude difference"""
903
904 def __init__(self, col1, col2, **kwargs):
905 self.col1 = fluxName(col1)
906 self.col2 = fluxName(col2)
907 super().__init__(**kwargs)
908
909 @property
910 def columns(self):
911 return [self.col1, self.col2]
912
913 def _func(self, df):
914 with np.warnings.catch_warnings():
915 np.warnings.filterwarnings('ignore', r'invalid value encountered')
916 np.warnings.filterwarnings('ignore', r'divide by zero')
917 return -2.5*np.log10(df[self.col1]/df[self.col2])
918
919 @property
920 def name(self):
921 return f'(mag_{self.col1} - mag_{self.col2})'
922
923 @property
924 def shortname(self):
925 return f'magDiff_{self.col1}_{self.col2}'
926
927
929 """Compute the color between two filters
930
931 Computes color by initializing two different `Mag`
932 functors based on the `col` and filters provided, and
933 then returning the difference.
934
935 This is enabled by the `_func` expecting a dataframe with a
936 multilevel column index, with both `'band'` and `'column'`,
937 instead of just `'column'`, which is the `Functor` default.
938 This is controlled by the `_dfLevels` attribute.
939
940 Also of note, the default dataset for `Color` is `forced_src'`,
941 whereas for `Mag` it is `'meas'`.
942
943 Parameters
944 ----------
945 col : str
946 Name of flux column from which to compute; same as would be passed to
948
949 filt2, filt1 : str
950 Filters from which to compute magnitude difference.
951 Color computed is `Mag(filt2) - Mag(filt1)`.
952 """
953 _defaultDataset = 'forced_src'
954 _dfLevels = ('band', 'column')
955 _defaultNoDup = True
956
957 def __init__(self, col, filt2, filt1, **kwargs):
958 self.col = fluxName(col)
959 if filt2 == filt1:
960 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
961 self.filt2 = filt2
962 self.filt1 = filt1
963
964 self.mag2 = Mag(col, filt=filt2, **kwargs)
965 self.mag1 = Mag(col, filt=filt1, **kwargs)
966
967 super().__init__(**kwargs)
968
969 @property
970 def filt(self):
971 return None
972
973 @filt.setter
974 def filt(self, filt):
975 pass
976
977 def _func(self, df):
978 mag2 = self.mag2._func(df[self.filt2])
979 mag1 = self.mag1._func(df[self.filt1])
980 return mag2 - mag1
981
982 @property
983 def columns(self):
984 return [self.mag1.col, self.mag2.col]
985
986 def multilevelColumns(self, parq, **kwargs):
987 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
988
989 @property
990 def name(self):
991 return f'{self.filt2} - {self.filt1} ({self.col})'
992
993 @property
994 def shortname(self):
995 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
996
997
999 """Main function of this subclass is to override the dropna=True
1000 """
1001 _null_label = 'null'
1002 _allow_difference = False
1003 name = 'label'
1004 _force_str = False
1005
1006 def __call__(self, parq, dropna=False, **kwargs):
1007 return super().__call__(parq, dropna=False, **kwargs)
1008
1009
1011 _columns = ["base_ClassificationExtendedness_value"]
1012 _column = "base_ClassificationExtendedness_value"
1013
1014 def _func(self, df):
1015 x = df[self._columns][self._column]
1016 mask = x.isnull()
1017 test = (x < 0.5).astype(int)
1018 test = test.mask(mask, 2)
1019
1020 # TODO: DM-21954 Look into veracity of inline comment below
1021 # are these backwards?
1022 categories = ['galaxy', 'star', self._null_label]
1023 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
1024 index=x.index, name='label')
1025 if self._force_str:
1026 label = label.astype(str)
1027 return label
1028
1029
1031 _columns = ['numStarFlags']
1032 labels = {"star": 0, "maybe": 1, "notStar": 2}
1033
1034 def _func(self, df):
1035 x = df[self._columns][self._columns[0]]
1036
1037 # Number of filters
1038 n = len(x.unique()) - 1
1039
1040 labels = ['noStar', 'maybe', 'star']
1041 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
1042 index=x.index, name='label')
1043
1044 if self._force_str:
1045 label = label.astype(str)
1046
1047 return label
1048
1049
1051 name = 'Deconvolved Moments'
1052 shortname = 'deconvolvedMoments'
1053 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1054 "ext_shapeHSM_HsmSourceMoments_yy",
1055 "base_SdssShape_xx", "base_SdssShape_yy",
1056 "ext_shapeHSM_HsmPsfMoments_xx",
1057 "ext_shapeHSM_HsmPsfMoments_yy")
1058
1059 def _func(self, df):
1060 """Calculate deconvolved moments"""
1061 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1062 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1063 else:
1064 hsm = np.ones(len(df))*np.nan
1065 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1066 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1067 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1068 else:
1069 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
1070 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
1071 # raise TaskError("No psf shape parameter found in catalog")
1072 raise RuntimeError('No psf shape parameter found in catalog')
1073
1074 return hsm.where(np.isfinite(hsm), sdss) - psf
1075
1076
1078 """Functor to calculate SDSS trace radius size for sources"""
1079 name = "SDSS Trace Size"
1080 shortname = 'sdssTrace'
1081 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1082
1083 def _func(self, df):
1084 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1085 return srcSize
1086
1087
1089 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1090 name = "PSF - SDSS Trace Size"
1091 shortname = 'psf_sdssTrace'
1092 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1093 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1094
1095 def _func(self, df):
1096 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1097 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1098 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1099 return sizeDiff
1100
1101
1103 """Functor to calculate HSM trace radius size for sources"""
1104 name = 'HSM Trace Size'
1105 shortname = 'hsmTrace'
1106 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1107 "ext_shapeHSM_HsmSourceMoments_yy")
1108
1109 def _func(self, df):
1110 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1111 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1112 return srcSize
1113
1114
1116 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1117 name = 'PSF - HSM Trace Size'
1118 shortname = 'psf_HsmTrace'
1119 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1120 "ext_shapeHSM_HsmSourceMoments_yy",
1121 "ext_shapeHSM_HsmPsfMoments_xx",
1122 "ext_shapeHSM_HsmPsfMoments_yy")
1123
1124 def _func(self, df):
1125 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1126 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1127 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1128 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1129 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1130 return sizeDiff
1131
1132
1134 name = 'HSM Psf FWHM'
1135 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1136 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1137 pixelScale = 0.168
1138 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1139
1140 def _func(self, df):
1141 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1142 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1143
1144
1146 name = "Distortion Ellipticity (e1)"
1147 shortname = "Distortion"
1148
1149 def __init__(self, colXX, colXY, colYY, **kwargs):
1150 self.colXX = colXX
1151 self.colXY = colXY
1152 self.colYY = colYY
1153 self._columns = [self.colXX, self.colXY, self.colYY]
1154 super().__init__(**kwargs)
1155
1156 @property
1157 def columns(self):
1158 return [self.colXX, self.colXY, self.colYY]
1159
1160 def _func(self, df):
1161 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1162
1163
1165 name = "Ellipticity e2"
1166
1167 def __init__(self, colXX, colXY, colYY, **kwargs):
1168 self.colXX = colXX
1169 self.colXY = colXY
1170 self.colYY = colYY
1171 super().__init__(**kwargs)
1172
1173 @property
1174 def columns(self):
1175 return [self.colXX, self.colXY, self.colYY]
1176
1177 def _func(self, df):
1178 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1179
1180
1182
1183 def __init__(self, colXX, colXY, colYY, **kwargs):
1184 self.colXX = colXX
1185 self.colXY = colXY
1186 self.colYY = colYY
1187 super().__init__(**kwargs)
1188
1189 @property
1190 def columns(self):
1191 return [self.colXX, self.colXY, self.colYY]
1192
1193 def _func(self, df):
1194 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1195
1196
1198 """Computations using the stored localWcs.
1199 """
1200 name = "LocalWcsOperations"
1201
1202 def __init__(self,
1203 colCD_1_1,
1204 colCD_1_2,
1205 colCD_2_1,
1206 colCD_2_2,
1207 **kwargs):
1208 self.colCD_1_1 = colCD_1_1
1209 self.colCD_1_2 = colCD_1_2
1210 self.colCD_2_1 = colCD_2_1
1211 self.colCD_2_2 = colCD_2_2
1212 super().__init__(**kwargs)
1213
1214 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1215 """Compute the distance on the sphere from x2, y1 to x1, y1.
1216
1217 Parameters
1218 ----------
1219 x : `pandas.Series`
1220 X pixel coordinate.
1221 y : `pandas.Series`
1222 Y pixel coordinate.
1223 cd11 : `pandas.Series`
1224 [1, 1] element of the local Wcs affine transform.
1225 cd11 : `pandas.Series`
1226 [1, 1] element of the local Wcs affine transform.
1227 cd12 : `pandas.Series`
1228 [1, 2] element of the local Wcs affine transform.
1229 cd21 : `pandas.Series`
1230 [2, 1] element of the local Wcs affine transform.
1231 cd22 : `pandas.Series`
1232 [2, 2] element of the local Wcs affine transform.
1233
1234 Returns
1235 -------
1236 raDecTuple : tuple
1237 RA and dec conversion of x and y given the local Wcs. Returned
1238 units are in radians.
1239
1240 """
1241 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1242
1243 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1244 """Compute the local pixel scale conversion.
1245
1246 Parameters
1247 ----------
1248 ra1 : `pandas.Series`
1249 Ra of the first coordinate in radians.
1250 dec1 : `pandas.Series`
1251 Dec of the first coordinate in radians.
1252 ra2 : `pandas.Series`
1253 Ra of the second coordinate in radians.
1254 dec2 : `pandas.Series`
1255 Dec of the second coordinate in radians.
1256
1257 Returns
1258 -------
1259 dist : `pandas.Series`
1260 Distance on the sphere in radians.
1261 """
1262 deltaDec = dec2 - dec1
1263 deltaRa = ra2 - ra1
1264 return 2 * np.arcsin(
1265 np.sqrt(
1266 np.sin(deltaDec / 2) ** 2
1267 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1268
1269 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1270 """Compute the distance on the sphere from x2, y1 to x1, y1.
1271
1272 Parameters
1273 ----------
1274 x1 : `pandas.Series`
1275 X pixel coordinate.
1276 y1 : `pandas.Series`
1277 Y pixel coordinate.
1278 x2 : `pandas.Series`
1279 X pixel coordinate.
1280 y2 : `pandas.Series`
1281 Y pixel coordinate.
1282 cd11 : `pandas.Series`
1283 [1, 1] element of the local Wcs affine transform.
1284 cd11 : `pandas.Series`
1285 [1, 1] element of the local Wcs affine transform.
1286 cd12 : `pandas.Series`
1287 [1, 2] element of the local Wcs affine transform.
1288 cd21 : `pandas.Series`
1289 [2, 1] element of the local Wcs affine transform.
1290 cd22 : `pandas.Series`
1291 [2, 2] element of the local Wcs affine transform.
1292
1293 Returns
1294 -------
1295 Distance : `pandas.Series`
1296 Arcseconds per pixel at the location of the local WC
1297 """
1298 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1299 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1300 # Great circle distance for small separations.
1301 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1302
1303
1305 """Compute the local pixel scale from the stored CDMatrix.
1306 """
1307 name = "PixelScale"
1308
1309 @property
1310 def columns(self):
1311 return [self.colCD_1_1,
1312 self.colCD_1_2,
1313 self.colCD_2_1,
1314 self.colCD_2_2]
1315
1316 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1317 """Compute the local pixel to scale conversion in arcseconds.
1318
1319 Parameters
1320 ----------
1321 cd11 : `pandas.Series`
1322 [1, 1] element of the local Wcs affine transform in radians.
1323 cd11 : `pandas.Series`
1324 [1, 1] element of the local Wcs affine transform in radians.
1325 cd12 : `pandas.Series`
1326 [1, 2] element of the local Wcs affine transform in radians.
1327 cd21 : `pandas.Series`
1328 [2, 1] element of the local Wcs affine transform in radians.
1329 cd22 : `pandas.Series`
1330 [2, 2] element of the local Wcs affine transform in radians.
1331
1332 Returns
1333 -------
1334 pixScale : `pandas.Series`
1335 Arcseconds per pixel at the location of the local WC
1336 """
1337 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1338
1339 def _func(self, df):
1340 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1341 df[self.colCD_1_2],
1342 df[self.colCD_2_1],
1343 df[self.colCD_2_2])
1344
1345
1347 """Convert a value in units pixels squared to units arcseconds squared.
1348 """
1349
1350 def __init__(self,
1351 col,
1352 colCD_1_1,
1353 colCD_1_2,
1354 colCD_2_1,
1355 colCD_2_2,
1356 **kwargs):
1357 self.col = col
1358 super().__init__(colCD_1_1,
1359 colCD_1_2,
1360 colCD_2_1,
1361 colCD_2_2,
1362 **kwargs)
1363
1364 @property
1365 def name(self):
1366 return f"{self.col}_asArcseconds"
1367
1368 @property
1369 def columns(self):
1370 return [self.col,
1371 self.colCD_1_1,
1372 self.colCD_1_2,
1373 self.colCD_2_1,
1374 self.colCD_2_2]
1375
1376 def _func(self, df):
1377 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1378 df[self.colCD_1_2],
1379 df[self.colCD_2_1],
1380 df[self.colCD_2_2])
1381
1382
1384 """Convert a value in units pixels to units arcseconds.
1385 """
1386
1387 def __init__(self,
1388 col,
1389 colCD_1_1,
1390 colCD_1_2,
1391 colCD_2_1,
1392 colCD_2_2,
1393 **kwargs):
1394 self.col = col
1395 super().__init__(colCD_1_1,
1396 colCD_1_2,
1397 colCD_2_1,
1398 colCD_2_2,
1399 **kwargs)
1400
1401 @property
1402 def name(self):
1403 return f"{self.col}_asArcsecondsSq"
1404
1405 @property
1406 def columns(self):
1407 return [self.col,
1408 self.colCD_1_1,
1409 self.colCD_1_2,
1410 self.colCD_2_1,
1411 self.colCD_2_2]
1412
1413 def _func(self, df):
1414 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1415 df[self.colCD_1_2],
1416 df[self.colCD_2_1],
1417 df[self.colCD_2_2])
1418 return df[self.col] * pixScale * pixScale
1419
1420
1422 name = 'Reference Band'
1423 shortname = 'refBand'
1424
1425 @property
1426 def columns(self):
1427 return ["merge_measurement_i",
1428 "merge_measurement_r",
1429 "merge_measurement_z",
1430 "merge_measurement_y",
1431 "merge_measurement_g",
1432 "merge_measurement_u"]
1433
1434 def _func(self, df: pd.DataFrame) -> pd.Series:
1435 def getFilterAliasName(row):
1436 # get column name with the max value (True > False)
1437 colName = row.idxmax()
1438 return colName.replace('merge_measurement_', '')
1439
1440 # Skip columns that are unavailable, because this functor requests the
1441 # superset of bands that could be included in the object table
1442 columns = [col for col in self.columnscolumns if col in df.columns]
1443 # Makes a Series of dtype object if df is empty
1444 return df[columns].apply(getFilterAliasName, axis=1,
1445 result_type='reduce').astype('object')
1446
1447
1449 # AB to NanoJansky (3631 Jansky)
1450 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1451 LOG_AB_FLUX_SCALE = 12.56
1452 FIVE_OVER_2LOG10 = 1.085736204758129569
1453 # TO DO: DM-21955 Replace hard coded photometic calibration values
1454 COADD_ZP = 27
1455
1456 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1457 self.vhypot = np.vectorize(self.hypot)
1458 self.col = colFlux
1459 self.colFluxErr = colFluxErr
1460
1461 self.calib = calib
1462 if calib is not None:
1463 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1464 else:
1465 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1466 self.fluxMag0Err = 0.
1467
1468 super().__init__(**kwargs)
1469
1470 @property
1471 def columns(self):
1472 return [self.col]
1473
1474 @property
1475 def name(self):
1476 return f'mag_{self.col}'
1477
1478 @classmethod
1479 def hypot(cls, a, b):
1480 if np.abs(a) < np.abs(b):
1481 a, b = b, a
1482 if a == 0.:
1483 return 0.
1484 q = b/a
1485 return np.abs(a) * np.sqrt(1. + q*q)
1486
1487 def dn2flux(self, dn, fluxMag0):
1488 return self.AB_FLUX_SCALE * dn / fluxMag0
1489
1490 def dn2mag(self, dn, fluxMag0):
1491 with np.warnings.catch_warnings():
1492 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1493 np.warnings.filterwarnings('ignore', r'divide by zero')
1494 return -2.5 * np.log10(dn/fluxMag0)
1495
1496 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1497 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1498 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1499 return retVal
1500
1501 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1502 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1503 return self.FIVE_OVER_2LOG10 * retVal
1504
1505
1507 def _func(self, df):
1508 return self.dn2flux(df[self.col], self.fluxMag0)
1509
1510
1512 @property
1513 def columns(self):
1514 return [self.col, self.colFluxErr]
1515
1516 def _func(self, df):
1517 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1518 return pd.Series(retArr, index=df.index)
1519
1520
1522 def _func(self, df):
1523 return self.dn2mag(df[self.col], self.fluxMag0)
1524
1525
1527 @property
1528 def columns(self):
1529 return [self.col, self.colFluxErr]
1530
1531 def _func(self, df):
1532 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1533 return pd.Series(retArr, index=df.index)
1534
1535
1537 """Base class for calibrating the specified instrument flux column using
1538 the local photometric calibration.
1539
1540 Parameters
1541 ----------
1542 instFluxCol : `str`
1543 Name of the instrument flux column.
1544 instFluxErrCol : `str`
1545 Name of the assocated error columns for ``instFluxCol``.
1546 photoCalibCol : `str`
1547 Name of local calibration column.
1548 photoCalibErrCol : `str`
1549 Error associated with ``photoCalibCol``
1550
1551 See also
1552 --------
1553 LocalPhotometry
1554 LocalNanojansky
1555 LocalNanojanskyErr
1556 LocalMagnitude
1557 LocalMagnitudeErr
1558 """
1559 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1560
1561 def __init__(self,
1562 instFluxCol,
1563 instFluxErrCol,
1564 photoCalibCol,
1565 photoCalibErrCol,
1566 **kwargs):
1567 self.instFluxCol = instFluxCol
1568 self.instFluxErrCol = instFluxErrCol
1569 self.photoCalibCol = photoCalibCol
1570 self.photoCalibErrCol = photoCalibErrCol
1571 super().__init__(**kwargs)
1572
1573 def instFluxToNanojansky(self, instFlux, localCalib):
1574 """Convert instrument flux to nanojanskys.
1575
1576 Parameters
1577 ----------
1578 instFlux : `numpy.ndarray` or `pandas.Series`
1579 Array of instrument flux measurements
1580 localCalib : `numpy.ndarray` or `pandas.Series`
1581 Array of local photometric calibration estimates.
1582
1583 Returns
1584 -------
1585 calibFlux : `numpy.ndarray` or `pandas.Series`
1586 Array of calibrated flux measurements.
1587 """
1588 return instFlux * localCalib
1589
1590 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1591 """Convert instrument flux to nanojanskys.
1592
1593 Parameters
1594 ----------
1595 instFlux : `numpy.ndarray` or `pandas.Series`
1596 Array of instrument flux measurements
1597 instFluxErr : `numpy.ndarray` or `pandas.Series`
1598 Errors on associated ``instFlux`` values
1599 localCalib : `numpy.ndarray` or `pandas.Series`
1600 Array of local photometric calibration estimates.
1601 localCalibErr : `numpy.ndarray` or `pandas.Series`
1602 Errors on associated ``localCalib`` values
1603
1604 Returns
1605 -------
1606 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1607 Errors on calibrated flux measurements.
1608 """
1609 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1610
1611 def instFluxToMagnitude(self, instFlux, localCalib):
1612 """Convert instrument flux to nanojanskys.
1613
1614 Parameters
1615 ----------
1616 instFlux : `numpy.ndarray` or `pandas.Series`
1617 Array of instrument flux measurements
1618 localCalib : `numpy.ndarray` or `pandas.Series`
1619 Array of local photometric calibration estimates.
1620
1621 Returns
1622 -------
1623 calibMag : `numpy.ndarray` or `pandas.Series`
1624 Array of calibrated AB magnitudes.
1625 """
1626 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1627
1628 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1629 """Convert instrument flux err to nanojanskys.
1630
1631 Parameters
1632 ----------
1633 instFlux : `numpy.ndarray` or `pandas.Series`
1634 Array of instrument flux measurements
1635 instFluxErr : `numpy.ndarray` or `pandas.Series`
1636 Errors on associated ``instFlux`` values
1637 localCalib : `numpy.ndarray` or `pandas.Series`
1638 Array of local photometric calibration estimates.
1639 localCalibErr : `numpy.ndarray` or `pandas.Series`
1640 Errors on associated ``localCalib`` values
1641
1642 Returns
1643 -------
1644 calibMagErr: `numpy.ndarray` or `pandas.Series`
1645 Error on calibrated AB magnitudes.
1646 """
1647 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1648 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1649
1650
1652 """Compute calibrated fluxes using the local calibration value.
1653
1654 See also
1655 --------
1656 LocalNanojansky
1657 LocalNanojanskyErr
1658 LocalMagnitude
1659 LocalMagnitudeErr
1660 """
1661
1662 @property
1663 def columns(self):
1664 return [self.instFluxCol, self.photoCalibCol]
1665
1666 @property
1667 def name(self):
1668 return f'flux_{self.instFluxCol}'
1669
1670 def _func(self, df):
1671 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1672
1673
1675 """Compute calibrated flux errors using the local calibration value.
1676
1677 See also
1678 --------
1679 LocalNanojansky
1680 LocalNanojanskyErr
1681 LocalMagnitude
1682 LocalMagnitudeErr
1683 """
1684
1685 @property
1686 def columns(self):
1687 return [self.instFluxCol, self.instFluxErrCol,
1689
1690 @property
1691 def name(self):
1692 return f'fluxErr_{self.instFluxCol}'
1693
1694 def _func(self, df):
1695 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1696 df[self.photoCalibCol], df[self.photoCalibErrCol])
1697
1698
1700 """Compute calibrated AB magnitudes using the local calibration value.
1701
1702 See also
1703 --------
1704 LocalNanojansky
1705 LocalNanojanskyErr
1706 LocalMagnitude
1707 LocalMagnitudeErr
1708 """
1709
1710 @property
1711 def columns(self):
1712 return [self.instFluxCol, self.photoCalibCol]
1713
1714 @property
1715 def name(self):
1716 return f'mag_{self.instFluxCol}'
1717
1718 def _func(self, df):
1719 return self.instFluxToMagnitude(df[self.instFluxCol],
1720 df[self.photoCalibCol])
1721
1722
1724 """Compute calibrated AB magnitude errors using the local calibration value.
1725
1726 See also
1727 --------
1728 LocalNanojansky
1729 LocalNanojanskyErr
1730 LocalMagnitude
1731 LocalMagnitudeErr
1732 """
1733
1734 @property
1735 def columns(self):
1736 return [self.instFluxCol, self.instFluxErrCol,
1738
1739 @property
1740 def name(self):
1741 return f'magErr_{self.instFluxCol}'
1742
1743 def _func(self, df):
1744 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1745 df[self.instFluxErrCol],
1746 df[self.photoCalibCol],
1747 df[self.photoCalibErrCol])
1748
1749
1751 """Compute absolute mean of dipole fluxes.
1752
1753 See also
1754 --------
1755 LocalNanojansky
1756 LocalNanojanskyErr
1757 LocalMagnitude
1758 LocalMagnitudeErr
1759 LocalDipoleMeanFlux
1760 LocalDipoleMeanFluxErr
1761 LocalDipoleDiffFlux
1762 LocalDipoleDiffFluxErr
1763 """
1764 def __init__(self,
1765 instFluxPosCol,
1766 instFluxNegCol,
1767 instFluxPosErrCol,
1768 instFluxNegErrCol,
1769 photoCalibCol,
1770 photoCalibErrCol,
1771 **kwargs):
1772 self.instFluxNegCol = instFluxNegCol
1773 self.instFluxPosCol = instFluxPosCol
1774 self.instFluxNegErrCol = instFluxNegErrCol
1775 self.instFluxPosErrCol = instFluxPosErrCol
1776 self.photoCalibColphotoCalibCol = photoCalibCol
1777 self.photoCalibErrColphotoCalibErrCol = photoCalibErrCol
1778 super().__init__(instFluxNegCol,
1779 instFluxNegErrCol,
1780 photoCalibCol,
1781 photoCalibErrCol,
1782 **kwargs)
1783
1784 @property
1785 def columns(self):
1786 return [self.instFluxPosCol,
1787 self.instFluxNegCol,
1789
1790 @property
1791 def name(self):
1792 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1793
1794 def _func(self, df):
1795 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibColphotoCalibCol]))
1796 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibColphotoCalibCol])))
1797
1798
1800 """Compute the error on the absolute mean of dipole fluxes.
1801
1802 See also
1803 --------
1804 LocalNanojansky
1805 LocalNanojanskyErr
1806 LocalMagnitude
1807 LocalMagnitudeErr
1808 LocalDipoleMeanFlux
1809 LocalDipoleMeanFluxErr
1810 LocalDipoleDiffFlux
1811 LocalDipoleDiffFluxErr
1812 """
1813
1814 @property
1815 def columns(self):
1816 return [self.instFluxPosCol,
1817 self.instFluxNegCol,
1818 self.instFluxPosErrCol,
1819 self.instFluxNegErrCol,
1822
1823 @property
1824 def name(self):
1825 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1826
1827 def _func(self, df):
1828 return 0.5*np.sqrt(
1829 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1831 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1832 * df[self.photoCalibColphotoCalibCol]**2)
1833
1834
1836 """Compute the absolute difference of dipole fluxes.
1837
1838 Value is (abs(pos) - abs(neg))
1839
1840 See also
1841 --------
1842 LocalNanojansky
1843 LocalNanojanskyErr
1844 LocalMagnitude
1845 LocalMagnitudeErr
1846 LocalDipoleMeanFlux
1847 LocalDipoleMeanFluxErr
1848 LocalDipoleDiffFlux
1849 LocalDipoleDiffFluxErr
1850 """
1851
1852 @property
1853 def columns(self):
1854 return [self.instFluxPosCol,
1855 self.instFluxNegCol,
1857
1858 @property
1859 def name(self):
1860 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1861
1862 def _func(self, df):
1863 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibColphotoCalibCol]))
1864 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibColphotoCalibCol])))
1865
1866
1868 """Compute the error on the absolute difference of dipole fluxes.
1869
1870 See also
1871 --------
1872 LocalNanojansky
1873 LocalNanojanskyErr
1874 LocalMagnitude
1875 LocalMagnitudeErr
1876 LocalDipoleMeanFlux
1877 LocalDipoleMeanFluxErr
1878 LocalDipoleDiffFlux
1879 LocalDipoleDiffFluxErr
1880 """
1881
1882 @property
1883 def columns(self):
1884 return [self.instFluxPosCol,
1885 self.instFluxNegCol,
1886 self.instFluxPosErrCol,
1887 self.instFluxNegErrCol,
1890
1891 @property
1892 def name(self):
1893 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1894
1895 def _func(self, df):
1896 return np.sqrt(
1897 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1899 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1900 * df[self.photoCalibColphotoCalibCol]**2)
1901
1902
1904 """Base class for returning the ratio of 2 columns.
1905
1906 Can be used to compute a Signal to Noise ratio for any input flux.
1907
1908 Parameters
1909 ----------
1910 numerator : `str`
1911 Name of the column to use at the numerator in the ratio
1912 denominator : `str`
1913 Name of the column to use as the denominator in the ratio.
1914 """
1915 def __init__(self,
1916 numerator,
1917 denominator,
1918 **kwargs):
1919 self.numerator = numerator
1920 self.denominator = denominator
1921 super().__init__(**kwargs)
1922
1923 @property
1924 def columns(self):
1925 return [self.numerator, self.denominator]
1926
1927 @property
1928 def name(self):
1929 return f'ratio_{self.numerator}_{self.denominator}'
1930
1931 def _func(self, df):
1932 with np.warnings.catch_warnings():
1933 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1934 np.warnings.filterwarnings('ignore', r'divide by zero')
1935 return df[self.numerator] / df[self.denominator]
1936
1937
1939 """Compute E(B-V) from dustmaps.sfd
1940 """
1941 _defaultDataset = 'ref'
1942 name = "E(B-V)"
1943 shortname = "ebv"
1944
1945 def __init__(self, **kwargs):
1946 # import is only needed for Ebv
1947 from dustmaps.sfd import SFDQuery
1948 self._columns = ['coord_ra', 'coord_dec']
1949 self.sfd = SFDQuery()
1950 super().__init__(**kwargs)
1951
1952 def _func(self, df):
1953 coords = SkyCoord(df['coord_ra']*u.rad, df['coord_dec']*u.rad)
1954 ebv = self.sfd(coords)
1955 # Double precision unnecessary scientifically
1956 # but currently needed for ingest to qserv
1957 return pd.Series(ebv, index=df.index).astype('float64')
def multilevelColumns(self, parq, **kwargs)
Definition: functors.py:986
def __init__(self, col, filt2, filt1, **kwargs)
Definition: functors.py:957
def __init__(self, col, **kwargs)
Definition: functors.py:671
def __init__(self, funcs, **kwargs)
Definition: functors.py:430
def __call__(self, data, **kwargs)
Definition: functors.py:482
def from_file(cls, filename, **kwargs)
Definition: functors.py:566
def from_yaml(cls, translationDefinition, **kwargs)
Definition: functors.py:575
def renameCol(cls, col, renameRules)
Definition: functors.py:557
def multilevelColumns(self, data, **kwargs)
Definition: functors.py:468
def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22)
Definition: functors.py:1316
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1393
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1356
def __init__(self, col, **kwargs)
Definition: functors.py:717
def __init__(self, expr, **kwargs)
Definition: functors.py:640
def __init__(self, **kwargs)
Definition: functors.py:745
def __call__(self, catalog, **kwargs)
Definition: functors.py:748
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1149
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1167
def __init__(self, **kwargs)
Definition: functors.py:1945
def __call__(self, data, dropna=False)
Definition: functors.py:366
def _func(self, df, dropna=True)
Definition: functors.py:303
def multilevelColumns(self, data, columnIndex=None, returnTuple=False)
Definition: functors.py:252
def _get_data_columnLevelNames(self, data, columnIndex=None)
Definition: functors.py:206
def difference(self, data1, data2, **kwargs)
Definition: functors.py:378
def __init__(self, filt=None, dataset=None, noDup=None)
Definition: functors.py:159
def _get_columnIndex(self, data)
Definition: functors.py:306
def _colsFromDict(self, colDict, columnIndex=None)
Definition: functors.py:230
def _get_data_columnLevels(self, data, columnIndex=None)
Definition: functors.py:180
def __init__(self, ra, decl, **kwargs)
Definition: functors.py:766
def __call__(self, parq, dropna=False, **kwargs)
Definition: functors.py:1006
def __init__(self, instFluxPosCol, instFluxNegCol, instFluxPosErrCol, instFluxNegErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1771
def instFluxToNanojansky(self, instFlux, localCalib)
Definition: functors.py:1573
def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1628
def __init__(self, instFluxCol, instFluxErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1566
def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1590
def instFluxToMagnitude(self, instFlux, localCalib)
Definition: functors.py:1611
def __init__(self, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1207
def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22)
Definition: functors.py:1214
def computeSkySeperation(self, ra1, dec1, ra2, dec2)
Definition: functors.py:1243
def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22)
Definition: functors.py:1269
def __init__(self, col1, col2, **kwargs)
Definition: functors.py:904
def __init__(self, *args, **kwargs)
Definition: functors.py:865
def __init__(self, col, calib=None, **kwargs)
Definition: functors.py:827
def dn2mag(self, dn, fluxMag0)
Definition: functors.py:1490
def dn2flux(self, dn, fluxMag0)
Definition: functors.py:1487
def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1496
def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1501
def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs)
Definition: functors.py:1456
def __call__(self, catalog, **kwargs)
Definition: functors.py:735
def __init__(self, **kwargs)
Definition: functors.py:732
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1183
def __init__(self, numerator, denominator, **kwargs)
Definition: functors.py:1918
def mag_aware_eval(df, expr, log)
Definition: functors.py:604
def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor', name=None)
Definition: functors.py:57