lsst.pipe.tasks g57dfbfe8b2+08e3ebb819
functors.py
Go to the documentation of this file.
1# This file is part of pipe_tasks.
2#
3# LSST Data Management System
4# This product includes software developed by the
5# LSST Project (http://www.lsst.org/).
6# See COPYRIGHT file at the top of the source tree.
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22import yaml
23import re
24from itertools import product
25import logging
26import os.path
27
28import pandas as pd
29import numpy as np
30import astropy.units as u
31from dustmaps.sfd import SFDQuery
32from astropy.coordinates import SkyCoord
33
34from lsst.daf.persistence import doImport
35from lsst.daf.butler import DeferredDatasetHandle
36import lsst.geom as geom
37import lsst.sphgeom as sphgeom
38
39from .parquetTable import ParquetTable, MultilevelParquetTable
40
41
42def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors',
43 typeKey='functor', name=None):
44 """Initialize an object defined in a dictionary
45
46 The object needs to be importable as
47 f'{basePath}.{initDict[typeKey]}'
48 The positional and keyword arguments (if any) are contained in
49 "args" and "kwargs" entries in the dictionary, respectively.
50 This is used in `functors.CompositeFunctor.from_yaml` to initialize
51 a composite functor from a specification in a YAML file.
52
53 Parameters
54 ----------
55 initDict : dictionary
56 Dictionary describing object's initialization. Must contain
57 an entry keyed by ``typeKey`` that is the name of the object,
58 relative to ``basePath``.
59 basePath : str
60 Path relative to module in which ``initDict[typeKey]`` is defined.
61 typeKey : str
62 Key of ``initDict`` that is the name of the object
63 (relative to `basePath`).
64 """
65 initDict = initDict.copy()
66 # TO DO: DM-21956 We should be able to define functors outside this module
67 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}')
68 args = []
69 if 'args' in initDict:
70 args = initDict.pop('args')
71 if isinstance(args, str):
72 args = [args]
73 try:
74 element = pythonType(*args, **initDict)
75 except Exception as e:
76 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}'
77 raise type(e)(message, e.args)
78 return element
79
80
81class Functor(object):
82 """Define and execute a calculation on a ParquetTable
83
84 The `__call__` method accepts either a `ParquetTable` object or a
85 `DeferredDatasetHandle`, and returns the
86 result of the calculation as a single column. Each functor defines what
87 columns are needed for the calculation, and only these columns are read
88 from the `ParquetTable`.
89
90 The action of `__call__` consists of two steps: first, loading the
91 necessary columns from disk into memory as a `pandas.DataFrame` object;
92 and second, performing the computation on this dataframe and returning the
93 result.
94
95
96 To define a new `Functor`, a subclass must define a `_func` method,
97 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`.
98 In addition, it must define the following attributes
99
100 * `_columns`: The columns necessary to perform the calculation
101 * `name`: A name appropriate for a figure axis label
102 * `shortname`: A name appropriate for use as a dictionary key
103
104 On initialization, a `Functor` should declare what band (`filt` kwarg)
105 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be
106 applied to. This enables the `_get_data` method to extract the proper
107 columns from the parquet file. If not specified, the dataset will fall back
108 on the `_defaultDataset`attribute. If band is not specified and `dataset`
109 is anything other than `'ref'`, then an error will be raised when trying to
110 perform the calculation.
111
112 Originally, `Functor` was set up to expect
113 datasets formatted like the `deepCoadd_obj` dataset; that is, a
114 dataframe with a multi-level column index, with the levels of the
115 column index being `band`, `dataset`, and `column`.
116 It has since been generalized to apply to dataframes without mutli-level
117 indices and multi-level indices with just `dataset` and `column` levels.
118 In addition, the `_get_data` method that reads
119 the dataframe from the `ParquetTable` will return a dataframe with column
120 index levels defined by the `_dfLevels` attribute; by default, this is
121 `column`.
122
123 The `_dfLevels` attributes should generally not need to
124 be changed, unless `_func` needs columns from multiple filters or datasets
125 to do the calculation.
126 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for
127 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe
128 it gets to have those levels in the column index.
129
130 Parameters
131 ----------
132 filt : str
133 Filter upon which to do the calculation
134
135 dataset : str
136 Dataset upon which to do the calculation
137 (e.g., 'ref', 'meas', 'forced_src').
138
139 """
140
141 _defaultDataset = 'ref'
142 _dfLevels = ('column',)
143 _defaultNoDup = False
144
145 def __init__(self, filt=None, dataset=None, noDup=None):
146 self.filt = filt
147 self.dataset = dataset if dataset is not None else self._defaultDataset
148 self._noDup = noDup
149 self.log = logging.getLogger(type(self).__name__)
150
151 @property
152 def noDup(self):
153 if self._noDup is not None:
154 return self._noDup
155 else:
156 return self._defaultNoDup
157
158 @property
159 def columns(self):
160 """Columns required to perform calculation
161 """
162 if not hasattr(self, '_columns'):
163 raise NotImplementedError('Must define columns property or _columns attribute')
164 return self._columns
165
166 def _get_data_columnLevels(self, data, columnIndex=None):
167 """Gets the names of the column index levels
168
169 This should only be called in the context of a multilevel table.
170 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable`
171 and with the gen3 `DeferredDatasetHandle`.
172
173 Parameters
174 ----------
175 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
176
177 columnnIndex (optional): pandas `Index` object
178 if not passed, then it is read from the `DeferredDatasetHandle`
179 """
180 if isinstance(data, DeferredDatasetHandle):
181 if columnIndex is None:
182 columnIndex = data.get(component="columns")
183 if columnIndex is not None:
184 return columnIndex.names
185 if isinstance(data, MultilevelParquetTable):
186 return data.columnLevels
187 else:
188 raise TypeError(f"Unknown type for data: {type(data)}!")
189
190 def _get_data_columnLevelNames(self, data, columnIndex=None):
191 """Gets the content of each of the column levels for a multilevel table
192
193 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2.
194
195 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
196 """
197 if isinstance(data, DeferredDatasetHandle):
198 if columnIndex is None:
199 columnIndex = data.get(component="columns")
200 if columnIndex is not None:
201 columnLevels = columnIndex.names
202 columnLevelNames = {
203 level: list(np.unique(np.array([c for c in columnIndex])[:, i]))
204 for i, level in enumerate(columnLevels)
205 }
206 return columnLevelNames
207 if isinstance(data, MultilevelParquetTable):
208 return data.columnLevelNames
209 else:
210 raise TypeError(f"Unknown type for data: {type(data)}!")
211
212 def _colsFromDict(self, colDict, columnIndex=None):
213 """Converts dictionary column specficiation to a list of columns
214
215 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable`
216 """
217 new_colDict = {}
218 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex)
219
220 for i, lev in enumerate(columnLevels):
221 if lev in colDict:
222 if isinstance(colDict[lev], str):
223 new_colDict[lev] = [colDict[lev]]
224 else:
225 new_colDict[lev] = colDict[lev]
226 else:
227 new_colDict[lev] = columnIndex.levels[i]
228
229 levelCols = [new_colDict[lev] for lev in columnLevels]
230 cols = list(product(*levelCols))
231 colsAvailable = [col for col in cols if col in columnIndex]
232 return colsAvailable
233
234 def multilevelColumns(self, data, columnIndex=None, returnTuple=False):
235 """Returns columns needed by functor from multilevel dataset
236
237 To access tables with multilevel column structure, the `MultilevelParquetTable`
238 or `DeferredDatasetHandle` need to be passed either a list of tuples or a
239 dictionary.
240
241 Parameters
242 ----------
243 data : `MultilevelParquetTable` or `DeferredDatasetHandle`
244
245 columnIndex (optional): pandas `Index` object
246 either passed or read in from `DeferredDatasetHandle`.
247
248 `returnTuple` : bool
249 If true, then return a list of tuples rather than the column dictionary
250 specification. This is set to `True` by `CompositeFunctor` in order to be able to
251 combine columns from the various component functors.
252
253 """
254 if isinstance(data, DeferredDatasetHandle) and columnIndex is None:
255 columnIndex = data.get(component="columns")
256
257 # Confirm that the dataset has the column levels the functor is expecting it to have.
258 columnLevels = self._get_data_columnLevels(data, columnIndex)
259
260 columnDict = {'column': self.columns,
261 'dataset': self.dataset}
262 if self.filt is None:
263 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex)
264 if "band" in columnLevels:
265 if self.dataset == "ref":
266 columnDict["band"] = columnLevelNames["band"][0]
267 else:
268 raise ValueError(f"'filt' not set for functor {self.name}"
269 f"(dataset {self.dataset}) "
270 "and ParquetTable "
271 "contains multiple filters in column index. "
272 "Set 'filt' or set 'dataset' to 'ref'.")
273 else:
274 columnDict['band'] = self.filt
275
276 if isinstance(data, MultilevelParquetTable):
277 return data._colsFromDict(columnDict)
278 elif isinstance(data, DeferredDatasetHandle):
279 if returnTuple:
280 return self._colsFromDict(columnDict, columnIndex=columnIndex)
281 else:
282 return columnDict
283
284 def _func(self, df, dropna=True):
285 raise NotImplementedError('Must define calculation on dataframe')
286
287 def _get_columnIndex(self, data):
288 """Return columnIndex
289 """
290
291 if isinstance(data, DeferredDatasetHandle):
292 return data.get(component="columns")
293 else:
294 return None
295
296 def _get_data(self, data):
297 """Retrieve dataframe necessary for calculation.
298
299 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle
300
301 Returns dataframe upon which `self._func` can act.
302
303 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested.
304 """
305 if isinstance(data, pd.DataFrame):
306 return data
307
308 # First thing to do: check to see if the data source has a multilevel column index or not.
309 columnIndex = self._get_columnIndex(data)
310 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
311
312 # Simple single-level parquet table, gen2
313 if isinstance(data, ParquetTable) and not is_multiLevel:
314 columns = self.columns
315 df = data.toDataFrame(columns=columns)
316 return df
317
318 # Get proper columns specification for this functor
319 if is_multiLevel:
320 columns = self.multilevelColumns(data, columnIndex=columnIndex)
321 else:
322 columns = self.columns
323
324 if isinstance(data, MultilevelParquetTable):
325 # Load in-memory dataframe with appropriate columns the gen2 way
326 df = data.toDataFrame(columns=columns, droplevels=False)
327 elif isinstance(data, DeferredDatasetHandle):
328 # Load in-memory dataframe with appropriate columns the gen3 way
329 df = data.get(parameters={"columns": columns})
330
331 # Drop unnecessary column levels
332 if is_multiLevel:
333 df = self._setLevels(df)
334
335 return df
336
337 def _setLevels(self, df):
338 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels]
339 df.columns = df.columns.droplevel(levelsToDrop)
340 return df
341
342 def _dropna(self, vals):
343 return vals.dropna()
344
345 def __call__(self, data, dropna=False):
346 try:
347 df = self._get_data(data)
348 vals = self._func(df)
349 except Exception as e:
350 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
351 vals = self.fail(df)
352 if dropna:
353 vals = self._dropna(vals)
354
355 return vals
356
357 def difference(self, data1, data2, **kwargs):
358 """Computes difference between functor called on two different ParquetTable objects
359 """
360 return self(data1, **kwargs) - self(data2, **kwargs)
361
362 def fail(self, df):
363 return pd.Series(np.full(len(df), np.nan), index=df.index)
364
365 @property
366 def name(self):
367 """Full name of functor (suitable for figure labels)
368 """
369 return NotImplementedError
370
371 @property
372 def shortname(self):
373 """Short name of functor (suitable for column name/dict key)
374 """
375 return self.name
376
377
379 """Perform multiple calculations at once on a catalog
380
381 The role of a `CompositeFunctor` is to group together computations from
382 multiple functors. Instead of returning `pandas.Series` a
383 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names
384 being the keys of `funcDict`.
385
386 The `columns` attribute of a `CompositeFunctor` is the union of all columns
387 in all the component functors.
388
389 A `CompositeFunctor` does not use a `_func` method itself; rather,
390 when a `CompositeFunctor` is called, all its columns are loaded
391 at once, and the resulting dataframe is passed to the `_func` method of each component
392 functor. This has the advantage of only doing I/O (reading from parquet file) once,
393 and works because each individual `_func` method of each component functor does not
394 care if there are *extra* columns in the dataframe being passed; only that it must contain
395 *at least* the `columns` it expects.
396
397 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML
398 file specifying a collection of functors.
399
400 Parameters
401 ----------
402 funcs : `dict` or `list`
403 Dictionary or list of functors. If a list, then it will be converted
404 into a dictonary according to the `.shortname` attribute of each functor.
405
406 """
407 dataset = None
408
409 def __init__(self, funcs, **kwargs):
410
411 if type(funcs) == dict:
412 self.funcDict = funcs
413 else:
414 self.funcDict = {f.shortname: f for f in funcs}
415
416 self._filt = None
417
418 super().__init__(**kwargs)
419
420 @property
421 def filt(self):
422 return self._filt
423
424 @filt.setter
425 def filt(self, filt):
426 if filt is not None:
427 for _, f in self.funcDict.items():
428 f.filt = filt
429 self._filt = filt
430
431 def update(self, new):
432 if isinstance(new, dict):
433 self.funcDict.update(new)
434 elif isinstance(new, CompositeFunctor):
435 self.funcDict.update(new.funcDict)
436 else:
437 raise TypeError('Can only update with dictionary or CompositeFunctor.')
438
439 # Make sure new functors have the same 'filt' set
440 if self.filtfiltfiltfilt is not None:
442
443 @property
444 def columns(self):
445 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y]))
446
447 def multilevelColumns(self, data, **kwargs):
448 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here.
449 return list(
450 set(
451 [
452 x
453 for y in [
454 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values()
455 ]
456 for x in y
457 ]
458 )
459 )
460
461 def __call__(self, data, **kwargs):
462 """Apply the functor to the data table
463
464 Parameters
465 ----------
466 data : `lsst.daf.butler.DeferredDatasetHandle`,
469 or `pandas.DataFrame`.
470 The table or a pointer to a table on disk from which columns can
471 be accessed
472 """
473 columnIndex = self._get_columnIndex(data)
474
475 # First, determine whether data has a multilevel index (either gen2 or gen3)
476 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex)
477
478 # Multilevel index, gen2 or gen3
479 if is_multiLevel:
480 columns = self.multilevelColumnsmultilevelColumns(data, columnIndex=columnIndex)
481
482 if isinstance(data, MultilevelParquetTable):
483 # Read data into memory the gen2 way
484 df = data.toDataFrame(columns=columns, droplevels=False)
485 elif isinstance(data, DeferredDatasetHandle):
486 # Read data into memory the gen3 way
487 df = data.get(parameters={"columns": columns})
488
489 valDict = {}
490 for k, f in self.funcDict.items():
491 try:
492 subdf = f._setLevels(
493 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)]
494 )
495 valDict[k] = f._func(subdf)
496 except Exception as e:
497 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e)
498 try:
499 valDict[k] = f.fail(subdf)
500 except NameError:
501 raise e
502
503 else:
504 if isinstance(data, DeferredDatasetHandle):
505 # input if Gen3 deferLoad=True
506 df = data.get(parameters={"columns": self.columnscolumns})
507 elif isinstance(data, pd.DataFrame):
508 # input if Gen3 deferLoad=False
509 df = data
510 else:
511 # Original Gen2 input is type ParquetTable and the fallback
512 df = data.toDataFrame(columns=self.columnscolumns)
513
514 valDict = {k: f._func(df) for k, f in self.funcDict.items()}
515
516 # Check that output columns are actually columns
517 for name, colVal in valDict.items():
518 if len(colVal.shape) != 1:
519 raise RuntimeError("Transformed column '%s' is not the shape of a column. "
520 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal)))
521
522 try:
523 valDf = pd.concat(valDict, axis=1)
524 except TypeError:
525 print([(k, type(v)) for k, v in valDict.items()])
526 raise
527
528 if kwargs.get('dropna', False):
529 valDf = valDf.dropna(how='any')
530
531 return valDf
532
533 @classmethod
534 def renameCol(cls, col, renameRules):
535 if renameRules is None:
536 return col
537 for old, new in renameRules:
538 if col.startswith(old):
539 col = col.replace(old, new)
540 return col
541
542 @classmethod
543 def from_file(cls, filename, **kwargs):
544 # Allow environment variables in the filename.
545 filename = os.path.expandvars(filename)
546 with open(filename) as f:
547 translationDefinition = yaml.safe_load(f)
548
549 return cls.from_yaml(translationDefinition, **kwargs)
550
551 @classmethod
552 def from_yaml(cls, translationDefinition, **kwargs):
553 funcs = {}
554 for func, val in translationDefinition['funcs'].items():
555 funcs[func] = init_fromDict(val, name=func)
556
557 if 'flag_rename_rules' in translationDefinition:
558 renameRules = translationDefinition['flag_rename_rules']
559 else:
560 renameRules = None
561
562 if 'calexpFlags' in translationDefinition:
563 for flag in translationDefinition['calexpFlags']:
564 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp')
565
566 if 'refFlags' in translationDefinition:
567 for flag in translationDefinition['refFlags']:
568 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref')
569
570 if 'forcedFlags' in translationDefinition:
571 for flag in translationDefinition['forcedFlags']:
572 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src')
573
574 if 'flags' in translationDefinition:
575 for flag in translationDefinition['flags']:
576 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas')
577
578 return cls(funcs, **kwargs)
579
580
581def mag_aware_eval(df, expr, log):
582 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means
583
584 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes.
585
586 Parameters
587 ----------
588 df : pandas.DataFrame
589 Dataframe on which to evaluate expression.
590
591 expr : str
592 Expression.
593 """
594 try:
595 expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>)/log(10)', expr)
596 val = df.eval(expr_new)
597 except Exception as e: # Should check what actually gets raised
598 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e)
599 expr_new = re.sub(r'mag\‍((\w+)\‍)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr)
600 val = df.eval(expr_new)
601 return val
602
603
605 """Arbitrary computation on a catalog
606
607 Column names (and thus the columns to be loaded from catalog) are found
608 by finding all words and trying to ignore all "math-y" words.
609
610 Parameters
611 ----------
612 expr : str
613 Expression to evaluate, to be parsed and executed by `mag_aware_eval`.
614 """
615 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt')
616
617 def __init__(self, expr, **kwargs):
618 self.expr = expr
619 super().__init__(**kwargs)
620
621 @property
622 def name(self):
623 return self.expr
624
625 @property
626 def columns(self):
627 flux_cols = re.findall(r'mag\‍(\s*(\w+)\s*\‍)', self.expr)
628
629 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words]
630 not_a_col = []
631 for c in flux_cols:
632 if not re.search('_instFlux$', c):
633 cols.append(f'{c}_instFlux')
634 not_a_col.append(c)
635 else:
636 cols.append(c)
637
638 return list(set([c for c in cols if c not in not_a_col]))
639
640 def _func(self, df):
641 return mag_aware_eval(df, self.expr, self.log)
642
643
645 """Get column with specified name
646 """
647
648 def __init__(self, col, **kwargs):
649 self.col = col
650 super().__init__(**kwargs)
651
652 @property
653 def name(self):
654 return self.col
655
656 @property
657 def columns(self):
658 return [self.col]
659
660 def _func(self, df):
661 return df[self.col]
662
663
665 """Return the value of the index for each object
666 """
667
668 columns = ['coord_ra'] # just a dummy; something has to be here
669 _defaultDataset = 'ref'
670 _defaultNoDup = True
671
672 def _func(self, df):
673 return pd.Series(df.index, index=df.index)
674
675
677 col = 'id'
678 _allow_difference = False
679 _defaultNoDup = True
680
681 def _func(self, df):
682 return pd.Series(df.index, index=df.index)
683
684
686 col = 'base_Footprint_nPix'
687
688
690 """Base class for coordinate column, in degrees
691 """
692 _radians = True
693
694 def __init__(self, col, **kwargs):
695 super().__init__(col, **kwargs)
696
697 def _func(self, df):
698 # Must not modify original column in case that column is used by another functor
699 output = df[self.col] * 180 / np.pi if self._radians else df[self.col]
700 return output
701
702
704 """Right Ascension, in degrees
705 """
706 name = 'RA'
707 _defaultNoDup = True
708
709 def __init__(self, **kwargs):
710 super().__init__('coord_ra', **kwargs)
711
712 def __call__(self, catalog, **kwargs):
713 return super().__call__(catalog, **kwargs)
714
715
717 """Declination, in degrees
718 """
719 name = 'Dec'
720 _defaultNoDup = True
721
722 def __init__(self, **kwargs):
723 super().__init__('coord_dec', **kwargs)
724
725 def __call__(self, catalog, **kwargs):
726 return super().__call__(catalog, **kwargs)
727
728
730 """Compute the level 20 HtmIndex for the catalog.
731
732 Notes
733 -----
734 This functor was implemented to satisfy requirements of old APDB interface
735 which required ``pixelId`` column in DiaObject with HTM20 index. APDB
736 interface had migrated to not need that information, but we keep this
737 class in case it may be useful for something else.
738 """
739 name = "Htm20"
740 htmLevel = 20
741 _radians = True
742
743 def __init__(self, ra, decl, **kwargs):
745 self.ra = ra
746 self.decl = decl
747 self._columns = [self.ra, self.decl]
748 super().__init__(**kwargs)
749
750 def _func(self, df):
751
752 def computePixel(row):
753 if self._radians:
754 sphPoint = geom.SpherePoint(row[self.ra],
755 row[self.decl],
756 geom.radians)
757 else:
758 sphPoint = geom.SpherePoint(row[self.ra],
759 row[self.decl],
760 geom.degrees)
761 return self.pixelator.index(sphPoint.getVector())
762
763 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64')
764
765
766def fluxName(col):
767 if not col.endswith('_instFlux'):
768 col += '_instFlux'
769 return col
770
771
772def fluxErrName(col):
773 if not col.endswith('_instFluxErr'):
774 col += '_instFluxErr'
775 return col
776
777
779 """Compute calibrated magnitude
780
781 Takes a `calib` argument, which returns the flux at mag=0
782 as `calib.getFluxMag0()`. If not provided, then the default
783 `fluxMag0` is 63095734448.0194, which is default for HSC.
784 This default should be removed in DM-21955
785
786 This calculation hides warnings about invalid values and dividing by zero.
787
788 As for all functors, a `dataset` and `filt` kwarg should be provided upon
789 initialization. Unlike the default `Functor`, however, the default dataset
790 for a `Mag` is `'meas'`, rather than `'ref'`.
791
792 Parameters
793 ----------
794 col : `str`
795 Name of flux column from which to compute magnitude. Can be parseable
796 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass
797 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will
798 understand.
799 calib : `lsst.afw.image.calib.Calib` (optional)
800 Object that knows zero point.
801 """
802 _defaultDataset = 'meas'
803
804 def __init__(self, col, calib=None, **kwargs):
805 self.col = fluxName(col)
806 self.calib = calib
807 if calib is not None:
808 self.fluxMag0 = calib.getFluxMag0()[0]
809 else:
810 # TO DO: DM-21955 Replace hard coded photometic calibration values
811 self.fluxMag0 = 63095734448.0194
812
813 super().__init__(**kwargs)
814
815 @property
816 def columns(self):
817 return [self.col]
818
819 def _func(self, df):
820 with np.warnings.catch_warnings():
821 np.warnings.filterwarnings('ignore', r'invalid value encountered')
822 np.warnings.filterwarnings('ignore', r'divide by zero')
823 return -2.5*np.log10(df[self.col] / self.fluxMag0)
824
825 @property
826 def name(self):
827 return f'mag_{self.col}'
828
829
830class MagErr(Mag):
831 """Compute calibrated magnitude uncertainty
832
833 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`.
834
835 Parameters
836 col : `str`
837 Name of flux column
838 calib : `lsst.afw.image.calib.Calib` (optional)
839 Object that knows zero point.
840 """
841
842 def __init__(self, *args, **kwargs):
843 super().__init__(*args, **kwargs)
844 if self.calib is not None:
845 self.fluxMag0Err = self.calib.getFluxMag0()[1]
846 else:
847 self.fluxMag0Err = 0.
848
849 @property
850 def columns(self):
851 return [self.col, self.col + 'Err']
852
853 def _func(self, df):
854 with np.warnings.catch_warnings():
855 np.warnings.filterwarnings('ignore', r'invalid value encountered')
856 np.warnings.filterwarnings('ignore', r'divide by zero')
857 fluxCol, fluxErrCol = self.columnscolumnscolumns
858 x = df[fluxErrCol] / df[fluxCol]
859 y = self.fluxMag0Err / self.fluxMag0
860 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y)
861 return magErr
862
863 @property
864 def name(self):
865 return super().name + '_err'
866
867
869 """
870 """
871
872 def _func(self, df):
873 return (df[self.col] / self.fluxMag0) * 1e9
874
875
877 _defaultDataset = 'meas'
878
879 """Functor to calculate magnitude difference"""
880
881 def __init__(self, col1, col2, **kwargs):
882 self.col1 = fluxName(col1)
883 self.col2 = fluxName(col2)
884 super().__init__(**kwargs)
885
886 @property
887 def columns(self):
888 return [self.col1, self.col2]
889
890 def _func(self, df):
891 with np.warnings.catch_warnings():
892 np.warnings.filterwarnings('ignore', r'invalid value encountered')
893 np.warnings.filterwarnings('ignore', r'divide by zero')
894 return -2.5*np.log10(df[self.col1]/df[self.col2])
895
896 @property
897 def name(self):
898 return f'(mag_{self.col1} - mag_{self.col2})'
899
900 @property
901 def shortname(self):
902 return f'magDiff_{self.col1}_{self.col2}'
903
904
906 """Compute the color between two filters
907
908 Computes color by initializing two different `Mag`
909 functors based on the `col` and filters provided, and
910 then returning the difference.
911
912 This is enabled by the `_func` expecting a dataframe with a
913 multilevel column index, with both `'band'` and `'column'`,
914 instead of just `'column'`, which is the `Functor` default.
915 This is controlled by the `_dfLevels` attribute.
916
917 Also of note, the default dataset for `Color` is `forced_src'`,
918 whereas for `Mag` it is `'meas'`.
919
920 Parameters
921 ----------
922 col : str
923 Name of flux column from which to compute; same as would be passed to
925
926 filt2, filt1 : str
927 Filters from which to compute magnitude difference.
928 Color computed is `Mag(filt2) - Mag(filt1)`.
929 """
930 _defaultDataset = 'forced_src'
931 _dfLevels = ('band', 'column')
932 _defaultNoDup = True
933
934 def __init__(self, col, filt2, filt1, **kwargs):
935 self.col = fluxName(col)
936 if filt2 == filt1:
937 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1))
938 self.filt2 = filt2
939 self.filt1 = filt1
940
941 self.mag2 = Mag(col, filt=filt2, **kwargs)
942 self.mag1 = Mag(col, filt=filt1, **kwargs)
943
944 super().__init__(**kwargs)
945
946 @property
947 def filt(self):
948 return None
949
950 @filt.setter
951 def filt(self, filt):
952 pass
953
954 def _func(self, df):
955 mag2 = self.mag2._func(df[self.filt2])
956 mag1 = self.mag1._func(df[self.filt1])
957 return mag2 - mag1
958
959 @property
960 def columns(self):
961 return [self.mag1.col, self.mag2.col]
962
963 def multilevelColumns(self, parq, **kwargs):
964 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)]
965
966 @property
967 def name(self):
968 return f'{self.filt2} - {self.filt1} ({self.col})'
969
970 @property
971 def shortname(self):
972 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}"
973
974
976 """Main function of this subclass is to override the dropna=True
977 """
978 _null_label = 'null'
979 _allow_difference = False
980 name = 'label'
981 _force_str = False
982
983 def __call__(self, parq, dropna=False, **kwargs):
984 return super().__call__(parq, dropna=False, **kwargs)
985
986
988 _columns = ["base_ClassificationExtendedness_value"]
989 _column = "base_ClassificationExtendedness_value"
990
991 def _func(self, df):
992 x = df[self._columns][self._column]
993 mask = x.isnull()
994 test = (x < 0.5).astype(int)
995 test = test.mask(mask, 2)
996
997 # TODO: DM-21954 Look into veracity of inline comment below
998 # are these backwards?
999 categories = ['galaxy', 'star', self._null_label]
1000 label = pd.Series(pd.Categorical.from_codes(test, categories=categories),
1001 index=x.index, name='label')
1002 if self._force_str:
1003 label = label.astype(str)
1004 return label
1005
1006
1008 _columns = ['numStarFlags']
1009 labels = {"star": 0, "maybe": 1, "notStar": 2}
1010
1011 def _func(self, df):
1012 x = df[self._columns][self._columns[0]]
1013
1014 # Number of filters
1015 n = len(x.unique()) - 1
1016
1017 labels = ['noStar', 'maybe', 'star']
1018 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels),
1019 index=x.index, name='label')
1020
1021 if self._force_str:
1022 label = label.astype(str)
1023
1024 return label
1025
1026
1028 name = 'Deconvolved Moments'
1029 shortname = 'deconvolvedMoments'
1030 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1031 "ext_shapeHSM_HsmSourceMoments_yy",
1032 "base_SdssShape_xx", "base_SdssShape_yy",
1033 "ext_shapeHSM_HsmPsfMoments_xx",
1034 "ext_shapeHSM_HsmPsfMoments_yy")
1035
1036 def _func(self, df):
1037 """Calculate deconvolved moments"""
1038 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm
1039 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"]
1040 else:
1041 hsm = np.ones(len(df))*np.nan
1042 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"]
1043 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns:
1044 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"]
1045 else:
1046 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using
1047 # exposure.getPsf().computeShape(s.getCentroid()).getIxx()
1048 # raise TaskError("No psf shape parameter found in catalog")
1049 raise RuntimeError('No psf shape parameter found in catalog')
1050
1051 return hsm.where(np.isfinite(hsm), sdss) - psf
1052
1053
1055 """Functor to calculate SDSS trace radius size for sources"""
1056 name = "SDSS Trace Size"
1057 shortname = 'sdssTrace'
1058 _columns = ("base_SdssShape_xx", "base_SdssShape_yy")
1059
1060 def _func(self, df):
1061 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1062 return srcSize
1063
1064
1066 """Functor to calculate SDSS trace radius size difference (%) between object and psf model"""
1067 name = "PSF - SDSS Trace Size"
1068 shortname = 'psf_sdssTrace'
1069 _columns = ("base_SdssShape_xx", "base_SdssShape_yy",
1070 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy")
1071
1072 def _func(self, df):
1073 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"]))
1074 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"]))
1075 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1076 return sizeDiff
1077
1078
1080 """Functor to calculate HSM trace radius size for sources"""
1081 name = 'HSM Trace Size'
1082 shortname = 'hsmTrace'
1083 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1084 "ext_shapeHSM_HsmSourceMoments_yy")
1085
1086 def _func(self, df):
1087 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1088 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1089 return srcSize
1090
1091
1093 """Functor to calculate HSM trace radius size difference (%) between object and psf model"""
1094 name = 'PSF - HSM Trace Size'
1095 shortname = 'psf_HsmTrace'
1096 _columns = ("ext_shapeHSM_HsmSourceMoments_xx",
1097 "ext_shapeHSM_HsmSourceMoments_yy",
1098 "ext_shapeHSM_HsmPsfMoments_xx",
1099 "ext_shapeHSM_HsmPsfMoments_yy")
1100
1101 def _func(self, df):
1102 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"]
1103 + df["ext_shapeHSM_HsmSourceMoments_yy"]))
1104 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"]
1105 + df["ext_shapeHSM_HsmPsfMoments_yy"]))
1106 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize))
1107 return sizeDiff
1108
1109
1111 name = 'HSM Psf FWHM'
1112 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy')
1113 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix
1114 pixelScale = 0.168
1115 SIGMA2FWHM = 2*np.sqrt(2*np.log(2))
1116
1117 def _func(self, df):
1118 return self.pixelScale*self.SIGMA2FWHM*np.sqrt(
1119 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy']))
1120
1121
1123 name = "Distortion Ellipticity (e1)"
1124 shortname = "Distortion"
1125
1126 def __init__(self, colXX, colXY, colYY, **kwargs):
1127 self.colXX = colXX
1128 self.colXY = colXY
1129 self.colYY = colYY
1130 self._columns = [self.colXX, self.colXY, self.colYY]
1131 super().__init__(**kwargs)
1132
1133 @property
1134 def columns(self):
1135 return [self.colXX, self.colXY, self.colYY]
1136
1137 def _func(self, df):
1138 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY])
1139
1140
1142 name = "Ellipticity e2"
1143
1144 def __init__(self, colXX, colXY, colYY, **kwargs):
1145 self.colXX = colXX
1146 self.colXY = colXY
1147 self.colYY = colYY
1148 super().__init__(**kwargs)
1149
1150 @property
1151 def columns(self):
1152 return [self.colXX, self.colXY, self.colYY]
1153
1154 def _func(self, df):
1155 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY])
1156
1157
1159
1160 def __init__(self, colXX, colXY, colYY, **kwargs):
1161 self.colXX = colXX
1162 self.colXY = colXY
1163 self.colYY = colYY
1164 super().__init__(**kwargs)
1165
1166 @property
1167 def columns(self):
1168 return [self.colXX, self.colXY, self.colYY]
1169
1170 def _func(self, df):
1171 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25
1172
1173
1175 """Computations using the stored localWcs.
1176 """
1177 name = "LocalWcsOperations"
1178
1179 def __init__(self,
1180 colCD_1_1,
1181 colCD_1_2,
1182 colCD_2_1,
1183 colCD_2_2,
1184 **kwargs):
1185 self.colCD_1_1 = colCD_1_1
1186 self.colCD_1_2 = colCD_1_2
1187 self.colCD_2_1 = colCD_2_1
1188 self.colCD_2_2 = colCD_2_2
1189 super().__init__(**kwargs)
1190
1191 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22):
1192 """Compute the distance on the sphere from x2, y1 to x1, y1.
1193
1194 Parameters
1195 ----------
1196 x : `pandas.Series`
1197 X pixel coordinate.
1198 y : `pandas.Series`
1199 Y pixel coordinate.
1200 cd11 : `pandas.Series`
1201 [1, 1] element of the local Wcs affine transform.
1202 cd11 : `pandas.Series`
1203 [1, 1] element of the local Wcs affine transform.
1204 cd12 : `pandas.Series`
1205 [1, 2] element of the local Wcs affine transform.
1206 cd21 : `pandas.Series`
1207 [2, 1] element of the local Wcs affine transform.
1208 cd22 : `pandas.Series`
1209 [2, 2] element of the local Wcs affine transform.
1210
1211 Returns
1212 -------
1213 raDecTuple : tuple
1214 RA and dec conversion of x and y given the local Wcs. Returned
1215 units are in radians.
1216
1217 """
1218 return (x * cd11 + y * cd12, x * cd21 + y * cd22)
1219
1220 def computeSkySeperation(self, ra1, dec1, ra2, dec2):
1221 """Compute the local pixel scale conversion.
1222
1223 Parameters
1224 ----------
1225 ra1 : `pandas.Series`
1226 Ra of the first coordinate in radians.
1227 dec1 : `pandas.Series`
1228 Dec of the first coordinate in radians.
1229 ra2 : `pandas.Series`
1230 Ra of the second coordinate in radians.
1231 dec2 : `pandas.Series`
1232 Dec of the second coordinate in radians.
1233
1234 Returns
1235 -------
1236 dist : `pandas.Series`
1237 Distance on the sphere in radians.
1238 """
1239 deltaDec = dec2 - dec1
1240 deltaRa = ra2 - ra1
1241 return 2 * np.arcsin(
1242 np.sqrt(
1243 np.sin(deltaDec / 2) ** 2
1244 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2))
1245
1246 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22):
1247 """Compute the distance on the sphere from x2, y1 to x1, y1.
1248
1249 Parameters
1250 ----------
1251 x1 : `pandas.Series`
1252 X pixel coordinate.
1253 y1 : `pandas.Series`
1254 Y pixel coordinate.
1255 x2 : `pandas.Series`
1256 X pixel coordinate.
1257 y2 : `pandas.Series`
1258 Y pixel coordinate.
1259 cd11 : `pandas.Series`
1260 [1, 1] element of the local Wcs affine transform.
1261 cd11 : `pandas.Series`
1262 [1, 1] element of the local Wcs affine transform.
1263 cd12 : `pandas.Series`
1264 [1, 2] element of the local Wcs affine transform.
1265 cd21 : `pandas.Series`
1266 [2, 1] element of the local Wcs affine transform.
1267 cd22 : `pandas.Series`
1268 [2, 2] element of the local Wcs affine transform.
1269
1270 Returns
1271 -------
1272 Distance : `pandas.Series`
1273 Arcseconds per pixel at the location of the local WC
1274 """
1275 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22)
1276 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22)
1277 # Great circle distance for small separations.
1278 return self.computeSkySeperation(ra1, dec1, ra2, dec2)
1279
1280
1282 """Compute the local pixel scale from the stored CDMatrix.
1283 """
1284 name = "PixelScale"
1285
1286 @property
1287 def columns(self):
1288 return [self.colCD_1_1,
1289 self.colCD_1_2,
1290 self.colCD_2_1,
1291 self.colCD_2_2]
1292
1293 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22):
1294 """Compute the local pixel to scale conversion in arcseconds.
1295
1296 Parameters
1297 ----------
1298 cd11 : `pandas.Series`
1299 [1, 1] element of the local Wcs affine transform in radians.
1300 cd11 : `pandas.Series`
1301 [1, 1] element of the local Wcs affine transform in radians.
1302 cd12 : `pandas.Series`
1303 [1, 2] element of the local Wcs affine transform in radians.
1304 cd21 : `pandas.Series`
1305 [2, 1] element of the local Wcs affine transform in radians.
1306 cd22 : `pandas.Series`
1307 [2, 2] element of the local Wcs affine transform in radians.
1308
1309 Returns
1310 -------
1311 pixScale : `pandas.Series`
1312 Arcseconds per pixel at the location of the local WC
1313 """
1314 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)))
1315
1316 def _func(self, df):
1317 return self.pixelScaleArcseconds(df[self.colCD_1_1],
1318 df[self.colCD_1_2],
1319 df[self.colCD_2_1],
1320 df[self.colCD_2_2])
1321
1322
1324 """Convert a value in units pixels squared to units arcseconds squared.
1325 """
1326
1327 def __init__(self,
1328 col,
1329 colCD_1_1,
1330 colCD_1_2,
1331 colCD_2_1,
1332 colCD_2_2,
1333 **kwargs):
1334 self.col = col
1335 super().__init__(colCD_1_1,
1336 colCD_1_2,
1337 colCD_2_1,
1338 colCD_2_2,
1339 **kwargs)
1340
1341 @property
1342 def name(self):
1343 return f"{self.col}_asArcseconds"
1344
1345 @property
1346 def columns(self):
1347 return [self.col,
1348 self.colCD_1_1,
1349 self.colCD_1_2,
1350 self.colCD_2_1,
1351 self.colCD_2_2]
1352
1353 def _func(self, df):
1354 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1],
1355 df[self.colCD_1_2],
1356 df[self.colCD_2_1],
1357 df[self.colCD_2_2])
1358
1359
1361 """Convert a value in units pixels to units arcseconds.
1362 """
1363
1364 def __init__(self,
1365 col,
1366 colCD_1_1,
1367 colCD_1_2,
1368 colCD_2_1,
1369 colCD_2_2,
1370 **kwargs):
1371 self.col = col
1372 super().__init__(colCD_1_1,
1373 colCD_1_2,
1374 colCD_2_1,
1375 colCD_2_2,
1376 **kwargs)
1377
1378 @property
1379 def name(self):
1380 return f"{self.col}_asArcsecondsSq"
1381
1382 @property
1383 def columns(self):
1384 return [self.col,
1385 self.colCD_1_1,
1386 self.colCD_1_2,
1387 self.colCD_2_1,
1388 self.colCD_2_2]
1389
1390 def _func(self, df):
1391 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1],
1392 df[self.colCD_1_2],
1393 df[self.colCD_2_1],
1394 df[self.colCD_2_2])
1395 return df[self.col] * pixScale * pixScale
1396
1397
1399 name = 'Reference Band'
1400 shortname = 'refBand'
1401
1402 @property
1403 def columns(self):
1404 return ["merge_measurement_i",
1405 "merge_measurement_r",
1406 "merge_measurement_z",
1407 "merge_measurement_y",
1408 "merge_measurement_g",
1409 "merge_measurement_u"]
1410
1411 def _func(self, df: pd.DataFrame) -> pd.Series:
1412 def getFilterAliasName(row):
1413 # get column name with the max value (True > False)
1414 colName = row.idxmax()
1415 return colName.replace('merge_measurement_', '')
1416
1417 # Skip columns that are unavailable, because this functor requests the
1418 # superset of bands that could be included in the object table
1419 columns = [col for col in self.columnscolumns if col in df.columns]
1420 # Makes a Series of dtype object if df is empty
1421 return df[columns].apply(getFilterAliasName, axis=1,
1422 result_type='reduce').astype('object')
1423
1424
1426 # AB to NanoJansky (3631 Jansky)
1427 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy)
1428 LOG_AB_FLUX_SCALE = 12.56
1429 FIVE_OVER_2LOG10 = 1.085736204758129569
1430 # TO DO: DM-21955 Replace hard coded photometic calibration values
1431 COADD_ZP = 27
1432
1433 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs):
1434 self.vhypot = np.vectorize(self.hypot)
1435 self.col = colFlux
1436 self.colFluxErr = colFluxErr
1437
1438 self.calib = calib
1439 if calib is not None:
1440 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0()
1441 else:
1442 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP)
1443 self.fluxMag0Err = 0.
1444
1445 super().__init__(**kwargs)
1446
1447 @property
1448 def columns(self):
1449 return [self.col]
1450
1451 @property
1452 def name(self):
1453 return f'mag_{self.col}'
1454
1455 @classmethod
1456 def hypot(cls, a, b):
1457 if np.abs(a) < np.abs(b):
1458 a, b = b, a
1459 if a == 0.:
1460 return 0.
1461 q = b/a
1462 return np.abs(a) * np.sqrt(1. + q*q)
1463
1464 def dn2flux(self, dn, fluxMag0):
1465 return self.AB_FLUX_SCALE * dn / fluxMag0
1466
1467 def dn2mag(self, dn, fluxMag0):
1468 with np.warnings.catch_warnings():
1469 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1470 np.warnings.filterwarnings('ignore', r'divide by zero')
1471 return -2.5 * np.log10(dn/fluxMag0)
1472
1473 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1474 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0)
1475 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0
1476 return retVal
1477
1478 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err):
1479 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0)
1480 return self.FIVE_OVER_2LOG10 * retVal
1481
1482
1484 def _func(self, df):
1485 return self.dn2flux(df[self.col], self.fluxMag0)
1486
1487
1489 @property
1490 def columns(self):
1491 return [self.col, self.colFluxErr]
1492
1493 def _func(self, df):
1494 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1495 return pd.Series(retArr, index=df.index)
1496
1497
1499 def _func(self, df):
1500 return self.dn2mag(df[self.col], self.fluxMag0)
1501
1502
1504 @property
1505 def columns(self):
1506 return [self.col, self.colFluxErr]
1507
1508 def _func(self, df):
1509 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err)
1510 return pd.Series(retArr, index=df.index)
1511
1512
1514 """Base class for calibrating the specified instrument flux column using
1515 the local photometric calibration.
1516
1517 Parameters
1518 ----------
1519 instFluxCol : `str`
1520 Name of the instrument flux column.
1521 instFluxErrCol : `str`
1522 Name of the assocated error columns for ``instFluxCol``.
1523 photoCalibCol : `str`
1524 Name of local calibration column.
1525 photoCalibErrCol : `str`
1526 Error associated with ``photoCalibCol``
1527
1528 See also
1529 --------
1530 LocalPhotometry
1531 LocalNanojansky
1532 LocalNanojanskyErr
1533 LocalMagnitude
1534 LocalMagnitudeErr
1535 """
1536 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag)
1537
1538 def __init__(self,
1539 instFluxCol,
1540 instFluxErrCol,
1541 photoCalibCol,
1542 photoCalibErrCol,
1543 **kwargs):
1544 self.instFluxCol = instFluxCol
1545 self.instFluxErrCol = instFluxErrCol
1546 self.photoCalibCol = photoCalibCol
1547 self.photoCalibErrCol = photoCalibErrCol
1548 super().__init__(**kwargs)
1549
1550 def instFluxToNanojansky(self, instFlux, localCalib):
1551 """Convert instrument flux to nanojanskys.
1552
1553 Parameters
1554 ----------
1555 instFlux : `numpy.ndarray` or `pandas.Series`
1556 Array of instrument flux measurements
1557 localCalib : `numpy.ndarray` or `pandas.Series`
1558 Array of local photometric calibration estimates.
1559
1560 Returns
1561 -------
1562 calibFlux : `numpy.ndarray` or `pandas.Series`
1563 Array of calibrated flux measurements.
1564 """
1565 return instFlux * localCalib
1566
1567 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1568 """Convert instrument flux to nanojanskys.
1569
1570 Parameters
1571 ----------
1572 instFlux : `numpy.ndarray` or `pandas.Series`
1573 Array of instrument flux measurements
1574 instFluxErr : `numpy.ndarray` or `pandas.Series`
1575 Errors on associated ``instFlux`` values
1576 localCalib : `numpy.ndarray` or `pandas.Series`
1577 Array of local photometric calibration estimates.
1578 localCalibErr : `numpy.ndarray` or `pandas.Series`
1579 Errors on associated ``localCalib`` values
1580
1581 Returns
1582 -------
1583 calibFluxErr : `numpy.ndarray` or `pandas.Series`
1584 Errors on calibrated flux measurements.
1585 """
1586 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr)
1587
1588 def instFluxToMagnitude(self, instFlux, localCalib):
1589 """Convert instrument flux to nanojanskys.
1590
1591 Parameters
1592 ----------
1593 instFlux : `numpy.ndarray` or `pandas.Series`
1594 Array of instrument flux measurements
1595 localCalib : `numpy.ndarray` or `pandas.Series`
1596 Array of local photometric calibration estimates.
1597
1598 Returns
1599 -------
1600 calibMag : `numpy.ndarray` or `pandas.Series`
1601 Array of calibrated AB magnitudes.
1602 """
1603 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB
1604
1605 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr):
1606 """Convert instrument flux err to nanojanskys.
1607
1608 Parameters
1609 ----------
1610 instFlux : `numpy.ndarray` or `pandas.Series`
1611 Array of instrument flux measurements
1612 instFluxErr : `numpy.ndarray` or `pandas.Series`
1613 Errors on associated ``instFlux`` values
1614 localCalib : `numpy.ndarray` or `pandas.Series`
1615 Array of local photometric calibration estimates.
1616 localCalibErr : `numpy.ndarray` or `pandas.Series`
1617 Errors on associated ``localCalib`` values
1618
1619 Returns
1620 -------
1621 calibMagErr: `numpy.ndarray` or `pandas.Series`
1622 Error on calibrated AB magnitudes.
1623 """
1624 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr)
1625 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr)
1626
1627
1629 """Compute calibrated fluxes using the local calibration value.
1630
1631 See also
1632 --------
1633 LocalNanojansky
1634 LocalNanojanskyErr
1635 LocalMagnitude
1636 LocalMagnitudeErr
1637 """
1638
1639 @property
1640 def columns(self):
1641 return [self.instFluxCol, self.photoCalibCol]
1642
1643 @property
1644 def name(self):
1645 return f'flux_{self.instFluxCol}'
1646
1647 def _func(self, df):
1648 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol])
1649
1650
1652 """Compute calibrated flux errors using the local calibration value.
1653
1654 See also
1655 --------
1656 LocalNanojansky
1657 LocalNanojanskyErr
1658 LocalMagnitude
1659 LocalMagnitudeErr
1660 """
1661
1662 @property
1663 def columns(self):
1664 return [self.instFluxCol, self.instFluxErrCol,
1666
1667 @property
1668 def name(self):
1669 return f'fluxErr_{self.instFluxCol}'
1670
1671 def _func(self, df):
1672 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol],
1673 df[self.photoCalibCol], df[self.photoCalibErrCol])
1674
1675
1677 """Compute calibrated AB magnitudes using the local calibration value.
1678
1679 See also
1680 --------
1681 LocalNanojansky
1682 LocalNanojanskyErr
1683 LocalMagnitude
1684 LocalMagnitudeErr
1685 """
1686
1687 @property
1688 def columns(self):
1689 return [self.instFluxCol, self.photoCalibCol]
1690
1691 @property
1692 def name(self):
1693 return f'mag_{self.instFluxCol}'
1694
1695 def _func(self, df):
1696 return self.instFluxToMagnitude(df[self.instFluxCol],
1697 df[self.photoCalibCol])
1698
1699
1701 """Compute calibrated AB magnitude errors using the local calibration value.
1702
1703 See also
1704 --------
1705 LocalNanojansky
1706 LocalNanojanskyErr
1707 LocalMagnitude
1708 LocalMagnitudeErr
1709 """
1710
1711 @property
1712 def columns(self):
1713 return [self.instFluxCol, self.instFluxErrCol,
1715
1716 @property
1717 def name(self):
1718 return f'magErr_{self.instFluxCol}'
1719
1720 def _func(self, df):
1721 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol],
1722 df[self.instFluxErrCol],
1723 df[self.photoCalibCol],
1724 df[self.photoCalibErrCol])
1725
1726
1728 """Compute absolute mean of dipole fluxes.
1729
1730 See also
1731 --------
1732 LocalNanojansky
1733 LocalNanojanskyErr
1734 LocalMagnitude
1735 LocalMagnitudeErr
1736 LocalDipoleMeanFlux
1737 LocalDipoleMeanFluxErr
1738 LocalDipoleDiffFlux
1739 LocalDipoleDiffFluxErr
1740 """
1741 def __init__(self,
1742 instFluxPosCol,
1743 instFluxNegCol,
1744 instFluxPosErrCol,
1745 instFluxNegErrCol,
1746 photoCalibCol,
1747 photoCalibErrCol,
1748 **kwargs):
1749 self.instFluxNegCol = instFluxNegCol
1750 self.instFluxPosCol = instFluxPosCol
1751 self.instFluxNegErrCol = instFluxNegErrCol
1752 self.instFluxPosErrCol = instFluxPosErrCol
1753 self.photoCalibColphotoCalibCol = photoCalibCol
1754 self.photoCalibErrColphotoCalibErrCol = photoCalibErrCol
1755 super().__init__(instFluxNegCol,
1756 instFluxNegErrCol,
1757 photoCalibCol,
1758 photoCalibErrCol,
1759 **kwargs)
1760
1761 @property
1762 def columns(self):
1763 return [self.instFluxPosCol,
1764 self.instFluxNegCol,
1766
1767 @property
1768 def name(self):
1769 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1770
1771 def _func(self, df):
1772 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibColphotoCalibCol]))
1773 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibColphotoCalibCol])))
1774
1775
1777 """Compute the error on the absolute mean of dipole fluxes.
1778
1779 See also
1780 --------
1781 LocalNanojansky
1782 LocalNanojanskyErr
1783 LocalMagnitude
1784 LocalMagnitudeErr
1785 LocalDipoleMeanFlux
1786 LocalDipoleMeanFluxErr
1787 LocalDipoleDiffFlux
1788 LocalDipoleDiffFluxErr
1789 """
1790
1791 @property
1792 def columns(self):
1793 return [self.instFluxPosCol,
1794 self.instFluxNegCol,
1795 self.instFluxPosErrCol,
1796 self.instFluxNegErrCol,
1799
1800 @property
1801 def name(self):
1802 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1803
1804 def _func(self, df):
1805 return 0.5*np.sqrt(
1806 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol])
1808 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2)
1809 * df[self.photoCalibColphotoCalibCol]**2)
1810
1811
1813 """Compute the absolute difference of dipole fluxes.
1814
1815 Value is (abs(pos) - abs(neg))
1816
1817 See also
1818 --------
1819 LocalNanojansky
1820 LocalNanojanskyErr
1821 LocalMagnitude
1822 LocalMagnitudeErr
1823 LocalDipoleMeanFlux
1824 LocalDipoleMeanFluxErr
1825 LocalDipoleDiffFlux
1826 LocalDipoleDiffFluxErr
1827 """
1828
1829 @property
1830 def columns(self):
1831 return [self.instFluxPosCol,
1832 self.instFluxNegCol,
1834
1835 @property
1836 def name(self):
1837 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}'
1838
1839 def _func(self, df):
1840 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibColphotoCalibCol]))
1841 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibColphotoCalibCol])))
1842
1843
1845 """Compute the error on the absolute difference of dipole fluxes.
1846
1847 See also
1848 --------
1849 LocalNanojansky
1850 LocalNanojanskyErr
1851 LocalMagnitude
1852 LocalMagnitudeErr
1853 LocalDipoleMeanFlux
1854 LocalDipoleMeanFluxErr
1855 LocalDipoleDiffFlux
1856 LocalDipoleDiffFluxErr
1857 """
1858
1859 @property
1860 def columns(self):
1861 return [self.instFluxPosCol,
1862 self.instFluxNegCol,
1863 self.instFluxPosErrCol,
1864 self.instFluxNegErrCol,
1867
1868 @property
1869 def name(self):
1870 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}'
1871
1872 def _func(self, df):
1873 return np.sqrt(
1874 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol]))
1876 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2)
1877 * df[self.photoCalibColphotoCalibCol]**2)
1878
1879
1881 """Base class for returning the ratio of 2 columns.
1882
1883 Can be used to compute a Signal to Noise ratio for any input flux.
1884
1885 Parameters
1886 ----------
1887 numerator : `str`
1888 Name of the column to use at the numerator in the ratio
1889 denominator : `str`
1890 Name of the column to use as the denominator in the ratio.
1891 """
1892 def __init__(self,
1893 numerator,
1894 denominator,
1895 **kwargs):
1896 self.numerator = numerator
1897 self.denominator = denominator
1898 super().__init__(**kwargs)
1899
1900 @property
1901 def columns(self):
1902 return [self.numerator, self.denominator]
1903
1904 @property
1905 def name(self):
1906 return f'ratio_{self.numerator}_{self.denominator}'
1907
1908 def _func(self, df):
1909 with np.warnings.catch_warnings():
1910 np.warnings.filterwarnings('ignore', r'invalid value encountered')
1911 np.warnings.filterwarnings('ignore', r'divide by zero')
1912 return df[self.numerator] / df[self.denominator]
1913
1914
1916 """Compute E(B-V) from dustmaps.sfd
1917 """
1918 _defaultDataset = 'ref'
1919 name = "E(B-V)"
1920 shortname = "ebv"
1921
1922 def __init__(self, **kwargs):
1923 self._columns = ['coord_ra', 'coord_dec']
1924 self.sfd = SFDQuery()
1925 super().__init__(**kwargs)
1926
1927 def _func(self, df):
1928 coords = SkyCoord(df['coord_ra']*u.rad, df['coord_dec']*u.rad)
1929 ebv = self.sfd(coords)
1930 # Double precision unnecessary scientifically
1931 # but currently needed for ingest to qserv
1932 return pd.Series(ebv, index=df.index).astype('float64')
def multilevelColumns(self, parq, **kwargs)
Definition: functors.py:963
def __init__(self, col, filt2, filt1, **kwargs)
Definition: functors.py:934
def __init__(self, col, **kwargs)
Definition: functors.py:648
def __init__(self, funcs, **kwargs)
Definition: functors.py:409
def __call__(self, data, **kwargs)
Definition: functors.py:461
def from_file(cls, filename, **kwargs)
Definition: functors.py:543
def from_yaml(cls, translationDefinition, **kwargs)
Definition: functors.py:552
def renameCol(cls, col, renameRules)
Definition: functors.py:534
def multilevelColumns(self, data, **kwargs)
Definition: functors.py:447
def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22)
Definition: functors.py:1293
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1370
def __init__(self, col, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1333
def __init__(self, col, **kwargs)
Definition: functors.py:694
def __init__(self, expr, **kwargs)
Definition: functors.py:617
def __init__(self, **kwargs)
Definition: functors.py:722
def __call__(self, catalog, **kwargs)
Definition: functors.py:725
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1126
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1144
def __init__(self, **kwargs)
Definition: functors.py:1922
def __call__(self, data, dropna=False)
Definition: functors.py:345
def _func(self, df, dropna=True)
Definition: functors.py:284
def multilevelColumns(self, data, columnIndex=None, returnTuple=False)
Definition: functors.py:234
def _get_data_columnLevelNames(self, data, columnIndex=None)
Definition: functors.py:190
def difference(self, data1, data2, **kwargs)
Definition: functors.py:357
def __init__(self, filt=None, dataset=None, noDup=None)
Definition: functors.py:145
def _get_columnIndex(self, data)
Definition: functors.py:287
def _colsFromDict(self, colDict, columnIndex=None)
Definition: functors.py:212
def _get_data_columnLevels(self, data, columnIndex=None)
Definition: functors.py:166
def __init__(self, ra, decl, **kwargs)
Definition: functors.py:743
def __call__(self, parq, dropna=False, **kwargs)
Definition: functors.py:983
def __init__(self, instFluxPosCol, instFluxNegCol, instFluxPosErrCol, instFluxNegErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1748
def instFluxToNanojansky(self, instFlux, localCalib)
Definition: functors.py:1550
def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1605
def __init__(self, instFluxCol, instFluxErrCol, photoCalibCol, photoCalibErrCol, **kwargs)
Definition: functors.py:1543
def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr)
Definition: functors.py:1567
def instFluxToMagnitude(self, instFlux, localCalib)
Definition: functors.py:1588
def __init__(self, colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs)
Definition: functors.py:1184
def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22)
Definition: functors.py:1191
def computeSkySeperation(self, ra1, dec1, ra2, dec2)
Definition: functors.py:1220
def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22)
Definition: functors.py:1246
def __init__(self, col1, col2, **kwargs)
Definition: functors.py:881
def __init__(self, *args, **kwargs)
Definition: functors.py:842
def __init__(self, col, calib=None, **kwargs)
Definition: functors.py:804
def dn2mag(self, dn, fluxMag0)
Definition: functors.py:1467
def dn2flux(self, dn, fluxMag0)
Definition: functors.py:1464
def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1473
def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err)
Definition: functors.py:1478
def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs)
Definition: functors.py:1433
def __call__(self, catalog, **kwargs)
Definition: functors.py:712
def __init__(self, **kwargs)
Definition: functors.py:709
def __init__(self, colXX, colXY, colYY, **kwargs)
Definition: functors.py:1160
def __init__(self, numerator, denominator, **kwargs)
Definition: functors.py:1895
def mag_aware_eval(df, expr, log)
Definition: functors.py:581
def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor', name=None)
Definition: functors.py:43