Coverage for python/lsst/pipe/tasks/functors.py: 34%

725 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-22 10:58 +0000

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["Functor", "CompositeFunctor", "CustomFunctor", "Column", "Index", 

23 "CoordColumn", "RAColumn", "DecColumn", "HtmIndex20", "Mag", 

24 "MagErr", "MagDiff", "Color", "DeconvolvedMoments", "SdssTraceSize", 

25 "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff", 

26 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs", 

27 "ComputePixelScale", "ConvertPixelToArcseconds", 

28 "ConvertPixelSqToArcsecondsSq", "ReferenceBand", "Photometry", 

29 "NanoJansky", "NanoJanskyErr", "LocalPhotometry", "LocalNanojansky", 

30 "LocalNanojanskyErr", "LocalDipoleMeanFlux", 

31 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux", 

32 "LocalDipoleDiffFluxErr", "Ebv", 

33 ] 

34 

35import yaml 

36import re 

37from itertools import product 

38import logging 

39import os.path 

40import warnings 

41 

42import pandas as pd 

43import numpy as np 

44import astropy.units as u 

45from astropy.coordinates import SkyCoord 

46 

47from lsst.utils import doImport 

48from lsst.utils.introspection import get_full_type_name 

49from lsst.daf.butler import DeferredDatasetHandle 

50from lsst.pipe.base import InMemoryDatasetHandle 

51import lsst.geom as geom 

52import lsst.sphgeom as sphgeom 

53 

54 

55def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

56 typeKey='functor', name=None): 

57 """Initialize an object defined in a dictionary 

58 

59 The object needs to be importable as 

60 f'{basePath}.{initDict[typeKey]}' 

61 The positional and keyword arguments (if any) are contained in 

62 "args" and "kwargs" entries in the dictionary, respectively. 

63 This is used in `functors.CompositeFunctor.from_yaml` to initialize 

64 a composite functor from a specification in a YAML file. 

65 

66 Parameters 

67 ---------- 

68 initDict : dictionary 

69 Dictionary describing object's initialization. Must contain 

70 an entry keyed by ``typeKey`` that is the name of the object, 

71 relative to ``basePath``. 

72 basePath : str 

73 Path relative to module in which ``initDict[typeKey]`` is defined. 

74 typeKey : str 

75 Key of ``initDict`` that is the name of the object 

76 (relative to `basePath`). 

77 """ 

78 initDict = initDict.copy() 

79 # TO DO: DM-21956 We should be able to define functors outside this module 

80 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

81 args = [] 

82 if 'args' in initDict: 

83 args = initDict.pop('args') 

84 if isinstance(args, str): 

85 args = [args] 

86 try: 

87 element = pythonType(*args, **initDict) 

88 except Exception as e: 

89 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

90 raise type(e)(message, e.args) 

91 return element 

92 

93 

94class Functor(object): 

95 """Define and execute a calculation on a DataFrame or Handle holding a DataFrame. 

96 

97 The `__call__` method accepts either a `DataFrame` object or a 

98 `DeferredDatasetHandle` or `InMemoryDatasetHandle`, and returns the 

99 result of the calculation as a single column. Each functor defines what 

100 columns are needed for the calculation, and only these columns are read 

101 from the dataset handle. 

102 

103 The action of `__call__` consists of two steps: first, loading the 

104 necessary columns from disk into memory as a `pandas.DataFrame` object; 

105 and second, performing the computation on this dataframe and returning the 

106 result. 

107 

108 

109 To define a new `Functor`, a subclass must define a `_func` method, 

110 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`. 

111 In addition, it must define the following attributes 

112 

113 * `_columns`: The columns necessary to perform the calculation 

114 * `name`: A name appropriate for a figure axis label 

115 * `shortname`: A name appropriate for use as a dictionary key 

116 

117 On initialization, a `Functor` should declare what band (`filt` kwarg) 

118 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be 

119 applied to. This enables the `_get_data` method to extract the proper 

120 columns from the underlying data. If not specified, the dataset will fall back 

121 on the `_defaultDataset`attribute. If band is not specified and `dataset` 

122 is anything other than `'ref'`, then an error will be raised when trying to 

123 perform the calculation. 

124 

125 Originally, `Functor` was set up to expect 

126 datasets formatted like the `deepCoadd_obj` dataset; that is, a 

127 dataframe with a multi-level column index, with the levels of the 

128 column index being `band`, `dataset`, and `column`. 

129 It has since been generalized to apply to dataframes without mutli-level 

130 indices and multi-level indices with just `dataset` and `column` levels. 

131 In addition, the `_get_data` method that reads 

132 the columns from the underlying data will return a dataframe with column 

133 index levels defined by the `_dfLevels` attribute; by default, this is 

134 `column`. 

135 

136 The `_dfLevels` attributes should generally not need to 

137 be changed, unless `_func` needs columns from multiple filters or datasets 

138 to do the calculation. 

139 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for 

140 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe 

141 it gets to have those levels in the column index. 

142 

143 Parameters 

144 ---------- 

145 filt : str 

146 Filter upon which to do the calculation 

147 

148 dataset : str 

149 Dataset upon which to do the calculation 

150 (e.g., 'ref', 'meas', 'forced_src'). 

151 """ 

152 

153 _defaultDataset = 'ref' 

154 _dfLevels = ('column',) 

155 _defaultNoDup = False 

156 

157 def __init__(self, filt=None, dataset=None, noDup=None): 

158 self.filt = filt 

159 self.dataset = dataset if dataset is not None else self._defaultDataset 

160 self._noDup = noDup 

161 self.log = logging.getLogger(type(self).__name__) 

162 

163 @property 

164 def noDup(self): 

165 if self._noDup is not None: 

166 return self._noDup 

167 else: 

168 return self._defaultNoDup 

169 

170 @property 

171 def columns(self): 

172 """Columns required to perform calculation 

173 """ 

174 if not hasattr(self, '_columns'): 

175 raise NotImplementedError('Must define columns property or _columns attribute') 

176 return self._columns 

177 

178 def _get_data_columnLevels(self, data, columnIndex=None): 

179 """Gets the names of the column index levels 

180 

181 This should only be called in the context of a multilevel table. 

182 

183 Parameters 

184 ---------- 

185 data : various 

186 The data to be read, can be a `DeferredDatasetHandle` or 

187 `InMemoryDatasetHandle`. 

188 columnnIndex (optional): pandas `Index` object 

189 If not passed, then it is read from the `DeferredDatasetHandle` 

190 for `InMemoryDatasetHandle`. 

191 """ 

192 if columnIndex is None: 

193 columnIndex = data.get(component="columns") 

194 return columnIndex.names 

195 

196 def _get_data_columnLevelNames(self, data, columnIndex=None): 

197 """Gets the content of each of the column levels for a multilevel table. 

198 """ 

199 if columnIndex is None: 

200 columnIndex = data.get(component="columns") 

201 

202 columnLevels = columnIndex.names 

203 columnLevelNames = { 

204 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

205 for i, level in enumerate(columnLevels) 

206 } 

207 return columnLevelNames 

208 

209 def _colsFromDict(self, colDict, columnIndex=None): 

210 """Converts dictionary column specficiation to a list of columns 

211 """ 

212 new_colDict = {} 

213 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

214 

215 for i, lev in enumerate(columnLevels): 

216 if lev in colDict: 

217 if isinstance(colDict[lev], str): 

218 new_colDict[lev] = [colDict[lev]] 

219 else: 

220 new_colDict[lev] = colDict[lev] 

221 else: 

222 new_colDict[lev] = columnIndex.levels[i] 

223 

224 levelCols = [new_colDict[lev] for lev in columnLevels] 

225 cols = list(product(*levelCols)) 

226 colsAvailable = [col for col in cols if col in columnIndex] 

227 return colsAvailable 

228 

229 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

230 """Returns columns needed by functor from multilevel dataset 

231 

232 To access tables with multilevel column structure, the `DeferredDatasetHandle` 

233 or `InMemoryDatasetHandle` need to be passed either a list of tuples or a 

234 dictionary. 

235 

236 Parameters 

237 ---------- 

238 data : various 

239 The data as either `DeferredDatasetHandle`, or `InMemoryDatasetHandle`. 

240 columnIndex (optional): pandas `Index` object 

241 either passed or read in from `DeferredDatasetHandle`. 

242 `returnTuple` : `bool` 

243 If true, then return a list of tuples rather than the column dictionary 

244 specification. This is set to `True` by `CompositeFunctor` in order to be able to 

245 combine columns from the various component functors. 

246 

247 """ 

248 if not isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

249 raise RuntimeError(f"Unexpected data type. Got {get_full_type_name(data)}.") 

250 

251 if columnIndex is None: 

252 columnIndex = data.get(component="columns") 

253 

254 # Confirm that the dataset has the column levels the functor is expecting it to have. 

255 columnLevels = self._get_data_columnLevels(data, columnIndex) 

256 

257 columnDict = {'column': self.columns, 

258 'dataset': self.dataset} 

259 if self.filt is None: 

260 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

261 if "band" in columnLevels: 

262 if self.dataset == "ref": 

263 columnDict["band"] = columnLevelNames["band"][0] 

264 else: 

265 raise ValueError(f"'filt' not set for functor {self.name}" 

266 f"(dataset {self.dataset}) " 

267 "and DataFrame " 

268 "contains multiple filters in column index. " 

269 "Set 'filt' or set 'dataset' to 'ref'.") 

270 else: 

271 columnDict['band'] = self.filt 

272 

273 if returnTuple: 

274 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

275 else: 

276 return columnDict 

277 

278 def _func(self, df, dropna=True): 

279 raise NotImplementedError('Must define calculation on dataframe') 

280 

281 def _get_columnIndex(self, data): 

282 """Return columnIndex 

283 """ 

284 

285 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

286 return data.get(component="columns") 

287 else: 

288 return None 

289 

290 def _get_data(self, data): 

291 """Retrieve dataframe necessary for calculation. 

292 

293 The data argument can be a `DataFrame`, a `DeferredDatasetHandle`, or an 

294 `InMemoryDatasetHandle`. 

295 

296 Returns dataframe upon which `self._func` can act. 

297 """ 

298 # We wrap a dataframe in a handle here to take advantage of the dataframe 

299 # delegate dataframe column wrangling abilities. 

300 if isinstance(data, pd.DataFrame): 

301 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

302 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

303 _data = data 

304 else: 

305 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

306 

307 # First thing to do: check to see if the data source has a multilevel column index or not. 

308 columnIndex = self._get_columnIndex(_data) 

309 is_multiLevel = isinstance(columnIndex, pd.MultiIndex) 

310 

311 # Get proper columns specification for this functor 

312 if is_multiLevel: 

313 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

314 else: 

315 columns = self.columns 

316 

317 # Load in-memory dataframe with appropriate columns the gen3 way 

318 df = _data.get(parameters={"columns": columns}) 

319 

320 # Drop unnecessary column levels 

321 if is_multiLevel: 

322 df = self._setLevels(df) 

323 

324 return df 

325 

326 def _setLevels(self, df): 

327 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

328 df.columns = df.columns.droplevel(levelsToDrop) 

329 return df 

330 

331 def _dropna(self, vals): 

332 return vals.dropna() 

333 

334 def __call__(self, data, dropna=False): 

335 df = self._get_data(data) 

336 try: 

337 vals = self._func(df) 

338 except Exception as e: 

339 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e) 

340 vals = self.fail(df) 

341 if dropna: 

342 vals = self._dropna(vals) 

343 

344 return vals 

345 

346 def difference(self, data1, data2, **kwargs): 

347 """Computes difference between functor called on two different DataFrame/Handle objects 

348 """ 

349 return self(data1, **kwargs) - self(data2, **kwargs) 

350 

351 def fail(self, df): 

352 return pd.Series(np.full(len(df), np.nan), index=df.index) 

353 

354 @property 

355 def name(self): 

356 """Full name of functor (suitable for figure labels) 

357 """ 

358 return NotImplementedError 

359 

360 @property 

361 def shortname(self): 

362 """Short name of functor (suitable for column name/dict key) 

363 """ 

364 return self.name 

365 

366 

367class CompositeFunctor(Functor): 

368 """Perform multiple calculations at once on a catalog. 

369 

370 The role of a `CompositeFunctor` is to group together computations from 

371 multiple functors. Instead of returning `pandas.Series` a 

372 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names 

373 being the keys of `funcDict`. 

374 

375 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

376 in all the component functors. 

377 

378 A `CompositeFunctor` does not use a `_func` method itself; rather, 

379 when a `CompositeFunctor` is called, all its columns are loaded 

380 at once, and the resulting dataframe is passed to the `_func` method of each component 

381 functor. This has the advantage of only doing I/O (reading from parquet file) once, 

382 and works because each individual `_func` method of each component functor does not 

383 care if there are *extra* columns in the dataframe being passed; only that it must contain 

384 *at least* the `columns` it expects. 

385 

386 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML 

387 file specifying a collection of functors. 

388 

389 Parameters 

390 ---------- 

391 funcs : `dict` or `list` 

392 Dictionary or list of functors. If a list, then it will be converted 

393 into a dictonary according to the `.shortname` attribute of each functor. 

394 

395 """ 

396 dataset = None 

397 name = "CompositeFunctor" 

398 

399 def __init__(self, funcs, **kwargs): 

400 

401 if type(funcs) == dict: 

402 self.funcDict = funcs 

403 else: 

404 self.funcDict = {f.shortname: f for f in funcs} 

405 

406 self._filt = None 

407 

408 super().__init__(**kwargs) 

409 

410 @property 

411 def filt(self): 

412 return self._filt 

413 

414 @filt.setter 

415 def filt(self, filt): 

416 if filt is not None: 

417 for _, f in self.funcDict.items(): 

418 f.filt = filt 

419 self._filt = filt 

420 

421 def update(self, new): 

422 if isinstance(new, dict): 

423 self.funcDict.update(new) 

424 elif isinstance(new, CompositeFunctor): 

425 self.funcDict.update(new.funcDict) 

426 else: 

427 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

428 

429 # Make sure new functors have the same 'filt' set 

430 if self.filt is not None: 

431 self.filt = self.filt 

432 

433 @property 

434 def columns(self): 

435 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

436 

437 def multilevelColumns(self, data, **kwargs): 

438 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here. 

439 return list( 

440 set( 

441 [ 

442 x 

443 for y in [ 

444 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

445 ] 

446 for x in y 

447 ] 

448 ) 

449 ) 

450 

451 def __call__(self, data, **kwargs): 

452 """Apply the functor to the data table 

453 

454 Parameters 

455 ---------- 

456 data : various 

457 The data represented as `lsst.daf.butler.DeferredDatasetHandle`, 

458 `lsst.pipe.base.InMemoryDatasetHandle`, 

459 or `pandas.DataFrame`. 

460 The table or a pointer to a table on disk from which columns can 

461 be accessed 

462 """ 

463 if isinstance(data, pd.DataFrame): 

464 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

465 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

466 _data = data 

467 else: 

468 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

469 

470 columnIndex = self._get_columnIndex(_data) 

471 

472 if isinstance(columnIndex, pd.MultiIndex): 

473 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

474 df = _data.get(parameters={"columns": columns}) 

475 

476 valDict = {} 

477 for k, f in self.funcDict.items(): 

478 try: 

479 subdf = f._setLevels( 

480 df[f.multilevelColumns(_data, returnTuple=True, columnIndex=columnIndex)] 

481 ) 

482 valDict[k] = f._func(subdf) 

483 except Exception as e: 

484 self.log.exception( 

485 "Exception in %s (funcs: %s) call: %s", 

486 self.name, 

487 str(list(self.funcDict.keys())), 

488 type(e).__name__, 

489 ) 

490 try: 

491 valDict[k] = f.fail(subdf) 

492 except NameError: 

493 raise e 

494 

495 else: 

496 df = _data.get(parameters={"columns": self.columns}) 

497 

498 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

499 

500 # Check that output columns are actually columns 

501 for name, colVal in valDict.items(): 

502 if len(colVal.shape) != 1: 

503 raise RuntimeError("Transformed column '%s' is not the shape of a column. " 

504 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal))) 

505 

506 try: 

507 valDf = pd.concat(valDict, axis=1) 

508 except TypeError: 

509 print([(k, type(v)) for k, v in valDict.items()]) 

510 raise 

511 

512 if kwargs.get('dropna', False): 

513 valDf = valDf.dropna(how='any') 

514 

515 return valDf 

516 

517 @classmethod 

518 def renameCol(cls, col, renameRules): 

519 if renameRules is None: 

520 return col 

521 for old, new in renameRules: 

522 if col.startswith(old): 

523 col = col.replace(old, new) 

524 return col 

525 

526 @classmethod 

527 def from_file(cls, filename, **kwargs): 

528 # Allow environment variables in the filename. 

529 filename = os.path.expandvars(filename) 

530 with open(filename) as f: 

531 translationDefinition = yaml.safe_load(f) 

532 

533 return cls.from_yaml(translationDefinition, **kwargs) 

534 

535 @classmethod 

536 def from_yaml(cls, translationDefinition, **kwargs): 

537 funcs = {} 

538 for func, val in translationDefinition['funcs'].items(): 

539 funcs[func] = init_fromDict(val, name=func) 

540 

541 if 'flag_rename_rules' in translationDefinition: 

542 renameRules = translationDefinition['flag_rename_rules'] 

543 else: 

544 renameRules = None 

545 

546 if 'calexpFlags' in translationDefinition: 

547 for flag in translationDefinition['calexpFlags']: 

548 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp') 

549 

550 if 'refFlags' in translationDefinition: 

551 for flag in translationDefinition['refFlags']: 

552 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

553 

554 if 'forcedFlags' in translationDefinition: 

555 for flag in translationDefinition['forcedFlags']: 

556 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

557 

558 if 'flags' in translationDefinition: 

559 for flag in translationDefinition['flags']: 

560 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

561 

562 return cls(funcs, **kwargs) 

563 

564 

565def mag_aware_eval(df, expr, log): 

566 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means 

567 

568 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes. 

569 

570 Parameters 

571 ---------- 

572 df : pandas.DataFrame 

573 Dataframe on which to evaluate expression. 

574 

575 expr : str 

576 Expression. 

577 """ 

578 try: 

579 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

580 val = df.eval(expr_new) 

581 except Exception as e: # Should check what actually gets raised 

582 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e) 

583 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

584 val = df.eval(expr_new) 

585 return val 

586 

587 

588class CustomFunctor(Functor): 

589 """Arbitrary computation on a catalog 

590 

591 Column names (and thus the columns to be loaded from catalog) are found 

592 by finding all words and trying to ignore all "math-y" words. 

593 

594 Parameters 

595 ---------- 

596 expr : str 

597 Expression to evaluate, to be parsed and executed by `mag_aware_eval`. 

598 """ 

599 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

600 

601 def __init__(self, expr, **kwargs): 

602 self.expr = expr 

603 super().__init__(**kwargs) 

604 

605 @property 

606 def name(self): 

607 return self.expr 

608 

609 @property 

610 def columns(self): 

611 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

612 

613 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

614 not_a_col = [] 

615 for c in flux_cols: 

616 if not re.search('_instFlux$', c): 

617 cols.append(f'{c}_instFlux') 

618 not_a_col.append(c) 

619 else: 

620 cols.append(c) 

621 

622 return list(set([c for c in cols if c not in not_a_col])) 

623 

624 def _func(self, df): 

625 return mag_aware_eval(df, self.expr, self.log) 

626 

627 

628class Column(Functor): 

629 """Get column with specified name 

630 """ 

631 

632 def __init__(self, col, **kwargs): 

633 self.col = col 

634 super().__init__(**kwargs) 

635 

636 @property 

637 def name(self): 

638 return self.col 

639 

640 @property 

641 def columns(self): 

642 return [self.col] 

643 

644 def _func(self, df): 

645 return df[self.col] 

646 

647 

648class Index(Functor): 

649 """Return the value of the index for each object 

650 """ 

651 

652 columns = ['coord_ra'] # just a dummy; something has to be here 

653 _defaultDataset = 'ref' 

654 _defaultNoDup = True 

655 

656 def _func(self, df): 

657 return pd.Series(df.index, index=df.index) 

658 

659 

660class CoordColumn(Column): 

661 """Base class for coordinate column, in degrees 

662 """ 

663 _radians = True 

664 

665 def __init__(self, col, **kwargs): 

666 super().__init__(col, **kwargs) 

667 

668 def _func(self, df): 

669 # Must not modify original column in case that column is used by another functor 

670 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

671 return output 

672 

673 

674class RAColumn(CoordColumn): 

675 """Right Ascension, in degrees 

676 """ 

677 name = 'RA' 

678 _defaultNoDup = True 

679 

680 def __init__(self, **kwargs): 

681 super().__init__('coord_ra', **kwargs) 

682 

683 def __call__(self, catalog, **kwargs): 

684 return super().__call__(catalog, **kwargs) 

685 

686 

687class DecColumn(CoordColumn): 

688 """Declination, in degrees 

689 """ 

690 name = 'Dec' 

691 _defaultNoDup = True 

692 

693 def __init__(self, **kwargs): 

694 super().__init__('coord_dec', **kwargs) 

695 

696 def __call__(self, catalog, **kwargs): 

697 return super().__call__(catalog, **kwargs) 

698 

699 

700class HtmIndex20(Functor): 

701 """Compute the level 20 HtmIndex for the catalog. 

702 

703 Notes 

704 ----- 

705 This functor was implemented to satisfy requirements of old APDB interface 

706 which required ``pixelId`` column in DiaObject with HTM20 index. APDB 

707 interface had migrated to not need that information, but we keep this 

708 class in case it may be useful for something else. 

709 """ 

710 name = "Htm20" 

711 htmLevel = 20 

712 _radians = True 

713 

714 def __init__(self, ra, dec, **kwargs): 

715 self.pixelator = sphgeom.HtmPixelization(self.htmLevel) 

716 self.ra = ra 

717 self.dec = dec 

718 self._columns = [self.ra, self.dec] 

719 super().__init__(**kwargs) 

720 

721 def _func(self, df): 

722 

723 def computePixel(row): 

724 if self._radians: 

725 sphPoint = geom.SpherePoint(row[self.ra], 

726 row[self.dec], 

727 geom.radians) 

728 else: 

729 sphPoint = geom.SpherePoint(row[self.ra], 

730 row[self.dec], 

731 geom.degrees) 

732 return self.pixelator.index(sphPoint.getVector()) 

733 

734 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64') 

735 

736 

737def fluxName(col): 

738 if not col.endswith('_instFlux'): 

739 col += '_instFlux' 

740 return col 

741 

742 

743def fluxErrName(col): 

744 if not col.endswith('_instFluxErr'): 

745 col += '_instFluxErr' 

746 return col 

747 

748 

749class Mag(Functor): 

750 """Compute calibrated magnitude 

751 

752 Takes a `calib` argument, which returns the flux at mag=0 

753 as `calib.getFluxMag0()`. If not provided, then the default 

754 `fluxMag0` is 63095734448.0194, which is default for HSC. 

755 This default should be removed in DM-21955 

756 

757 This calculation hides warnings about invalid values and dividing by zero. 

758 

759 As for all functors, a `dataset` and `filt` kwarg should be provided upon 

760 initialization. Unlike the default `Functor`, however, the default dataset 

761 for a `Mag` is `'meas'`, rather than `'ref'`. 

762 

763 Parameters 

764 ---------- 

765 col : `str` 

766 Name of flux column from which to compute magnitude. Can be parseable 

767 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass 

768 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will 

769 understand. 

770 calib : `lsst.afw.image.calib.Calib` (optional) 

771 Object that knows zero point. 

772 """ 

773 _defaultDataset = 'meas' 

774 

775 def __init__(self, col, calib=None, **kwargs): 

776 self.col = fluxName(col) 

777 self.calib = calib 

778 if calib is not None: 

779 self.fluxMag0 = calib.getFluxMag0()[0] 

780 else: 

781 # TO DO: DM-21955 Replace hard coded photometic calibration values 

782 self.fluxMag0 = 63095734448.0194 

783 

784 super().__init__(**kwargs) 

785 

786 @property 

787 def columns(self): 

788 return [self.col] 

789 

790 def _func(self, df): 

791 with warnings.catch_warnings(): 

792 warnings.filterwarnings('ignore', r'invalid value encountered') 

793 warnings.filterwarnings('ignore', r'divide by zero') 

794 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

795 

796 @property 

797 def name(self): 

798 return f'mag_{self.col}' 

799 

800 

801class MagErr(Mag): 

802 """Compute calibrated magnitude uncertainty 

803 

804 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`. 

805 

806 Parameters 

807 col : `str` 

808 Name of flux column 

809 calib : `lsst.afw.image.calib.Calib` (optional) 

810 Object that knows zero point. 

811 """ 

812 

813 def __init__(self, *args, **kwargs): 

814 super().__init__(*args, **kwargs) 

815 if self.calib is not None: 

816 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

817 else: 

818 self.fluxMag0Err = 0. 

819 

820 @property 

821 def columns(self): 

822 return [self.col, self.col + 'Err'] 

823 

824 def _func(self, df): 

825 with warnings.catch_warnings(): 

826 warnings.filterwarnings('ignore', r'invalid value encountered') 

827 warnings.filterwarnings('ignore', r'divide by zero') 

828 fluxCol, fluxErrCol = self.columns 

829 x = df[fluxErrCol] / df[fluxCol] 

830 y = self.fluxMag0Err / self.fluxMag0 

831 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

832 return magErr 

833 

834 @property 

835 def name(self): 

836 return super().name + '_err' 

837 

838 

839class MagDiff(Functor): 

840 _defaultDataset = 'meas' 

841 

842 """Functor to calculate magnitude difference""" 

843 

844 def __init__(self, col1, col2, **kwargs): 

845 self.col1 = fluxName(col1) 

846 self.col2 = fluxName(col2) 

847 super().__init__(**kwargs) 

848 

849 @property 

850 def columns(self): 

851 return [self.col1, self.col2] 

852 

853 def _func(self, df): 

854 with warnings.catch_warnings(): 

855 warnings.filterwarnings('ignore', r'invalid value encountered') 

856 warnings.filterwarnings('ignore', r'divide by zero') 

857 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

858 

859 @property 

860 def name(self): 

861 return f'(mag_{self.col1} - mag_{self.col2})' 

862 

863 @property 

864 def shortname(self): 

865 return f'magDiff_{self.col1}_{self.col2}' 

866 

867 

868class Color(Functor): 

869 """Compute the color between two filters 

870 

871 Computes color by initializing two different `Mag` 

872 functors based on the `col` and filters provided, and 

873 then returning the difference. 

874 

875 This is enabled by the `_func` expecting a dataframe with a 

876 multilevel column index, with both `'band'` and `'column'`, 

877 instead of just `'column'`, which is the `Functor` default. 

878 This is controlled by the `_dfLevels` attribute. 

879 

880 Also of note, the default dataset for `Color` is `forced_src'`, 

881 whereas for `Mag` it is `'meas'`. 

882 

883 Parameters 

884 ---------- 

885 col : str 

886 Name of flux column from which to compute; same as would be passed to 

887 `lsst.pipe.tasks.functors.Mag`. 

888 

889 filt2, filt1 : str 

890 Filters from which to compute magnitude difference. 

891 Color computed is `Mag(filt2) - Mag(filt1)`. 

892 """ 

893 _defaultDataset = 'forced_src' 

894 _dfLevels = ('band', 'column') 

895 _defaultNoDup = True 

896 

897 def __init__(self, col, filt2, filt1, **kwargs): 

898 self.col = fluxName(col) 

899 if filt2 == filt1: 

900 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

901 self.filt2 = filt2 

902 self.filt1 = filt1 

903 

904 self.mag2 = Mag(col, filt=filt2, **kwargs) 

905 self.mag1 = Mag(col, filt=filt1, **kwargs) 

906 

907 super().__init__(**kwargs) 

908 

909 @property 

910 def filt(self): 

911 return None 

912 

913 @filt.setter 

914 def filt(self, filt): 

915 pass 

916 

917 def _func(self, df): 

918 mag2 = self.mag2._func(df[self.filt2]) 

919 mag1 = self.mag1._func(df[self.filt1]) 

920 return mag2 - mag1 

921 

922 @property 

923 def columns(self): 

924 return [self.mag1.col, self.mag2.col] 

925 

926 def multilevelColumns(self, parq, **kwargs): 

927 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

928 

929 @property 

930 def name(self): 

931 return f'{self.filt2} - {self.filt1} ({self.col})' 

932 

933 @property 

934 def shortname(self): 

935 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

936 

937 

938class DeconvolvedMoments(Functor): 

939 name = 'Deconvolved Moments' 

940 shortname = 'deconvolvedMoments' 

941 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

942 "ext_shapeHSM_HsmSourceMoments_yy", 

943 "base_SdssShape_xx", "base_SdssShape_yy", 

944 "ext_shapeHSM_HsmPsfMoments_xx", 

945 "ext_shapeHSM_HsmPsfMoments_yy") 

946 

947 def _func(self, df): 

948 """Calculate deconvolved moments""" 

949 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

950 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

951 else: 

952 hsm = np.ones(len(df))*np.nan 

953 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

954 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

955 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

956 else: 

957 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using 

958 # exposure.getPsf().computeShape(s.getCentroid()).getIxx() 

959 # raise TaskError("No psf shape parameter found in catalog") 

960 raise RuntimeError('No psf shape parameter found in catalog') 

961 

962 return hsm.where(np.isfinite(hsm), sdss) - psf 

963 

964 

965class SdssTraceSize(Functor): 

966 """Functor to calculate SDSS trace radius size for sources""" 

967 name = "SDSS Trace Size" 

968 shortname = 'sdssTrace' 

969 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

970 

971 def _func(self, df): 

972 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

973 return srcSize 

974 

975 

976class PsfSdssTraceSizeDiff(Functor): 

977 """Functor to calculate SDSS trace radius size difference (%) between object and psf model""" 

978 name = "PSF - SDSS Trace Size" 

979 shortname = 'psf_sdssTrace' 

980 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

981 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

982 

983 def _func(self, df): 

984 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

985 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

986 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

987 return sizeDiff 

988 

989 

990class HsmTraceSize(Functor): 

991 """Functor to calculate HSM trace radius size for sources""" 

992 name = 'HSM Trace Size' 

993 shortname = 'hsmTrace' 

994 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

995 "ext_shapeHSM_HsmSourceMoments_yy") 

996 

997 def _func(self, df): 

998 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

999 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1000 return srcSize 

1001 

1002 

1003class PsfHsmTraceSizeDiff(Functor): 

1004 """Functor to calculate HSM trace radius size difference (%) between object and psf model""" 

1005 name = 'PSF - HSM Trace Size' 

1006 shortname = 'psf_HsmTrace' 

1007 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1008 "ext_shapeHSM_HsmSourceMoments_yy", 

1009 "ext_shapeHSM_HsmPsfMoments_xx", 

1010 "ext_shapeHSM_HsmPsfMoments_yy") 

1011 

1012 def _func(self, df): 

1013 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1014 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1015 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1016 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1017 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1018 return sizeDiff 

1019 

1020 

1021class HsmFwhm(Functor): 

1022 name = 'HSM Psf FWHM' 

1023 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1024 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1025 pixelScale = 0.168 

1026 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1027 

1028 def _func(self, df): 

1029 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1030 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1031 

1032 

1033class E1(Functor): 

1034 name = "Distortion Ellipticity (e1)" 

1035 shortname = "Distortion" 

1036 

1037 def __init__(self, colXX, colXY, colYY, **kwargs): 

1038 self.colXX = colXX 

1039 self.colXY = colXY 

1040 self.colYY = colYY 

1041 self._columns = [self.colXX, self.colXY, self.colYY] 

1042 super().__init__(**kwargs) 

1043 

1044 @property 

1045 def columns(self): 

1046 return [self.colXX, self.colXY, self.colYY] 

1047 

1048 def _func(self, df): 

1049 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1050 

1051 

1052class E2(Functor): 

1053 name = "Ellipticity e2" 

1054 

1055 def __init__(self, colXX, colXY, colYY, **kwargs): 

1056 self.colXX = colXX 

1057 self.colXY = colXY 

1058 self.colYY = colYY 

1059 super().__init__(**kwargs) 

1060 

1061 @property 

1062 def columns(self): 

1063 return [self.colXX, self.colXY, self.colYY] 

1064 

1065 def _func(self, df): 

1066 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1067 

1068 

1069class RadiusFromQuadrupole(Functor): 

1070 

1071 def __init__(self, colXX, colXY, colYY, **kwargs): 

1072 self.colXX = colXX 

1073 self.colXY = colXY 

1074 self.colYY = colYY 

1075 super().__init__(**kwargs) 

1076 

1077 @property 

1078 def columns(self): 

1079 return [self.colXX, self.colXY, self.colYY] 

1080 

1081 def _func(self, df): 

1082 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1083 

1084 

1085class LocalWcs(Functor): 

1086 """Computations using the stored localWcs. 

1087 """ 

1088 name = "LocalWcsOperations" 

1089 

1090 def __init__(self, 

1091 colCD_1_1, 

1092 colCD_1_2, 

1093 colCD_2_1, 

1094 colCD_2_2, 

1095 **kwargs): 

1096 self.colCD_1_1 = colCD_1_1 

1097 self.colCD_1_2 = colCD_1_2 

1098 self.colCD_2_1 = colCD_2_1 

1099 self.colCD_2_2 = colCD_2_2 

1100 super().__init__(**kwargs) 

1101 

1102 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1103 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1104 

1105 Parameters 

1106 ---------- 

1107 x : `pandas.Series` 

1108 X pixel coordinate. 

1109 y : `pandas.Series` 

1110 Y pixel coordinate. 

1111 cd11 : `pandas.Series` 

1112 [1, 1] element of the local Wcs affine transform. 

1113 cd11 : `pandas.Series` 

1114 [1, 1] element of the local Wcs affine transform. 

1115 cd12 : `pandas.Series` 

1116 [1, 2] element of the local Wcs affine transform. 

1117 cd21 : `pandas.Series` 

1118 [2, 1] element of the local Wcs affine transform. 

1119 cd22 : `pandas.Series` 

1120 [2, 2] element of the local Wcs affine transform. 

1121 

1122 Returns 

1123 ------- 

1124 raDecTuple : tuple 

1125 RA and dec conversion of x and y given the local Wcs. Returned 

1126 units are in radians. 

1127 

1128 """ 

1129 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1130 

1131 def computeSkySeparation(self, ra1, dec1, ra2, dec2): 

1132 """Compute the local pixel scale conversion. 

1133 

1134 Parameters 

1135 ---------- 

1136 ra1 : `pandas.Series` 

1137 Ra of the first coordinate in radians. 

1138 dec1 : `pandas.Series` 

1139 Dec of the first coordinate in radians. 

1140 ra2 : `pandas.Series` 

1141 Ra of the second coordinate in radians. 

1142 dec2 : `pandas.Series` 

1143 Dec of the second coordinate in radians. 

1144 

1145 Returns 

1146 ------- 

1147 dist : `pandas.Series` 

1148 Distance on the sphere in radians. 

1149 """ 

1150 deltaDec = dec2 - dec1 

1151 deltaRa = ra2 - ra1 

1152 return 2 * np.arcsin( 

1153 np.sqrt( 

1154 np.sin(deltaDec / 2) ** 2 

1155 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1156 

1157 def getSkySeparationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1158 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1159 

1160 Parameters 

1161 ---------- 

1162 x1 : `pandas.Series` 

1163 X pixel coordinate. 

1164 y1 : `pandas.Series` 

1165 Y pixel coordinate. 

1166 x2 : `pandas.Series` 

1167 X pixel coordinate. 

1168 y2 : `pandas.Series` 

1169 Y pixel coordinate. 

1170 cd11 : `pandas.Series` 

1171 [1, 1] element of the local Wcs affine transform. 

1172 cd11 : `pandas.Series` 

1173 [1, 1] element of the local Wcs affine transform. 

1174 cd12 : `pandas.Series` 

1175 [1, 2] element of the local Wcs affine transform. 

1176 cd21 : `pandas.Series` 

1177 [2, 1] element of the local Wcs affine transform. 

1178 cd22 : `pandas.Series` 

1179 [2, 2] element of the local Wcs affine transform. 

1180 

1181 Returns 

1182 ------- 

1183 Distance : `pandas.Series` 

1184 Arcseconds per pixel at the location of the local WC 

1185 """ 

1186 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1187 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1188 # Great circle distance for small separations. 

1189 return self.computeSkySeparation(ra1, dec1, ra2, dec2) 

1190 

1191 

1192class ComputePixelScale(LocalWcs): 

1193 """Compute the local pixel scale from the stored CDMatrix. 

1194 """ 

1195 name = "PixelScale" 

1196 

1197 @property 

1198 def columns(self): 

1199 return [self.colCD_1_1, 

1200 self.colCD_1_2, 

1201 self.colCD_2_1, 

1202 self.colCD_2_2] 

1203 

1204 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1205 """Compute the local pixel to scale conversion in arcseconds. 

1206 

1207 Parameters 

1208 ---------- 

1209 cd11 : `pandas.Series` 

1210 [1, 1] element of the local Wcs affine transform in radians. 

1211 cd11 : `pandas.Series` 

1212 [1, 1] element of the local Wcs affine transform in radians. 

1213 cd12 : `pandas.Series` 

1214 [1, 2] element of the local Wcs affine transform in radians. 

1215 cd21 : `pandas.Series` 

1216 [2, 1] element of the local Wcs affine transform in radians. 

1217 cd22 : `pandas.Series` 

1218 [2, 2] element of the local Wcs affine transform in radians. 

1219 

1220 Returns 

1221 ------- 

1222 pixScale : `pandas.Series` 

1223 Arcseconds per pixel at the location of the local WC 

1224 """ 

1225 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1226 

1227 def _func(self, df): 

1228 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1229 df[self.colCD_1_2], 

1230 df[self.colCD_2_1], 

1231 df[self.colCD_2_2]) 

1232 

1233 

1234class ConvertPixelToArcseconds(ComputePixelScale): 

1235 """Convert a value in units pixels to units arcseconds. 

1236 """ 

1237 

1238 def __init__(self, 

1239 col, 

1240 colCD_1_1, 

1241 colCD_1_2, 

1242 colCD_2_1, 

1243 colCD_2_2, 

1244 **kwargs): 

1245 self.col = col 

1246 super().__init__(colCD_1_1, 

1247 colCD_1_2, 

1248 colCD_2_1, 

1249 colCD_2_2, 

1250 **kwargs) 

1251 

1252 @property 

1253 def name(self): 

1254 return f"{self.col}_asArcseconds" 

1255 

1256 @property 

1257 def columns(self): 

1258 return [self.col, 

1259 self.colCD_1_1, 

1260 self.colCD_1_2, 

1261 self.colCD_2_1, 

1262 self.colCD_2_2] 

1263 

1264 def _func(self, df): 

1265 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1266 df[self.colCD_1_2], 

1267 df[self.colCD_2_1], 

1268 df[self.colCD_2_2]) 

1269 

1270 

1271class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1272 """Convert a value in units pixels squared to units arcseconds squared. 

1273 """ 

1274 

1275 def __init__(self, 

1276 col, 

1277 colCD_1_1, 

1278 colCD_1_2, 

1279 colCD_2_1, 

1280 colCD_2_2, 

1281 **kwargs): 

1282 self.col = col 

1283 super().__init__(colCD_1_1, 

1284 colCD_1_2, 

1285 colCD_2_1, 

1286 colCD_2_2, 

1287 **kwargs) 

1288 

1289 @property 

1290 def name(self): 

1291 return f"{self.col}_asArcsecondsSq" 

1292 

1293 @property 

1294 def columns(self): 

1295 return [self.col, 

1296 self.colCD_1_1, 

1297 self.colCD_1_2, 

1298 self.colCD_2_1, 

1299 self.colCD_2_2] 

1300 

1301 def _func(self, df): 

1302 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1303 df[self.colCD_1_2], 

1304 df[self.colCD_2_1], 

1305 df[self.colCD_2_2]) 

1306 return df[self.col] * pixScale * pixScale 

1307 

1308 

1309class ReferenceBand(Functor): 

1310 name = 'Reference Band' 

1311 shortname = 'refBand' 

1312 

1313 @property 

1314 def columns(self): 

1315 return ["merge_measurement_i", 

1316 "merge_measurement_r", 

1317 "merge_measurement_z", 

1318 "merge_measurement_y", 

1319 "merge_measurement_g", 

1320 "merge_measurement_u"] 

1321 

1322 def _func(self, df: pd.DataFrame) -> pd.Series: 

1323 def getFilterAliasName(row): 

1324 # get column name with the max value (True > False) 

1325 colName = row.idxmax() 

1326 return colName.replace('merge_measurement_', '') 

1327 

1328 # Skip columns that are unavailable, because this functor requests the 

1329 # superset of bands that could be included in the object table 

1330 columns = [col for col in self.columns if col in df.columns] 

1331 # Makes a Series of dtype object if df is empty 

1332 return df[columns].apply(getFilterAliasName, axis=1, 

1333 result_type='reduce').astype('object') 

1334 

1335 

1336class Photometry(Functor): 

1337 # AB to NanoJansky (3631 Jansky) 

1338 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1339 LOG_AB_FLUX_SCALE = 12.56 

1340 FIVE_OVER_2LOG10 = 1.085736204758129569 

1341 # TO DO: DM-21955 Replace hard coded photometic calibration values 

1342 COADD_ZP = 27 

1343 

1344 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1345 self.vhypot = np.vectorize(self.hypot) 

1346 self.col = colFlux 

1347 self.colFluxErr = colFluxErr 

1348 

1349 self.calib = calib 

1350 if calib is not None: 

1351 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1352 else: 

1353 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1354 self.fluxMag0Err = 0. 

1355 

1356 super().__init__(**kwargs) 

1357 

1358 @property 

1359 def columns(self): 

1360 return [self.col] 

1361 

1362 @property 

1363 def name(self): 

1364 return f'mag_{self.col}' 

1365 

1366 @classmethod 

1367 def hypot(cls, a, b): 

1368 if np.abs(a) < np.abs(b): 

1369 a, b = b, a 

1370 if a == 0.: 

1371 return 0. 

1372 q = b/a 

1373 return np.abs(a) * np.sqrt(1. + q*q) 

1374 

1375 def dn2flux(self, dn, fluxMag0): 

1376 return self.AB_FLUX_SCALE * dn / fluxMag0 

1377 

1378 def dn2mag(self, dn, fluxMag0): 

1379 with warnings.catch_warnings(): 

1380 warnings.filterwarnings('ignore', r'invalid value encountered') 

1381 warnings.filterwarnings('ignore', r'divide by zero') 

1382 return -2.5 * np.log10(dn/fluxMag0) 

1383 

1384 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1385 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1386 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1387 return retVal 

1388 

1389 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1390 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1391 return self.FIVE_OVER_2LOG10 * retVal 

1392 

1393 

1394class NanoJansky(Photometry): 

1395 def _func(self, df): 

1396 return self.dn2flux(df[self.col], self.fluxMag0) 

1397 

1398 

1399class NanoJanskyErr(Photometry): 

1400 @property 

1401 def columns(self): 

1402 return [self.col, self.colFluxErr] 

1403 

1404 def _func(self, df): 

1405 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1406 return pd.Series(retArr, index=df.index) 

1407 

1408 

1409class LocalPhotometry(Functor): 

1410 """Base class for calibrating the specified instrument flux column using 

1411 the local photometric calibration. 

1412 

1413 Parameters 

1414 ---------- 

1415 instFluxCol : `str` 

1416 Name of the instrument flux column. 

1417 instFluxErrCol : `str` 

1418 Name of the assocated error columns for ``instFluxCol``. 

1419 photoCalibCol : `str` 

1420 Name of local calibration column. 

1421 photoCalibErrCol : `str` 

1422 Error associated with ``photoCalibCol`` 

1423 

1424 See also 

1425 -------- 

1426 LocalNanojansky 

1427 LocalNanojanskyErr 

1428 """ 

1429 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1430 

1431 def __init__(self, 

1432 instFluxCol, 

1433 instFluxErrCol, 

1434 photoCalibCol, 

1435 photoCalibErrCol, 

1436 **kwargs): 

1437 self.instFluxCol = instFluxCol 

1438 self.instFluxErrCol = instFluxErrCol 

1439 self.photoCalibCol = photoCalibCol 

1440 self.photoCalibErrCol = photoCalibErrCol 

1441 super().__init__(**kwargs) 

1442 

1443 def instFluxToNanojansky(self, instFlux, localCalib): 

1444 """Convert instrument flux to nanojanskys. 

1445 

1446 Parameters 

1447 ---------- 

1448 instFlux : `numpy.ndarray` or `pandas.Series` 

1449 Array of instrument flux measurements 

1450 localCalib : `numpy.ndarray` or `pandas.Series` 

1451 Array of local photometric calibration estimates. 

1452 

1453 Returns 

1454 ------- 

1455 calibFlux : `numpy.ndarray` or `pandas.Series` 

1456 Array of calibrated flux measurements. 

1457 """ 

1458 return instFlux * localCalib 

1459 

1460 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1461 """Convert instrument flux to nanojanskys. 

1462 

1463 Parameters 

1464 ---------- 

1465 instFlux : `numpy.ndarray` or `pandas.Series` 

1466 Array of instrument flux measurements 

1467 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1468 Errors on associated ``instFlux`` values 

1469 localCalib : `numpy.ndarray` or `pandas.Series` 

1470 Array of local photometric calibration estimates. 

1471 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1472 Errors on associated ``localCalib`` values 

1473 

1474 Returns 

1475 ------- 

1476 calibFluxErr : `numpy.ndarray` or `pandas.Series` 

1477 Errors on calibrated flux measurements. 

1478 """ 

1479 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1480 

1481 def instFluxToMagnitude(self, instFlux, localCalib): 

1482 """Convert instrument flux to nanojanskys. 

1483 

1484 Parameters 

1485 ---------- 

1486 instFlux : `numpy.ndarray` or `pandas.Series` 

1487 Array of instrument flux measurements 

1488 localCalib : `numpy.ndarray` or `pandas.Series` 

1489 Array of local photometric calibration estimates. 

1490 

1491 Returns 

1492 ------- 

1493 calibMag : `numpy.ndarray` or `pandas.Series` 

1494 Array of calibrated AB magnitudes. 

1495 """ 

1496 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1497 

1498 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1499 """Convert instrument flux err to nanojanskys. 

1500 

1501 Parameters 

1502 ---------- 

1503 instFlux : `numpy.ndarray` or `pandas.Series` 

1504 Array of instrument flux measurements 

1505 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1506 Errors on associated ``instFlux`` values 

1507 localCalib : `numpy.ndarray` or `pandas.Series` 

1508 Array of local photometric calibration estimates. 

1509 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1510 Errors on associated ``localCalib`` values 

1511 

1512 Returns 

1513 ------- 

1514 calibMagErr: `numpy.ndarray` or `pandas.Series` 

1515 Error on calibrated AB magnitudes. 

1516 """ 

1517 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1518 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1519 

1520 

1521class LocalNanojansky(LocalPhotometry): 

1522 """Compute calibrated fluxes using the local calibration value.""" 

1523 

1524 @property 

1525 def columns(self): 

1526 return [self.instFluxCol, self.photoCalibCol] 

1527 

1528 @property 

1529 def name(self): 

1530 return f'flux_{self.instFluxCol}' 

1531 

1532 def _func(self, df): 

1533 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1534 

1535 

1536class LocalNanojanskyErr(LocalPhotometry): 

1537 """Compute calibrated flux errors using the local calibration value.""" 

1538 

1539 @property 

1540 def columns(self): 

1541 return [self.instFluxCol, self.instFluxErrCol, 

1542 self.photoCalibCol, self.photoCalibErrCol] 

1543 

1544 @property 

1545 def name(self): 

1546 return f'fluxErr_{self.instFluxCol}' 

1547 

1548 def _func(self, df): 

1549 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1550 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1551 

1552 

1553class LocalDipoleMeanFlux(LocalPhotometry): 

1554 """Compute absolute mean of dipole fluxes. 

1555 

1556 See also 

1557 -------- 

1558 LocalNanojansky 

1559 LocalNanojanskyErr 

1560 LocalDipoleMeanFluxErr 

1561 LocalDipoleDiffFlux 

1562 LocalDipoleDiffFluxErr 

1563 """ 

1564 def __init__(self, 

1565 instFluxPosCol, 

1566 instFluxNegCol, 

1567 instFluxPosErrCol, 

1568 instFluxNegErrCol, 

1569 photoCalibCol, 

1570 photoCalibErrCol, 

1571 **kwargs): 

1572 self.instFluxNegCol = instFluxNegCol 

1573 self.instFluxPosCol = instFluxPosCol 

1574 self.instFluxNegErrCol = instFluxNegErrCol 

1575 self.instFluxPosErrCol = instFluxPosErrCol 

1576 self.photoCalibCol = photoCalibCol 

1577 self.photoCalibErrCol = photoCalibErrCol 

1578 super().__init__(instFluxNegCol, 

1579 instFluxNegErrCol, 

1580 photoCalibCol, 

1581 photoCalibErrCol, 

1582 **kwargs) 

1583 

1584 @property 

1585 def columns(self): 

1586 return [self.instFluxPosCol, 

1587 self.instFluxNegCol, 

1588 self.photoCalibCol] 

1589 

1590 @property 

1591 def name(self): 

1592 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1593 

1594 def _func(self, df): 

1595 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1596 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1597 

1598 

1599class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1600 """Compute the error on the absolute mean of dipole fluxes. 

1601 

1602 See also 

1603 -------- 

1604 LocalNanojansky 

1605 LocalNanojanskyErr 

1606 LocalDipoleMeanFlux 

1607 LocalDipoleDiffFlux 

1608 LocalDipoleDiffFluxErr 

1609 """ 

1610 

1611 @property 

1612 def columns(self): 

1613 return [self.instFluxPosCol, 

1614 self.instFluxNegCol, 

1615 self.instFluxPosErrCol, 

1616 self.instFluxNegErrCol, 

1617 self.photoCalibCol, 

1618 self.photoCalibErrCol] 

1619 

1620 @property 

1621 def name(self): 

1622 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1623 

1624 def _func(self, df): 

1625 return 0.5*np.sqrt( 

1626 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1627 * df[self.photoCalibErrCol])**2 

1628 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1629 * df[self.photoCalibCol]**2) 

1630 

1631 

1632class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1633 """Compute the absolute difference of dipole fluxes. 

1634 

1635 Value is (abs(pos) - abs(neg)) 

1636 

1637 See also 

1638 -------- 

1639 LocalNanojansky 

1640 LocalNanojanskyErr 

1641 LocalDipoleMeanFlux 

1642 LocalDipoleMeanFluxErr 

1643 LocalDipoleDiffFluxErr 

1644 """ 

1645 

1646 @property 

1647 def columns(self): 

1648 return [self.instFluxPosCol, 

1649 self.instFluxNegCol, 

1650 self.photoCalibCol] 

1651 

1652 @property 

1653 def name(self): 

1654 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1655 

1656 def _func(self, df): 

1657 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1658 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1659 

1660 

1661class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1662 """Compute the error on the absolute difference of dipole fluxes. 

1663 

1664 See also 

1665 -------- 

1666 LocalNanojansky 

1667 LocalNanojanskyErr 

1668 LocalDipoleMeanFlux 

1669 LocalDipoleMeanFluxErr 

1670 LocalDipoleDiffFlux 

1671 """ 

1672 

1673 @property 

1674 def columns(self): 

1675 return [self.instFluxPosCol, 

1676 self.instFluxNegCol, 

1677 self.instFluxPosErrCol, 

1678 self.instFluxNegErrCol, 

1679 self.photoCalibCol, 

1680 self.photoCalibErrCol] 

1681 

1682 @property 

1683 def name(self): 

1684 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1685 

1686 def _func(self, df): 

1687 return np.sqrt( 

1688 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1689 * df[self.photoCalibErrCol])**2 

1690 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1691 * df[self.photoCalibCol]**2) 

1692 

1693 

1694class Ebv(Functor): 

1695 """Compute E(B-V) from dustmaps.sfd 

1696 """ 

1697 _defaultDataset = 'ref' 

1698 name = "E(B-V)" 

1699 shortname = "ebv" 

1700 

1701 def __init__(self, **kwargs): 

1702 # import is only needed for Ebv 

1703 from dustmaps.sfd import SFDQuery 

1704 self._columns = ['coord_ra', 'coord_dec'] 

1705 self.sfd = SFDQuery() 

1706 super().__init__(**kwargs) 

1707 

1708 def _func(self, df): 

1709 coords = SkyCoord(df['coord_ra'].values * u.rad, df['coord_dec'].values * u.rad) 

1710 ebv = self.sfd(coords) 

1711 # Double precision unnecessary scientifically 

1712 # but currently needed for ingest to qserv 

1713 return pd.Series(ebv, index=df.index).astype('float64')