Coverage for python/lsst/pipe/tasks/functors.py: 34%

724 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-06-07 12:50 +0000

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["Functor", "CompositeFunctor", "CustomFunctor", "Column", "Index", 

23 "CoordColumn", "RAColumn", "DecColumn", "HtmIndex20", "Mag", 

24 "MagErr", "MagDiff", "Color", "DeconvolvedMoments", "SdssTraceSize", 

25 "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff", 

26 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs", 

27 "ComputePixelScale", "ConvertPixelToArcseconds", 

28 "ConvertPixelSqToArcsecondsSq", "ReferenceBand", "Photometry", 

29 "NanoJansky", "NanoJanskyErr", "LocalPhotometry", "LocalNanojansky", 

30 "LocalNanojanskyErr", "LocalDipoleMeanFlux", 

31 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux", 

32 "LocalDipoleDiffFluxErr", "Ebv", 

33 ] 

34 

35import yaml 

36import re 

37from itertools import product 

38import logging 

39import os.path 

40 

41import pandas as pd 

42import numpy as np 

43import astropy.units as u 

44from astropy.coordinates import SkyCoord 

45 

46from lsst.utils import doImport 

47from lsst.utils.introspection import get_full_type_name 

48from lsst.daf.butler import DeferredDatasetHandle 

49from lsst.pipe.base import InMemoryDatasetHandle 

50import lsst.geom as geom 

51import lsst.sphgeom as sphgeom 

52 

53 

54def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

55 typeKey='functor', name=None): 

56 """Initialize an object defined in a dictionary 

57 

58 The object needs to be importable as 

59 f'{basePath}.{initDict[typeKey]}' 

60 The positional and keyword arguments (if any) are contained in 

61 "args" and "kwargs" entries in the dictionary, respectively. 

62 This is used in `functors.CompositeFunctor.from_yaml` to initialize 

63 a composite functor from a specification in a YAML file. 

64 

65 Parameters 

66 ---------- 

67 initDict : dictionary 

68 Dictionary describing object's initialization. Must contain 

69 an entry keyed by ``typeKey`` that is the name of the object, 

70 relative to ``basePath``. 

71 basePath : str 

72 Path relative to module in which ``initDict[typeKey]`` is defined. 

73 typeKey : str 

74 Key of ``initDict`` that is the name of the object 

75 (relative to `basePath`). 

76 """ 

77 initDict = initDict.copy() 

78 # TO DO: DM-21956 We should be able to define functors outside this module 

79 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

80 args = [] 

81 if 'args' in initDict: 

82 args = initDict.pop('args') 

83 if isinstance(args, str): 

84 args = [args] 

85 try: 

86 element = pythonType(*args, **initDict) 

87 except Exception as e: 

88 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

89 raise type(e)(message, e.args) 

90 return element 

91 

92 

93class Functor(object): 

94 """Define and execute a calculation on a DataFrame or Handle holding a DataFrame. 

95 

96 The `__call__` method accepts either a `DataFrame` object or a 

97 `DeferredDatasetHandle` or `InMemoryDatasetHandle`, and returns the 

98 result of the calculation as a single column. Each functor defines what 

99 columns are needed for the calculation, and only these columns are read 

100 from the dataset handle. 

101 

102 The action of `__call__` consists of two steps: first, loading the 

103 necessary columns from disk into memory as a `pandas.DataFrame` object; 

104 and second, performing the computation on this dataframe and returning the 

105 result. 

106 

107 

108 To define a new `Functor`, a subclass must define a `_func` method, 

109 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`. 

110 In addition, it must define the following attributes 

111 

112 * `_columns`: The columns necessary to perform the calculation 

113 * `name`: A name appropriate for a figure axis label 

114 * `shortname`: A name appropriate for use as a dictionary key 

115 

116 On initialization, a `Functor` should declare what band (`filt` kwarg) 

117 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be 

118 applied to. This enables the `_get_data` method to extract the proper 

119 columns from the underlying data. If not specified, the dataset will fall back 

120 on the `_defaultDataset`attribute. If band is not specified and `dataset` 

121 is anything other than `'ref'`, then an error will be raised when trying to 

122 perform the calculation. 

123 

124 Originally, `Functor` was set up to expect 

125 datasets formatted like the `deepCoadd_obj` dataset; that is, a 

126 dataframe with a multi-level column index, with the levels of the 

127 column index being `band`, `dataset`, and `column`. 

128 It has since been generalized to apply to dataframes without mutli-level 

129 indices and multi-level indices with just `dataset` and `column` levels. 

130 In addition, the `_get_data` method that reads 

131 the columns from the underlying data will return a dataframe with column 

132 index levels defined by the `_dfLevels` attribute; by default, this is 

133 `column`. 

134 

135 The `_dfLevels` attributes should generally not need to 

136 be changed, unless `_func` needs columns from multiple filters or datasets 

137 to do the calculation. 

138 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for 

139 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe 

140 it gets to have those levels in the column index. 

141 

142 Parameters 

143 ---------- 

144 filt : str 

145 Filter upon which to do the calculation 

146 

147 dataset : str 

148 Dataset upon which to do the calculation 

149 (e.g., 'ref', 'meas', 'forced_src'). 

150 """ 

151 

152 _defaultDataset = 'ref' 

153 _dfLevels = ('column',) 

154 _defaultNoDup = False 

155 

156 def __init__(self, filt=None, dataset=None, noDup=None): 

157 self.filt = filt 

158 self.dataset = dataset if dataset is not None else self._defaultDataset 

159 self._noDup = noDup 

160 self.log = logging.getLogger(type(self).__name__) 

161 

162 @property 

163 def noDup(self): 

164 if self._noDup is not None: 

165 return self._noDup 

166 else: 

167 return self._defaultNoDup 

168 

169 @property 

170 def columns(self): 

171 """Columns required to perform calculation 

172 """ 

173 if not hasattr(self, '_columns'): 

174 raise NotImplementedError('Must define columns property or _columns attribute') 

175 return self._columns 

176 

177 def _get_data_columnLevels(self, data, columnIndex=None): 

178 """Gets the names of the column index levels 

179 

180 This should only be called in the context of a multilevel table. 

181 

182 Parameters 

183 ---------- 

184 data : various 

185 The data to be read, can be a `DeferredDatasetHandle` or 

186 `InMemoryDatasetHandle`. 

187 columnnIndex (optional): pandas `Index` object 

188 If not passed, then it is read from the `DeferredDatasetHandle` 

189 for `InMemoryDatasetHandle`. 

190 """ 

191 if columnIndex is None: 

192 columnIndex = data.get(component="columns") 

193 return columnIndex.names 

194 

195 def _get_data_columnLevelNames(self, data, columnIndex=None): 

196 """Gets the content of each of the column levels for a multilevel table. 

197 """ 

198 if columnIndex is None: 

199 columnIndex = data.get(component="columns") 

200 

201 columnLevels = columnIndex.names 

202 columnLevelNames = { 

203 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

204 for i, level in enumerate(columnLevels) 

205 } 

206 return columnLevelNames 

207 

208 def _colsFromDict(self, colDict, columnIndex=None): 

209 """Converts dictionary column specficiation to a list of columns 

210 """ 

211 new_colDict = {} 

212 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

213 

214 for i, lev in enumerate(columnLevels): 

215 if lev in colDict: 

216 if isinstance(colDict[lev], str): 

217 new_colDict[lev] = [colDict[lev]] 

218 else: 

219 new_colDict[lev] = colDict[lev] 

220 else: 

221 new_colDict[lev] = columnIndex.levels[i] 

222 

223 levelCols = [new_colDict[lev] for lev in columnLevels] 

224 cols = list(product(*levelCols)) 

225 colsAvailable = [col for col in cols if col in columnIndex] 

226 return colsAvailable 

227 

228 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

229 """Returns columns needed by functor from multilevel dataset 

230 

231 To access tables with multilevel column structure, the `DeferredDatasetHandle` 

232 or `InMemoryDatasetHandle` need to be passed either a list of tuples or a 

233 dictionary. 

234 

235 Parameters 

236 ---------- 

237 data : various 

238 The data as either `DeferredDatasetHandle`, or `InMemoryDatasetHandle`. 

239 columnIndex (optional): pandas `Index` object 

240 either passed or read in from `DeferredDatasetHandle`. 

241 `returnTuple` : `bool` 

242 If true, then return a list of tuples rather than the column dictionary 

243 specification. This is set to `True` by `CompositeFunctor` in order to be able to 

244 combine columns from the various component functors. 

245 

246 """ 

247 if not isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

248 raise RuntimeError(f"Unexpected data type. Got {get_full_type_name(data)}.") 

249 

250 if columnIndex is None: 

251 columnIndex = data.get(component="columns") 

252 

253 # Confirm that the dataset has the column levels the functor is expecting it to have. 

254 columnLevels = self._get_data_columnLevels(data, columnIndex) 

255 

256 columnDict = {'column': self.columns, 

257 'dataset': self.dataset} 

258 if self.filt is None: 

259 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

260 if "band" in columnLevels: 

261 if self.dataset == "ref": 

262 columnDict["band"] = columnLevelNames["band"][0] 

263 else: 

264 raise ValueError(f"'filt' not set for functor {self.name}" 

265 f"(dataset {self.dataset}) " 

266 "and DataFrame " 

267 "contains multiple filters in column index. " 

268 "Set 'filt' or set 'dataset' to 'ref'.") 

269 else: 

270 columnDict['band'] = self.filt 

271 

272 if returnTuple: 

273 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

274 else: 

275 return columnDict 

276 

277 def _func(self, df, dropna=True): 

278 raise NotImplementedError('Must define calculation on dataframe') 

279 

280 def _get_columnIndex(self, data): 

281 """Return columnIndex 

282 """ 

283 

284 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

285 return data.get(component="columns") 

286 else: 

287 return None 

288 

289 def _get_data(self, data): 

290 """Retrieve dataframe necessary for calculation. 

291 

292 The data argument can be a `DataFrame`, a `DeferredDatasetHandle`, or an 

293 `InMemoryDatasetHandle`. 

294 

295 Returns dataframe upon which `self._func` can act. 

296 """ 

297 # We wrap a dataframe in a handle here to take advantage of the dataframe 

298 # delegate dataframe column wrangling abilities. 

299 if isinstance(data, pd.DataFrame): 

300 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

301 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

302 _data = data 

303 else: 

304 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

305 

306 # First thing to do: check to see if the data source has a multilevel column index or not. 

307 columnIndex = self._get_columnIndex(_data) 

308 is_multiLevel = isinstance(columnIndex, pd.MultiIndex) 

309 

310 # Get proper columns specification for this functor 

311 if is_multiLevel: 

312 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

313 else: 

314 columns = self.columns 

315 

316 # Load in-memory dataframe with appropriate columns the gen3 way 

317 df = _data.get(parameters={"columns": columns}) 

318 

319 # Drop unnecessary column levels 

320 if is_multiLevel: 

321 df = self._setLevels(df) 

322 

323 return df 

324 

325 def _setLevels(self, df): 

326 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

327 df.columns = df.columns.droplevel(levelsToDrop) 

328 return df 

329 

330 def _dropna(self, vals): 

331 return vals.dropna() 

332 

333 def __call__(self, data, dropna=False): 

334 df = self._get_data(data) 

335 try: 

336 vals = self._func(df) 

337 except Exception as e: 

338 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e) 

339 vals = self.fail(df) 

340 if dropna: 

341 vals = self._dropna(vals) 

342 

343 return vals 

344 

345 def difference(self, data1, data2, **kwargs): 

346 """Computes difference between functor called on two different DataFrame/Handle objects 

347 """ 

348 return self(data1, **kwargs) - self(data2, **kwargs) 

349 

350 def fail(self, df): 

351 return pd.Series(np.full(len(df), np.nan), index=df.index) 

352 

353 @property 

354 def name(self): 

355 """Full name of functor (suitable for figure labels) 

356 """ 

357 return NotImplementedError 

358 

359 @property 

360 def shortname(self): 

361 """Short name of functor (suitable for column name/dict key) 

362 """ 

363 return self.name 

364 

365 

366class CompositeFunctor(Functor): 

367 """Perform multiple calculations at once on a catalog. 

368 

369 The role of a `CompositeFunctor` is to group together computations from 

370 multiple functors. Instead of returning `pandas.Series` a 

371 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names 

372 being the keys of `funcDict`. 

373 

374 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

375 in all the component functors. 

376 

377 A `CompositeFunctor` does not use a `_func` method itself; rather, 

378 when a `CompositeFunctor` is called, all its columns are loaded 

379 at once, and the resulting dataframe is passed to the `_func` method of each component 

380 functor. This has the advantage of only doing I/O (reading from parquet file) once, 

381 and works because each individual `_func` method of each component functor does not 

382 care if there are *extra* columns in the dataframe being passed; only that it must contain 

383 *at least* the `columns` it expects. 

384 

385 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML 

386 file specifying a collection of functors. 

387 

388 Parameters 

389 ---------- 

390 funcs : `dict` or `list` 

391 Dictionary or list of functors. If a list, then it will be converted 

392 into a dictonary according to the `.shortname` attribute of each functor. 

393 

394 """ 

395 dataset = None 

396 name = "CompositeFunctor" 

397 

398 def __init__(self, funcs, **kwargs): 

399 

400 if type(funcs) == dict: 

401 self.funcDict = funcs 

402 else: 

403 self.funcDict = {f.shortname: f for f in funcs} 

404 

405 self._filt = None 

406 

407 super().__init__(**kwargs) 

408 

409 @property 

410 def filt(self): 

411 return self._filt 

412 

413 @filt.setter 

414 def filt(self, filt): 

415 if filt is not None: 

416 for _, f in self.funcDict.items(): 

417 f.filt = filt 

418 self._filt = filt 

419 

420 def update(self, new): 

421 if isinstance(new, dict): 

422 self.funcDict.update(new) 

423 elif isinstance(new, CompositeFunctor): 

424 self.funcDict.update(new.funcDict) 

425 else: 

426 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

427 

428 # Make sure new functors have the same 'filt' set 

429 if self.filt is not None: 

430 self.filt = self.filt 

431 

432 @property 

433 def columns(self): 

434 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

435 

436 def multilevelColumns(self, data, **kwargs): 

437 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here. 

438 return list( 

439 set( 

440 [ 

441 x 

442 for y in [ 

443 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

444 ] 

445 for x in y 

446 ] 

447 ) 

448 ) 

449 

450 def __call__(self, data, **kwargs): 

451 """Apply the functor to the data table 

452 

453 Parameters 

454 ---------- 

455 data : various 

456 The data represented as `lsst.daf.butler.DeferredDatasetHandle`, 

457 `lsst.pipe.base.InMemoryDatasetHandle`, 

458 or `pandas.DataFrame`. 

459 The table or a pointer to a table on disk from which columns can 

460 be accessed 

461 """ 

462 if isinstance(data, pd.DataFrame): 

463 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

464 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

465 _data = data 

466 else: 

467 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

468 

469 columnIndex = self._get_columnIndex(_data) 

470 

471 if isinstance(columnIndex, pd.MultiIndex): 

472 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

473 df = _data.get(parameters={"columns": columns}) 

474 

475 valDict = {} 

476 for k, f in self.funcDict.items(): 

477 try: 

478 subdf = f._setLevels( 

479 df[f.multilevelColumns(_data, returnTuple=True, columnIndex=columnIndex)] 

480 ) 

481 valDict[k] = f._func(subdf) 

482 except Exception as e: 

483 self.log.exception( 

484 "Exception in %s (funcs: %s) call: %s", 

485 self.name, 

486 str(list(self.funcDict.keys())), 

487 type(e).__name__, 

488 ) 

489 try: 

490 valDict[k] = f.fail(subdf) 

491 except NameError: 

492 raise e 

493 

494 else: 

495 df = _data.get(parameters={"columns": self.columns}) 

496 

497 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

498 

499 # Check that output columns are actually columns 

500 for name, colVal in valDict.items(): 

501 if len(colVal.shape) != 1: 

502 raise RuntimeError("Transformed column '%s' is not the shape of a column. " 

503 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal))) 

504 

505 try: 

506 valDf = pd.concat(valDict, axis=1) 

507 except TypeError: 

508 print([(k, type(v)) for k, v in valDict.items()]) 

509 raise 

510 

511 if kwargs.get('dropna', False): 

512 valDf = valDf.dropna(how='any') 

513 

514 return valDf 

515 

516 @classmethod 

517 def renameCol(cls, col, renameRules): 

518 if renameRules is None: 

519 return col 

520 for old, new in renameRules: 

521 if col.startswith(old): 

522 col = col.replace(old, new) 

523 return col 

524 

525 @classmethod 

526 def from_file(cls, filename, **kwargs): 

527 # Allow environment variables in the filename. 

528 filename = os.path.expandvars(filename) 

529 with open(filename) as f: 

530 translationDefinition = yaml.safe_load(f) 

531 

532 return cls.from_yaml(translationDefinition, **kwargs) 

533 

534 @classmethod 

535 def from_yaml(cls, translationDefinition, **kwargs): 

536 funcs = {} 

537 for func, val in translationDefinition['funcs'].items(): 

538 funcs[func] = init_fromDict(val, name=func) 

539 

540 if 'flag_rename_rules' in translationDefinition: 

541 renameRules = translationDefinition['flag_rename_rules'] 

542 else: 

543 renameRules = None 

544 

545 if 'calexpFlags' in translationDefinition: 

546 for flag in translationDefinition['calexpFlags']: 

547 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp') 

548 

549 if 'refFlags' in translationDefinition: 

550 for flag in translationDefinition['refFlags']: 

551 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

552 

553 if 'forcedFlags' in translationDefinition: 

554 for flag in translationDefinition['forcedFlags']: 

555 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

556 

557 if 'flags' in translationDefinition: 

558 for flag in translationDefinition['flags']: 

559 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

560 

561 return cls(funcs, **kwargs) 

562 

563 

564def mag_aware_eval(df, expr, log): 

565 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means 

566 

567 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes. 

568 

569 Parameters 

570 ---------- 

571 df : pandas.DataFrame 

572 Dataframe on which to evaluate expression. 

573 

574 expr : str 

575 Expression. 

576 """ 

577 try: 

578 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

579 val = df.eval(expr_new) 

580 except Exception as e: # Should check what actually gets raised 

581 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e) 

582 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

583 val = df.eval(expr_new) 

584 return val 

585 

586 

587class CustomFunctor(Functor): 

588 """Arbitrary computation on a catalog 

589 

590 Column names (and thus the columns to be loaded from catalog) are found 

591 by finding all words and trying to ignore all "math-y" words. 

592 

593 Parameters 

594 ---------- 

595 expr : str 

596 Expression to evaluate, to be parsed and executed by `mag_aware_eval`. 

597 """ 

598 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

599 

600 def __init__(self, expr, **kwargs): 

601 self.expr = expr 

602 super().__init__(**kwargs) 

603 

604 @property 

605 def name(self): 

606 return self.expr 

607 

608 @property 

609 def columns(self): 

610 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

611 

612 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

613 not_a_col = [] 

614 for c in flux_cols: 

615 if not re.search('_instFlux$', c): 

616 cols.append(f'{c}_instFlux') 

617 not_a_col.append(c) 

618 else: 

619 cols.append(c) 

620 

621 return list(set([c for c in cols if c not in not_a_col])) 

622 

623 def _func(self, df): 

624 return mag_aware_eval(df, self.expr, self.log) 

625 

626 

627class Column(Functor): 

628 """Get column with specified name 

629 """ 

630 

631 def __init__(self, col, **kwargs): 

632 self.col = col 

633 super().__init__(**kwargs) 

634 

635 @property 

636 def name(self): 

637 return self.col 

638 

639 @property 

640 def columns(self): 

641 return [self.col] 

642 

643 def _func(self, df): 

644 return df[self.col] 

645 

646 

647class Index(Functor): 

648 """Return the value of the index for each object 

649 """ 

650 

651 columns = ['coord_ra'] # just a dummy; something has to be here 

652 _defaultDataset = 'ref' 

653 _defaultNoDup = True 

654 

655 def _func(self, df): 

656 return pd.Series(df.index, index=df.index) 

657 

658 

659class CoordColumn(Column): 

660 """Base class for coordinate column, in degrees 

661 """ 

662 _radians = True 

663 

664 def __init__(self, col, **kwargs): 

665 super().__init__(col, **kwargs) 

666 

667 def _func(self, df): 

668 # Must not modify original column in case that column is used by another functor 

669 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

670 return output 

671 

672 

673class RAColumn(CoordColumn): 

674 """Right Ascension, in degrees 

675 """ 

676 name = 'RA' 

677 _defaultNoDup = True 

678 

679 def __init__(self, **kwargs): 

680 super().__init__('coord_ra', **kwargs) 

681 

682 def __call__(self, catalog, **kwargs): 

683 return super().__call__(catalog, **kwargs) 

684 

685 

686class DecColumn(CoordColumn): 

687 """Declination, in degrees 

688 """ 

689 name = 'Dec' 

690 _defaultNoDup = True 

691 

692 def __init__(self, **kwargs): 

693 super().__init__('coord_dec', **kwargs) 

694 

695 def __call__(self, catalog, **kwargs): 

696 return super().__call__(catalog, **kwargs) 

697 

698 

699class HtmIndex20(Functor): 

700 """Compute the level 20 HtmIndex for the catalog. 

701 

702 Notes 

703 ----- 

704 This functor was implemented to satisfy requirements of old APDB interface 

705 which required ``pixelId`` column in DiaObject with HTM20 index. APDB 

706 interface had migrated to not need that information, but we keep this 

707 class in case it may be useful for something else. 

708 """ 

709 name = "Htm20" 

710 htmLevel = 20 

711 _radians = True 

712 

713 def __init__(self, ra, dec, **kwargs): 

714 self.pixelator = sphgeom.HtmPixelization(self.htmLevel) 

715 self.ra = ra 

716 self.dec = dec 

717 self._columns = [self.ra, self.dec] 

718 super().__init__(**kwargs) 

719 

720 def _func(self, df): 

721 

722 def computePixel(row): 

723 if self._radians: 

724 sphPoint = geom.SpherePoint(row[self.ra], 

725 row[self.dec], 

726 geom.radians) 

727 else: 

728 sphPoint = geom.SpherePoint(row[self.ra], 

729 row[self.dec], 

730 geom.degrees) 

731 return self.pixelator.index(sphPoint.getVector()) 

732 

733 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64') 

734 

735 

736def fluxName(col): 

737 if not col.endswith('_instFlux'): 

738 col += '_instFlux' 

739 return col 

740 

741 

742def fluxErrName(col): 

743 if not col.endswith('_instFluxErr'): 

744 col += '_instFluxErr' 

745 return col 

746 

747 

748class Mag(Functor): 

749 """Compute calibrated magnitude 

750 

751 Takes a `calib` argument, which returns the flux at mag=0 

752 as `calib.getFluxMag0()`. If not provided, then the default 

753 `fluxMag0` is 63095734448.0194, which is default for HSC. 

754 This default should be removed in DM-21955 

755 

756 This calculation hides warnings about invalid values and dividing by zero. 

757 

758 As for all functors, a `dataset` and `filt` kwarg should be provided upon 

759 initialization. Unlike the default `Functor`, however, the default dataset 

760 for a `Mag` is `'meas'`, rather than `'ref'`. 

761 

762 Parameters 

763 ---------- 

764 col : `str` 

765 Name of flux column from which to compute magnitude. Can be parseable 

766 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass 

767 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will 

768 understand. 

769 calib : `lsst.afw.image.calib.Calib` (optional) 

770 Object that knows zero point. 

771 """ 

772 _defaultDataset = 'meas' 

773 

774 def __init__(self, col, calib=None, **kwargs): 

775 self.col = fluxName(col) 

776 self.calib = calib 

777 if calib is not None: 

778 self.fluxMag0 = calib.getFluxMag0()[0] 

779 else: 

780 # TO DO: DM-21955 Replace hard coded photometic calibration values 

781 self.fluxMag0 = 63095734448.0194 

782 

783 super().__init__(**kwargs) 

784 

785 @property 

786 def columns(self): 

787 return [self.col] 

788 

789 def _func(self, df): 

790 with np.warnings.catch_warnings(): 

791 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

792 np.warnings.filterwarnings('ignore', r'divide by zero') 

793 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

794 

795 @property 

796 def name(self): 

797 return f'mag_{self.col}' 

798 

799 

800class MagErr(Mag): 

801 """Compute calibrated magnitude uncertainty 

802 

803 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`. 

804 

805 Parameters 

806 col : `str` 

807 Name of flux column 

808 calib : `lsst.afw.image.calib.Calib` (optional) 

809 Object that knows zero point. 

810 """ 

811 

812 def __init__(self, *args, **kwargs): 

813 super().__init__(*args, **kwargs) 

814 if self.calib is not None: 

815 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

816 else: 

817 self.fluxMag0Err = 0. 

818 

819 @property 

820 def columns(self): 

821 return [self.col, self.col + 'Err'] 

822 

823 def _func(self, df): 

824 with np.warnings.catch_warnings(): 

825 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

826 np.warnings.filterwarnings('ignore', r'divide by zero') 

827 fluxCol, fluxErrCol = self.columns 

828 x = df[fluxErrCol] / df[fluxCol] 

829 y = self.fluxMag0Err / self.fluxMag0 

830 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

831 return magErr 

832 

833 @property 

834 def name(self): 

835 return super().name + '_err' 

836 

837 

838class MagDiff(Functor): 

839 _defaultDataset = 'meas' 

840 

841 """Functor to calculate magnitude difference""" 

842 

843 def __init__(self, col1, col2, **kwargs): 

844 self.col1 = fluxName(col1) 

845 self.col2 = fluxName(col2) 

846 super().__init__(**kwargs) 

847 

848 @property 

849 def columns(self): 

850 return [self.col1, self.col2] 

851 

852 def _func(self, df): 

853 with np.warnings.catch_warnings(): 

854 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

855 np.warnings.filterwarnings('ignore', r'divide by zero') 

856 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

857 

858 @property 

859 def name(self): 

860 return f'(mag_{self.col1} - mag_{self.col2})' 

861 

862 @property 

863 def shortname(self): 

864 return f'magDiff_{self.col1}_{self.col2}' 

865 

866 

867class Color(Functor): 

868 """Compute the color between two filters 

869 

870 Computes color by initializing two different `Mag` 

871 functors based on the `col` and filters provided, and 

872 then returning the difference. 

873 

874 This is enabled by the `_func` expecting a dataframe with a 

875 multilevel column index, with both `'band'` and `'column'`, 

876 instead of just `'column'`, which is the `Functor` default. 

877 This is controlled by the `_dfLevels` attribute. 

878 

879 Also of note, the default dataset for `Color` is `forced_src'`, 

880 whereas for `Mag` it is `'meas'`. 

881 

882 Parameters 

883 ---------- 

884 col : str 

885 Name of flux column from which to compute; same as would be passed to 

886 `lsst.pipe.tasks.functors.Mag`. 

887 

888 filt2, filt1 : str 

889 Filters from which to compute magnitude difference. 

890 Color computed is `Mag(filt2) - Mag(filt1)`. 

891 """ 

892 _defaultDataset = 'forced_src' 

893 _dfLevels = ('band', 'column') 

894 _defaultNoDup = True 

895 

896 def __init__(self, col, filt2, filt1, **kwargs): 

897 self.col = fluxName(col) 

898 if filt2 == filt1: 

899 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

900 self.filt2 = filt2 

901 self.filt1 = filt1 

902 

903 self.mag2 = Mag(col, filt=filt2, **kwargs) 

904 self.mag1 = Mag(col, filt=filt1, **kwargs) 

905 

906 super().__init__(**kwargs) 

907 

908 @property 

909 def filt(self): 

910 return None 

911 

912 @filt.setter 

913 def filt(self, filt): 

914 pass 

915 

916 def _func(self, df): 

917 mag2 = self.mag2._func(df[self.filt2]) 

918 mag1 = self.mag1._func(df[self.filt1]) 

919 return mag2 - mag1 

920 

921 @property 

922 def columns(self): 

923 return [self.mag1.col, self.mag2.col] 

924 

925 def multilevelColumns(self, parq, **kwargs): 

926 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

927 

928 @property 

929 def name(self): 

930 return f'{self.filt2} - {self.filt1} ({self.col})' 

931 

932 @property 

933 def shortname(self): 

934 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

935 

936 

937class DeconvolvedMoments(Functor): 

938 name = 'Deconvolved Moments' 

939 shortname = 'deconvolvedMoments' 

940 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

941 "ext_shapeHSM_HsmSourceMoments_yy", 

942 "base_SdssShape_xx", "base_SdssShape_yy", 

943 "ext_shapeHSM_HsmPsfMoments_xx", 

944 "ext_shapeHSM_HsmPsfMoments_yy") 

945 

946 def _func(self, df): 

947 """Calculate deconvolved moments""" 

948 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

949 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

950 else: 

951 hsm = np.ones(len(df))*np.nan 

952 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

953 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

954 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

955 else: 

956 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using 

957 # exposure.getPsf().computeShape(s.getCentroid()).getIxx() 

958 # raise TaskError("No psf shape parameter found in catalog") 

959 raise RuntimeError('No psf shape parameter found in catalog') 

960 

961 return hsm.where(np.isfinite(hsm), sdss) - psf 

962 

963 

964class SdssTraceSize(Functor): 

965 """Functor to calculate SDSS trace radius size for sources""" 

966 name = "SDSS Trace Size" 

967 shortname = 'sdssTrace' 

968 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

969 

970 def _func(self, df): 

971 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

972 return srcSize 

973 

974 

975class PsfSdssTraceSizeDiff(Functor): 

976 """Functor to calculate SDSS trace radius size difference (%) between object and psf model""" 

977 name = "PSF - SDSS Trace Size" 

978 shortname = 'psf_sdssTrace' 

979 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

980 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

981 

982 def _func(self, df): 

983 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

984 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

985 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

986 return sizeDiff 

987 

988 

989class HsmTraceSize(Functor): 

990 """Functor to calculate HSM trace radius size for sources""" 

991 name = 'HSM Trace Size' 

992 shortname = 'hsmTrace' 

993 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

994 "ext_shapeHSM_HsmSourceMoments_yy") 

995 

996 def _func(self, df): 

997 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

998 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

999 return srcSize 

1000 

1001 

1002class PsfHsmTraceSizeDiff(Functor): 

1003 """Functor to calculate HSM trace radius size difference (%) between object and psf model""" 

1004 name = 'PSF - HSM Trace Size' 

1005 shortname = 'psf_HsmTrace' 

1006 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1007 "ext_shapeHSM_HsmSourceMoments_yy", 

1008 "ext_shapeHSM_HsmPsfMoments_xx", 

1009 "ext_shapeHSM_HsmPsfMoments_yy") 

1010 

1011 def _func(self, df): 

1012 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1013 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1014 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1015 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1016 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1017 return sizeDiff 

1018 

1019 

1020class HsmFwhm(Functor): 

1021 name = 'HSM Psf FWHM' 

1022 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1023 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1024 pixelScale = 0.168 

1025 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1026 

1027 def _func(self, df): 

1028 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1029 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1030 

1031 

1032class E1(Functor): 

1033 name = "Distortion Ellipticity (e1)" 

1034 shortname = "Distortion" 

1035 

1036 def __init__(self, colXX, colXY, colYY, **kwargs): 

1037 self.colXX = colXX 

1038 self.colXY = colXY 

1039 self.colYY = colYY 

1040 self._columns = [self.colXX, self.colXY, self.colYY] 

1041 super().__init__(**kwargs) 

1042 

1043 @property 

1044 def columns(self): 

1045 return [self.colXX, self.colXY, self.colYY] 

1046 

1047 def _func(self, df): 

1048 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1049 

1050 

1051class E2(Functor): 

1052 name = "Ellipticity e2" 

1053 

1054 def __init__(self, colXX, colXY, colYY, **kwargs): 

1055 self.colXX = colXX 

1056 self.colXY = colXY 

1057 self.colYY = colYY 

1058 super().__init__(**kwargs) 

1059 

1060 @property 

1061 def columns(self): 

1062 return [self.colXX, self.colXY, self.colYY] 

1063 

1064 def _func(self, df): 

1065 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1066 

1067 

1068class RadiusFromQuadrupole(Functor): 

1069 

1070 def __init__(self, colXX, colXY, colYY, **kwargs): 

1071 self.colXX = colXX 

1072 self.colXY = colXY 

1073 self.colYY = colYY 

1074 super().__init__(**kwargs) 

1075 

1076 @property 

1077 def columns(self): 

1078 return [self.colXX, self.colXY, self.colYY] 

1079 

1080 def _func(self, df): 

1081 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1082 

1083 

1084class LocalWcs(Functor): 

1085 """Computations using the stored localWcs. 

1086 """ 

1087 name = "LocalWcsOperations" 

1088 

1089 def __init__(self, 

1090 colCD_1_1, 

1091 colCD_1_2, 

1092 colCD_2_1, 

1093 colCD_2_2, 

1094 **kwargs): 

1095 self.colCD_1_1 = colCD_1_1 

1096 self.colCD_1_2 = colCD_1_2 

1097 self.colCD_2_1 = colCD_2_1 

1098 self.colCD_2_2 = colCD_2_2 

1099 super().__init__(**kwargs) 

1100 

1101 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1102 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1103 

1104 Parameters 

1105 ---------- 

1106 x : `pandas.Series` 

1107 X pixel coordinate. 

1108 y : `pandas.Series` 

1109 Y pixel coordinate. 

1110 cd11 : `pandas.Series` 

1111 [1, 1] element of the local Wcs affine transform. 

1112 cd11 : `pandas.Series` 

1113 [1, 1] element of the local Wcs affine transform. 

1114 cd12 : `pandas.Series` 

1115 [1, 2] element of the local Wcs affine transform. 

1116 cd21 : `pandas.Series` 

1117 [2, 1] element of the local Wcs affine transform. 

1118 cd22 : `pandas.Series` 

1119 [2, 2] element of the local Wcs affine transform. 

1120 

1121 Returns 

1122 ------- 

1123 raDecTuple : tuple 

1124 RA and dec conversion of x and y given the local Wcs. Returned 

1125 units are in radians. 

1126 

1127 """ 

1128 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1129 

1130 def computeSkySeparation(self, ra1, dec1, ra2, dec2): 

1131 """Compute the local pixel scale conversion. 

1132 

1133 Parameters 

1134 ---------- 

1135 ra1 : `pandas.Series` 

1136 Ra of the first coordinate in radians. 

1137 dec1 : `pandas.Series` 

1138 Dec of the first coordinate in radians. 

1139 ra2 : `pandas.Series` 

1140 Ra of the second coordinate in radians. 

1141 dec2 : `pandas.Series` 

1142 Dec of the second coordinate in radians. 

1143 

1144 Returns 

1145 ------- 

1146 dist : `pandas.Series` 

1147 Distance on the sphere in radians. 

1148 """ 

1149 deltaDec = dec2 - dec1 

1150 deltaRa = ra2 - ra1 

1151 return 2 * np.arcsin( 

1152 np.sqrt( 

1153 np.sin(deltaDec / 2) ** 2 

1154 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1155 

1156 def getSkySeparationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1157 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1158 

1159 Parameters 

1160 ---------- 

1161 x1 : `pandas.Series` 

1162 X pixel coordinate. 

1163 y1 : `pandas.Series` 

1164 Y pixel coordinate. 

1165 x2 : `pandas.Series` 

1166 X pixel coordinate. 

1167 y2 : `pandas.Series` 

1168 Y pixel coordinate. 

1169 cd11 : `pandas.Series` 

1170 [1, 1] element of the local Wcs affine transform. 

1171 cd11 : `pandas.Series` 

1172 [1, 1] element of the local Wcs affine transform. 

1173 cd12 : `pandas.Series` 

1174 [1, 2] element of the local Wcs affine transform. 

1175 cd21 : `pandas.Series` 

1176 [2, 1] element of the local Wcs affine transform. 

1177 cd22 : `pandas.Series` 

1178 [2, 2] element of the local Wcs affine transform. 

1179 

1180 Returns 

1181 ------- 

1182 Distance : `pandas.Series` 

1183 Arcseconds per pixel at the location of the local WC 

1184 """ 

1185 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1186 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1187 # Great circle distance for small separations. 

1188 return self.computeSkySeparation(ra1, dec1, ra2, dec2) 

1189 

1190 

1191class ComputePixelScale(LocalWcs): 

1192 """Compute the local pixel scale from the stored CDMatrix. 

1193 """ 

1194 name = "PixelScale" 

1195 

1196 @property 

1197 def columns(self): 

1198 return [self.colCD_1_1, 

1199 self.colCD_1_2, 

1200 self.colCD_2_1, 

1201 self.colCD_2_2] 

1202 

1203 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1204 """Compute the local pixel to scale conversion in arcseconds. 

1205 

1206 Parameters 

1207 ---------- 

1208 cd11 : `pandas.Series` 

1209 [1, 1] element of the local Wcs affine transform in radians. 

1210 cd11 : `pandas.Series` 

1211 [1, 1] element of the local Wcs affine transform in radians. 

1212 cd12 : `pandas.Series` 

1213 [1, 2] element of the local Wcs affine transform in radians. 

1214 cd21 : `pandas.Series` 

1215 [2, 1] element of the local Wcs affine transform in radians. 

1216 cd22 : `pandas.Series` 

1217 [2, 2] element of the local Wcs affine transform in radians. 

1218 

1219 Returns 

1220 ------- 

1221 pixScale : `pandas.Series` 

1222 Arcseconds per pixel at the location of the local WC 

1223 """ 

1224 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1225 

1226 def _func(self, df): 

1227 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1228 df[self.colCD_1_2], 

1229 df[self.colCD_2_1], 

1230 df[self.colCD_2_2]) 

1231 

1232 

1233class ConvertPixelToArcseconds(ComputePixelScale): 

1234 """Convert a value in units pixels to units arcseconds. 

1235 """ 

1236 

1237 def __init__(self, 

1238 col, 

1239 colCD_1_1, 

1240 colCD_1_2, 

1241 colCD_2_1, 

1242 colCD_2_2, 

1243 **kwargs): 

1244 self.col = col 

1245 super().__init__(colCD_1_1, 

1246 colCD_1_2, 

1247 colCD_2_1, 

1248 colCD_2_2, 

1249 **kwargs) 

1250 

1251 @property 

1252 def name(self): 

1253 return f"{self.col}_asArcseconds" 

1254 

1255 @property 

1256 def columns(self): 

1257 return [self.col, 

1258 self.colCD_1_1, 

1259 self.colCD_1_2, 

1260 self.colCD_2_1, 

1261 self.colCD_2_2] 

1262 

1263 def _func(self, df): 

1264 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1265 df[self.colCD_1_2], 

1266 df[self.colCD_2_1], 

1267 df[self.colCD_2_2]) 

1268 

1269 

1270class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1271 """Convert a value in units pixels squared to units arcseconds squared. 

1272 """ 

1273 

1274 def __init__(self, 

1275 col, 

1276 colCD_1_1, 

1277 colCD_1_2, 

1278 colCD_2_1, 

1279 colCD_2_2, 

1280 **kwargs): 

1281 self.col = col 

1282 super().__init__(colCD_1_1, 

1283 colCD_1_2, 

1284 colCD_2_1, 

1285 colCD_2_2, 

1286 **kwargs) 

1287 

1288 @property 

1289 def name(self): 

1290 return f"{self.col}_asArcsecondsSq" 

1291 

1292 @property 

1293 def columns(self): 

1294 return [self.col, 

1295 self.colCD_1_1, 

1296 self.colCD_1_2, 

1297 self.colCD_2_1, 

1298 self.colCD_2_2] 

1299 

1300 def _func(self, df): 

1301 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1302 df[self.colCD_1_2], 

1303 df[self.colCD_2_1], 

1304 df[self.colCD_2_2]) 

1305 return df[self.col] * pixScale * pixScale 

1306 

1307 

1308class ReferenceBand(Functor): 

1309 name = 'Reference Band' 

1310 shortname = 'refBand' 

1311 

1312 @property 

1313 def columns(self): 

1314 return ["merge_measurement_i", 

1315 "merge_measurement_r", 

1316 "merge_measurement_z", 

1317 "merge_measurement_y", 

1318 "merge_measurement_g", 

1319 "merge_measurement_u"] 

1320 

1321 def _func(self, df: pd.DataFrame) -> pd.Series: 

1322 def getFilterAliasName(row): 

1323 # get column name with the max value (True > False) 

1324 colName = row.idxmax() 

1325 return colName.replace('merge_measurement_', '') 

1326 

1327 # Skip columns that are unavailable, because this functor requests the 

1328 # superset of bands that could be included in the object table 

1329 columns = [col for col in self.columns if col in df.columns] 

1330 # Makes a Series of dtype object if df is empty 

1331 return df[columns].apply(getFilterAliasName, axis=1, 

1332 result_type='reduce').astype('object') 

1333 

1334 

1335class Photometry(Functor): 

1336 # AB to NanoJansky (3631 Jansky) 

1337 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1338 LOG_AB_FLUX_SCALE = 12.56 

1339 FIVE_OVER_2LOG10 = 1.085736204758129569 

1340 # TO DO: DM-21955 Replace hard coded photometic calibration values 

1341 COADD_ZP = 27 

1342 

1343 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1344 self.vhypot = np.vectorize(self.hypot) 

1345 self.col = colFlux 

1346 self.colFluxErr = colFluxErr 

1347 

1348 self.calib = calib 

1349 if calib is not None: 

1350 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1351 else: 

1352 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1353 self.fluxMag0Err = 0. 

1354 

1355 super().__init__(**kwargs) 

1356 

1357 @property 

1358 def columns(self): 

1359 return [self.col] 

1360 

1361 @property 

1362 def name(self): 

1363 return f'mag_{self.col}' 

1364 

1365 @classmethod 

1366 def hypot(cls, a, b): 

1367 if np.abs(a) < np.abs(b): 

1368 a, b = b, a 

1369 if a == 0.: 

1370 return 0. 

1371 q = b/a 

1372 return np.abs(a) * np.sqrt(1. + q*q) 

1373 

1374 def dn2flux(self, dn, fluxMag0): 

1375 return self.AB_FLUX_SCALE * dn / fluxMag0 

1376 

1377 def dn2mag(self, dn, fluxMag0): 

1378 with np.warnings.catch_warnings(): 

1379 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1380 np.warnings.filterwarnings('ignore', r'divide by zero') 

1381 return -2.5 * np.log10(dn/fluxMag0) 

1382 

1383 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1384 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1385 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1386 return retVal 

1387 

1388 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1389 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1390 return self.FIVE_OVER_2LOG10 * retVal 

1391 

1392 

1393class NanoJansky(Photometry): 

1394 def _func(self, df): 

1395 return self.dn2flux(df[self.col], self.fluxMag0) 

1396 

1397 

1398class NanoJanskyErr(Photometry): 

1399 @property 

1400 def columns(self): 

1401 return [self.col, self.colFluxErr] 

1402 

1403 def _func(self, df): 

1404 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1405 return pd.Series(retArr, index=df.index) 

1406 

1407 

1408class LocalPhotometry(Functor): 

1409 """Base class for calibrating the specified instrument flux column using 

1410 the local photometric calibration. 

1411 

1412 Parameters 

1413 ---------- 

1414 instFluxCol : `str` 

1415 Name of the instrument flux column. 

1416 instFluxErrCol : `str` 

1417 Name of the assocated error columns for ``instFluxCol``. 

1418 photoCalibCol : `str` 

1419 Name of local calibration column. 

1420 photoCalibErrCol : `str` 

1421 Error associated with ``photoCalibCol`` 

1422 

1423 See also 

1424 -------- 

1425 LocalNanojansky 

1426 LocalNanojanskyErr 

1427 """ 

1428 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1429 

1430 def __init__(self, 

1431 instFluxCol, 

1432 instFluxErrCol, 

1433 photoCalibCol, 

1434 photoCalibErrCol, 

1435 **kwargs): 

1436 self.instFluxCol = instFluxCol 

1437 self.instFluxErrCol = instFluxErrCol 

1438 self.photoCalibCol = photoCalibCol 

1439 self.photoCalibErrCol = photoCalibErrCol 

1440 super().__init__(**kwargs) 

1441 

1442 def instFluxToNanojansky(self, instFlux, localCalib): 

1443 """Convert instrument flux to nanojanskys. 

1444 

1445 Parameters 

1446 ---------- 

1447 instFlux : `numpy.ndarray` or `pandas.Series` 

1448 Array of instrument flux measurements 

1449 localCalib : `numpy.ndarray` or `pandas.Series` 

1450 Array of local photometric calibration estimates. 

1451 

1452 Returns 

1453 ------- 

1454 calibFlux : `numpy.ndarray` or `pandas.Series` 

1455 Array of calibrated flux measurements. 

1456 """ 

1457 return instFlux * localCalib 

1458 

1459 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1460 """Convert instrument flux to nanojanskys. 

1461 

1462 Parameters 

1463 ---------- 

1464 instFlux : `numpy.ndarray` or `pandas.Series` 

1465 Array of instrument flux measurements 

1466 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1467 Errors on associated ``instFlux`` values 

1468 localCalib : `numpy.ndarray` or `pandas.Series` 

1469 Array of local photometric calibration estimates. 

1470 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1471 Errors on associated ``localCalib`` values 

1472 

1473 Returns 

1474 ------- 

1475 calibFluxErr : `numpy.ndarray` or `pandas.Series` 

1476 Errors on calibrated flux measurements. 

1477 """ 

1478 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1479 

1480 def instFluxToMagnitude(self, instFlux, localCalib): 

1481 """Convert instrument flux to nanojanskys. 

1482 

1483 Parameters 

1484 ---------- 

1485 instFlux : `numpy.ndarray` or `pandas.Series` 

1486 Array of instrument flux measurements 

1487 localCalib : `numpy.ndarray` or `pandas.Series` 

1488 Array of local photometric calibration estimates. 

1489 

1490 Returns 

1491 ------- 

1492 calibMag : `numpy.ndarray` or `pandas.Series` 

1493 Array of calibrated AB magnitudes. 

1494 """ 

1495 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1496 

1497 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1498 """Convert instrument flux err to nanojanskys. 

1499 

1500 Parameters 

1501 ---------- 

1502 instFlux : `numpy.ndarray` or `pandas.Series` 

1503 Array of instrument flux measurements 

1504 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1505 Errors on associated ``instFlux`` values 

1506 localCalib : `numpy.ndarray` or `pandas.Series` 

1507 Array of local photometric calibration estimates. 

1508 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1509 Errors on associated ``localCalib`` values 

1510 

1511 Returns 

1512 ------- 

1513 calibMagErr: `numpy.ndarray` or `pandas.Series` 

1514 Error on calibrated AB magnitudes. 

1515 """ 

1516 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1517 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1518 

1519 

1520class LocalNanojansky(LocalPhotometry): 

1521 """Compute calibrated fluxes using the local calibration value.""" 

1522 

1523 @property 

1524 def columns(self): 

1525 return [self.instFluxCol, self.photoCalibCol] 

1526 

1527 @property 

1528 def name(self): 

1529 return f'flux_{self.instFluxCol}' 

1530 

1531 def _func(self, df): 

1532 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1533 

1534 

1535class LocalNanojanskyErr(LocalPhotometry): 

1536 """Compute calibrated flux errors using the local calibration value.""" 

1537 

1538 @property 

1539 def columns(self): 

1540 return [self.instFluxCol, self.instFluxErrCol, 

1541 self.photoCalibCol, self.photoCalibErrCol] 

1542 

1543 @property 

1544 def name(self): 

1545 return f'fluxErr_{self.instFluxCol}' 

1546 

1547 def _func(self, df): 

1548 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1549 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1550 

1551 

1552class LocalDipoleMeanFlux(LocalPhotometry): 

1553 """Compute absolute mean of dipole fluxes. 

1554 

1555 See also 

1556 -------- 

1557 LocalNanojansky 

1558 LocalNanojanskyErr 

1559 LocalDipoleMeanFluxErr 

1560 LocalDipoleDiffFlux 

1561 LocalDipoleDiffFluxErr 

1562 """ 

1563 def __init__(self, 

1564 instFluxPosCol, 

1565 instFluxNegCol, 

1566 instFluxPosErrCol, 

1567 instFluxNegErrCol, 

1568 photoCalibCol, 

1569 photoCalibErrCol, 

1570 **kwargs): 

1571 self.instFluxNegCol = instFluxNegCol 

1572 self.instFluxPosCol = instFluxPosCol 

1573 self.instFluxNegErrCol = instFluxNegErrCol 

1574 self.instFluxPosErrCol = instFluxPosErrCol 

1575 self.photoCalibCol = photoCalibCol 

1576 self.photoCalibErrCol = photoCalibErrCol 

1577 super().__init__(instFluxNegCol, 

1578 instFluxNegErrCol, 

1579 photoCalibCol, 

1580 photoCalibErrCol, 

1581 **kwargs) 

1582 

1583 @property 

1584 def columns(self): 

1585 return [self.instFluxPosCol, 

1586 self.instFluxNegCol, 

1587 self.photoCalibCol] 

1588 

1589 @property 

1590 def name(self): 

1591 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1592 

1593 def _func(self, df): 

1594 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1595 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1596 

1597 

1598class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1599 """Compute the error on the absolute mean of dipole fluxes. 

1600 

1601 See also 

1602 -------- 

1603 LocalNanojansky 

1604 LocalNanojanskyErr 

1605 LocalDipoleMeanFlux 

1606 LocalDipoleDiffFlux 

1607 LocalDipoleDiffFluxErr 

1608 """ 

1609 

1610 @property 

1611 def columns(self): 

1612 return [self.instFluxPosCol, 

1613 self.instFluxNegCol, 

1614 self.instFluxPosErrCol, 

1615 self.instFluxNegErrCol, 

1616 self.photoCalibCol, 

1617 self.photoCalibErrCol] 

1618 

1619 @property 

1620 def name(self): 

1621 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1622 

1623 def _func(self, df): 

1624 return 0.5*np.sqrt( 

1625 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1626 * df[self.photoCalibErrCol])**2 

1627 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1628 * df[self.photoCalibCol]**2) 

1629 

1630 

1631class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1632 """Compute the absolute difference of dipole fluxes. 

1633 

1634 Value is (abs(pos) - abs(neg)) 

1635 

1636 See also 

1637 -------- 

1638 LocalNanojansky 

1639 LocalNanojanskyErr 

1640 LocalDipoleMeanFlux 

1641 LocalDipoleMeanFluxErr 

1642 LocalDipoleDiffFluxErr 

1643 """ 

1644 

1645 @property 

1646 def columns(self): 

1647 return [self.instFluxPosCol, 

1648 self.instFluxNegCol, 

1649 self.photoCalibCol] 

1650 

1651 @property 

1652 def name(self): 

1653 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1654 

1655 def _func(self, df): 

1656 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1657 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1658 

1659 

1660class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1661 """Compute the error on the absolute difference of dipole fluxes. 

1662 

1663 See also 

1664 -------- 

1665 LocalNanojansky 

1666 LocalNanojanskyErr 

1667 LocalDipoleMeanFlux 

1668 LocalDipoleMeanFluxErr 

1669 LocalDipoleDiffFlux 

1670 """ 

1671 

1672 @property 

1673 def columns(self): 

1674 return [self.instFluxPosCol, 

1675 self.instFluxNegCol, 

1676 self.instFluxPosErrCol, 

1677 self.instFluxNegErrCol, 

1678 self.photoCalibCol, 

1679 self.photoCalibErrCol] 

1680 

1681 @property 

1682 def name(self): 

1683 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1684 

1685 def _func(self, df): 

1686 return np.sqrt( 

1687 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1688 * df[self.photoCalibErrCol])**2 

1689 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1690 * df[self.photoCalibCol]**2) 

1691 

1692 

1693class Ebv(Functor): 

1694 """Compute E(B-V) from dustmaps.sfd 

1695 """ 

1696 _defaultDataset = 'ref' 

1697 name = "E(B-V)" 

1698 shortname = "ebv" 

1699 

1700 def __init__(self, **kwargs): 

1701 # import is only needed for Ebv 

1702 from dustmaps.sfd import SFDQuery 

1703 self._columns = ['coord_ra', 'coord_dec'] 

1704 self.sfd = SFDQuery() 

1705 super().__init__(**kwargs) 

1706 

1707 def _func(self, df): 

1708 coords = SkyCoord(df['coord_ra'].values * u.rad, df['coord_dec'].values * u.rad) 

1709 ebv = self.sfd(coords) 

1710 # Double precision unnecessary scientifically 

1711 # but currently needed for ingest to qserv 

1712 return pd.Series(ebv, index=df.index).astype('float64')