Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import yaml 

2import re 

3from itertools import product 

4 

5import pandas as pd 

6import numpy as np 

7import astropy.units as u 

8 

9from lsst.daf.persistence import doImport 

10from lsst.daf.butler import DeferredDatasetHandle 

11from .parquetTable import ParquetTable, MultilevelParquetTable 

12 

13 

14def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

15 typeKey='functor', name=None): 

16 """Initialize an object defined in a dictionary 

17 

18 The object needs to be importable as 

19 f'{basePath}.{initDict[typeKey]}' 

20 The positional and keyword arguments (if any) are contained in 

21 "args" and "kwargs" entries in the dictionary, respectively. 

22 This is used in `functors.CompositeFunctor.from_yaml` to initialize 

23 a composite functor from a specification in a YAML file. 

24 

25 Parameters 

26 ---------- 

27 initDict : dictionary 

28 Dictionary describing object's initialization. Must contain 

29 an entry keyed by ``typeKey`` that is the name of the object, 

30 relative to ``basePath``. 

31 basePath : str 

32 Path relative to module in which ``initDict[typeKey]`` is defined. 

33 typeKey : str 

34 Key of ``initDict`` that is the name of the object 

35 (relative to `basePath`). 

36 """ 

37 initDict = initDict.copy() 

38 # TO DO: DM-21956 We should be able to define functors outside this module 

39 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

40 args = [] 

41 if 'args' in initDict: 

42 args = initDict.pop('args') 

43 if isinstance(args, str): 

44 args = [args] 

45 try: 

46 element = pythonType(*args, **initDict) 

47 except Exception as e: 

48 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

49 raise type(e)(message, e.args) 

50 return element 

51 

52 

53class Functor(object): 

54 """Define and execute a calculation on a ParquetTable 

55 

56 The `__call__` method accepts either a `ParquetTable` object or a 

57 `DeferredDatasetHandle`, and returns the 

58 result of the calculation as a single column. Each functor defines what 

59 columns are needed for the calculation, and only these columns are read 

60 from the `ParquetTable`. 

61 

62 The action of `__call__` consists of two steps: first, loading the 

63 necessary columns from disk into memory as a `pandas.DataFrame` object; 

64 and second, performing the computation on this dataframe and returning the 

65 result. 

66 

67 

68 To define a new `Functor`, a subclass must define a `_func` method, 

69 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`. 

70 In addition, it must define the following attributes 

71 

72 * `_columns`: The columns necessary to perform the calculation 

73 * `name`: A name appropriate for a figure axis label 

74 * `shortname`: A name appropriate for use as a dictionary key 

75 

76 On initialization, a `Functor` should declare what band (`filt` kwarg) 

77 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be 

78 applied to. This enables the `_get_data` method to extract the proper 

79 columns from the parquet file. If not specified, the dataset will fall back 

80 on the `_defaultDataset`attribute. If band is not specified and `dataset` 

81 is anything other than `'ref'`, then an error will be raised when trying to 

82 perform the calculation. 

83 

84 As currently implemented, `Functor` is only set up to expect a 

85 dataset of the format of the `deepCoadd_obj` dataset; that is, a 

86 dataframe with a multi-level column index, 

87 with the levels of the column index being `band`, 

88 `dataset`, and `column`. This is defined in the `_columnLevels` attribute, 

89 as well as being implicit in the role of the `filt` and `dataset` attributes 

90 defined at initialization. In addition, the `_get_data` method that reads 

91 the dataframe from the `ParquetTable` will return a dataframe with column 

92 index levels defined by the `_dfLevels` attribute; by default, this is 

93 `column`. 

94 

95 The `_columnLevels` and `_dfLevels` attributes should generally not need to 

96 be changed, unless `_func` needs columns from multiple filters or datasets 

97 to do the calculation. 

98 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for 

99 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe 

100 it gets to have those levels in the column index. 

101 

102 Parameters 

103 ---------- 

104 filt : str 

105 Filter upon which to do the calculation 

106 

107 dataset : str 

108 Dataset upon which to do the calculation 

109 (e.g., 'ref', 'meas', 'forced_src'). 

110 

111 """ 

112 

113 _defaultDataset = 'ref' 

114 _columnLevels = ('band', 'dataset', 'column') 

115 _dfLevels = ('column',) 

116 _defaultNoDup = False 

117 

118 def __init__(self, filt=None, dataset=None, noDup=None): 

119 self.filt = filt 

120 self.dataset = dataset if dataset is not None else self._defaultDataset 

121 self._noDup = noDup 

122 

123 @property 

124 def noDup(self): 

125 if self._noDup is not None: 

126 return self._noDup 

127 else: 

128 return self._defaultNoDup 

129 

130 @property 

131 def columns(self): 

132 """Columns required to perform calculation 

133 """ 

134 if not hasattr(self, '_columns'): 

135 raise NotImplementedError('Must define columns property or _columns attribute') 

136 return self._columns 

137 

138 def _get_data_columnLevels(self, data, columnIndex=None): 

139 """Gets the names of the column index levels 

140 

141 This should only be called in the context of a multilevel table. 

142 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable` 

143 and with the gen3 `DeferredDatasetHandle`. 

144 

145 Parameters 

146 ---------- 

147 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

148 

149 columnnIndex (optional): pandas `Index` object 

150 if not passed, then it is read from the `DeferredDatasetHandle` 

151 """ 

152 if isinstance(data, DeferredDatasetHandle): 

153 if columnIndex is None: 

154 columnIndex = data.get(component="columns") 

155 if columnIndex is not None: 

156 return columnIndex.names 

157 if isinstance(data, MultilevelParquetTable): 

158 return data.columnLevels 

159 else: 

160 raise TypeError(f"Unknown type for data: {type(data)}!") 

161 

162 def _get_data_columnLevelNames(self, data, columnIndex=None): 

163 """Gets the content of each of the column levels for a multilevel table 

164 

165 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2. 

166 

167 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

168 """ 

169 if isinstance(data, DeferredDatasetHandle): 

170 if columnIndex is None: 

171 columnIndex = data.get(component="columns") 

172 if columnIndex is not None: 

173 columnLevels = columnIndex.names 

174 columnLevelNames = { 

175 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

176 for i, level in enumerate(columnLevels) 

177 } 

178 return columnLevelNames 

179 if isinstance(data, MultilevelParquetTable): 

180 return data.columnLevelNames 

181 else: 

182 raise TypeError(f"Unknown type for data: {type(data)}!") 

183 

184 def _colsFromDict(self, colDict, columnIndex=None): 

185 """Converts dictionary column specficiation to a list of columns 

186 

187 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

188 """ 

189 new_colDict = {} 

190 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

191 

192 for i, lev in enumerate(columnLevels): 

193 if lev in colDict: 

194 if isinstance(colDict[lev], str): 

195 new_colDict[lev] = [colDict[lev]] 

196 else: 

197 new_colDict[lev] = colDict[lev] 

198 else: 

199 new_colDict[lev] = columnIndex.levels[i] 

200 

201 levelCols = [new_colDict[lev] for lev in columnLevels] 

202 cols = product(*levelCols) 

203 return list(cols) 

204 

205 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

206 """Returns columns needed by functor from multilevel dataset 

207 

208 To access tables with multilevel column structure, the `MultilevelParquetTable` 

209 or `DeferredDatasetHandle` need to be passed either a list of tuples or a 

210 dictionary. 

211 

212 Parameters 

213 ---------- 

214 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

215 

216 columnIndex (optional): pandas `Index` object 

217 either passed or read in from `DeferredDatasetHandle`. 

218 

219 `returnTuple` : bool 

220 If true, then return a list of tuples rather than the column dictionary 

221 specification. This is set to `True` by `CompositeFunctor` in order to be able to 

222 combine columns from the various component functors. 

223 

224 """ 

225 if isinstance(data, DeferredDatasetHandle) and columnIndex is None: 

226 columnIndex = data.get(component="columns") 

227 

228 # Confirm that the dataset has the column levels the functor is expecting it to have. 

229 columnLevels = self._get_data_columnLevels(data, columnIndex) 

230 

231 if not set(columnLevels) == set(self._columnLevels): 

232 raise ValueError( 

233 "ParquetTable does not have the expected column levels. " 

234 f"Got {columnLevels}; expected {self._columnLevels}." 

235 ) 

236 

237 columnDict = {'column': self.columns, 

238 'dataset': self.dataset} 

239 if self.filt is None: 

240 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

241 if "band" in columnLevels: 

242 if self.dataset == "ref": 

243 columnDict["band"] = columnLevelNames["band"][0] 

244 else: 

245 raise ValueError(f"'filt' not set for functor {self.name}" 

246 f"(dataset {self.dataset}) " 

247 "and ParquetTable " 

248 "contains multiple filters in column index. " 

249 "Set 'filt' or set 'dataset' to 'ref'.") 

250 else: 

251 columnDict['band'] = self.filt 

252 

253 if isinstance(data, MultilevelParquetTable): 

254 return data._colsFromDict(columnDict) 

255 elif isinstance(data, DeferredDatasetHandle): 

256 if returnTuple: 

257 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

258 else: 

259 return columnDict 

260 

261 def _func(self, df, dropna=True): 

262 raise NotImplementedError('Must define calculation on dataframe') 

263 

264 def _get_columnIndex(self, data): 

265 """Return columnIndex 

266 """ 

267 

268 if isinstance(data, DeferredDatasetHandle): 

269 return data.get(component="columns") 

270 else: 

271 return None 

272 

273 def _get_data(self, data): 

274 """Retrieve dataframe necessary for calculation. 

275 

276 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle 

277 

278 Returns dataframe upon which `self._func` can act. 

279 

280 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested. 

281 """ 

282 if isinstance(data, pd.DataFrame): 

283 return data 

284 

285 # First thing to do: check to see if the data source has a multilevel column index or not. 

286 columnIndex = self._get_columnIndex(data) 

287 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

288 

289 # Simple single-level parquet table, gen2 

290 if isinstance(data, ParquetTable) and not is_multiLevel: 

291 columns = self.columns 

292 df = data.toDataFrame(columns=columns) 

293 return df 

294 

295 # Get proper columns specification for this functor 

296 if is_multiLevel: 

297 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

298 else: 

299 columns = self.columns 

300 

301 if isinstance(data, MultilevelParquetTable): 

302 # Load in-memory dataframe with appropriate columns the gen2 way 

303 df = data.toDataFrame(columns=columns, droplevels=False) 

304 elif isinstance(data, DeferredDatasetHandle): 

305 # Load in-memory dataframe with appropriate columns the gen3 way 

306 df = data.get(parameters={"columns": columns}) 

307 

308 # Drop unnecessary column levels 

309 if is_multiLevel: 

310 df = self._setLevels(df) 

311 

312 return df 

313 

314 def _setLevels(self, df): 

315 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

316 df.columns = df.columns.droplevel(levelsToDrop) 

317 return df 

318 

319 def _dropna(self, vals): 

320 return vals.dropna() 

321 

322 def __call__(self, data, dropna=False): 

323 try: 

324 df = self._get_data(data) 

325 vals = self._func(df) 

326 except Exception: 

327 vals = self.fail(df) 

328 if dropna: 

329 vals = self._dropna(vals) 

330 

331 return vals 

332 

333 def difference(self, data1, data2, **kwargs): 

334 """Computes difference between functor called on two different ParquetTable objects 

335 """ 

336 return self(data1, **kwargs) - self(data2, **kwargs) 

337 

338 def fail(self, df): 

339 return pd.Series(np.full(len(df), np.nan), index=df.index) 

340 

341 @property 

342 def name(self): 

343 """Full name of functor (suitable for figure labels) 

344 """ 

345 return NotImplementedError 

346 

347 @property 

348 def shortname(self): 

349 """Short name of functor (suitable for column name/dict key) 

350 """ 

351 return self.name 

352 

353 

354class CompositeFunctor(Functor): 

355 """Perform multiple calculations at once on a catalog 

356 

357 The role of a `CompositeFunctor` is to group together computations from 

358 multiple functors. Instead of returning `pandas.Series` a 

359 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names 

360 being the keys of `funcDict`. 

361 

362 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

363 in all the component functors. 

364 

365 A `CompositeFunctor` does not use a `_func` method itself; rather, 

366 when a `CompositeFunctor` is called, all its columns are loaded 

367 at once, and the resulting dataframe is passed to the `_func` method of each component 

368 functor. This has the advantage of only doing I/O (reading from parquet file) once, 

369 and works because each individual `_func` method of each component functor does not 

370 care if there are *extra* columns in the dataframe being passed; only that it must contain 

371 *at least* the `columns` it expects. 

372 

373 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML 

374 file specifying a collection of functors. 

375 

376 Parameters 

377 ---------- 

378 funcs : `dict` or `list` 

379 Dictionary or list of functors. If a list, then it will be converted 

380 into a dictonary according to the `.shortname` attribute of each functor. 

381 

382 """ 

383 dataset = None 

384 

385 def __init__(self, funcs, **kwargs): 

386 

387 if type(funcs) == dict: 

388 self.funcDict = funcs 

389 else: 

390 self.funcDict = {f.shortname: f for f in funcs} 

391 

392 self._filt = None 

393 

394 super().__init__(**kwargs) 

395 

396 @property 

397 def filt(self): 

398 return self._filt 

399 

400 @filt.setter 

401 def filt(self, filt): 

402 if filt is not None: 

403 for _, f in self.funcDict.items(): 

404 f.filt = filt 

405 self._filt = filt 

406 

407 def update(self, new): 

408 if isinstance(new, dict): 

409 self.funcDict.update(new) 

410 elif isinstance(new, CompositeFunctor): 

411 self.funcDict.update(new.funcDict) 

412 else: 

413 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

414 

415 # Make sure new functors have the same 'filt' set 

416 if self.filt is not None: 

417 self.filt = self.filt 

418 

419 @property 

420 def columns(self): 

421 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

422 

423 def multilevelColumns(self, data, **kwargs): 

424 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here. 

425 return list( 

426 set( 

427 [ 

428 x 

429 for y in [ 

430 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

431 ] 

432 for x in y 

433 ] 

434 ) 

435 ) 

436 

437 def __call__(self, data, **kwargs): 

438 """Apply the functor to the data table 

439 

440 Parameters 

441 ---------- 

442 data : `lsst.daf.butler.DeferredDatasetHandle`, 

443 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`, 

444 `lsst.pipe.tasks.parquetTable.ParquetTable`, 

445 or `pandas.DataFrame`. 

446 The table or a pointer to a table on disk from which columns can 

447 be accessed 

448 """ 

449 columnIndex = self._get_columnIndex(data) 

450 

451 # First, determine whether data has a multilevel index (either gen2 or gen3) 

452 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

453 

454 # Multilevel index, gen2 or gen3 

455 if is_multiLevel: 

456 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

457 

458 if isinstance(data, MultilevelParquetTable): 

459 # Read data into memory the gen2 way 

460 df = data.toDataFrame(columns=columns, droplevels=False) 

461 elif isinstance(data, DeferredDatasetHandle): 

462 # Read data into memory the gen3 way 

463 df = data.get(parameters={"columns": columns}) 

464 

465 valDict = {} 

466 for k, f in self.funcDict.items(): 

467 try: 

468 subdf = f._setLevels( 

469 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)] 

470 ) 

471 valDict[k] = f._func(subdf) 

472 except Exception: 

473 valDict[k] = f.fail(subdf) 

474 

475 else: 

476 if isinstance(data, DeferredDatasetHandle): 

477 # input if Gen3 deferLoad=True 

478 df = data.get(columns=self.columns) 

479 elif isinstance(data, pd.DataFrame): 

480 # input if Gen3 deferLoad=False 

481 df = data 

482 else: 

483 # Original Gen2 input is type ParquetTable and the fallback 

484 df = data.toDataFrame(columns=self.columns) 

485 

486 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

487 

488 try: 

489 valDf = pd.concat(valDict, axis=1) 

490 except TypeError: 

491 print([(k, type(v)) for k, v in valDict.items()]) 

492 raise 

493 

494 if kwargs.get('dropna', False): 

495 valDf = valDf.dropna(how='any') 

496 

497 return valDf 

498 

499 @classmethod 

500 def renameCol(cls, col, renameRules): 

501 if renameRules is None: 

502 return col 

503 for old, new in renameRules: 

504 if col.startswith(old): 

505 col = col.replace(old, new) 

506 return col 

507 

508 @classmethod 

509 def from_file(cls, filename, **kwargs): 

510 with open(filename) as f: 

511 translationDefinition = yaml.safe_load(f) 

512 

513 return cls.from_yaml(translationDefinition, **kwargs) 

514 

515 @classmethod 

516 def from_yaml(cls, translationDefinition, **kwargs): 

517 funcs = {} 

518 for func, val in translationDefinition['funcs'].items(): 

519 funcs[func] = init_fromDict(val, name=func) 

520 

521 if 'flag_rename_rules' in translationDefinition: 

522 renameRules = translationDefinition['flag_rename_rules'] 

523 else: 

524 renameRules = None 

525 

526 if 'refFlags' in translationDefinition: 

527 for flag in translationDefinition['refFlags']: 

528 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

529 

530 if 'flags' in translationDefinition: 

531 for flag in translationDefinition['flags']: 

532 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

533 

534 return cls(funcs, **kwargs) 

535 

536 

537def mag_aware_eval(df, expr): 

538 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means 

539 

540 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes. 

541 

542 Parameters 

543 ---------- 

544 df : pandas.DataFrame 

545 Dataframe on which to evaluate expression. 

546 

547 expr : str 

548 Expression. 

549 """ 

550 try: 

551 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

552 val = df.eval(expr_new, truediv=True) 

553 except Exception: # Should check what actually gets raised 

554 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

555 val = df.eval(expr_new, truediv=True) 

556 return val 

557 

558 

559class CustomFunctor(Functor): 

560 """Arbitrary computation on a catalog 

561 

562 Column names (and thus the columns to be loaded from catalog) are found 

563 by finding all words and trying to ignore all "math-y" words. 

564 

565 Parameters 

566 ---------- 

567 expr : str 

568 Expression to evaluate, to be parsed and executed by `mag_aware_eval`. 

569 """ 

570 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

571 

572 def __init__(self, expr, **kwargs): 

573 self.expr = expr 

574 super().__init__(**kwargs) 

575 

576 @property 

577 def name(self): 

578 return self.expr 

579 

580 @property 

581 def columns(self): 

582 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

583 

584 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

585 not_a_col = [] 

586 for c in flux_cols: 

587 if not re.search('_instFlux$', c): 

588 cols.append(f'{c}_instFlux') 

589 not_a_col.append(c) 

590 else: 

591 cols.append(c) 

592 

593 return list(set([c for c in cols if c not in not_a_col])) 

594 

595 def _func(self, df): 

596 return mag_aware_eval(df, self.expr) 

597 

598 

599class Column(Functor): 

600 """Get column with specified name 

601 """ 

602 

603 def __init__(self, col, **kwargs): 

604 self.col = col 

605 super().__init__(**kwargs) 

606 

607 @property 

608 def name(self): 

609 return self.col 

610 

611 @property 

612 def columns(self): 

613 return [self.col] 

614 

615 def _func(self, df): 

616 return df[self.col] 

617 

618 

619class Index(Functor): 

620 """Return the value of the index for each object 

621 """ 

622 

623 columns = ['coord_ra'] # just a dummy; something has to be here 

624 _defaultDataset = 'ref' 

625 _defaultNoDup = True 

626 

627 def _func(self, df): 

628 return pd.Series(df.index, index=df.index) 

629 

630 

631class IDColumn(Column): 

632 col = 'id' 

633 _allow_difference = False 

634 _defaultNoDup = True 

635 

636 def _func(self, df): 

637 return pd.Series(df.index, index=df.index) 

638 

639 

640class FootprintNPix(Column): 

641 col = 'base_Footprint_nPix' 

642 

643 

644class CoordColumn(Column): 

645 """Base class for coordinate column, in degrees 

646 """ 

647 _radians = True 

648 

649 def __init__(self, col, **kwargs): 

650 super().__init__(col, **kwargs) 

651 

652 def _func(self, df): 

653 # Must not modify original column in case that column is used by another functor 

654 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

655 return output 

656 

657 

658class RAColumn(CoordColumn): 

659 """Right Ascension, in degrees 

660 """ 

661 name = 'RA' 

662 _defaultNoDup = True 

663 

664 def __init__(self, **kwargs): 

665 super().__init__('coord_ra', **kwargs) 

666 

667 def __call__(self, catalog, **kwargs): 

668 return super().__call__(catalog, **kwargs) 

669 

670 

671class DecColumn(CoordColumn): 

672 """Declination, in degrees 

673 """ 

674 name = 'Dec' 

675 _defaultNoDup = True 

676 

677 def __init__(self, **kwargs): 

678 super().__init__('coord_dec', **kwargs) 

679 

680 def __call__(self, catalog, **kwargs): 

681 return super().__call__(catalog, **kwargs) 

682 

683 

684def fluxName(col): 

685 if not col.endswith('_instFlux'): 

686 col += '_instFlux' 

687 return col 

688 

689 

690def fluxErrName(col): 

691 if not col.endswith('_instFluxErr'): 

692 col += '_instFluxErr' 

693 return col 

694 

695 

696class Mag(Functor): 

697 """Compute calibrated magnitude 

698 

699 Takes a `calib` argument, which returns the flux at mag=0 

700 as `calib.getFluxMag0()`. If not provided, then the default 

701 `fluxMag0` is 63095734448.0194, which is default for HSC. 

702 This default should be removed in DM-21955 

703 

704 This calculation hides warnings about invalid values and dividing by zero. 

705 

706 As for all functors, a `dataset` and `filt` kwarg should be provided upon 

707 initialization. Unlike the default `Functor`, however, the default dataset 

708 for a `Mag` is `'meas'`, rather than `'ref'`. 

709 

710 Parameters 

711 ---------- 

712 col : `str` 

713 Name of flux column from which to compute magnitude. Can be parseable 

714 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass 

715 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will 

716 understand. 

717 calib : `lsst.afw.image.calib.Calib` (optional) 

718 Object that knows zero point. 

719 """ 

720 _defaultDataset = 'meas' 

721 

722 def __init__(self, col, calib=None, **kwargs): 

723 self.col = fluxName(col) 

724 self.calib = calib 

725 if calib is not None: 

726 self.fluxMag0 = calib.getFluxMag0()[0] 

727 else: 

728 # TO DO: DM-21955 Replace hard coded photometic calibration values 

729 self.fluxMag0 = 63095734448.0194 

730 

731 super().__init__(**kwargs) 

732 

733 @property 

734 def columns(self): 

735 return [self.col] 

736 

737 def _func(self, df): 

738 with np.warnings.catch_warnings(): 

739 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

740 np.warnings.filterwarnings('ignore', r'divide by zero') 

741 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

742 

743 @property 

744 def name(self): 

745 return f'mag_{self.col}' 

746 

747 

748class MagErr(Mag): 

749 """Compute calibrated magnitude uncertainty 

750 

751 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`. 

752 

753 Parameters 

754 col : `str` 

755 Name of flux column 

756 calib : `lsst.afw.image.calib.Calib` (optional) 

757 Object that knows zero point. 

758 """ 

759 

760 def __init__(self, *args, **kwargs): 

761 super().__init__(*args, **kwargs) 

762 if self.calib is not None: 

763 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

764 else: 

765 self.fluxMag0Err = 0. 

766 

767 @property 

768 def columns(self): 

769 return [self.col, self.col + 'Err'] 

770 

771 def _func(self, df): 

772 with np.warnings.catch_warnings(): 

773 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

774 np.warnings.filterwarnings('ignore', r'divide by zero') 

775 fluxCol, fluxErrCol = self.columns 

776 x = df[fluxErrCol] / df[fluxCol] 

777 y = self.fluxMag0Err / self.fluxMag0 

778 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

779 return magErr 

780 

781 @property 

782 def name(self): 

783 return super().name + '_err' 

784 

785 

786class NanoMaggie(Mag): 

787 """ 

788 """ 

789 

790 def _func(self, df): 

791 return (df[self.col] / self.fluxMag0) * 1e9 

792 

793 

794class MagDiff(Functor): 

795 _defaultDataset = 'meas' 

796 

797 """Functor to calculate magnitude difference""" 

798 

799 def __init__(self, col1, col2, **kwargs): 

800 self.col1 = fluxName(col1) 

801 self.col2 = fluxName(col2) 

802 super().__init__(**kwargs) 

803 

804 @property 

805 def columns(self): 

806 return [self.col1, self.col2] 

807 

808 def _func(self, df): 

809 with np.warnings.catch_warnings(): 

810 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

811 np.warnings.filterwarnings('ignore', r'divide by zero') 

812 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

813 

814 @property 

815 def name(self): 

816 return f'(mag_{self.col1} - mag_{self.col2})' 

817 

818 @property 

819 def shortname(self): 

820 return f'magDiff_{self.col1}_{self.col2}' 

821 

822 

823class Color(Functor): 

824 """Compute the color between two filters 

825 

826 Computes color by initializing two different `Mag` 

827 functors based on the `col` and filters provided, and 

828 then returning the difference. 

829 

830 This is enabled by the `_func` expecting a dataframe with a 

831 multilevel column index, with both `'band'` and `'column'`, 

832 instead of just `'column'`, which is the `Functor` default. 

833 This is controlled by the `_dfLevels` attribute. 

834 

835 Also of note, the default dataset for `Color` is `forced_src'`, 

836 whereas for `Mag` it is `'meas'`. 

837 

838 Parameters 

839 ---------- 

840 col : str 

841 Name of flux column from which to compute; same as would be passed to 

842 `lsst.pipe.tasks.functors.Mag`. 

843 

844 filt2, filt1 : str 

845 Filters from which to compute magnitude difference. 

846 Color computed is `Mag(filt2) - Mag(filt1)`. 

847 """ 

848 _defaultDataset = 'forced_src' 

849 _dfLevels = ('band', 'column') 

850 _defaultNoDup = True 

851 

852 def __init__(self, col, filt2, filt1, **kwargs): 

853 self.col = fluxName(col) 

854 if filt2 == filt1: 

855 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

856 self.filt2 = filt2 

857 self.filt1 = filt1 

858 

859 self.mag2 = Mag(col, filt=filt2, **kwargs) 

860 self.mag1 = Mag(col, filt=filt1, **kwargs) 

861 

862 super().__init__(**kwargs) 

863 

864 @property 

865 def filt(self): 

866 return None 

867 

868 @filt.setter 

869 def filt(self, filt): 

870 pass 

871 

872 def _func(self, df): 

873 mag2 = self.mag2._func(df[self.filt2]) 

874 mag1 = self.mag1._func(df[self.filt1]) 

875 return mag2 - mag1 

876 

877 @property 

878 def columns(self): 

879 return [self.mag1.col, self.mag2.col] 

880 

881 def multilevelColumns(self, parq, **kwargs): 

882 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

883 

884 @property 

885 def name(self): 

886 return f'{self.filt2} - {self.filt1} ({self.col})' 

887 

888 @property 

889 def shortname(self): 

890 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

891 

892 

893class Labeller(Functor): 

894 """Main function of this subclass is to override the dropna=True 

895 """ 

896 _null_label = 'null' 

897 _allow_difference = False 

898 name = 'label' 

899 _force_str = False 

900 

901 def __call__(self, parq, dropna=False, **kwargs): 

902 return super().__call__(parq, dropna=False, **kwargs) 

903 

904 

905class StarGalaxyLabeller(Labeller): 

906 _columns = ["base_ClassificationExtendedness_value"] 

907 _column = "base_ClassificationExtendedness_value" 

908 

909 def _func(self, df): 

910 x = df[self._columns][self._column] 

911 mask = x.isnull() 

912 test = (x < 0.5).astype(int) 

913 test = test.mask(mask, 2) 

914 

915 # TODO: DM-21954 Look into veracity of inline comment below 

916 # are these backwards? 

917 categories = ['galaxy', 'star', self._null_label] 

918 label = pd.Series(pd.Categorical.from_codes(test, categories=categories), 

919 index=x.index, name='label') 

920 if self._force_str: 

921 label = label.astype(str) 

922 return label 

923 

924 

925class NumStarLabeller(Labeller): 

926 _columns = ['numStarFlags'] 

927 labels = {"star": 0, "maybe": 1, "notStar": 2} 

928 

929 def _func(self, df): 

930 x = df[self._columns][self._columns[0]] 

931 

932 # Number of filters 

933 n = len(x.unique()) - 1 

934 

935 labels = ['noStar', 'maybe', 'star'] 

936 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels), 

937 index=x.index, name='label') 

938 

939 if self._force_str: 

940 label = label.astype(str) 

941 

942 return label 

943 

944 

945class DeconvolvedMoments(Functor): 

946 name = 'Deconvolved Moments' 

947 shortname = 'deconvolvedMoments' 

948 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

949 "ext_shapeHSM_HsmSourceMoments_yy", 

950 "base_SdssShape_xx", "base_SdssShape_yy", 

951 "ext_shapeHSM_HsmPsfMoments_xx", 

952 "ext_shapeHSM_HsmPsfMoments_yy") 

953 

954 def _func(self, df): 

955 """Calculate deconvolved moments""" 

956 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

957 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

958 else: 

959 hsm = np.ones(len(df))*np.nan 

960 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

961 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

962 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

963 else: 

964 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using 

965 # exposure.getPsf().computeShape(s.getCentroid()).getIxx() 

966 # raise TaskError("No psf shape parameter found in catalog") 

967 raise RuntimeError('No psf shape parameter found in catalog') 

968 

969 return hsm.where(np.isfinite(hsm), sdss) - psf 

970 

971 

972class SdssTraceSize(Functor): 

973 """Functor to calculate SDSS trace radius size for sources""" 

974 name = "SDSS Trace Size" 

975 shortname = 'sdssTrace' 

976 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

977 

978 def _func(self, df): 

979 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

980 return srcSize 

981 

982 

983class PsfSdssTraceSizeDiff(Functor): 

984 """Functor to calculate SDSS trace radius size difference (%) between object and psf model""" 

985 name = "PSF - SDSS Trace Size" 

986 shortname = 'psf_sdssTrace' 

987 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

988 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

989 

990 def _func(self, df): 

991 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

992 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

993 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

994 return sizeDiff 

995 

996 

997class HsmTraceSize(Functor): 

998 """Functor to calculate HSM trace radius size for sources""" 

999 name = 'HSM Trace Size' 

1000 shortname = 'hsmTrace' 

1001 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1002 "ext_shapeHSM_HsmSourceMoments_yy") 

1003 

1004 def _func(self, df): 

1005 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1006 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1007 return srcSize 

1008 

1009 

1010class PsfHsmTraceSizeDiff(Functor): 

1011 """Functor to calculate HSM trace radius size difference (%) between object and psf model""" 

1012 name = 'PSF - HSM Trace Size' 

1013 shortname = 'psf_HsmTrace' 

1014 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1015 "ext_shapeHSM_HsmSourceMoments_yy", 

1016 "ext_shapeHSM_HsmPsfMoments_xx", 

1017 "ext_shapeHSM_HsmPsfMoments_yy") 

1018 

1019 def _func(self, df): 

1020 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1021 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1022 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1023 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1024 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1025 return sizeDiff 

1026 

1027 

1028class HsmFwhm(Functor): 

1029 name = 'HSM Psf FWHM' 

1030 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1031 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1032 pixelScale = 0.168 

1033 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1034 

1035 def _func(self, df): 

1036 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1037 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1038 

1039 

1040class E1(Functor): 

1041 name = "Distortion Ellipticity (e1)" 

1042 shortname = "Distortion" 

1043 

1044 def __init__(self, colXX, colXY, colYY, **kwargs): 

1045 self.colXX = colXX 

1046 self.colXY = colXY 

1047 self.colYY = colYY 

1048 self._columns = [self.colXX, self.colXY, self.colYY] 

1049 super().__init__(**kwargs) 

1050 

1051 @property 

1052 def columns(self): 

1053 return [self.colXX, self.colXY, self.colYY] 

1054 

1055 def _func(self, df): 

1056 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1057 

1058 

1059class E2(Functor): 

1060 name = "Ellipticity e2" 

1061 

1062 def __init__(self, colXX, colXY, colYY, **kwargs): 

1063 self.colXX = colXX 

1064 self.colXY = colXY 

1065 self.colYY = colYY 

1066 super().__init__(**kwargs) 

1067 

1068 @property 

1069 def columns(self): 

1070 return [self.colXX, self.colXY, self.colYY] 

1071 

1072 def _func(self, df): 

1073 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1074 

1075 

1076class RadiusFromQuadrupole(Functor): 

1077 

1078 def __init__(self, colXX, colXY, colYY, **kwargs): 

1079 self.colXX = colXX 

1080 self.colXY = colXY 

1081 self.colYY = colYY 

1082 super().__init__(**kwargs) 

1083 

1084 @property 

1085 def columns(self): 

1086 return [self.colXX, self.colXY, self.colYY] 

1087 

1088 def _func(self, df): 

1089 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1090 

1091 

1092class LocalWcs(Functor): 

1093 """Computations using the stored localWcs. 

1094 """ 

1095 name = "LocalWcsOperations" 

1096 

1097 def __init__(self, 

1098 colCD_1_1, 

1099 colCD_1_2, 

1100 colCD_2_1, 

1101 colCD_2_2, 

1102 **kwargs): 

1103 self.colCD_1_1 = colCD_1_1 

1104 self.colCD_1_2 = colCD_1_2 

1105 self.colCD_2_1 = colCD_2_1 

1106 self.colCD_2_2 = colCD_2_2 

1107 super().__init__(**kwargs) 

1108 

1109 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1110 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1111 

1112 Parameters 

1113 ---------- 

1114 x : `pandas.Series` 

1115 X pixel coordinate. 

1116 y : `pandas.Series` 

1117 Y pixel coordinate. 

1118 cd11 : `pandas.Series` 

1119 [1, 1] element of the local Wcs affine transform. 

1120 cd11 : `pandas.Series` 

1121 [1, 1] element of the local Wcs affine transform. 

1122 cd12 : `pandas.Series` 

1123 [1, 2] element of the local Wcs affine transform. 

1124 cd21 : `pandas.Series` 

1125 [2, 1] element of the local Wcs affine transform. 

1126 cd22 : `pandas.Series` 

1127 [2, 2] element of the local Wcs affine transform. 

1128 

1129 Returns 

1130 ------- 

1131 raDecTuple : tuple 

1132 RA and dec conversion of x and y given the local Wcs. Returned 

1133 units are in radians. 

1134 

1135 """ 

1136 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1137 

1138 def computeSkySeperation(self, ra1, dec1, ra2, dec2): 

1139 """Compute the local pixel scale conversion. 

1140 

1141 Parameters 

1142 ---------- 

1143 ra1 : `pandas.Series` 

1144 Ra of the first coordinate in radians. 

1145 dec1 : `pandas.Series` 

1146 Dec of the first coordinate in radians. 

1147 ra2 : `pandas.Series` 

1148 Ra of the second coordinate in radians. 

1149 dec2 : `pandas.Series` 

1150 Dec of the second coordinate in radians. 

1151 

1152 Returns 

1153 ------- 

1154 dist : `pandas.Series` 

1155 Distance on the sphere in radians. 

1156 """ 

1157 deltaDec = dec2 - dec1 

1158 deltaRa = ra2 - ra1 

1159 return 2 * np.arcsin( 

1160 np.sqrt( 

1161 np.sin(deltaDec / 2) ** 2 

1162 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1163 

1164 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1165 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1166 

1167 Parameters 

1168 ---------- 

1169 x1 : `pandas.Series` 

1170 X pixel coordinate. 

1171 y1 : `pandas.Series` 

1172 Y pixel coordinate. 

1173 x2 : `pandas.Series` 

1174 X pixel coordinate. 

1175 y2 : `pandas.Series` 

1176 Y pixel coordinate. 

1177 cd11 : `pandas.Series` 

1178 [1, 1] element of the local Wcs affine transform. 

1179 cd11 : `pandas.Series` 

1180 [1, 1] element of the local Wcs affine transform. 

1181 cd12 : `pandas.Series` 

1182 [1, 2] element of the local Wcs affine transform. 

1183 cd21 : `pandas.Series` 

1184 [2, 1] element of the local Wcs affine transform. 

1185 cd22 : `pandas.Series` 

1186 [2, 2] element of the local Wcs affine transform. 

1187 

1188 Returns 

1189 ------- 

1190 Distance : `pandas.Series` 

1191 Arcseconds per pixel at the location of the local WC 

1192 """ 

1193 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1194 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1195 # Great circle distance for small separations. 

1196 return self.computeSkySeperation(ra1, dec1, ra2, dec2) 

1197 

1198 

1199class ComputePixelScale(LocalWcs): 

1200 """Compute the local pixel scale from the stored CDMatrix. 

1201 """ 

1202 name = "PixelScale" 

1203 

1204 @property 

1205 def columns(self): 

1206 return [self.colCD_1_1, 

1207 self.colCD_1_2, 

1208 self.colCD_2_1, 

1209 self.colCD_2_2] 

1210 

1211 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1212 """Compute the local pixel to scale conversion in arcseconds. 

1213 

1214 Parameters 

1215 ---------- 

1216 cd11 : `pandas.Series` 

1217 [1, 1] element of the local Wcs affine transform in radians. 

1218 cd11 : `pandas.Series` 

1219 [1, 1] element of the local Wcs affine transform in radians. 

1220 cd12 : `pandas.Series` 

1221 [1, 2] element of the local Wcs affine transform in radians. 

1222 cd21 : `pandas.Series` 

1223 [2, 1] element of the local Wcs affine transform in radians. 

1224 cd22 : `pandas.Series` 

1225 [2, 2] element of the local Wcs affine transform in radians. 

1226 

1227 Returns 

1228 ------- 

1229 pixScale : `pandas.Series` 

1230 Arcseconds per pixel at the location of the local WC 

1231 """ 

1232 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1233 

1234 def _func(self, df): 

1235 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1236 df[self.colCD_1_2], 

1237 df[self.colCD_2_1], 

1238 df[self.colCD_2_2]) 

1239 

1240 

1241class ConvertPixelToArcseconds(ComputePixelScale): 

1242 """Convert a value in units pixels squared to units arcseconds squared. 

1243 """ 

1244 

1245 def __init__(self, 

1246 col, 

1247 colCD_1_1, 

1248 colCD_1_2, 

1249 colCD_2_1, 

1250 colCD_2_2, 

1251 **kwargs): 

1252 self.col = col 

1253 super().__init__(colCD_1_1, 

1254 colCD_1_2, 

1255 colCD_2_1, 

1256 colCD_2_2, 

1257 **kwargs) 

1258 

1259 @property 

1260 def name(self): 

1261 return f"{self.col}_asArcseconds" 

1262 

1263 @property 

1264 def columns(self): 

1265 return [self.col, 

1266 self.colCD_1_1, 

1267 self.colCD_1_2, 

1268 self.colCD_2_1, 

1269 self.colCD_2_2] 

1270 

1271 def _func(self, df): 

1272 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1273 df[self.colCD_1_2], 

1274 df[self.colCD_2_1], 

1275 df[self.colCD_2_2]) 

1276 

1277 

1278class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1279 """Convert a value in units pixels to units arcseconds. 

1280 """ 

1281 

1282 def __init__(self, 

1283 col, 

1284 colCD_1_1, 

1285 colCD_1_2, 

1286 colCD_2_1, 

1287 colCD_2_2, 

1288 **kwargs): 

1289 self.col = col 

1290 super().__init__(colCD_1_1, 

1291 colCD_1_2, 

1292 colCD_2_1, 

1293 colCD_2_2, 

1294 **kwargs) 

1295 

1296 @property 

1297 def name(self): 

1298 return f"{self.col}_asArcsecondsSq" 

1299 

1300 @property 

1301 def columns(self): 

1302 return [self.col, 

1303 self.colCD_1_1, 

1304 self.colCD_1_2, 

1305 self.colCD_2_1, 

1306 self.colCD_2_2] 

1307 

1308 def _func(self, df): 

1309 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1310 df[self.colCD_1_2], 

1311 df[self.colCD_2_1], 

1312 df[self.colCD_2_2]) 

1313 return df[self.col] * pixScale * pixScale 

1314 

1315 

1316class ReferenceBand(Functor): 

1317 name = 'Reference Band' 

1318 shortname = 'refBand' 

1319 

1320 @property 

1321 def columns(self): 

1322 return ["merge_measurement_i", 

1323 "merge_measurement_r", 

1324 "merge_measurement_z", 

1325 "merge_measurement_y", 

1326 "merge_measurement_g"] 

1327 

1328 def _func(self, df): 

1329 def getFilterAliasName(row): 

1330 # get column name with the max value (True > False) 

1331 colName = row.idxmax() 

1332 return colName.replace('merge_measurement_', '') 

1333 

1334 return df[self.columns].apply(getFilterAliasName, axis=1) 

1335 

1336 

1337class Photometry(Functor): 

1338 # AB to NanoJansky (3631 Jansky) 

1339 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1340 LOG_AB_FLUX_SCALE = 12.56 

1341 FIVE_OVER_2LOG10 = 1.085736204758129569 

1342 # TO DO: DM-21955 Replace hard coded photometic calibration values 

1343 COADD_ZP = 27 

1344 

1345 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1346 self.vhypot = np.vectorize(self.hypot) 

1347 self.col = colFlux 

1348 self.colFluxErr = colFluxErr 

1349 

1350 self.calib = calib 

1351 if calib is not None: 

1352 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1353 else: 

1354 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1355 self.fluxMag0Err = 0. 

1356 

1357 super().__init__(**kwargs) 

1358 

1359 @property 

1360 def columns(self): 

1361 return [self.col] 

1362 

1363 @property 

1364 def name(self): 

1365 return f'mag_{self.col}' 

1366 

1367 @classmethod 

1368 def hypot(cls, a, b): 

1369 if np.abs(a) < np.abs(b): 

1370 a, b = b, a 

1371 if a == 0.: 

1372 return 0. 

1373 q = b/a 

1374 return np.abs(a) * np.sqrt(1. + q*q) 

1375 

1376 def dn2flux(self, dn, fluxMag0): 

1377 return self.AB_FLUX_SCALE * dn / fluxMag0 

1378 

1379 def dn2mag(self, dn, fluxMag0): 

1380 with np.warnings.catch_warnings(): 

1381 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1382 np.warnings.filterwarnings('ignore', r'divide by zero') 

1383 return -2.5 * np.log10(dn/fluxMag0) 

1384 

1385 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1386 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1387 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1388 return retVal 

1389 

1390 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1391 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1392 return self.FIVE_OVER_2LOG10 * retVal 

1393 

1394 

1395class NanoJansky(Photometry): 

1396 def _func(self, df): 

1397 return self.dn2flux(df[self.col], self.fluxMag0) 

1398 

1399 

1400class NanoJanskyErr(Photometry): 

1401 @property 

1402 def columns(self): 

1403 return [self.col, self.colFluxErr] 

1404 

1405 def _func(self, df): 

1406 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1407 return pd.Series(retArr, index=df.index) 

1408 

1409 

1410class Magnitude(Photometry): 

1411 def _func(self, df): 

1412 return self.dn2mag(df[self.col], self.fluxMag0) 

1413 

1414 

1415class MagnitudeErr(Photometry): 

1416 @property 

1417 def columns(self): 

1418 return [self.col, self.colFluxErr] 

1419 

1420 def _func(self, df): 

1421 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1422 return pd.Series(retArr, index=df.index) 

1423 

1424 

1425class LocalPhotometry(Functor): 

1426 """Base class for calibrating the specified instrument flux column using 

1427 the local photometric calibration. 

1428 

1429 Parameters 

1430 ---------- 

1431 instFluxCol : `str` 

1432 Name of the instrument flux column. 

1433 instFluxErrCol : `str` 

1434 Name of the assocated error columns for ``instFluxCol``. 

1435 photoCalibCol : `str` 

1436 Name of local calibration column. 

1437 photoCalibErrCol : `str` 

1438 Error associated with ``photoCalibCol`` 

1439 

1440 See also 

1441 -------- 

1442 LocalPhotometry 

1443 LocalNanojansky 

1444 LocalNanojanskyErr 

1445 LocalMagnitude 

1446 LocalMagnitudeErr 

1447 """ 

1448 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1449 

1450 def __init__(self, 

1451 instFluxCol, 

1452 instFluxErrCol, 

1453 photoCalibCol, 

1454 photoCalibErrCol, 

1455 **kwargs): 

1456 self.instFluxCol = instFluxCol 

1457 self.instFluxErrCol = instFluxErrCol 

1458 self.photoCalibCol = photoCalibCol 

1459 self.photoCalibErrCol = photoCalibErrCol 

1460 super().__init__(**kwargs) 

1461 

1462 def instFluxToNanojansky(self, instFlux, localCalib): 

1463 """Convert instrument flux to nanojanskys. 

1464 

1465 Parameters 

1466 ---------- 

1467 instFlux : `numpy.ndarray` or `pandas.Series` 

1468 Array of instrument flux measurements 

1469 localCalib : `numpy.ndarray` or `pandas.Series` 

1470 Array of local photometric calibration estimates. 

1471 

1472 Returns 

1473 ------- 

1474 calibFlux : `numpy.ndarray` or `pandas.Series` 

1475 Array of calibrated flux measurements. 

1476 """ 

1477 return instFlux * localCalib 

1478 

1479 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1480 """Convert instrument flux to nanojanskys. 

1481 

1482 Parameters 

1483 ---------- 

1484 instFlux : `numpy.ndarray` or `pandas.Series` 

1485 Array of instrument flux measurements 

1486 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1487 Errors on associated ``instFlux`` values 

1488 localCalib : `numpy.ndarray` or `pandas.Series` 

1489 Array of local photometric calibration estimates. 

1490 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1491 Errors on associated ``localCalib`` values 

1492 

1493 Returns 

1494 ------- 

1495 calibFluxErr : `numpy.ndarray` or `pandas.Series` 

1496 Errors on calibrated flux measurements. 

1497 """ 

1498 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1499 

1500 def instFluxToMagnitude(self, instFlux, localCalib): 

1501 """Convert instrument flux to nanojanskys. 

1502 

1503 Parameters 

1504 ---------- 

1505 instFlux : `numpy.ndarray` or `pandas.Series` 

1506 Array of instrument flux measurements 

1507 localCalib : `numpy.ndarray` or `pandas.Series` 

1508 Array of local photometric calibration estimates. 

1509 

1510 Returns 

1511 ------- 

1512 calibMag : `numpy.ndarray` or `pandas.Series` 

1513 Array of calibrated AB magnitudes. 

1514 """ 

1515 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1516 

1517 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1518 """Convert instrument flux err to nanojanskys. 

1519 

1520 Parameters 

1521 ---------- 

1522 instFlux : `numpy.ndarray` or `pandas.Series` 

1523 Array of instrument flux measurements 

1524 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1525 Errors on associated ``instFlux`` values 

1526 localCalib : `numpy.ndarray` or `pandas.Series` 

1527 Array of local photometric calibration estimates. 

1528 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1529 Errors on associated ``localCalib`` values 

1530 

1531 Returns 

1532 ------- 

1533 calibMagErr: `numpy.ndarray` or `pandas.Series` 

1534 Error on calibrated AB magnitudes. 

1535 """ 

1536 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1537 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1538 

1539 

1540class LocalNanojansky(LocalPhotometry): 

1541 """Compute calibrated fluxes using the local calibration value. 

1542 

1543 See also 

1544 -------- 

1545 LocalNanojansky 

1546 LocalNanojanskyErr 

1547 LocalMagnitude 

1548 LocalMagnitudeErr 

1549 """ 

1550 

1551 @property 

1552 def columns(self): 

1553 return [self.instFluxCol, self.photoCalibCol] 

1554 

1555 @property 

1556 def name(self): 

1557 return f'flux_{self.instFluxCol}' 

1558 

1559 def _func(self, df): 

1560 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1561 

1562 

1563class LocalNanojanskyErr(LocalPhotometry): 

1564 """Compute calibrated flux errors using the local calibration value. 

1565 

1566 See also 

1567 -------- 

1568 LocalNanojansky 

1569 LocalNanojanskyErr 

1570 LocalMagnitude 

1571 LocalMagnitudeErr 

1572 """ 

1573 

1574 @property 

1575 def columns(self): 

1576 return [self.instFluxCol, self.instFluxErrCol, 

1577 self.photoCalibCol, self.photoCalibErrCol] 

1578 

1579 @property 

1580 def name(self): 

1581 return f'fluxErr_{self.instFluxCol}' 

1582 

1583 def _func(self, df): 

1584 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1585 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1586 

1587 

1588class LocalMagnitude(LocalPhotometry): 

1589 """Compute calibrated AB magnitudes using the local calibration value. 

1590 

1591 See also 

1592 -------- 

1593 LocalNanojansky 

1594 LocalNanojanskyErr 

1595 LocalMagnitude 

1596 LocalMagnitudeErr 

1597 """ 

1598 

1599 @property 

1600 def columns(self): 

1601 return [self.instFluxCol, self.photoCalibCol] 

1602 

1603 @property 

1604 def name(self): 

1605 return f'mag_{self.instFluxCol}' 

1606 

1607 def _func(self, df): 

1608 return self.instFluxToMagnitude(df[self.instFluxCol], 

1609 df[self.photoCalibCol]) 

1610 

1611 

1612class LocalMagnitudeErr(LocalPhotometry): 

1613 """Compute calibrated AB magnitude errors using the local calibration value. 

1614 

1615 See also 

1616 -------- 

1617 LocalNanojansky 

1618 LocalNanojanskyErr 

1619 LocalMagnitude 

1620 LocalMagnitudeErr 

1621 """ 

1622 

1623 @property 

1624 def columns(self): 

1625 return [self.instFluxCol, self.instFluxErrCol, 

1626 self.photoCalibCol, self.photoCalibErrCol] 

1627 

1628 @property 

1629 def name(self): 

1630 return f'magErr_{self.instFluxCol}' 

1631 

1632 def _func(self, df): 

1633 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol], 

1634 df[self.instFluxErrCol], 

1635 df[self.photoCalibCol], 

1636 df[self.photoCalibErrCol]) 

1637 

1638 

1639class LocalDipoleMeanFlux(LocalPhotometry): 

1640 """Compute absolute mean of dipole fluxes. 

1641 

1642 See also 

1643 -------- 

1644 LocalNanojansky 

1645 LocalNanojanskyErr 

1646 LocalMagnitude 

1647 LocalMagnitudeErr 

1648 LocalDipoleMeanFlux 

1649 LocalDipoleMeanFluxErr 

1650 LocalDipoleDiffFlux 

1651 LocalDipoleDiffFluxErr 

1652 """ 

1653 def __init__(self, 

1654 instFluxPosCol, 

1655 instFluxNegCol, 

1656 instFluxPosErrCol, 

1657 instFluxNegErrCol, 

1658 photoCalibCol, 

1659 photoCalibErrCol, 

1660 **kwargs): 

1661 self.instFluxNegCol = instFluxNegCol 

1662 self.instFluxPosCol = instFluxPosCol 

1663 self.instFluxNegErrCol = instFluxNegErrCol 

1664 self.instFluxPosErrCol = instFluxPosErrCol 

1665 self.photoCalibCol = photoCalibCol 

1666 self.photoCalibErrCol = photoCalibErrCol 

1667 super().__init__(instFluxNegCol, 

1668 instFluxNegErrCol, 

1669 photoCalibCol, 

1670 photoCalibErrCol, 

1671 **kwargs) 

1672 

1673 @property 

1674 def columns(self): 

1675 return [self.instFluxPosCol, 

1676 self.instFluxNegCol, 

1677 self.photoCalibCol] 

1678 

1679 @property 

1680 def name(self): 

1681 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1682 

1683 def _func(self, df): 

1684 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1685 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1686 

1687 

1688class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1689 """Compute the error on the absolute mean of dipole fluxes. 

1690 

1691 See also 

1692 -------- 

1693 LocalNanojansky 

1694 LocalNanojanskyErr 

1695 LocalMagnitude 

1696 LocalMagnitudeErr 

1697 LocalDipoleMeanFlux 

1698 LocalDipoleMeanFluxErr 

1699 LocalDipoleDiffFlux 

1700 LocalDipoleDiffFluxErr 

1701 """ 

1702 

1703 @property 

1704 def columns(self): 

1705 return [self.instFluxPosCol, 

1706 self.instFluxNegCol, 

1707 self.instFluxPosErrCol, 

1708 self.instFluxNegErrCol, 

1709 self.photoCalibCol, 

1710 self.photoCalibErrCol] 

1711 

1712 @property 

1713 def name(self): 

1714 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1715 

1716 def _func(self, df): 

1717 return 0.5*np.sqrt( 

1718 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1719 * df[self.photoCalibErrCol])**2 

1720 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1721 * df[self.photoCalibCol]**2) 

1722 

1723 

1724class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1725 """Compute the absolute difference of dipole fluxes. 

1726 

1727 Value is (abs(pos) - abs(neg)) 

1728 

1729 See also 

1730 -------- 

1731 LocalNanojansky 

1732 LocalNanojanskyErr 

1733 LocalMagnitude 

1734 LocalMagnitudeErr 

1735 LocalDipoleMeanFlux 

1736 LocalDipoleMeanFluxErr 

1737 LocalDipoleDiffFlux 

1738 LocalDipoleDiffFluxErr 

1739 """ 

1740 

1741 @property 

1742 def columns(self): 

1743 return [self.instFluxPosCol, 

1744 self.instFluxNegCol, 

1745 self.photoCalibCol] 

1746 

1747 @property 

1748 def name(self): 

1749 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1750 

1751 def _func(self, df): 

1752 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1753 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1754 

1755 

1756class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1757 """Compute the error on the absolute difference of dipole fluxes. 

1758 

1759 See also 

1760 -------- 

1761 LocalNanojansky 

1762 LocalNanojanskyErr 

1763 LocalMagnitude 

1764 LocalMagnitudeErr 

1765 LocalDipoleMeanFlux 

1766 LocalDipoleMeanFluxErr 

1767 LocalDipoleDiffFlux 

1768 LocalDipoleDiffFluxErr 

1769 """ 

1770 

1771 @property 

1772 def columns(self): 

1773 return [self.instFluxPosCol, 

1774 self.instFluxNegCol, 

1775 self.instFluxPosErrCol, 

1776 self.instFluxNegErrCol, 

1777 self.photoCalibCol, 

1778 self.photoCalibErrCol] 

1779 

1780 @property 

1781 def name(self): 

1782 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1783 

1784 def _func(self, df): 

1785 return np.sqrt( 

1786 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1787 * df[self.photoCalibErrCol])**2 

1788 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1789 * df[self.photoCalibCol]**2) 

1790 

1791 

1792class Ratio(Functor): 

1793 """Base class for returning the ratio of 2 columns. 

1794 

1795 Can be used to compute a Signal to Noise ratio for any input flux. 

1796 

1797 Parameters 

1798 ---------- 

1799 numerator : `str` 

1800 Name of the column to use at the numerator in the ratio 

1801 denominator : `str` 

1802 Name of the column to use as the denominator in the ratio. 

1803 """ 

1804 def __init__(self, 

1805 numerator, 

1806 denominator, 

1807 **kwargs): 

1808 self.numerator = numerator 

1809 self.denominator = denominator 

1810 super().__init__(**kwargs) 

1811 

1812 @property 

1813 def columns(self): 

1814 return [self.numerator, self.denominator] 

1815 

1816 @property 

1817 def name(self): 

1818 return f'ratio_{self.numerator}_{self.denominator}' 

1819 

1820 def _func(self, df): 

1821 with np.warnings.catch_warnings(): 

1822 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1823 np.warnings.filterwarnings('ignore', r'divide by zero') 

1824 return df[self.numerator] / df[self.denominator]