Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import yaml 

2import re 

3 

4import pandas as pd 

5import numpy as np 

6import astropy.units as u 

7 

8from lsst.daf.persistence import doImport 

9from .parquetTable import MultilevelParquetTable 

10 

11 

12def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

13 typeKey='functor', name=None): 

14 """Initialize an object defined in a dictionary 

15 

16 The object needs to be importable as 

17 '{0}.{1}'.format(basePath, initDict[typeKey]) 

18 The positional and keyword arguments (if any) are contained in 

19 "args" and "kwargs" entries in the dictionary, respectively. 

20 This is used in `functors.CompositeFunctor.from_yaml` to initialize 

21 a composite functor from a specification in a YAML file. 

22 

23 Parameters 

24 ---------- 

25 initDict : dictionary 

26 Dictionary describing object's initialization. Must contain 

27 an entry keyed by ``typeKey`` that is the name of the object, 

28 relative to ``basePath``. 

29 basePath : str 

30 Path relative to module in which ``initDict[typeKey]`` is defined. 

31 typeKey : str 

32 Key of ``initDict`` that is the name of the object 

33 (relative to `basePath`). 

34 """ 

35 initDict = initDict.copy() 

36 # TO DO: DM-21956 We should be able to define functors outside this module 

37 pythonType = doImport('{0}.{1}'.format(basePath, initDict.pop(typeKey))) 

38 args = [] 

39 if 'args' in initDict: 

40 args = initDict.pop('args') 

41 if isinstance(args, str): 

42 args = [args] 

43 try: 

44 element = pythonType(*args, **initDict) 

45 except Exception as e: 

46 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

47 raise type(e)(message, e.args) 

48 return element 

49 

50 

51class Functor(object): 

52 """Define and execute a calculation on a ParquetTable 

53 

54 The `__call__` method accepts a `ParquetTable` object, and returns the 

55 result of the calculation as a single column. Each functor defines what 

56 columns are needed for the calculation, and only these columns are read 

57 from the `ParquetTable`. 

58 

59 The action of `__call__` consists of two steps: first, loading the 

60 necessary columns from disk into memory as a `pandas.DataFrame` object; 

61 and second, performing the computation on this dataframe and returning the 

62 result. 

63 

64 

65 To define a new `Functor`, a subclass must define a `_func` method, 

66 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`. 

67 In addition, it must define the following attributes 

68 

69 * `_columns`: The columns necessary to perform the calculation 

70 * `name`: A name appropriate for a figure axis label 

71 * `shortname`: A name appropriate for use as a dictionary key 

72 

73 On initialization, a `Functor` should declare what filter (`filt` kwarg) 

74 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be 

75 applied to. This enables the `_get_cols` method to extract the proper 

76 columns from the parquet file. If not specified, the dataset will fall back 

77 on the `_defaultDataset`attribute. If filter is not specified and `dataset` 

78 is anything other than `'ref'`, then an error will be raised when trying to 

79 perform the calculation. 

80 

81 As currently implemented, `Functor` is only set up to expect a 

82 `ParquetTable` of the format of the `deepCoadd_obj` dataset; that is, a 

83 `MultilevelParquetTable` with the levels of the column index being `filter`, 

84 `dataset`, and `column`. This is defined in the `_columnLevels` attribute, 

85 as well as being implicit in the role of the `filt` and `dataset` attributes 

86 defined at initialization. In addition, the `_get_cols` method that reads 

87 the dataframe from the `ParquetTable` will return a dataframe with column 

88 index levels defined by the `_dfLevels` attribute; by default, this is 

89 `column`. 

90 

91 The `_columnLevels` and `_dfLevels` attributes should generally not need to 

92 be changed, unless `_func` needs columns from multiple filters or datasets 

93 to do the calculation. 

94 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for 

95 which `_dfLevels = ('filter', 'column')`, and `_func` expects the dataframe 

96 it gets to have those levels in the column index. 

97 

98 Parameters 

99 ---------- 

100 filt : str 

101 Filter upon which to do the calculation 

102 

103 dataset : str 

104 Dataset upon which to do the calculation 

105 (e.g., 'ref', 'meas', 'forced_src'). 

106 

107 """ 

108 

109 _defaultDataset = 'ref' 

110 _columnLevels = ('filter', 'dataset', 'column') 

111 _dfLevels = ('column',) 

112 _defaultNoDup = False 

113 

114 def __init__(self, filt=None, dataset=None, noDup=None): 

115 self.filt = filt 

116 self.dataset = dataset if dataset is not None else self._defaultDataset 

117 self._noDup = noDup 

118 

119 @property 

120 def noDup(self): 

121 if self._noDup is not None: 

122 return self._noDup 

123 else: 

124 return self._defaultNoDup 

125 

126 @property 

127 def columns(self): 

128 """Columns required to perform calculation 

129 """ 

130 if not hasattr(self, '_columns'): 

131 raise NotImplementedError('Must define columns property or _columns attribute') 

132 return self._columns 

133 

134 def multilevelColumns(self, parq): 

135 if not set(parq.columnLevels) == set(self._columnLevels): 

136 raise ValueError('ParquetTable does not have the expected column levels. ' + 

137 'Got {0}; expected {1}.'.format(parq.columnLevels, self._columnLevels)) 

138 

139 columnDict = {'column': self.columns, 

140 'dataset': self.dataset} 

141 if self.filt is None: 

142 if 'filter' in parq.columnLevels: 

143 if self.dataset == 'ref': 

144 columnDict['filter'] = parq.columnLevelNames['filter'][0] 

145 else: 

146 raise ValueError("'filt' not set for functor {}".format(self.name) + 

147 "(dataset {}) ".format(self.dataset) + 

148 "and ParquetTable " + 

149 "contains multiple filters in column index. " + 

150 "Set 'filt' or set 'dataset' to 'ref'.") 

151 else: 

152 columnDict['filter'] = self.filt 

153 

154 return parq._colsFromDict(columnDict) 

155 

156 def _func(self, df, dropna=True): 

157 raise NotImplementedError('Must define calculation on dataframe') 

158 

159 def _get_cols(self, parq): 

160 """Retrieve dataframe necessary for calculation. 

161 

162 Returns dataframe upon which `self._func` can act. 

163 """ 

164 if isinstance(parq, MultilevelParquetTable): 

165 columns = self.multilevelColumns(parq) 

166 df = parq.toDataFrame(columns=columns, droplevels=False) 

167 df = self._setLevels(df) 

168 else: 

169 columns = self.columns 

170 df = parq.toDataFrame(columns=columns) 

171 

172 return df 

173 

174 def _setLevels(self, df): 

175 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

176 df.columns = df.columns.droplevel(levelsToDrop) 

177 return df 

178 

179 def _dropna(self, vals): 

180 return vals.dropna() 

181 

182 def __call__(self, parq, dropna=False): 

183 try: 

184 df = self._get_cols(parq) 

185 vals = self._func(df) 

186 except Exception: 

187 vals = self.fail(df) 

188 if dropna: 

189 vals = self._dropna(vals) 

190 

191 return vals 

192 

193 def fail(self, df): 

194 return pd.Series(np.full(len(df), np.nan), index=df.index) 

195 

196 @property 

197 def name(self): 

198 """Full name of functor (suitable for figure labels) 

199 """ 

200 return NotImplementedError 

201 

202 @property 

203 def shortname(self): 

204 """Short name of functor (suitable for column name/dict key) 

205 """ 

206 return self.name 

207 

208 

209class CompositeFunctor(Functor): 

210 """Perform multiple calculations at once on a catalog 

211 

212 The role of a `CompositeFunctor` is to group together computations from 

213 multiple functors. Instead of returning `pandas.Series` a 

214 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names 

215 being the keys of `funcDict`. 

216 

217 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

218 in all the component functors. 

219 

220 A `CompositeFunctor` does not use a `_func` method itself; rather, 

221 when a `CompositeFunctor` is called, all its columns are loaded 

222 at once, and the resulting dataframe is passed to the `_func` method of each component 

223 functor. This has the advantage of only doing I/O (reading from parquet file) once, 

224 and works because each individual `_func` method of each component functor does not 

225 care if there are *extra* columns in the dataframe being passed; only that it must contain 

226 *at least* the `columns` it expects. 

227 

228 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML 

229 file specifying a collection of functors. 

230 

231 Parameters 

232 ---------- 

233 funcs : `dict` or `list` 

234 Dictionary or list of functors. If a list, then it will be converted 

235 into a dictonary according to the `.shortname` attribute of each functor. 

236 

237 """ 

238 dataset = None 

239 

240 def __init__(self, funcs, **kwargs): 

241 

242 if type(funcs) == dict: 

243 self.funcDict = funcs 

244 else: 

245 self.funcDict = {f.shortname: f for f in funcs} 

246 

247 self._filt = None 

248 

249 super().__init__(**kwargs) 

250 

251 @property 

252 def filt(self): 

253 return self._filt 

254 

255 @filt.setter 

256 def filt(self, filt): 

257 if filt is not None: 

258 for _, f in self.funcDict.items(): 

259 f.filt = filt 

260 self._filt = filt 

261 

262 def update(self, new): 

263 if isinstance(new, dict): 

264 self.funcDict.update(new) 

265 elif isinstance(new, CompositeFunctor): 

266 self.funcDict.update(new.funcDict) 

267 else: 

268 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

269 

270 # Make sure new functors have the same 'filt' set 

271 if self.filt is not None: 

272 self.filt = self.filt 

273 

274 @property 

275 def columns(self): 

276 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

277 

278 def multilevelColumns(self, parq): 

279 return list(set([x for y in [f.multilevelColumns(parq) 

280 for f in self.funcDict.values()] for x in y])) 

281 

282 def __call__(self, parq, **kwargs): 

283 if isinstance(parq, MultilevelParquetTable): 

284 columns = self.multilevelColumns(parq) 

285 df = parq.toDataFrame(columns=columns, droplevels=False) 

286 valDict = {} 

287 for k, f in self.funcDict.items(): 

288 try: 

289 subdf = f._setLevels(df[f.multilevelColumns(parq)]) 

290 valDict[k] = f._func(subdf) 

291 except Exception: 

292 valDict[k] = f.fail(subdf) 

293 else: 

294 columns = self.columns 

295 df = parq.toDataFrame(columns=columns) 

296 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

297 

298 try: 

299 valDf = pd.concat(valDict, axis=1) 

300 except TypeError: 

301 print([(k, type(v)) for k, v in valDict.items()]) 

302 raise 

303 

304 if kwargs.get('dropna', False): 

305 valDf = valDf.dropna(how='any') 

306 

307 return valDf 

308 

309 @classmethod 

310 def renameCol(cls, col, renameRules): 

311 if renameRules is None: 

312 return col 

313 for old, new in renameRules: 

314 if col.startswith(old): 

315 col = col.replace(old, new) 

316 return col 

317 

318 @classmethod 

319 def from_file(cls, filename, **kwargs): 

320 with open(filename) as f: 

321 translationDefinition = yaml.safe_load(f) 

322 

323 return cls.from_yaml(translationDefinition, **kwargs) 

324 

325 @classmethod 

326 def from_yaml(cls, translationDefinition, **kwargs): 

327 funcs = {} 

328 for func, val in translationDefinition['funcs'].items(): 

329 funcs[func] = init_fromDict(val, name=func) 

330 

331 if 'flag_rename_rules' in translationDefinition: 

332 renameRules = translationDefinition['flag_rename_rules'] 

333 else: 

334 renameRules = None 

335 

336 if 'refFlags' in translationDefinition: 

337 for flag in translationDefinition['refFlags']: 

338 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

339 

340 if 'flags' in translationDefinition: 

341 for flag in translationDefinition['flags']: 

342 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

343 

344 return cls(funcs, **kwargs) 

345 

346 

347def mag_aware_eval(df, expr): 

348 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means 

349 

350 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes. 

351 

352 Parameters 

353 ---------- 

354 df : pandas.DataFrame 

355 Dataframe on which to evaluate expression. 

356 

357 expr : str 

358 Expression. 

359 """ 

360 try: 

361 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

362 val = df.eval(expr_new, truediv=True) 

363 except Exception: # Should check what actually gets raised 

364 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

365 val = df.eval(expr_new, truediv=True) 

366 return val 

367 

368 

369class CustomFunctor(Functor): 

370 """Arbitrary computation on a catalog 

371 

372 Column names (and thus the columns to be loaded from catalog) are found 

373 by finding all words and trying to ignore all "math-y" words. 

374 

375 Parameters 

376 ---------- 

377 expr : str 

378 Expression to evaluate, to be parsed and executed by `mag_aware_eval`. 

379 """ 

380 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

381 

382 def __init__(self, expr, **kwargs): 

383 self.expr = expr 

384 super().__init__(**kwargs) 

385 

386 @property 

387 def name(self): 

388 return self.expr 

389 

390 @property 

391 def columns(self): 

392 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

393 

394 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

395 not_a_col = [] 

396 for c in flux_cols: 

397 if not re.search('_instFlux$', c): 

398 cols.append('{}_instFlux'.format(c)) 

399 not_a_col.append(c) 

400 else: 

401 cols.append(c) 

402 

403 return list(set([c for c in cols if c not in not_a_col])) 

404 

405 def _func(self, df): 

406 return mag_aware_eval(df, self.expr) 

407 

408 

409class Column(Functor): 

410 """Get column with specified name 

411 """ 

412 

413 def __init__(self, col, **kwargs): 

414 self.col = col 

415 super().__init__(**kwargs) 

416 

417 @property 

418 def name(self): 

419 return self.col 

420 

421 @property 

422 def columns(self): 

423 return [self.col] 

424 

425 def _func(self, df): 

426 return df[self.col] 

427 

428 

429class Index(Functor): 

430 """Return the value of the index for each object 

431 """ 

432 

433 columns = ['coord_ra'] # just a dummy; something has to be here 

434 _defaultDataset = 'ref' 

435 _defaultNoDup = True 

436 

437 def _func(self, df): 

438 return pd.Series(df.index, index=df.index) 

439 

440 

441class IDColumn(Column): 

442 col = 'id' 

443 _allow_difference = False 

444 _defaultNoDup = True 

445 

446 def _func(self, df): 

447 return pd.Series(df.index, index=df.index) 

448 

449 

450class FootprintNPix(Column): 

451 col = 'base_Footprint_nPix' 

452 

453 

454class CoordColumn(Column): 

455 """Base class for coordinate column, in degrees 

456 """ 

457 _radians = True 

458 

459 def __init__(self, col, **kwargs): 

460 super().__init__(col, **kwargs) 

461 

462 def _func(self, df): 

463 # Must not modify original column in case that column is used by another functor 

464 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

465 return output 

466 

467 

468class RAColumn(CoordColumn): 

469 """Right Ascension, in degrees 

470 """ 

471 name = 'RA' 

472 _defaultNoDup = True 

473 

474 def __init__(self, **kwargs): 

475 super().__init__('coord_ra', **kwargs) 

476 

477 def __call__(self, catalog, **kwargs): 

478 return super().__call__(catalog, **kwargs) 

479 

480 

481class DecColumn(CoordColumn): 

482 """Declination, in degrees 

483 """ 

484 name = 'Dec' 

485 _defaultNoDup = True 

486 

487 def __init__(self, **kwargs): 

488 super().__init__('coord_dec', **kwargs) 

489 

490 def __call__(self, catalog, **kwargs): 

491 return super().__call__(catalog, **kwargs) 

492 

493 

494def fluxName(col): 

495 if not col.endswith('_instFlux'): 

496 col += '_instFlux' 

497 return col 

498 

499 

500def fluxErrName(col): 

501 if not col.endswith('_instFluxErr'): 

502 col += '_instFluxErr' 

503 return col 

504 

505 

506class Mag(Functor): 

507 """Compute calibrated magnitude 

508 

509 Takes a `calib` argument, which returns the flux at mag=0 

510 as `calib.getFluxMag0()`. If not provided, then the default 

511 `fluxMag0` is 63095734448.0194, which is default for HSC. 

512 This default should be removed in DM-21955 

513 

514 This calculation hides warnings about invalid values and dividing by zero. 

515 

516 As for all functors, a `dataset` and `filt` kwarg should be provided upon 

517 initialization. Unlike the default `Functor`, however, the default dataset 

518 for a `Mag` is `'meas'`, rather than `'ref'`. 

519 

520 Parameters 

521 ---------- 

522 col : `str` 

523 Name of flux column from which to compute magnitude. Can be parseable 

524 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass 

525 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will 

526 understand. 

527 calib : `lsst.afw.image.calib.Calib` (optional) 

528 Object that knows zero point. 

529 """ 

530 _defaultDataset = 'meas' 

531 

532 def __init__(self, col, calib=None, **kwargs): 

533 self.col = fluxName(col) 

534 self.calib = calib 

535 if calib is not None: 

536 self.fluxMag0 = calib.getFluxMag0()[0] 

537 else: 

538 # TO DO: DM-21955 Replace hard coded photometic calibration values 

539 self.fluxMag0 = 63095734448.0194 

540 

541 super().__init__(**kwargs) 

542 

543 @property 

544 def columns(self): 

545 return [self.col] 

546 

547 def _func(self, df): 

548 with np.warnings.catch_warnings(): 

549 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

550 np.warnings.filterwarnings('ignore', r'divide by zero') 

551 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

552 

553 @property 

554 def name(self): 

555 return 'mag_{0}'.format(self.col) 

556 

557 

558class MagErr(Mag): 

559 """Compute calibrated magnitude uncertainty 

560 

561 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`. 

562 

563 Parameters 

564 col : `str` 

565 Name of flux column 

566 calib : `lsst.afw.image.calib.Calib` (optional) 

567 Object that knows zero point. 

568 """ 

569 

570 def __init__(self, *args, **kwargs): 

571 super().__init__(*args, **kwargs) 

572 if self.calib is not None: 

573 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

574 else: 

575 self.fluxMag0Err = 0. 

576 

577 @property 

578 def columns(self): 

579 return [self.col, self.col + 'Err'] 

580 

581 def _func(self, df): 

582 with np.warnings.catch_warnings(): 

583 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

584 np.warnings.filterwarnings('ignore', r'divide by zero') 

585 fluxCol, fluxErrCol = self.columns 

586 x = df[fluxErrCol] / df[fluxCol] 

587 y = self.fluxMag0Err / self.fluxMag0 

588 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

589 return magErr 

590 

591 @property 

592 def name(self): 

593 return super().name + '_err' 

594 

595 

596class NanoMaggie(Mag): 

597 """ 

598 """ 

599 

600 def _func(self, df): 

601 return (df[self.col] / self.fluxMag0) * 1e9 

602 

603 

604class MagDiff(Functor): 

605 _defaultDataset = 'meas' 

606 

607 """Functor to calculate magnitude difference""" 

608 

609 def __init__(self, col1, col2, **kwargs): 

610 self.col1 = fluxName(col1) 

611 self.col2 = fluxName(col2) 

612 super().__init__(**kwargs) 

613 

614 @property 

615 def columns(self): 

616 return [self.col1, self.col2] 

617 

618 def _func(self, df): 

619 with np.warnings.catch_warnings(): 

620 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

621 np.warnings.filterwarnings('ignore', r'divide by zero') 

622 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

623 

624 @property 

625 def name(self): 

626 return '(mag_{0} - mag_{1})'.format(self.col1, self.col2) 

627 

628 @property 

629 def shortname(self): 

630 return 'magDiff_{0}_{1}'.format(self.col1, self.col2) 

631 

632 

633class Color(Functor): 

634 """Compute the color between two filters 

635 

636 Computes color by initializing two different `Mag` 

637 functors based on the `col` and filters provided, and 

638 then returning the difference. 

639 

640 This is enabled by the `_func` expecting a dataframe with a 

641 multilevel column index, with both `'filter'` and `'column'`, 

642 instead of just `'column'`, which is the `Functor` default. 

643 This is controlled by the `_dfLevels` attribute. 

644 

645 Also of note, the default dataset for `Color` is `forced_src'`, 

646 whereas for `Mag` it is `'meas'`. 

647 

648 Parameters 

649 ---------- 

650 col : str 

651 Name of flux column from which to compute; same as would be passed to 

652 `lsst.pipe.tasks.functors.Mag`. 

653 

654 filt2, filt1 : str 

655 Filters from which to compute magnitude difference. 

656 Color computed is `Mag(filt2) - Mag(filt1)`. 

657 """ 

658 _defaultDataset = 'forced_src' 

659 _dfLevels = ('filter', 'column') 

660 _defaultNoDup = True 

661 

662 def __init__(self, col, filt2, filt1, **kwargs): 

663 self.col = fluxName(col) 

664 if filt2 == filt1: 

665 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

666 self.filt2 = filt2 

667 self.filt1 = filt1 

668 

669 self.mag2 = Mag(col, filt=filt2, **kwargs) 

670 self.mag1 = Mag(col, filt=filt1, **kwargs) 

671 

672 super().__init__(**kwargs) 

673 

674 @property 

675 def filt(self): 

676 return None 

677 

678 @filt.setter 

679 def filt(self, filt): 

680 pass 

681 

682 def _func(self, df): 

683 mag2 = self.mag2._func(df[self.filt2]) 

684 mag1 = self.mag1._func(df[self.filt1]) 

685 return mag2 - mag1 

686 

687 @property 

688 def columns(self): 

689 return [self.mag1.col, self.mag2.col] 

690 

691 def multilevelColumns(self, parq): 

692 return [(self.dataset, self.filt1, self.col), 

693 (self.dataset, self.filt2, self.col)] 

694 

695 @property 

696 def name(self): 

697 return '{0} - {1} ({2})'.format(self.filt2, self.filt1, self.col) 

698 

699 @property 

700 def shortname(self): 

701 return '{0}_{1}m{2}'.format(self.col, self.filt2.replace('-', ''), 

702 self.filt1.replace('-', '')) 

703 

704 

705class Labeller(Functor): 

706 """Main function of this subclass is to override the dropna=True 

707 """ 

708 _null_label = 'null' 

709 _allow_difference = False 

710 name = 'label' 

711 _force_str = False 

712 

713 def __call__(self, parq, dropna=False, **kwargs): 

714 return super().__call__(parq, dropna=False, **kwargs) 

715 

716 

717class StarGalaxyLabeller(Labeller): 

718 _columns = ["base_ClassificationExtendedness_value"] 

719 _column = "base_ClassificationExtendedness_value" 

720 

721 def _func(self, df): 

722 x = df[self._columns][self._column] 

723 mask = x.isnull() 

724 test = (x < 0.5).astype(int) 

725 test = test.mask(mask, 2) 

726 

727 # TODO: DM-21954 Look into veracity of inline comment below 

728 # are these backwards? 

729 categories = ['galaxy', 'star', self._null_label] 

730 label = pd.Series(pd.Categorical.from_codes(test, categories=categories), 

731 index=x.index, name='label') 

732 if self._force_str: 

733 label = label.astype(str) 

734 return label 

735 

736 

737class NumStarLabeller(Labeller): 

738 _columns = ['numStarFlags'] 

739 labels = {"star": 0, "maybe": 1, "notStar": 2} 

740 

741 def _func(self, df): 

742 x = df[self._columns][self._columns[0]] 

743 

744 # Number of filters 

745 n = len(x.unique()) - 1 

746 

747 labels = ['noStar', 'maybe', 'star'] 

748 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels), 

749 index=x.index, name='label') 

750 

751 if self._force_str: 

752 label = label.astype(str) 

753 

754 return label 

755 

756 

757class DeconvolvedMoments(Functor): 

758 name = 'Deconvolved Moments' 

759 shortname = 'deconvolvedMoments' 

760 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

761 "ext_shapeHSM_HsmSourceMoments_yy", 

762 "base_SdssShape_xx", "base_SdssShape_yy", 

763 "ext_shapeHSM_HsmPsfMoments_xx", 

764 "ext_shapeHSM_HsmPsfMoments_yy") 

765 

766 def _func(self, df): 

767 """Calculate deconvolved moments""" 

768 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

769 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

770 else: 

771 hsm = np.ones(len(df))*np.nan 

772 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

773 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

774 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

775 else: 

776 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using 

777 # exposure.getPsf().computeShape(s.getCentroid()).getIxx() 

778 # raise TaskError("No psf shape parameter found in catalog") 

779 raise RuntimeError('No psf shape parameter found in catalog') 

780 

781 return hsm.where(np.isfinite(hsm), sdss) - psf 

782 

783 

784class SdssTraceSize(Functor): 

785 """Functor to calculate SDSS trace radius size for sources""" 

786 name = "SDSS Trace Size" 

787 shortname = 'sdssTrace' 

788 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

789 

790 def _func(self, df): 

791 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

792 return srcSize 

793 

794 

795class PsfSdssTraceSizeDiff(Functor): 

796 """Functor to calculate SDSS trace radius size difference (%) between object and psf model""" 

797 name = "PSF - SDSS Trace Size" 

798 shortname = 'psf_sdssTrace' 

799 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

800 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

801 

802 def _func(self, df): 

803 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

804 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

805 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

806 return sizeDiff 

807 

808 

809class HsmTraceSize(Functor): 

810 """Functor to calculate HSM trace radius size for sources""" 

811 name = 'HSM Trace Size' 

812 shortname = 'hsmTrace' 

813 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

814 "ext_shapeHSM_HsmSourceMoments_yy") 

815 

816 def _func(self, df): 

817 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] + 

818 df["ext_shapeHSM_HsmSourceMoments_yy"])) 

819 return srcSize 

820 

821 

822class PsfHsmTraceSizeDiff(Functor): 

823 """Functor to calculate HSM trace radius size difference (%) between object and psf model""" 

824 name = 'PSF - HSM Trace Size' 

825 shortname = 'psf_HsmTrace' 

826 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

827 "ext_shapeHSM_HsmSourceMoments_yy", 

828 "ext_shapeHSM_HsmPsfMoments_xx", 

829 "ext_shapeHSM_HsmPsfMoments_yy") 

830 

831 def _func(self, df): 

832 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] + 

833 df["ext_shapeHSM_HsmSourceMoments_yy"])) 

834 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] + 

835 df["ext_shapeHSM_HsmPsfMoments_yy"])) 

836 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

837 return sizeDiff 

838 

839 

840class HsmFwhm(Functor): 

841 name = 'HSM Psf FWHM' 

842 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

843 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

844 pixelScale = 0.168 

845 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

846 

847 def _func(self, df): 

848 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

849 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

850 

851 

852class E1(Functor): 

853 name = "Distortion Ellipticity (e1)" 

854 shortname = "Distortion" 

855 

856 def __init__(self, colXX, colXY, colYY, **kwargs): 

857 self.colXX = colXX 

858 self.colXY = colXY 

859 self.colYY = colYY 

860 self._columns = [self.colXX, self.colXY, self.colYY] 

861 super().__init__(**kwargs) 

862 

863 @property 

864 def columns(self): 

865 return [self.colXX, self.colXY, self.colYY] 

866 

867 def _func(self, df): 

868 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

869 

870 

871class E2(Functor): 

872 name = "Ellipticity e2" 

873 

874 def __init__(self, colXX, colXY, colYY, **kwargs): 

875 self.colXX = colXX 

876 self.colXY = colXY 

877 self.colYY = colYY 

878 super().__init__(**kwargs) 

879 

880 @property 

881 def columns(self): 

882 return [self.colXX, self.colXY, self.colYY] 

883 

884 def _func(self, df): 

885 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

886 

887 

888class RadiusFromQuadrupole(Functor): 

889 

890 def __init__(self, colXX, colXY, colYY, **kwargs): 

891 self.colXX = colXX 

892 self.colXY = colXY 

893 self.colYY = colYY 

894 super().__init__(**kwargs) 

895 

896 @property 

897 def columns(self): 

898 return [self.colXX, self.colXY, self.colYY] 

899 

900 def _func(self, df): 

901 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

902 

903 

904class ComputePixelScale(Functor): 

905 """Compute the local pixel scale from the stored CDMatrix. 

906 """ 

907 name = "Pixel Scale" 

908 

909 def __init__(self, 

910 colCD_1_1, 

911 colCD_1_2, 

912 colCD_2_1, 

913 colCD_2_2, 

914 **kwargs): 

915 self.colCD_1_1 = colCD_1_1 

916 self.colCD_1_2 = colCD_1_2 

917 self.colCD_2_1 = colCD_2_1 

918 self.colCD_2_2 = colCD_2_2 

919 super().__init__(**kwargs) 

920 

921 @property 

922 def columns(self): 

923 return [self.colCD_1_1, self.colCD_1_2, 

924 self.colCD_2_1, self.colCD_2_2] 

925 

926 def pixelScale(self, cd11, cd12, cd21, cd22): 

927 """Compute the local pixel scale conversion. 

928 

929 Parameters 

930 ---------- 

931 cd11 : `pandas.Series` 

932 [1, 1] element of the local CDMatricies. 

933 cd12 : `pandas.Series` 

934 [1, 2] element of the local CDMatricies. 

935 cd21 : `pandas.Series` 

936 [2, 1] element of the local CDMatricies. 

937 cd2 : `pandas.Series` 

938 [2, 2] element of the local CDMatricies. 

939 

940 Returns 

941 ------- 

942 pixScale : `pandas.Series` 

943 Arcseconds per pixel at the location of the local WC 

944 """ 

945 return 3600 * np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21)) 

946 

947 def _func(self, df): 

948 return self.pixelScale(df[self.colCD_1_1], df[self.colCD_1_2], 

949 df[self.colCD_2_1], df[self.colCD_2_2]) 

950 

951 

952class ConvertPixelToArcseconds(ComputePixelScale): 

953 """Convert a value in units pixels to units arcseconds. 

954 """ 

955 name = "Pixel scale converter" 

956 

957 def __init__(self, 

958 col, 

959 colCD_1_1, 

960 colCD_1_2, 

961 colCD_2_1, 

962 colCD_2_2, **kwargs): 

963 self.col = col 

964 super().__init__(colCD_1_1, colCD_1_2, colCD_2_1, colCD_2_2, **kwargs) 

965 

966 @property 

967 def name(self): 

968 return f"{self.col}_asArcseconds" 

969 

970 @property 

971 def columns(self): 

972 return [self.col, 

973 self.colCD_1_1, self.colCD_1_2, 

974 self.colCD_2_1, self.colCD_2_2] 

975 

976 def _func(self, df): 

977 return df[self.col] * self.pixelScale(df[self.colCD_1_1], df[self.colCD_1_2], 

978 df[self.colCD_2_1], df[self.colCD_2_2]) 

979 

980 

981class ReferenceBand(Functor): 

982 name = 'Reference Band' 

983 shortname = 'refBand' 

984 

985 @property 

986 def columns(self): 

987 return ["merge_measurement_i", 

988 "merge_measurement_r", 

989 "merge_measurement_z", 

990 "merge_measurement_y", 

991 "merge_measurement_g"] 

992 

993 def _func(self, df): 

994 def getFilterAliasName(row): 

995 # get column name with the max value (True > False) 

996 colName = row.idxmax() 

997 return colName.replace('merge_measurement_', '') 

998 

999 return df[self.columns].apply(getFilterAliasName, axis=1) 

1000 

1001 

1002class Photometry(Functor): 

1003 # AB to NanoJansky (3631 Jansky) 

1004 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1005 LOG_AB_FLUX_SCALE = 12.56 

1006 FIVE_OVER_2LOG10 = 1.085736204758129569 

1007 # TO DO: DM-21955 Replace hard coded photometic calibration values 

1008 COADD_ZP = 27 

1009 

1010 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1011 self.vhypot = np.vectorize(self.hypot) 

1012 self.col = colFlux 

1013 self.colFluxErr = colFluxErr 

1014 

1015 self.calib = calib 

1016 if calib is not None: 

1017 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1018 else: 

1019 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1020 self.fluxMag0Err = 0. 

1021 

1022 super().__init__(**kwargs) 

1023 

1024 @property 

1025 def columns(self): 

1026 return [self.col] 

1027 

1028 @property 

1029 def name(self): 

1030 return 'mag_{0}'.format(self.col) 

1031 

1032 @classmethod 

1033 def hypot(cls, a, b): 

1034 if np.abs(a) < np.abs(b): 

1035 a, b = b, a 

1036 if a == 0.: 

1037 return 0. 

1038 q = b/a 

1039 return np.abs(a) * np.sqrt(1. + q*q) 

1040 

1041 def dn2flux(self, dn, fluxMag0): 

1042 return self.AB_FLUX_SCALE * dn / fluxMag0 

1043 

1044 def dn2mag(self, dn, fluxMag0): 

1045 with np.warnings.catch_warnings(): 

1046 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1047 np.warnings.filterwarnings('ignore', r'divide by zero') 

1048 return -2.5 * np.log10(dn/fluxMag0) 

1049 

1050 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1051 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1052 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1053 return retVal 

1054 

1055 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1056 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1057 return self.FIVE_OVER_2LOG10 * retVal 

1058 

1059 

1060class NanoJansky(Photometry): 

1061 def _func(self, df): 

1062 return self.dn2flux(df[self.col], self.fluxMag0) 

1063 

1064 

1065class NanoJanskyErr(Photometry): 

1066 @property 

1067 def columns(self): 

1068 return [self.col, self.colFluxErr] 

1069 

1070 def _func(self, df): 

1071 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1072 return pd.Series(retArr, index=df.index) 

1073 

1074 

1075class Magnitude(Photometry): 

1076 def _func(self, df): 

1077 return self.dn2mag(df[self.col], self.fluxMag0) 

1078 

1079 

1080class MagnitudeErr(Photometry): 

1081 @property 

1082 def columns(self): 

1083 return [self.col, self.colFluxErr] 

1084 

1085 def _func(self, df): 

1086 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1087 return pd.Series(retArr, index=df.index) 

1088 

1089 

1090class LocalPhotometry(Functor): 

1091 """Base class for calibrating the specified instrument flux column using 

1092 the local photometric calibration. 

1093 

1094 Parameters 

1095 ---------- 

1096 instFluxCol : `str` 

1097 Name of the instrument flux column. 

1098 instFluxErrCol : `str` 

1099 Name of the assocated error columns for ``instFluxCol``. 

1100 photoCalibCol : `str` 

1101 Name of local calibration column. 

1102 photoCalibErrCol : `str` 

1103 Error associated with ``photoCalibCol`` 

1104 

1105 See also 

1106 -------- 

1107 LocalPhotometry 

1108 LocalNanojansky 

1109 LocalNanojanskyErr 

1110 LocalMagnitude 

1111 LocalMagnitudeErr 

1112 """ 

1113 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1114 

1115 def __init__(self, 

1116 instFluxCol, 

1117 instFluxErrCol, 

1118 photoCalibCol, 

1119 photoCalibErrCol, 

1120 **kwargs): 

1121 self.instFluxCol = instFluxCol 

1122 self.instFluxErrCol = instFluxErrCol 

1123 self.photoCalibCol = photoCalibCol 

1124 self.photoCalibErrCol = photoCalibErrCol 

1125 super().__init__(**kwargs) 

1126 

1127 def instFluxToNanojansky(self, instFlux, localCalib): 

1128 """Convert instrument flux to nanojanskys. 

1129 

1130 Parameters 

1131 ---------- 

1132 instFlux : `numpy.ndarray` or `pandas.Series` 

1133 Array of instrument flux measurements 

1134 localCalib : `numpy.ndarray` or `pandas.Series` 

1135 Array of local photometric calibration estimates. 

1136 

1137 Returns 

1138 ------- 

1139 calibFlux : `numpy.ndarray` or `pandas.Series` 

1140 Array of calibrated flux measurements. 

1141 """ 

1142 return instFlux * localCalib 

1143 

1144 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1145 """Convert instrument flux to nanojanskys. 

1146 

1147 Parameters 

1148 ---------- 

1149 instFlux : `numpy.ndarray` or `pandas.Series` 

1150 Array of instrument flux measurements 

1151 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1152 Errors on associated ``instFlux`` values 

1153 localCalib : `numpy.ndarray` or `pandas.Series` 

1154 Array of local photometric calibration estimates. 

1155 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1156 Errors on associated ``localCalib`` values 

1157 

1158 Returns 

1159 ------- 

1160 calibFluxErr : `numpy.ndarray` or `pandas.Series` 

1161 Errors on calibrated flux measurements. 

1162 """ 

1163 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1164 

1165 def instFluxToMagnitude(self, instFlux, localCalib): 

1166 """Convert instrument flux to nanojanskys. 

1167 

1168 Parameters 

1169 ---------- 

1170 instFlux : `numpy.ndarray` or `pandas.Series` 

1171 Array of instrument flux measurements 

1172 localCalib : `numpy.ndarray` or `pandas.Series` 

1173 Array of local photometric calibration estimates. 

1174 

1175 Returns 

1176 ------- 

1177 calibMag : `numpy.ndarray` or `pandas.Series` 

1178 Array of calibrated AB magnitudes. 

1179 """ 

1180 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1181 

1182 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1183 """Convert instrument flux err to nanojanskys. 

1184 

1185 Parameters 

1186 ---------- 

1187 instFlux : `numpy.ndarray` or `pandas.Series` 

1188 Array of instrument flux measurements 

1189 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1190 Errors on associated ``instFlux`` values 

1191 localCalib : `numpy.ndarray` or `pandas.Series` 

1192 Array of local photometric calibration estimates. 

1193 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1194 Errors on associated ``localCalib`` values 

1195 

1196 Returns 

1197 ------- 

1198 calibMagErr: `numpy.ndarray` or `pandas.Series` 

1199 Error on calibrated AB magnitudes. 

1200 """ 

1201 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1202 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1203 

1204 

1205class LocalNanojansky(LocalPhotometry): 

1206 """Compute calibrated fluxes using the local calibration value. 

1207 

1208 See also 

1209 -------- 

1210 LocalNanojansky 

1211 LocalNanojanskyErr 

1212 LocalMagnitude 

1213 LocalMagnitudeErr 

1214 """ 

1215 

1216 @property 

1217 def columns(self): 

1218 return [self.instFluxCol, self.photoCalibCol] 

1219 

1220 @property 

1221 def name(self): 

1222 return f'flux_{self.instFluxCol}' 

1223 

1224 def _func(self, df): 

1225 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1226 

1227 

1228class LocalNanojanskyErr(LocalPhotometry): 

1229 """Compute calibrated flux errors using the local calibration value. 

1230 

1231 See also 

1232 -------- 

1233 LocalNanojansky 

1234 LocalNanojanskyErr 

1235 LocalMagnitude 

1236 LocalMagnitudeErr 

1237 """ 

1238 

1239 @property 

1240 def columns(self): 

1241 return [self.instFluxCol, self.instFluxErrCol, 

1242 self.photoCalibCol, self.photoCalibErrCol] 

1243 

1244 @property 

1245 def name(self): 

1246 return f'fluxErr_{self.instFluxCol}' 

1247 

1248 def _func(self, df): 

1249 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1250 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1251 

1252 

1253class LocalMagnitude(LocalPhotometry): 

1254 """Compute calibrated AB magnitudes using the local calibration value. 

1255 

1256 See also 

1257 -------- 

1258 LocalNanojansky 

1259 LocalNanojanskyErr 

1260 LocalMagnitude 

1261 LocalMagnitudeErr 

1262 """ 

1263 

1264 @property 

1265 def columns(self): 

1266 return [self.instFluxCol, self.photoCalibCol] 

1267 

1268 @property 

1269 def name(self): 

1270 return f'mag_{self.instFluxCol}' 

1271 

1272 def _func(self, df): 

1273 return self.instFluxToMagnitude(df[self.instFluxCol], 

1274 df[self.photoCalibCol]) 

1275 

1276 

1277class LocalMagnitudeErr(LocalPhotometry): 

1278 """Compute calibrated AB magnitude errors using the local calibration value. 

1279 

1280 See also 

1281 -------- 

1282 LocalNanojansky 

1283 LocalNanojanskyErr 

1284 LocalMagnitude 

1285 LocalMagnitudeErr 

1286 """ 

1287 

1288 @property 

1289 def columns(self): 

1290 return [self.instFluxCol, self.instFluxErrCol, 

1291 self.photoCalibCol, self.photoCalibErrCol] 

1292 

1293 @property 

1294 def name(self): 

1295 return f'magErr_{self.instFluxCol}' 

1296 

1297 def _func(self, df): 

1298 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol], 

1299 df[self.instFluxErrCol], 

1300 df[self.photoCalibCol], 

1301 df[self.photoCalibErrCol])