Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import yaml 

2import re 

3 

4import pandas as pd 

5import numpy as np 

6import astropy.units as u 

7 

8from lsst.daf.persistence import doImport 

9from .parquetTable import MultilevelParquetTable 

10 

11 

12def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', typeKey='functor'): 

13 """Initialize an object defined in a dictionary 

14 

15 The object needs to be importable as 

16 '{0}.{1}'.format(basePath, initDict[typeKey]) 

17 The positional and keyword arguments (if any) are contained in 

18 "args" and "kwargs" entries in the dictionary, respectively. 

19 This is used in `functors.CompositeFunctor.from_yaml` to initialize 

20 a composite functor from a specification in a YAML file. 

21 

22 Parameters 

23 ---------- 

24 initDict : dictionary 

25 Dictionary describing object's initialization. Must contain 

26 an entry keyed by ``typeKey`` that is the name of the object, 

27 relative to ``basePath``. 

28 basePath : str 

29 Path relative to module in which ``initDict[typeKey]`` is defined. 

30 typeKey : str 

31 Key of ``initDict`` that is the name of the object 

32 (relative to `basePath`). 

33 """ 

34 initDict = initDict.copy() 

35 # TO DO: DM-21956 We should be able to define functors outside this module 

36 pythonType = doImport('{0}.{1}'.format(basePath, initDict.pop(typeKey))) 

37 args = [] 

38 if 'args' in initDict: 

39 args = initDict.pop('args') 

40 if isinstance(args, str): 

41 args = [args] 

42 

43 return pythonType(*args, **initDict) 

44 

45 

46class Functor(object): 

47 """Define and execute a calculation on a ParquetTable 

48 

49 The `__call__` method accepts a `ParquetTable` object, and returns the 

50 result of the calculation as a single column. Each functor defines what 

51 columns are needed for the calculation, and only these columns are read 

52 from the `ParquetTable`. 

53 

54 The action of `__call__` consists of two steps: first, loading the 

55 necessary columns from disk into memory as a `pandas.DataFrame` object; 

56 and second, performing the computation on this dataframe and returning the 

57 result. 

58 

59 

60 To define a new `Functor`, a subclass must define a `_func` method, 

61 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`. 

62 In addition, it must define the following attributes 

63 

64 * `_columns`: The columns necessary to perform the calculation 

65 * `name`: A name appropriate for a figure axis label 

66 * `shortname`: A name appropriate for use as a dictionary key 

67 

68 On initialization, a `Functor` should declare what filter (`filt` kwarg) 

69 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be 

70 applied to. This enables the `_get_cols` method to extract the proper 

71 columns from the parquet file. If not specified, the dataset will fall back 

72 on the `_defaultDataset`attribute. If filter is not specified and `dataset` 

73 is anything other than `'ref'`, then an error will be raised when trying to 

74 perform the calculation. 

75 

76 As currently implemented, `Functor` is only set up to expect a 

77 `ParquetTable` of the format of the `deepCoadd_obj` dataset; that is, a 

78 `MultilevelParquetTable` with the levels of the column index being `filter`, 

79 `dataset`, and `column`. This is defined in the `_columnLevels` attribute, 

80 as well as being implicit in the role of the `filt` and `dataset` attributes 

81 defined at initialization. In addition, the `_get_cols` method that reads 

82 the dataframe from the `ParquetTable` will return a dataframe with column 

83 index levels defined by the `_dfLevels` attribute; by default, this is 

84 `column`. 

85 

86 The `_columnLevels` and `_dfLevels` attributes should generally not need to 

87 be changed, unless `_func` needs columns from multiple filters or datasets 

88 to do the calculation. 

89 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for 

90 which `_dfLevels = ('filter', 'column')`, and `_func` expects the dataframe 

91 it gets to have those levels in the column index. 

92 

93 Parameters 

94 ---------- 

95 filt : str 

96 Filter upon which to do the calculation 

97 

98 dataset : str 

99 Dataset upon which to do the calculation 

100 (e.g., 'ref', 'meas', 'forced_src'). 

101 

102 """ 

103 

104 _defaultDataset = 'ref' 

105 _columnLevels = ('filter', 'dataset', 'column') 

106 _dfLevels = ('column',) 

107 _defaultNoDup = False 

108 

109 def __init__(self, filt=None, dataset=None, noDup=None): 

110 self.filt = filt 

111 self.dataset = dataset if dataset is not None else self._defaultDataset 

112 self._noDup = noDup 

113 

114 @property 

115 def noDup(self): 

116 if self._noDup is not None: 

117 return self._noDup 

118 else: 

119 return self._defaultNoDup 

120 

121 @property 

122 def columns(self): 

123 """Columns required to perform calculation 

124 """ 

125 if not hasattr(self, '_columns'): 

126 raise NotImplementedError('Must define columns property or _columns attribute') 

127 return self._columns 

128 

129 def multilevelColumns(self, parq): 

130 if not set(parq.columnLevels) == set(self._columnLevels): 

131 raise ValueError('ParquetTable does not have the expected column levels. ' + 

132 'Got {0}; expected {1}.'.format(parq.columnLevels, self._columnLevels)) 

133 

134 columnDict = {'column': self.columns, 

135 'dataset': self.dataset} 

136 if self.filt is None: 

137 if 'filter' in parq.columnLevels: 

138 if self.dataset == 'ref': 

139 columnDict['filter'] = parq.columnLevelNames['filter'][0] 

140 else: 

141 raise ValueError("'filt' not set for functor {}".format(self.name) + 

142 "(dataset {}) ".format(self.dataset) + 

143 "and ParquetTable " + 

144 "contains multiple filters in column index. " + 

145 "Set 'filt' or set 'dataset' to 'ref'.") 

146 else: 

147 columnDict['filter'] = self.filt 

148 

149 return parq._colsFromDict(columnDict) 

150 

151 def _func(self, df, dropna=True): 

152 raise NotImplementedError('Must define calculation on dataframe') 

153 

154 def _get_cols(self, parq): 

155 """Retrieve dataframe necessary for calculation. 

156 

157 Returns dataframe upon which `self._func` can act. 

158 """ 

159 if isinstance(parq, MultilevelParquetTable): 

160 columns = self.multilevelColumns(parq) 

161 df = parq.toDataFrame(columns=columns, droplevels=False) 

162 df = self._setLevels(df) 

163 else: 

164 columns = self.columns 

165 df = parq.toDataFrame(columns=columns) 

166 

167 return df 

168 

169 def _setLevels(self, df): 

170 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

171 df.columns = df.columns.droplevel(levelsToDrop) 

172 return df 

173 

174 def _dropna(self, vals): 

175 return vals.dropna() 

176 

177 def __call__(self, parq, dropna=False): 

178 try: 

179 df = self._get_cols(parq) 

180 vals = self._func(df) 

181 except Exception: 

182 vals = self.fail(df) 

183 if dropna: 

184 vals = self._dropna(vals) 

185 

186 return vals 

187 

188 def fail(self, df): 

189 return pd.Series(np.full(len(df), np.nan), index=df.index) 

190 

191 @property 

192 def name(self): 

193 """Full name of functor (suitable for figure labels) 

194 """ 

195 return NotImplementedError 

196 

197 @property 

198 def shortname(self): 

199 """Short name of functor (suitable for column name/dict key) 

200 """ 

201 return self.name 

202 

203 

204class CompositeFunctor(Functor): 

205 """Perform multiple calculations at once on a catalog 

206 

207 The role of a `CompositeFunctor` is to group together computations from 

208 multiple functors. Instead of returning `pandas.Series` a 

209 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names 

210 being the keys of `funcDict`. 

211 

212 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

213 in all the component functors. 

214 

215 A `CompositeFunctor` does not use a `_func` method itself; rather, 

216 when a `CompositeFunctor` is called, all its columns are loaded 

217 at once, and the resulting dataframe is passed to the `_func` method of each component 

218 functor. This has the advantage of only doing I/O (reading from parquet file) once, 

219 and works because each individual `_func` method of each component functor does not 

220 care if there are *extra* columns in the dataframe being passed; only that it must contain 

221 *at least* the `columns` it expects. 

222 

223 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML 

224 file specifying a collection of functors. 

225 

226 Parameters 

227 ---------- 

228 funcs : `dict` or `list` 

229 Dictionary or list of functors. If a list, then it will be converted 

230 into a dictonary according to the `.shortname` attribute of each functor. 

231 

232 """ 

233 dataset = None 

234 

235 def __init__(self, funcs, **kwargs): 

236 

237 if type(funcs) == dict: 

238 self.funcDict = funcs 

239 else: 

240 self.funcDict = {f.shortname: f for f in funcs} 

241 

242 self._filt = None 

243 

244 super().__init__(**kwargs) 

245 

246 @property 

247 def filt(self): 

248 return self._filt 

249 

250 @filt.setter 

251 def filt(self, filt): 

252 if filt is not None: 

253 for _, f in self.funcDict.items(): 

254 f.filt = filt 

255 self._filt = filt 

256 

257 def update(self, new): 

258 if isinstance(new, dict): 

259 self.funcDict.update(new) 

260 elif isinstance(new, CompositeFunctor): 

261 self.funcDict.update(new.funcDict) 

262 else: 

263 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

264 

265 # Make sure new functors have the same 'filt' set 

266 if self.filt is not None: 

267 self.filt = self.filt 

268 

269 @property 

270 def columns(self): 

271 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

272 

273 def multilevelColumns(self, parq): 

274 return list(set([x for y in [f.multilevelColumns(parq) 

275 for f in self.funcDict.values()] for x in y])) 

276 

277 def __call__(self, parq, **kwargs): 

278 if isinstance(parq, MultilevelParquetTable): 

279 columns = self.multilevelColumns(parq) 

280 df = parq.toDataFrame(columns=columns, droplevels=False) 

281 valDict = {} 

282 for k, f in self.funcDict.items(): 

283 try: 

284 subdf = f._setLevels(df[f.multilevelColumns(parq)]) 

285 valDict[k] = f._func(subdf) 

286 except Exception: 

287 valDict[k] = f.fail(subdf) 

288 else: 

289 columns = self.columns 

290 df = parq.toDataFrame(columns=columns) 

291 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

292 

293 try: 

294 valDf = pd.concat(valDict, axis=1) 

295 except TypeError: 

296 print([(k, type(v)) for k, v in valDict.items()]) 

297 raise 

298 

299 if kwargs.get('dropna', False): 

300 valDf = valDf.dropna(how='any') 

301 

302 return valDf 

303 

304 @classmethod 

305 def renameCol(cls, col, renameRules): 

306 if renameRules is None: 

307 return col 

308 for old, new in renameRules: 

309 if col.startswith(old): 

310 col = col.replace(old, new) 

311 return col 

312 

313 @classmethod 

314 def from_file(cls, filename, **kwargs): 

315 with open(filename) as f: 

316 translationDefinition = yaml.safe_load(f) 

317 

318 return cls.from_yaml(translationDefinition, **kwargs) 

319 

320 @classmethod 

321 def from_yaml(cls, translationDefinition, **kwargs): 

322 funcs = {} 

323 for func, val in translationDefinition['funcs'].items(): 

324 funcs[func] = init_fromDict(val) 

325 

326 if 'flag_rename_rules' in translationDefinition: 

327 renameRules = translationDefinition['flag_rename_rules'] 

328 else: 

329 renameRules = None 

330 

331 if 'refFlags' in translationDefinition: 

332 for flag in translationDefinition['refFlags']: 

333 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

334 

335 if 'flags' in translationDefinition: 

336 for flag in translationDefinition['flags']: 

337 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

338 

339 return cls(funcs, **kwargs) 

340 

341 

342def mag_aware_eval(df, expr): 

343 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means 

344 

345 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes. 

346 

347 Parameters 

348 ---------- 

349 df : pandas.DataFrame 

350 Dataframe on which to evaluate expression. 

351 

352 expr : str 

353 Expression. 

354 """ 

355 try: 

356 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

357 val = df.eval(expr_new, truediv=True) 

358 except Exception: # Should check what actually gets raised 

359 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

360 val = df.eval(expr_new, truediv=True) 

361 return val 

362 

363 

364class CustomFunctor(Functor): 

365 """Arbitrary computation on a catalog 

366 

367 Column names (and thus the columns to be loaded from catalog) are found 

368 by finding all words and trying to ignore all "math-y" words. 

369 

370 Parameters 

371 ---------- 

372 expr : str 

373 Expression to evaluate, to be parsed and executed by `mag_aware_eval`. 

374 """ 

375 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

376 

377 def __init__(self, expr, **kwargs): 

378 self.expr = expr 

379 super().__init__(**kwargs) 

380 

381 @property 

382 def name(self): 

383 return self.expr 

384 

385 @property 

386 def columns(self): 

387 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

388 

389 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

390 not_a_col = [] 

391 for c in flux_cols: 

392 if not re.search('_instFlux$', c): 

393 cols.append('{}_instFlux'.format(c)) 

394 not_a_col.append(c) 

395 else: 

396 cols.append(c) 

397 

398 return list(set([c for c in cols if c not in not_a_col])) 

399 

400 def _func(self, df): 

401 return mag_aware_eval(df, self.expr) 

402 

403 

404class Column(Functor): 

405 """Get column with specified name 

406 """ 

407 

408 def __init__(self, col, **kwargs): 

409 self.col = col 

410 super().__init__(**kwargs) 

411 

412 @property 

413 def name(self): 

414 return self.col 

415 

416 @property 

417 def columns(self): 

418 return [self.col] 

419 

420 def _func(self, df): 

421 return df[self.col] 

422 

423 

424class Index(Functor): 

425 """Return the value of the index for each object 

426 """ 

427 

428 columns = ['coord_ra'] # just a dummy; something has to be here 

429 _defaultDataset = 'ref' 

430 _defaultNoDup = True 

431 

432 def _func(self, df): 

433 return pd.Series(df.index, index=df.index) 

434 

435 

436class IDColumn(Column): 

437 col = 'id' 

438 _allow_difference = False 

439 _defaultNoDup = True 

440 

441 def _func(self, df): 

442 return pd.Series(df.index, index=df.index) 

443 

444 

445class FootprintNPix(Column): 

446 col = 'base_Footprint_nPix' 

447 

448 

449class CoordColumn(Column): 

450 """Base class for coordinate column, in degrees 

451 """ 

452 _radians = True 

453 

454 def __init__(self, col, **kwargs): 

455 super().__init__(col, **kwargs) 

456 

457 def _func(self, df): 

458 res = df[self.col] 

459 if self._radians: 

460 res *= 180 / np.pi 

461 return res 

462 

463 

464class RAColumn(CoordColumn): 

465 """Right Ascension, in degrees 

466 """ 

467 name = 'RA' 

468 _defaultNoDup = True 

469 

470 def __init__(self, **kwargs): 

471 super().__init__('coord_ra', **kwargs) 

472 

473 def __call__(self, catalog, **kwargs): 

474 return super().__call__(catalog, **kwargs) 

475 

476 

477class DecColumn(CoordColumn): 

478 """Declination, in degrees 

479 """ 

480 name = 'Dec' 

481 _defaultNoDup = True 

482 

483 def __init__(self, **kwargs): 

484 super().__init__('coord_dec', **kwargs) 

485 

486 def __call__(self, catalog, **kwargs): 

487 return super().__call__(catalog, **kwargs) 

488 

489 

490def fluxName(col): 

491 if not col.endswith('_instFlux'): 

492 col += '_instFlux' 

493 return col 

494 

495 

496def fluxErrName(col): 

497 if not col.endswith('_instFluxErr'): 

498 col += '_instFluxErr' 

499 return col 

500 

501 

502class Mag(Functor): 

503 """Compute calibrated magnitude 

504 

505 Takes a `calib` argument, which returns the flux at mag=0 

506 as `calib.getFluxMag0()`. If not provided, then the default 

507 `fluxMag0` is 63095734448.0194, which is default for HSC. 

508 This default should be removed in DM-21955 

509 

510 This calculation hides warnings about invalid values and dividing by zero. 

511 

512 As for all functors, a `dataset` and `filt` kwarg should be provided upon 

513 initialization. Unlike the default `Functor`, however, the default dataset 

514 for a `Mag` is `'meas'`, rather than `'ref'`. 

515 

516 Parameters 

517 ---------- 

518 col : `str` 

519 Name of flux column from which to compute magnitude. Can be parseable 

520 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass 

521 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will 

522 understand. 

523 calib : `lsst.afw.image.calib.Calib` (optional) 

524 Object that knows zero point. 

525 """ 

526 _defaultDataset = 'meas' 

527 

528 def __init__(self, col, calib=None, **kwargs): 

529 self.col = fluxName(col) 

530 self.calib = calib 

531 if calib is not None: 

532 self.fluxMag0 = calib.getFluxMag0()[0] 

533 else: 

534 # TO DO: DM-21955 Replace hard coded photometic calibration values 

535 self.fluxMag0 = 63095734448.0194 

536 

537 super().__init__(**kwargs) 

538 

539 @property 

540 def columns(self): 

541 return [self.col] 

542 

543 def _func(self, df): 

544 with np.warnings.catch_warnings(): 

545 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

546 np.warnings.filterwarnings('ignore', r'divide by zero') 

547 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

548 

549 @property 

550 def name(self): 

551 return 'mag_{0}'.format(self.col) 

552 

553 

554class MagErr(Mag): 

555 """Compute calibrated magnitude uncertainty 

556 

557 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`. 

558 

559 Parameters 

560 col : `str` 

561 Name of flux column 

562 calib : `lsst.afw.image.calib.Calib` (optional) 

563 Object that knows zero point. 

564 """ 

565 

566 def __init__(self, *args, **kwargs): 

567 super().__init__(*args, **kwargs) 

568 if self.calib is not None: 

569 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

570 else: 

571 self.fluxMag0Err = 0. 

572 

573 @property 

574 def columns(self): 

575 return [self.col, self.col + 'Err'] 

576 

577 def _func(self, df): 

578 with np.warnings.catch_warnings(): 

579 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

580 np.warnings.filterwarnings('ignore', r'divide by zero') 

581 fluxCol, fluxErrCol = self.columns 

582 x = df[fluxErrCol] / df[fluxCol] 

583 y = self.fluxMag0Err / self.fluxMag0 

584 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

585 return magErr 

586 

587 @property 

588 def name(self): 

589 return super().name + '_err' 

590 

591 

592class NanoMaggie(Mag): 

593 """ 

594 """ 

595 

596 def _func(self, df): 

597 return (df[self.col] / self.fluxMag0) * 1e9 

598 

599 

600class MagDiff(Functor): 

601 _defaultDataset = 'meas' 

602 

603 """Functor to calculate magnitude difference""" 

604 

605 def __init__(self, col1, col2, **kwargs): 

606 self.col1 = fluxName(col1) 

607 self.col2 = fluxName(col2) 

608 super().__init__(**kwargs) 

609 

610 @property 

611 def columns(self): 

612 return [self.col1, self.col2] 

613 

614 def _func(self, df): 

615 with np.warnings.catch_warnings(): 

616 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

617 np.warnings.filterwarnings('ignore', r'divide by zero') 

618 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

619 

620 @property 

621 def name(self): 

622 return '(mag_{0} - mag_{1})'.format(self.col1, self.col2) 

623 

624 @property 

625 def shortname(self): 

626 return 'magDiff_{0}_{1}'.format(self.col1, self.col2) 

627 

628 

629class Color(Functor): 

630 """Compute the color between two filters 

631 

632 Computes color by initializing two different `Mag` 

633 functors based on the `col` and filters provided, and 

634 then returning the difference. 

635 

636 This is enabled by the `_func` expecting a dataframe with a 

637 multilevel column index, with both `'filter'` and `'column'`, 

638 instead of just `'column'`, which is the `Functor` default. 

639 This is controlled by the `_dfLevels` attribute. 

640 

641 Also of note, the default dataset for `Color` is `forced_src'`, 

642 whereas for `Mag` it is `'meas'`. 

643 

644 Parameters 

645 ---------- 

646 col : str 

647 Name of flux column from which to compute; same as would be passed to 

648 `lsst.pipe.tasks.functors.Mag`. 

649 

650 filt2, filt1 : str 

651 Filters from which to compute magnitude difference. 

652 Color computed is `Mag(filt2) - Mag(filt1)`. 

653 """ 

654 _defaultDataset = 'forced_src' 

655 _dfLevels = ('filter', 'column') 

656 _defaultNoDup = True 

657 

658 def __init__(self, col, filt2, filt1, **kwargs): 

659 self.col = fluxName(col) 

660 if filt2 == filt1: 

661 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

662 self.filt2 = filt2 

663 self.filt1 = filt1 

664 

665 self.mag2 = Mag(col, filt=filt2, **kwargs) 

666 self.mag1 = Mag(col, filt=filt1, **kwargs) 

667 

668 super().__init__(**kwargs) 

669 

670 @property 

671 def filt(self): 

672 return None 

673 

674 @filt.setter 

675 def filt(self, filt): 

676 pass 

677 

678 def _func(self, df): 

679 mag2 = self.mag2._func(df[self.filt2]) 

680 mag1 = self.mag1._func(df[self.filt1]) 

681 return mag2 - mag1 

682 

683 @property 

684 def columns(self): 

685 return [self.mag1.col, self.mag2.col] 

686 

687 def multilevelColumns(self, parq): 

688 return [(self.dataset, self.filt1, self.col), 

689 (self.dataset, self.filt2, self.col)] 

690 

691 @property 

692 def name(self): 

693 return '{0} - {1} ({2})'.format(self.filt2, self.filt1, self.col) 

694 

695 @property 

696 def shortname(self): 

697 return '{0}_{1}m{2}'.format(self.col, self.filt2.replace('-', ''), 

698 self.filt1.replace('-', '')) 

699 

700 

701class Labeller(Functor): 

702 """Main function of this subclass is to override the dropna=True 

703 """ 

704 _null_label = 'null' 

705 _allow_difference = False 

706 name = 'label' 

707 _force_str = False 

708 

709 def __call__(self, parq, dropna=False, **kwargs): 

710 return super().__call__(parq, dropna=False, **kwargs) 

711 

712 

713class StarGalaxyLabeller(Labeller): 

714 _columns = ["base_ClassificationExtendedness_value"] 

715 _column = "base_ClassificationExtendedness_value" 

716 

717 def _func(self, df): 

718 x = df[self._columns][self._column] 

719 mask = x.isnull() 

720 test = (x < 0.5).astype(int) 

721 test = test.mask(mask, 2) 

722 

723 # TODO: DM-21954 Look into veracity of inline comment below 

724 # are these backwards? 

725 categories = ['galaxy', 'star', self._null_label] 

726 label = pd.Series(pd.Categorical.from_codes(test, categories=categories), 

727 index=x.index, name='label') 

728 if self._force_str: 

729 label = label.astype(str) 

730 return label 

731 

732 

733class NumStarLabeller(Labeller): 

734 _columns = ['numStarFlags'] 

735 labels = {"star": 0, "maybe": 1, "notStar": 2} 

736 

737 def _func(self, df): 

738 x = df[self._columns][self._columns[0]] 

739 

740 # Number of filters 

741 n = len(x.unique()) - 1 

742 

743 labels = ['noStar', 'maybe', 'star'] 

744 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels), 

745 index=x.index, name='label') 

746 

747 if self._force_str: 

748 label = label.astype(str) 

749 

750 return label 

751 

752 

753class DeconvolvedMoments(Functor): 

754 name = 'Deconvolved Moments' 

755 shortname = 'deconvolvedMoments' 

756 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

757 "ext_shapeHSM_HsmSourceMoments_yy", 

758 "base_SdssShape_xx", "base_SdssShape_yy", 

759 "ext_shapeHSM_HsmPsfMoments_xx", 

760 "ext_shapeHSM_HsmPsfMoments_yy") 

761 

762 def _func(self, df): 

763 """Calculate deconvolved moments""" 

764 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

765 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

766 else: 

767 hsm = np.ones(len(df))*np.nan 

768 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

769 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

770 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

771 else: 

772 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using 

773 # exposure.getPsf().computeShape(s.getCentroid()).getIxx() 

774 # raise TaskError("No psf shape parameter found in catalog") 

775 raise RuntimeError('No psf shape parameter found in catalog') 

776 

777 return hsm.where(np.isfinite(hsm), sdss) - psf 

778 

779 

780class SdssTraceSize(Functor): 

781 """Functor to calculate SDSS trace radius size for sources""" 

782 name = "SDSS Trace Size" 

783 shortname = 'sdssTrace' 

784 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

785 

786 def _func(self, df): 

787 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

788 return srcSize 

789 

790 

791class PsfSdssTraceSizeDiff(Functor): 

792 """Functor to calculate SDSS trace radius size difference (%) between object and psf model""" 

793 name = "PSF - SDSS Trace Size" 

794 shortname = 'psf_sdssTrace' 

795 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

796 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

797 

798 def _func(self, df): 

799 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

800 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

801 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

802 return sizeDiff 

803 

804 

805class HsmTraceSize(Functor): 

806 """Functor to calculate HSM trace radius size for sources""" 

807 name = 'HSM Trace Size' 

808 shortname = 'hsmTrace' 

809 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

810 "ext_shapeHSM_HsmSourceMoments_yy") 

811 

812 def _func(self, df): 

813 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] + 

814 df["ext_shapeHSM_HsmSourceMoments_yy"])) 

815 return srcSize 

816 

817 

818class PsfHsmTraceSizeDiff(Functor): 

819 """Functor to calculate HSM trace radius size difference (%) between object and psf model""" 

820 name = 'PSF - HSM Trace Size' 

821 shortname = 'psf_HsmTrace' 

822 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

823 "ext_shapeHSM_HsmSourceMoments_yy", 

824 "ext_shapeHSM_HsmPsfMoments_xx", 

825 "ext_shapeHSM_HsmPsfMoments_yy") 

826 

827 def _func(self, df): 

828 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] + 

829 df["ext_shapeHSM_HsmSourceMoments_yy"])) 

830 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] + 

831 df["ext_shapeHSM_HsmPsfMoments_yy"])) 

832 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

833 return sizeDiff 

834 

835 

836class HsmFwhm(Functor): 

837 name = 'HSM Psf FWHM' 

838 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

839 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

840 pixelScale = 0.168 

841 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

842 

843 def _func(self, df): 

844 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

845 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

846 

847 

848class E1(Functor): 

849 name = "Distortion Ellipticity (e1)" 

850 shortname = "Distortion" 

851 

852 def __init__(self, colXX, colXY, colYY, **kwargs): 

853 self.colXX = colXX 

854 self.colXY = colXY 

855 self.colYY = colYY 

856 self._columns = [self.colXX, self.colXY, self.colYY] 

857 super().__init__(**kwargs) 

858 

859 @property 

860 def columns(self): 

861 return [self.colXX, self.colXY, self.colYY] 

862 

863 def _func(self, df): 

864 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

865 

866 

867class E2(Functor): 

868 name = "Ellipticity e2" 

869 

870 def __init__(self, colXX, colXY, colYY, **kwargs): 

871 self.colXX = colXX 

872 self.colXY = colXY 

873 self.colYY = colYY 

874 super().__init__(**kwargs) 

875 

876 @property 

877 def columns(self): 

878 return [self.colXX, self.colXY, self.colYY] 

879 

880 def _func(self, df): 

881 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

882 

883 

884class RadiusFromQuadrupole(Functor): 

885 

886 def __init__(self, colXX, colXY, colYY, **kwargs): 

887 self.colXX = colXX 

888 self.colXY = colXY 

889 self.colYY = colYY 

890 super().__init__(**kwargs) 

891 

892 @property 

893 def columns(self): 

894 return [self.colXX, self.colXY, self.colYY] 

895 

896 def _func(self, df): 

897 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

898 

899 

900class ReferenceBand(Functor): 

901 name = 'Reference Band' 

902 shortname = 'refBand' 

903 

904 @property 

905 def columns(self): 

906 return ["merge_measurement_i", 

907 "merge_measurement_r", 

908 "merge_measurement_z", 

909 "merge_measurement_y", 

910 "merge_measurement_g"] 

911 

912 def _func(self, df): 

913 def getFilterAliasName(row): 

914 # get column name with the max value (True > False) 

915 colName = row.idxmax() 

916 return colName.replace('merge_measurement_', '') 

917 

918 return df[self.columns].apply(getFilterAliasName, axis=1) 

919 

920 

921class Photometry(Functor): 

922 # AB to NanoJansky (3631 Jansky) 

923 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

924 LOG_AB_FLUX_SCALE = 12.56 

925 FIVE_OVER_2LOG10 = 1.085736204758129569 

926 # TO DO: DM-21955 Replace hard coded photometic calibration values 

927 COADD_ZP = 27 

928 

929 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

930 self.vhypot = np.vectorize(self.hypot) 

931 self.col = colFlux 

932 self.colFluxErr = colFluxErr 

933 

934 self.calib = calib 

935 if calib is not None: 

936 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

937 else: 

938 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

939 self.fluxMag0Err = 0. 

940 

941 super().__init__(**kwargs) 

942 

943 @property 

944 def columns(self): 

945 return [self.col] 

946 

947 @property 

948 def name(self): 

949 return 'mag_{0}'.format(self.col) 

950 

951 @classmethod 

952 def hypot(cls, a, b): 

953 if np.abs(a) < np.abs(b): 

954 a, b = b, a 

955 if a == 0.: 

956 return 0. 

957 q = b/a 

958 return np.abs(a) * np.sqrt(1. + q*q) 

959 

960 def dn2flux(self, dn, fluxMag0): 

961 return self.AB_FLUX_SCALE * dn / fluxMag0 

962 

963 def dn2mag(self, dn, fluxMag0): 

964 with np.warnings.catch_warnings(): 

965 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

966 np.warnings.filterwarnings('ignore', r'divide by zero') 

967 return -2.5 * np.log10(dn/fluxMag0) 

968 

969 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

970 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

971 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

972 return retVal 

973 

974 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

975 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

976 return self.FIVE_OVER_2LOG10 * retVal 

977 

978 

979class NanoJansky(Photometry): 

980 def _func(self, df): 

981 return self.dn2flux(df[self.col], self.fluxMag0) 

982 

983 

984class NanoJanskyErr(Photometry): 

985 @property 

986 def columns(self): 

987 return [self.col, self.colFluxErr] 

988 

989 def _func(self, df): 

990 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

991 return pd.Series(retArr, index=df.index) 

992 

993 

994class Magnitude(Photometry): 

995 def _func(self, df): 

996 return self.dn2mag(df[self.col], self.fluxMag0) 

997 

998 

999class MagnitudeErr(Photometry): 

1000 @property 

1001 def columns(self): 

1002 return [self.col, self.colFluxErr] 

1003 

1004 def _func(self, df): 

1005 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1006 return pd.Series(retArr, index=df.index) 

1007 

1008 

1009class LocalPhotometry(Functor): 

1010 """Base class for calibrating the specified instrument flux column using 

1011 the local photometric calibration. 

1012 

1013 Parameters 

1014 ---------- 

1015 instFluxCol : `str` 

1016 Name of the instrument flux column. 

1017 instFluxErrCol : `str` 

1018 Name of the assocated error columns for ``instFluxCol``. 

1019 photoCalibCol : `str` 

1020 Name of local calibration column. 

1021 photoCalibErrCol : `str` 

1022 Error associated with ``photoCalibCol`` 

1023 

1024 See also 

1025 -------- 

1026 LocalPhotometry 

1027 LocalNanojansky 

1028 LocalNanojanskyErr 

1029 LocalMagnitude 

1030 LocalMagnitudeErr 

1031 """ 

1032 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1033 

1034 def __init__(self, 

1035 instFluxCol, 

1036 instFluxErrCol, 

1037 photoCalibCol, 

1038 photoCalibErrCol, 

1039 **kwargs): 

1040 self.instFluxCol = instFluxCol 

1041 self.instFluxErrCol = instFluxErrCol 

1042 self.photoCalibCol = photoCalibCol 

1043 self.photoCalibErrCol = photoCalibErrCol 

1044 super().__init__(**kwargs) 

1045 

1046 def instFluxToNanojansky(self, instFlux, localCalib): 

1047 """Convert instrument flux to nanojanskys. 

1048 

1049 Parameters 

1050 ---------- 

1051 instFlux : `numpy.ndarray` or `pandas.Series` 

1052 Array of instrument flux measurements 

1053 localCalib : `numpy.ndarray` or `pandas.Series` 

1054 Array of local photometric calibration estimates. 

1055 

1056 Returns 

1057 ------- 

1058 calibFlux : `numpy.ndarray` or `pandas.Series` 

1059 Array of calibrated flux measurements. 

1060 """ 

1061 return instFlux * localCalib 

1062 

1063 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1064 """Convert instrument flux to nanojanskys. 

1065 

1066 Parameters 

1067 ---------- 

1068 instFlux : `numpy.ndarray` or `pandas.Series` 

1069 Array of instrument flux measurements 

1070 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1071 Errors on associated ``instFlux`` values 

1072 localCalib : `numpy.ndarray` or `pandas.Series` 

1073 Array of local photometric calibration estimates. 

1074 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1075 Errors on associated ``localCalib`` values 

1076 

1077 Returns 

1078 ------- 

1079 calibFluxErr : `numpy.ndarray` or `pandas.Series` 

1080 Errors on calibrated flux measurements. 

1081 """ 

1082 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1083 

1084 def instFluxToMagnitude(self, instFlux, localCalib): 

1085 """Convert instrument flux to nanojanskys. 

1086 

1087 Parameters 

1088 ---------- 

1089 instFlux : `numpy.ndarray` or `pandas.Series` 

1090 Array of instrument flux measurements 

1091 localCalib : `numpy.ndarray` or `pandas.Series` 

1092 Array of local photometric calibration estimates. 

1093 

1094 Returns 

1095 ------- 

1096 calibMag : `numpy.ndarray` or `pandas.Series` 

1097 Array of calibrated AB magnitudes. 

1098 """ 

1099 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1100 

1101 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1102 """Convert instrument flux err to nanojanskys. 

1103 

1104 Parameters 

1105 ---------- 

1106 instFlux : `numpy.ndarray` or `pandas.Series` 

1107 Array of instrument flux measurements 

1108 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1109 Errors on associated ``instFlux`` values 

1110 localCalib : `numpy.ndarray` or `pandas.Series` 

1111 Array of local photometric calibration estimates. 

1112 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1113 Errors on associated ``localCalib`` values 

1114 

1115 Returns 

1116 ------- 

1117 calibMagErr: `numpy.ndarray` or `pandas.Series` 

1118 Error on calibrated AB magnitudes. 

1119 """ 

1120 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1121 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1122 

1123 

1124class LocalNanojansky(LocalPhotometry): 

1125 """Compute calibrated fluxes using the local calibration value. 

1126 

1127 See also 

1128 -------- 

1129 LocalNanojansky 

1130 LocalNanojanskyErr 

1131 LocalMagnitude 

1132 LocalMagnitudeErr 

1133 """ 

1134 

1135 @property 

1136 def columns(self): 

1137 return [self.instFluxCol, self.photoCalibCol] 

1138 

1139 @property 

1140 def name(self): 

1141 return f'flux_{self.instFluxCol}' 

1142 

1143 def _func(self, df): 

1144 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1145 

1146 

1147class LocalNanojanskyErr(LocalPhotometry): 

1148 """Compute calibrated flux errors using the local calibration value. 

1149 

1150 See also 

1151 -------- 

1152 LocalNanojansky 

1153 LocalNanojanskyErr 

1154 LocalMagnitude 

1155 LocalMagnitudeErr 

1156 """ 

1157 

1158 @property 

1159 def columns(self): 

1160 return [self.instFluxCol, self.instFluxErrCol, 

1161 self.photoCalibCol, self.photoCalibErrCol] 

1162 

1163 @property 

1164 def name(self): 

1165 return f'fluxErr_{self.instFluxCol}' 

1166 

1167 def _func(self, df): 

1168 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1169 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1170 

1171 

1172class LocalMagnitude(LocalPhotometry): 

1173 """Compute calibrated AB magnitudes using the local calibration value. 

1174 

1175 See also 

1176 -------- 

1177 LocalNanojansky 

1178 LocalNanojanskyErr 

1179 LocalMagnitude 

1180 LocalMagnitudeErr 

1181 """ 

1182 

1183 @property 

1184 def columns(self): 

1185 return [self.instFluxCol, self.photoCalibCol] 

1186 

1187 @property 

1188 def name(self): 

1189 return f'mag_{self.instFluxCol}' 

1190 

1191 def _func(self, df): 

1192 return self.instFluxToMagnitude(df[self.instFluxCol], 

1193 df[self.photoCalibCol]) 

1194 

1195 

1196class LocalMagnitudeErr(LocalPhotometry): 

1197 """Compute calibrated AB magnitude errors using the local calibration value. 

1198 

1199 See also 

1200 -------- 

1201 LocalNanojansky 

1202 LocalNanojanskyErr 

1203 LocalMagnitude 

1204 LocalMagnitudeErr 

1205 """ 

1206 

1207 @property 

1208 def columns(self): 

1209 return [self.instFluxCol, self.instFluxErrCol, 

1210 self.photoCalibCol, self.photoCalibErrCol] 

1211 

1212 @property 

1213 def name(self): 

1214 return f'magErr_{self.instFluxCol}' 

1215 

1216 def _func(self, df): 

1217 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol], 

1218 df[self.instFluxErrCol], 

1219 df[self.photoCalibCol], 

1220 df[self.photoCalibErrCol])