Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks. 

2# 

3# LSST Data Management System 

4# This product includes software developed by the 

5# LSST Project (http://www.lsst.org/). 

6# See COPYRIGHT file at the top of the source tree. 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <https://www.lsstcorp.org/LegalNotices/>. 

21# 

22import yaml 

23import re 

24from itertools import product 

25import os.path 

26 

27import pandas as pd 

28import numpy as np 

29import astropy.units as u 

30 

31from lsst.daf.persistence import doImport 

32from lsst.daf.butler import DeferredDatasetHandle 

33from .parquetTable import ParquetTable, MultilevelParquetTable 

34 

35 

36def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

37 typeKey='functor', name=None): 

38 """Initialize an object defined in a dictionary 

39 

40 The object needs to be importable as 

41 f'{basePath}.{initDict[typeKey]}' 

42 The positional and keyword arguments (if any) are contained in 

43 "args" and "kwargs" entries in the dictionary, respectively. 

44 This is used in `functors.CompositeFunctor.from_yaml` to initialize 

45 a composite functor from a specification in a YAML file. 

46 

47 Parameters 

48 ---------- 

49 initDict : dictionary 

50 Dictionary describing object's initialization. Must contain 

51 an entry keyed by ``typeKey`` that is the name of the object, 

52 relative to ``basePath``. 

53 basePath : str 

54 Path relative to module in which ``initDict[typeKey]`` is defined. 

55 typeKey : str 

56 Key of ``initDict`` that is the name of the object 

57 (relative to `basePath`). 

58 """ 

59 initDict = initDict.copy() 

60 # TO DO: DM-21956 We should be able to define functors outside this module 

61 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

62 args = [] 

63 if 'args' in initDict: 

64 args = initDict.pop('args') 

65 if isinstance(args, str): 

66 args = [args] 

67 try: 

68 element = pythonType(*args, **initDict) 

69 except Exception as e: 

70 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

71 raise type(e)(message, e.args) 

72 return element 

73 

74 

75class Functor(object): 

76 """Define and execute a calculation on a ParquetTable 

77 

78 The `__call__` method accepts either a `ParquetTable` object or a 

79 `DeferredDatasetHandle`, and returns the 

80 result of the calculation as a single column. Each functor defines what 

81 columns are needed for the calculation, and only these columns are read 

82 from the `ParquetTable`. 

83 

84 The action of `__call__` consists of two steps: first, loading the 

85 necessary columns from disk into memory as a `pandas.DataFrame` object; 

86 and second, performing the computation on this dataframe and returning the 

87 result. 

88 

89 

90 To define a new `Functor`, a subclass must define a `_func` method, 

91 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`. 

92 In addition, it must define the following attributes 

93 

94 * `_columns`: The columns necessary to perform the calculation 

95 * `name`: A name appropriate for a figure axis label 

96 * `shortname`: A name appropriate for use as a dictionary key 

97 

98 On initialization, a `Functor` should declare what band (`filt` kwarg) 

99 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be 

100 applied to. This enables the `_get_data` method to extract the proper 

101 columns from the parquet file. If not specified, the dataset will fall back 

102 on the `_defaultDataset`attribute. If band is not specified and `dataset` 

103 is anything other than `'ref'`, then an error will be raised when trying to 

104 perform the calculation. 

105 

106 Originally, `Functor` was set up to expect 

107 datasets formatted like the `deepCoadd_obj` dataset; that is, a 

108 dataframe with a multi-level column index, with the levels of the 

109 column index being `band`, `dataset`, and `column`. 

110 It has since been generalized to apply to dataframes without mutli-level 

111 indices and multi-level indices with just `dataset` and `column` levels. 

112 In addition, the `_get_data` method that reads 

113 the dataframe from the `ParquetTable` will return a dataframe with column 

114 index levels defined by the `_dfLevels` attribute; by default, this is 

115 `column`. 

116 

117 The `_dfLevels` attributes should generally not need to 

118 be changed, unless `_func` needs columns from multiple filters or datasets 

119 to do the calculation. 

120 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for 

121 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe 

122 it gets to have those levels in the column index. 

123 

124 Parameters 

125 ---------- 

126 filt : str 

127 Filter upon which to do the calculation 

128 

129 dataset : str 

130 Dataset upon which to do the calculation 

131 (e.g., 'ref', 'meas', 'forced_src'). 

132 

133 """ 

134 

135 _defaultDataset = 'ref' 

136 _dfLevels = ('column',) 

137 _defaultNoDup = False 

138 

139 def __init__(self, filt=None, dataset=None, noDup=None): 

140 self.filt = filt 

141 self.dataset = dataset if dataset is not None else self._defaultDataset 

142 self._noDup = noDup 

143 

144 @property 

145 def noDup(self): 

146 if self._noDup is not None: 

147 return self._noDup 

148 else: 

149 return self._defaultNoDup 

150 

151 @property 

152 def columns(self): 

153 """Columns required to perform calculation 

154 """ 

155 if not hasattr(self, '_columns'): 

156 raise NotImplementedError('Must define columns property or _columns attribute') 

157 return self._columns 

158 

159 def _get_data_columnLevels(self, data, columnIndex=None): 

160 """Gets the names of the column index levels 

161 

162 This should only be called in the context of a multilevel table. 

163 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable` 

164 and with the gen3 `DeferredDatasetHandle`. 

165 

166 Parameters 

167 ---------- 

168 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

169 

170 columnnIndex (optional): pandas `Index` object 

171 if not passed, then it is read from the `DeferredDatasetHandle` 

172 """ 

173 if isinstance(data, DeferredDatasetHandle): 

174 if columnIndex is None: 

175 columnIndex = data.get(component="columns") 

176 if columnIndex is not None: 

177 return columnIndex.names 

178 if isinstance(data, MultilevelParquetTable): 

179 return data.columnLevels 

180 else: 

181 raise TypeError(f"Unknown type for data: {type(data)}!") 

182 

183 def _get_data_columnLevelNames(self, data, columnIndex=None): 

184 """Gets the content of each of the column levels for a multilevel table 

185 

186 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2. 

187 

188 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

189 """ 

190 if isinstance(data, DeferredDatasetHandle): 

191 if columnIndex is None: 

192 columnIndex = data.get(component="columns") 

193 if columnIndex is not None: 

194 columnLevels = columnIndex.names 

195 columnLevelNames = { 

196 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

197 for i, level in enumerate(columnLevels) 

198 } 

199 return columnLevelNames 

200 if isinstance(data, MultilevelParquetTable): 

201 return data.columnLevelNames 

202 else: 

203 raise TypeError(f"Unknown type for data: {type(data)}!") 

204 

205 def _colsFromDict(self, colDict, columnIndex=None): 

206 """Converts dictionary column specficiation to a list of columns 

207 

208 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

209 """ 

210 new_colDict = {} 

211 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

212 

213 for i, lev in enumerate(columnLevels): 

214 if lev in colDict: 

215 if isinstance(colDict[lev], str): 

216 new_colDict[lev] = [colDict[lev]] 

217 else: 

218 new_colDict[lev] = colDict[lev] 

219 else: 

220 new_colDict[lev] = columnIndex.levels[i] 

221 

222 levelCols = [new_colDict[lev] for lev in columnLevels] 

223 cols = product(*levelCols) 

224 return list(cols) 

225 

226 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

227 """Returns columns needed by functor from multilevel dataset 

228 

229 To access tables with multilevel column structure, the `MultilevelParquetTable` 

230 or `DeferredDatasetHandle` need to be passed either a list of tuples or a 

231 dictionary. 

232 

233 Parameters 

234 ---------- 

235 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

236 

237 columnIndex (optional): pandas `Index` object 

238 either passed or read in from `DeferredDatasetHandle`. 

239 

240 `returnTuple` : bool 

241 If true, then return a list of tuples rather than the column dictionary 

242 specification. This is set to `True` by `CompositeFunctor` in order to be able to 

243 combine columns from the various component functors. 

244 

245 """ 

246 if isinstance(data, DeferredDatasetHandle) and columnIndex is None: 

247 columnIndex = data.get(component="columns") 

248 

249 # Confirm that the dataset has the column levels the functor is expecting it to have. 

250 columnLevels = self._get_data_columnLevels(data, columnIndex) 

251 

252 columnDict = {'column': self.columns, 

253 'dataset': self.dataset} 

254 if self.filt is None: 

255 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

256 if "band" in columnLevels: 

257 if self.dataset == "ref": 

258 columnDict["band"] = columnLevelNames["band"][0] 

259 else: 

260 raise ValueError(f"'filt' not set for functor {self.name}" 

261 f"(dataset {self.dataset}) " 

262 "and ParquetTable " 

263 "contains multiple filters in column index. " 

264 "Set 'filt' or set 'dataset' to 'ref'.") 

265 else: 

266 columnDict['band'] = self.filt 

267 

268 if isinstance(data, MultilevelParquetTable): 

269 return data._colsFromDict(columnDict) 

270 elif isinstance(data, DeferredDatasetHandle): 

271 if returnTuple: 

272 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

273 else: 

274 return columnDict 

275 

276 def _func(self, df, dropna=True): 

277 raise NotImplementedError('Must define calculation on dataframe') 

278 

279 def _get_columnIndex(self, data): 

280 """Return columnIndex 

281 """ 

282 

283 if isinstance(data, DeferredDatasetHandle): 

284 return data.get(component="columns") 

285 else: 

286 return None 

287 

288 def _get_data(self, data): 

289 """Retrieve dataframe necessary for calculation. 

290 

291 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle 

292 

293 Returns dataframe upon which `self._func` can act. 

294 

295 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested. 

296 """ 

297 if isinstance(data, pd.DataFrame): 

298 return data 

299 

300 # First thing to do: check to see if the data source has a multilevel column index or not. 

301 columnIndex = self._get_columnIndex(data) 

302 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

303 

304 # Simple single-level parquet table, gen2 

305 if isinstance(data, ParquetTable) and not is_multiLevel: 

306 columns = self.columns 

307 df = data.toDataFrame(columns=columns) 

308 return df 

309 

310 # Get proper columns specification for this functor 

311 if is_multiLevel: 

312 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

313 else: 

314 columns = self.columns 

315 

316 if isinstance(data, MultilevelParquetTable): 

317 # Load in-memory dataframe with appropriate columns the gen2 way 

318 df = data.toDataFrame(columns=columns, droplevels=False) 

319 elif isinstance(data, DeferredDatasetHandle): 

320 # Load in-memory dataframe with appropriate columns the gen3 way 

321 df = data.get(parameters={"columns": columns}) 

322 

323 # Drop unnecessary column levels 

324 if is_multiLevel: 

325 df = self._setLevels(df) 

326 

327 return df 

328 

329 def _setLevels(self, df): 

330 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

331 df.columns = df.columns.droplevel(levelsToDrop) 

332 return df 

333 

334 def _dropna(self, vals): 

335 return vals.dropna() 

336 

337 def __call__(self, data, dropna=False): 

338 try: 

339 df = self._get_data(data) 

340 vals = self._func(df) 

341 except Exception: 

342 vals = self.fail(df) 

343 if dropna: 

344 vals = self._dropna(vals) 

345 

346 return vals 

347 

348 def difference(self, data1, data2, **kwargs): 

349 """Computes difference between functor called on two different ParquetTable objects 

350 """ 

351 return self(data1, **kwargs) - self(data2, **kwargs) 

352 

353 def fail(self, df): 

354 return pd.Series(np.full(len(df), np.nan), index=df.index) 

355 

356 @property 

357 def name(self): 

358 """Full name of functor (suitable for figure labels) 

359 """ 

360 return NotImplementedError 

361 

362 @property 

363 def shortname(self): 

364 """Short name of functor (suitable for column name/dict key) 

365 """ 

366 return self.name 

367 

368 

369class CompositeFunctor(Functor): 

370 """Perform multiple calculations at once on a catalog 

371 

372 The role of a `CompositeFunctor` is to group together computations from 

373 multiple functors. Instead of returning `pandas.Series` a 

374 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names 

375 being the keys of `funcDict`. 

376 

377 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

378 in all the component functors. 

379 

380 A `CompositeFunctor` does not use a `_func` method itself; rather, 

381 when a `CompositeFunctor` is called, all its columns are loaded 

382 at once, and the resulting dataframe is passed to the `_func` method of each component 

383 functor. This has the advantage of only doing I/O (reading from parquet file) once, 

384 and works because each individual `_func` method of each component functor does not 

385 care if there are *extra* columns in the dataframe being passed; only that it must contain 

386 *at least* the `columns` it expects. 

387 

388 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML 

389 file specifying a collection of functors. 

390 

391 Parameters 

392 ---------- 

393 funcs : `dict` or `list` 

394 Dictionary or list of functors. If a list, then it will be converted 

395 into a dictonary according to the `.shortname` attribute of each functor. 

396 

397 """ 

398 dataset = None 

399 

400 def __init__(self, funcs, **kwargs): 

401 

402 if type(funcs) == dict: 

403 self.funcDict = funcs 

404 else: 

405 self.funcDict = {f.shortname: f for f in funcs} 

406 

407 self._filt = None 

408 

409 super().__init__(**kwargs) 

410 

411 @property 

412 def filt(self): 

413 return self._filt 

414 

415 @filt.setter 

416 def filt(self, filt): 

417 if filt is not None: 

418 for _, f in self.funcDict.items(): 

419 f.filt = filt 

420 self._filt = filt 

421 

422 def update(self, new): 

423 if isinstance(new, dict): 

424 self.funcDict.update(new) 

425 elif isinstance(new, CompositeFunctor): 

426 self.funcDict.update(new.funcDict) 

427 else: 

428 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

429 

430 # Make sure new functors have the same 'filt' set 

431 if self.filt is not None: 

432 self.filt = self.filt 

433 

434 @property 

435 def columns(self): 

436 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

437 

438 def multilevelColumns(self, data, **kwargs): 

439 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here. 

440 return list( 

441 set( 

442 [ 

443 x 

444 for y in [ 

445 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

446 ] 

447 for x in y 

448 ] 

449 ) 

450 ) 

451 

452 def __call__(self, data, **kwargs): 

453 """Apply the functor to the data table 

454 

455 Parameters 

456 ---------- 

457 data : `lsst.daf.butler.DeferredDatasetHandle`, 

458 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`, 

459 `lsst.pipe.tasks.parquetTable.ParquetTable`, 

460 or `pandas.DataFrame`. 

461 The table or a pointer to a table on disk from which columns can 

462 be accessed 

463 """ 

464 columnIndex = self._get_columnIndex(data) 

465 

466 # First, determine whether data has a multilevel index (either gen2 or gen3) 

467 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

468 

469 # Multilevel index, gen2 or gen3 

470 if is_multiLevel: 

471 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

472 

473 if isinstance(data, MultilevelParquetTable): 

474 # Read data into memory the gen2 way 

475 df = data.toDataFrame(columns=columns, droplevels=False) 

476 elif isinstance(data, DeferredDatasetHandle): 

477 # Read data into memory the gen3 way 

478 df = data.get(parameters={"columns": columns}) 

479 

480 valDict = {} 

481 for k, f in self.funcDict.items(): 

482 try: 

483 subdf = f._setLevels( 

484 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)] 

485 ) 

486 valDict[k] = f._func(subdf) 

487 except Exception: 

488 valDict[k] = f.fail(subdf) 

489 

490 else: 

491 if isinstance(data, DeferredDatasetHandle): 

492 # input if Gen3 deferLoad=True 

493 df = data.get(parameters={"columns": self.columns}) 

494 elif isinstance(data, pd.DataFrame): 

495 # input if Gen3 deferLoad=False 

496 df = data 

497 else: 

498 # Original Gen2 input is type ParquetTable and the fallback 

499 df = data.toDataFrame(columns=self.columns) 

500 

501 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

502 

503 try: 

504 valDf = pd.concat(valDict, axis=1) 

505 except TypeError: 

506 print([(k, type(v)) for k, v in valDict.items()]) 

507 raise 

508 

509 if kwargs.get('dropna', False): 

510 valDf = valDf.dropna(how='any') 

511 

512 return valDf 

513 

514 @classmethod 

515 def renameCol(cls, col, renameRules): 

516 if renameRules is None: 

517 return col 

518 for old, new in renameRules: 

519 if col.startswith(old): 

520 col = col.replace(old, new) 

521 return col 

522 

523 @classmethod 

524 def from_file(cls, filename, **kwargs): 

525 # Allow environment variables in the filename. 

526 filename = os.path.expandvars(filename) 

527 with open(filename) as f: 

528 translationDefinition = yaml.safe_load(f) 

529 

530 return cls.from_yaml(translationDefinition, **kwargs) 

531 

532 @classmethod 

533 def from_yaml(cls, translationDefinition, **kwargs): 

534 funcs = {} 

535 for func, val in translationDefinition['funcs'].items(): 

536 funcs[func] = init_fromDict(val, name=func) 

537 

538 if 'flag_rename_rules' in translationDefinition: 

539 renameRules = translationDefinition['flag_rename_rules'] 

540 else: 

541 renameRules = None 

542 

543 if 'calexpFlags' in translationDefinition: 

544 for flag in translationDefinition['calexpFlags']: 

545 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp') 

546 

547 if 'refFlags' in translationDefinition: 

548 for flag in translationDefinition['refFlags']: 

549 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

550 

551 if 'forcedFlags' in translationDefinition: 

552 for flag in translationDefinition['forcedFlags']: 

553 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

554 

555 if 'flags' in translationDefinition: 

556 for flag in translationDefinition['flags']: 

557 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

558 

559 return cls(funcs, **kwargs) 

560 

561 

562def mag_aware_eval(df, expr): 

563 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means 

564 

565 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes. 

566 

567 Parameters 

568 ---------- 

569 df : pandas.DataFrame 

570 Dataframe on which to evaluate expression. 

571 

572 expr : str 

573 Expression. 

574 """ 

575 try: 

576 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

577 val = df.eval(expr_new, truediv=True) 

578 except Exception: # Should check what actually gets raised 

579 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

580 val = df.eval(expr_new, truediv=True) 

581 return val 

582 

583 

584class CustomFunctor(Functor): 

585 """Arbitrary computation on a catalog 

586 

587 Column names (and thus the columns to be loaded from catalog) are found 

588 by finding all words and trying to ignore all "math-y" words. 

589 

590 Parameters 

591 ---------- 

592 expr : str 

593 Expression to evaluate, to be parsed and executed by `mag_aware_eval`. 

594 """ 

595 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

596 

597 def __init__(self, expr, **kwargs): 

598 self.expr = expr 

599 super().__init__(**kwargs) 

600 

601 @property 

602 def name(self): 

603 return self.expr 

604 

605 @property 

606 def columns(self): 

607 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

608 

609 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

610 not_a_col = [] 

611 for c in flux_cols: 

612 if not re.search('_instFlux$', c): 

613 cols.append(f'{c}_instFlux') 

614 not_a_col.append(c) 

615 else: 

616 cols.append(c) 

617 

618 return list(set([c for c in cols if c not in not_a_col])) 

619 

620 def _func(self, df): 

621 return mag_aware_eval(df, self.expr) 

622 

623 

624class Column(Functor): 

625 """Get column with specified name 

626 """ 

627 

628 def __init__(self, col, **kwargs): 

629 self.col = col 

630 super().__init__(**kwargs) 

631 

632 @property 

633 def name(self): 

634 return self.col 

635 

636 @property 

637 def columns(self): 

638 return [self.col] 

639 

640 def _func(self, df): 

641 return df[self.col] 

642 

643 

644class Index(Functor): 

645 """Return the value of the index for each object 

646 """ 

647 

648 columns = ['coord_ra'] # just a dummy; something has to be here 

649 _defaultDataset = 'ref' 

650 _defaultNoDup = True 

651 

652 def _func(self, df): 

653 return pd.Series(df.index, index=df.index) 

654 

655 

656class IDColumn(Column): 

657 col = 'id' 

658 _allow_difference = False 

659 _defaultNoDup = True 

660 

661 def _func(self, df): 

662 return pd.Series(df.index, index=df.index) 

663 

664 

665class FootprintNPix(Column): 

666 col = 'base_Footprint_nPix' 

667 

668 

669class CoordColumn(Column): 

670 """Base class for coordinate column, in degrees 

671 """ 

672 _radians = True 

673 

674 def __init__(self, col, **kwargs): 

675 super().__init__(col, **kwargs) 

676 

677 def _func(self, df): 

678 # Must not modify original column in case that column is used by another functor 

679 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

680 return output 

681 

682 

683class RAColumn(CoordColumn): 

684 """Right Ascension, in degrees 

685 """ 

686 name = 'RA' 

687 _defaultNoDup = True 

688 

689 def __init__(self, **kwargs): 

690 super().__init__('coord_ra', **kwargs) 

691 

692 def __call__(self, catalog, **kwargs): 

693 return super().__call__(catalog, **kwargs) 

694 

695 

696class DecColumn(CoordColumn): 

697 """Declination, in degrees 

698 """ 

699 name = 'Dec' 

700 _defaultNoDup = True 

701 

702 def __init__(self, **kwargs): 

703 super().__init__('coord_dec', **kwargs) 

704 

705 def __call__(self, catalog, **kwargs): 

706 return super().__call__(catalog, **kwargs) 

707 

708 

709def fluxName(col): 

710 if not col.endswith('_instFlux'): 

711 col += '_instFlux' 

712 return col 

713 

714 

715def fluxErrName(col): 

716 if not col.endswith('_instFluxErr'): 

717 col += '_instFluxErr' 

718 return col 

719 

720 

721class Mag(Functor): 

722 """Compute calibrated magnitude 

723 

724 Takes a `calib` argument, which returns the flux at mag=0 

725 as `calib.getFluxMag0()`. If not provided, then the default 

726 `fluxMag0` is 63095734448.0194, which is default for HSC. 

727 This default should be removed in DM-21955 

728 

729 This calculation hides warnings about invalid values and dividing by zero. 

730 

731 As for all functors, a `dataset` and `filt` kwarg should be provided upon 

732 initialization. Unlike the default `Functor`, however, the default dataset 

733 for a `Mag` is `'meas'`, rather than `'ref'`. 

734 

735 Parameters 

736 ---------- 

737 col : `str` 

738 Name of flux column from which to compute magnitude. Can be parseable 

739 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass 

740 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will 

741 understand. 

742 calib : `lsst.afw.image.calib.Calib` (optional) 

743 Object that knows zero point. 

744 """ 

745 _defaultDataset = 'meas' 

746 

747 def __init__(self, col, calib=None, **kwargs): 

748 self.col = fluxName(col) 

749 self.calib = calib 

750 if calib is not None: 

751 self.fluxMag0 = calib.getFluxMag0()[0] 

752 else: 

753 # TO DO: DM-21955 Replace hard coded photometic calibration values 

754 self.fluxMag0 = 63095734448.0194 

755 

756 super().__init__(**kwargs) 

757 

758 @property 

759 def columns(self): 

760 return [self.col] 

761 

762 def _func(self, df): 

763 with np.warnings.catch_warnings(): 

764 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

765 np.warnings.filterwarnings('ignore', r'divide by zero') 

766 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

767 

768 @property 

769 def name(self): 

770 return f'mag_{self.col}' 

771 

772 

773class MagErr(Mag): 

774 """Compute calibrated magnitude uncertainty 

775 

776 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`. 

777 

778 Parameters 

779 col : `str` 

780 Name of flux column 

781 calib : `lsst.afw.image.calib.Calib` (optional) 

782 Object that knows zero point. 

783 """ 

784 

785 def __init__(self, *args, **kwargs): 

786 super().__init__(*args, **kwargs) 

787 if self.calib is not None: 

788 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

789 else: 

790 self.fluxMag0Err = 0. 

791 

792 @property 

793 def columns(self): 

794 return [self.col, self.col + 'Err'] 

795 

796 def _func(self, df): 

797 with np.warnings.catch_warnings(): 

798 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

799 np.warnings.filterwarnings('ignore', r'divide by zero') 

800 fluxCol, fluxErrCol = self.columns 

801 x = df[fluxErrCol] / df[fluxCol] 

802 y = self.fluxMag0Err / self.fluxMag0 

803 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

804 return magErr 

805 

806 @property 

807 def name(self): 

808 return super().name + '_err' 

809 

810 

811class NanoMaggie(Mag): 

812 """ 

813 """ 

814 

815 def _func(self, df): 

816 return (df[self.col] / self.fluxMag0) * 1e9 

817 

818 

819class MagDiff(Functor): 

820 _defaultDataset = 'meas' 

821 

822 """Functor to calculate magnitude difference""" 

823 

824 def __init__(self, col1, col2, **kwargs): 

825 self.col1 = fluxName(col1) 

826 self.col2 = fluxName(col2) 

827 super().__init__(**kwargs) 

828 

829 @property 

830 def columns(self): 

831 return [self.col1, self.col2] 

832 

833 def _func(self, df): 

834 with np.warnings.catch_warnings(): 

835 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

836 np.warnings.filterwarnings('ignore', r'divide by zero') 

837 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

838 

839 @property 

840 def name(self): 

841 return f'(mag_{self.col1} - mag_{self.col2})' 

842 

843 @property 

844 def shortname(self): 

845 return f'magDiff_{self.col1}_{self.col2}' 

846 

847 

848class Color(Functor): 

849 """Compute the color between two filters 

850 

851 Computes color by initializing two different `Mag` 

852 functors based on the `col` and filters provided, and 

853 then returning the difference. 

854 

855 This is enabled by the `_func` expecting a dataframe with a 

856 multilevel column index, with both `'band'` and `'column'`, 

857 instead of just `'column'`, which is the `Functor` default. 

858 This is controlled by the `_dfLevels` attribute. 

859 

860 Also of note, the default dataset for `Color` is `forced_src'`, 

861 whereas for `Mag` it is `'meas'`. 

862 

863 Parameters 

864 ---------- 

865 col : str 

866 Name of flux column from which to compute; same as would be passed to 

867 `lsst.pipe.tasks.functors.Mag`. 

868 

869 filt2, filt1 : str 

870 Filters from which to compute magnitude difference. 

871 Color computed is `Mag(filt2) - Mag(filt1)`. 

872 """ 

873 _defaultDataset = 'forced_src' 

874 _dfLevels = ('band', 'column') 

875 _defaultNoDup = True 

876 

877 def __init__(self, col, filt2, filt1, **kwargs): 

878 self.col = fluxName(col) 

879 if filt2 == filt1: 

880 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

881 self.filt2 = filt2 

882 self.filt1 = filt1 

883 

884 self.mag2 = Mag(col, filt=filt2, **kwargs) 

885 self.mag1 = Mag(col, filt=filt1, **kwargs) 

886 

887 super().__init__(**kwargs) 

888 

889 @property 

890 def filt(self): 

891 return None 

892 

893 @filt.setter 

894 def filt(self, filt): 

895 pass 

896 

897 def _func(self, df): 

898 mag2 = self.mag2._func(df[self.filt2]) 

899 mag1 = self.mag1._func(df[self.filt1]) 

900 return mag2 - mag1 

901 

902 @property 

903 def columns(self): 

904 return [self.mag1.col, self.mag2.col] 

905 

906 def multilevelColumns(self, parq, **kwargs): 

907 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

908 

909 @property 

910 def name(self): 

911 return f'{self.filt2} - {self.filt1} ({self.col})' 

912 

913 @property 

914 def shortname(self): 

915 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

916 

917 

918class Labeller(Functor): 

919 """Main function of this subclass is to override the dropna=True 

920 """ 

921 _null_label = 'null' 

922 _allow_difference = False 

923 name = 'label' 

924 _force_str = False 

925 

926 def __call__(self, parq, dropna=False, **kwargs): 

927 return super().__call__(parq, dropna=False, **kwargs) 

928 

929 

930class StarGalaxyLabeller(Labeller): 

931 _columns = ["base_ClassificationExtendedness_value"] 

932 _column = "base_ClassificationExtendedness_value" 

933 

934 def _func(self, df): 

935 x = df[self._columns][self._column] 

936 mask = x.isnull() 

937 test = (x < 0.5).astype(int) 

938 test = test.mask(mask, 2) 

939 

940 # TODO: DM-21954 Look into veracity of inline comment below 

941 # are these backwards? 

942 categories = ['galaxy', 'star', self._null_label] 

943 label = pd.Series(pd.Categorical.from_codes(test, categories=categories), 

944 index=x.index, name='label') 

945 if self._force_str: 

946 label = label.astype(str) 

947 return label 

948 

949 

950class NumStarLabeller(Labeller): 

951 _columns = ['numStarFlags'] 

952 labels = {"star": 0, "maybe": 1, "notStar": 2} 

953 

954 def _func(self, df): 

955 x = df[self._columns][self._columns[0]] 

956 

957 # Number of filters 

958 n = len(x.unique()) - 1 

959 

960 labels = ['noStar', 'maybe', 'star'] 

961 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels), 

962 index=x.index, name='label') 

963 

964 if self._force_str: 

965 label = label.astype(str) 

966 

967 return label 

968 

969 

970class DeconvolvedMoments(Functor): 

971 name = 'Deconvolved Moments' 

972 shortname = 'deconvolvedMoments' 

973 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

974 "ext_shapeHSM_HsmSourceMoments_yy", 

975 "base_SdssShape_xx", "base_SdssShape_yy", 

976 "ext_shapeHSM_HsmPsfMoments_xx", 

977 "ext_shapeHSM_HsmPsfMoments_yy") 

978 

979 def _func(self, df): 

980 """Calculate deconvolved moments""" 

981 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

982 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

983 else: 

984 hsm = np.ones(len(df))*np.nan 

985 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

986 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

987 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

988 else: 

989 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using 

990 # exposure.getPsf().computeShape(s.getCentroid()).getIxx() 

991 # raise TaskError("No psf shape parameter found in catalog") 

992 raise RuntimeError('No psf shape parameter found in catalog') 

993 

994 return hsm.where(np.isfinite(hsm), sdss) - psf 

995 

996 

997class SdssTraceSize(Functor): 

998 """Functor to calculate SDSS trace radius size for sources""" 

999 name = "SDSS Trace Size" 

1000 shortname = 'sdssTrace' 

1001 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

1002 

1003 def _func(self, df): 

1004 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1005 return srcSize 

1006 

1007 

1008class PsfSdssTraceSizeDiff(Functor): 

1009 """Functor to calculate SDSS trace radius size difference (%) between object and psf model""" 

1010 name = "PSF - SDSS Trace Size" 

1011 shortname = 'psf_sdssTrace' 

1012 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

1013 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

1014 

1015 def _func(self, df): 

1016 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1017 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

1018 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1019 return sizeDiff 

1020 

1021 

1022class HsmTraceSize(Functor): 

1023 """Functor to calculate HSM trace radius size for sources""" 

1024 name = 'HSM Trace Size' 

1025 shortname = 'hsmTrace' 

1026 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1027 "ext_shapeHSM_HsmSourceMoments_yy") 

1028 

1029 def _func(self, df): 

1030 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1031 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1032 return srcSize 

1033 

1034 

1035class PsfHsmTraceSizeDiff(Functor): 

1036 """Functor to calculate HSM trace radius size difference (%) between object and psf model""" 

1037 name = 'PSF - HSM Trace Size' 

1038 shortname = 'psf_HsmTrace' 

1039 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1040 "ext_shapeHSM_HsmSourceMoments_yy", 

1041 "ext_shapeHSM_HsmPsfMoments_xx", 

1042 "ext_shapeHSM_HsmPsfMoments_yy") 

1043 

1044 def _func(self, df): 

1045 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1046 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1047 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1048 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1049 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1050 return sizeDiff 

1051 

1052 

1053class HsmFwhm(Functor): 

1054 name = 'HSM Psf FWHM' 

1055 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1056 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1057 pixelScale = 0.168 

1058 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1059 

1060 def _func(self, df): 

1061 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1062 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1063 

1064 

1065class E1(Functor): 

1066 name = "Distortion Ellipticity (e1)" 

1067 shortname = "Distortion" 

1068 

1069 def __init__(self, colXX, colXY, colYY, **kwargs): 

1070 self.colXX = colXX 

1071 self.colXY = colXY 

1072 self.colYY = colYY 

1073 self._columns = [self.colXX, self.colXY, self.colYY] 

1074 super().__init__(**kwargs) 

1075 

1076 @property 

1077 def columns(self): 

1078 return [self.colXX, self.colXY, self.colYY] 

1079 

1080 def _func(self, df): 

1081 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1082 

1083 

1084class E2(Functor): 

1085 name = "Ellipticity e2" 

1086 

1087 def __init__(self, colXX, colXY, colYY, **kwargs): 

1088 self.colXX = colXX 

1089 self.colXY = colXY 

1090 self.colYY = colYY 

1091 super().__init__(**kwargs) 

1092 

1093 @property 

1094 def columns(self): 

1095 return [self.colXX, self.colXY, self.colYY] 

1096 

1097 def _func(self, df): 

1098 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1099 

1100 

1101class RadiusFromQuadrupole(Functor): 

1102 

1103 def __init__(self, colXX, colXY, colYY, **kwargs): 

1104 self.colXX = colXX 

1105 self.colXY = colXY 

1106 self.colYY = colYY 

1107 super().__init__(**kwargs) 

1108 

1109 @property 

1110 def columns(self): 

1111 return [self.colXX, self.colXY, self.colYY] 

1112 

1113 def _func(self, df): 

1114 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1115 

1116 

1117class LocalWcs(Functor): 

1118 """Computations using the stored localWcs. 

1119 """ 

1120 name = "LocalWcsOperations" 

1121 

1122 def __init__(self, 

1123 colCD_1_1, 

1124 colCD_1_2, 

1125 colCD_2_1, 

1126 colCD_2_2, 

1127 **kwargs): 

1128 self.colCD_1_1 = colCD_1_1 

1129 self.colCD_1_2 = colCD_1_2 

1130 self.colCD_2_1 = colCD_2_1 

1131 self.colCD_2_2 = colCD_2_2 

1132 super().__init__(**kwargs) 

1133 

1134 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1135 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1136 

1137 Parameters 

1138 ---------- 

1139 x : `pandas.Series` 

1140 X pixel coordinate. 

1141 y : `pandas.Series` 

1142 Y pixel coordinate. 

1143 cd11 : `pandas.Series` 

1144 [1, 1] element of the local Wcs affine transform. 

1145 cd11 : `pandas.Series` 

1146 [1, 1] element of the local Wcs affine transform. 

1147 cd12 : `pandas.Series` 

1148 [1, 2] element of the local Wcs affine transform. 

1149 cd21 : `pandas.Series` 

1150 [2, 1] element of the local Wcs affine transform. 

1151 cd22 : `pandas.Series` 

1152 [2, 2] element of the local Wcs affine transform. 

1153 

1154 Returns 

1155 ------- 

1156 raDecTuple : tuple 

1157 RA and dec conversion of x and y given the local Wcs. Returned 

1158 units are in radians. 

1159 

1160 """ 

1161 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1162 

1163 def computeSkySeperation(self, ra1, dec1, ra2, dec2): 

1164 """Compute the local pixel scale conversion. 

1165 

1166 Parameters 

1167 ---------- 

1168 ra1 : `pandas.Series` 

1169 Ra of the first coordinate in radians. 

1170 dec1 : `pandas.Series` 

1171 Dec of the first coordinate in radians. 

1172 ra2 : `pandas.Series` 

1173 Ra of the second coordinate in radians. 

1174 dec2 : `pandas.Series` 

1175 Dec of the second coordinate in radians. 

1176 

1177 Returns 

1178 ------- 

1179 dist : `pandas.Series` 

1180 Distance on the sphere in radians. 

1181 """ 

1182 deltaDec = dec2 - dec1 

1183 deltaRa = ra2 - ra1 

1184 return 2 * np.arcsin( 

1185 np.sqrt( 

1186 np.sin(deltaDec / 2) ** 2 

1187 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1188 

1189 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1190 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1191 

1192 Parameters 

1193 ---------- 

1194 x1 : `pandas.Series` 

1195 X pixel coordinate. 

1196 y1 : `pandas.Series` 

1197 Y pixel coordinate. 

1198 x2 : `pandas.Series` 

1199 X pixel coordinate. 

1200 y2 : `pandas.Series` 

1201 Y pixel coordinate. 

1202 cd11 : `pandas.Series` 

1203 [1, 1] element of the local Wcs affine transform. 

1204 cd11 : `pandas.Series` 

1205 [1, 1] element of the local Wcs affine transform. 

1206 cd12 : `pandas.Series` 

1207 [1, 2] element of the local Wcs affine transform. 

1208 cd21 : `pandas.Series` 

1209 [2, 1] element of the local Wcs affine transform. 

1210 cd22 : `pandas.Series` 

1211 [2, 2] element of the local Wcs affine transform. 

1212 

1213 Returns 

1214 ------- 

1215 Distance : `pandas.Series` 

1216 Arcseconds per pixel at the location of the local WC 

1217 """ 

1218 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1219 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1220 # Great circle distance for small separations. 

1221 return self.computeSkySeperation(ra1, dec1, ra2, dec2) 

1222 

1223 

1224class ComputePixelScale(LocalWcs): 

1225 """Compute the local pixel scale from the stored CDMatrix. 

1226 """ 

1227 name = "PixelScale" 

1228 

1229 @property 

1230 def columns(self): 

1231 return [self.colCD_1_1, 

1232 self.colCD_1_2, 

1233 self.colCD_2_1, 

1234 self.colCD_2_2] 

1235 

1236 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1237 """Compute the local pixel to scale conversion in arcseconds. 

1238 

1239 Parameters 

1240 ---------- 

1241 cd11 : `pandas.Series` 

1242 [1, 1] element of the local Wcs affine transform in radians. 

1243 cd11 : `pandas.Series` 

1244 [1, 1] element of the local Wcs affine transform in radians. 

1245 cd12 : `pandas.Series` 

1246 [1, 2] element of the local Wcs affine transform in radians. 

1247 cd21 : `pandas.Series` 

1248 [2, 1] element of the local Wcs affine transform in radians. 

1249 cd22 : `pandas.Series` 

1250 [2, 2] element of the local Wcs affine transform in radians. 

1251 

1252 Returns 

1253 ------- 

1254 pixScale : `pandas.Series` 

1255 Arcseconds per pixel at the location of the local WC 

1256 """ 

1257 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1258 

1259 def _func(self, df): 

1260 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1261 df[self.colCD_1_2], 

1262 df[self.colCD_2_1], 

1263 df[self.colCD_2_2]) 

1264 

1265 

1266class ConvertPixelToArcseconds(ComputePixelScale): 

1267 """Convert a value in units pixels squared to units arcseconds squared. 

1268 """ 

1269 

1270 def __init__(self, 

1271 col, 

1272 colCD_1_1, 

1273 colCD_1_2, 

1274 colCD_2_1, 

1275 colCD_2_2, 

1276 **kwargs): 

1277 self.col = col 

1278 super().__init__(colCD_1_1, 

1279 colCD_1_2, 

1280 colCD_2_1, 

1281 colCD_2_2, 

1282 **kwargs) 

1283 

1284 @property 

1285 def name(self): 

1286 return f"{self.col}_asArcseconds" 

1287 

1288 @property 

1289 def columns(self): 

1290 return [self.col, 

1291 self.colCD_1_1, 

1292 self.colCD_1_2, 

1293 self.colCD_2_1, 

1294 self.colCD_2_2] 

1295 

1296 def _func(self, df): 

1297 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1298 df[self.colCD_1_2], 

1299 df[self.colCD_2_1], 

1300 df[self.colCD_2_2]) 

1301 

1302 

1303class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1304 """Convert a value in units pixels to units arcseconds. 

1305 """ 

1306 

1307 def __init__(self, 

1308 col, 

1309 colCD_1_1, 

1310 colCD_1_2, 

1311 colCD_2_1, 

1312 colCD_2_2, 

1313 **kwargs): 

1314 self.col = col 

1315 super().__init__(colCD_1_1, 

1316 colCD_1_2, 

1317 colCD_2_1, 

1318 colCD_2_2, 

1319 **kwargs) 

1320 

1321 @property 

1322 def name(self): 

1323 return f"{self.col}_asArcsecondsSq" 

1324 

1325 @property 

1326 def columns(self): 

1327 return [self.col, 

1328 self.colCD_1_1, 

1329 self.colCD_1_2, 

1330 self.colCD_2_1, 

1331 self.colCD_2_2] 

1332 

1333 def _func(self, df): 

1334 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1335 df[self.colCD_1_2], 

1336 df[self.colCD_2_1], 

1337 df[self.colCD_2_2]) 

1338 return df[self.col] * pixScale * pixScale 

1339 

1340 

1341class ReferenceBand(Functor): 

1342 name = 'Reference Band' 

1343 shortname = 'refBand' 

1344 

1345 @property 

1346 def columns(self): 

1347 return ["merge_measurement_i", 

1348 "merge_measurement_r", 

1349 "merge_measurement_z", 

1350 "merge_measurement_y", 

1351 "merge_measurement_g"] 

1352 

1353 def _func(self, df): 

1354 def getFilterAliasName(row): 

1355 # get column name with the max value (True > False) 

1356 colName = row.idxmax() 

1357 return colName.replace('merge_measurement_', '') 

1358 

1359 return df[self.columns].apply(getFilterAliasName, axis=1) 

1360 

1361 

1362class Photometry(Functor): 

1363 # AB to NanoJansky (3631 Jansky) 

1364 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1365 LOG_AB_FLUX_SCALE = 12.56 

1366 FIVE_OVER_2LOG10 = 1.085736204758129569 

1367 # TO DO: DM-21955 Replace hard coded photometic calibration values 

1368 COADD_ZP = 27 

1369 

1370 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1371 self.vhypot = np.vectorize(self.hypot) 

1372 self.col = colFlux 

1373 self.colFluxErr = colFluxErr 

1374 

1375 self.calib = calib 

1376 if calib is not None: 

1377 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1378 else: 

1379 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1380 self.fluxMag0Err = 0. 

1381 

1382 super().__init__(**kwargs) 

1383 

1384 @property 

1385 def columns(self): 

1386 return [self.col] 

1387 

1388 @property 

1389 def name(self): 

1390 return f'mag_{self.col}' 

1391 

1392 @classmethod 

1393 def hypot(cls, a, b): 

1394 if np.abs(a) < np.abs(b): 

1395 a, b = b, a 

1396 if a == 0.: 

1397 return 0. 

1398 q = b/a 

1399 return np.abs(a) * np.sqrt(1. + q*q) 

1400 

1401 def dn2flux(self, dn, fluxMag0): 

1402 return self.AB_FLUX_SCALE * dn / fluxMag0 

1403 

1404 def dn2mag(self, dn, fluxMag0): 

1405 with np.warnings.catch_warnings(): 

1406 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1407 np.warnings.filterwarnings('ignore', r'divide by zero') 

1408 return -2.5 * np.log10(dn/fluxMag0) 

1409 

1410 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1411 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1412 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1413 return retVal 

1414 

1415 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1416 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1417 return self.FIVE_OVER_2LOG10 * retVal 

1418 

1419 

1420class NanoJansky(Photometry): 

1421 def _func(self, df): 

1422 return self.dn2flux(df[self.col], self.fluxMag0) 

1423 

1424 

1425class NanoJanskyErr(Photometry): 

1426 @property 

1427 def columns(self): 

1428 return [self.col, self.colFluxErr] 

1429 

1430 def _func(self, df): 

1431 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1432 return pd.Series(retArr, index=df.index) 

1433 

1434 

1435class Magnitude(Photometry): 

1436 def _func(self, df): 

1437 return self.dn2mag(df[self.col], self.fluxMag0) 

1438 

1439 

1440class MagnitudeErr(Photometry): 

1441 @property 

1442 def columns(self): 

1443 return [self.col, self.colFluxErr] 

1444 

1445 def _func(self, df): 

1446 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1447 return pd.Series(retArr, index=df.index) 

1448 

1449 

1450class LocalPhotometry(Functor): 

1451 """Base class for calibrating the specified instrument flux column using 

1452 the local photometric calibration. 

1453 

1454 Parameters 

1455 ---------- 

1456 instFluxCol : `str` 

1457 Name of the instrument flux column. 

1458 instFluxErrCol : `str` 

1459 Name of the assocated error columns for ``instFluxCol``. 

1460 photoCalibCol : `str` 

1461 Name of local calibration column. 

1462 photoCalibErrCol : `str` 

1463 Error associated with ``photoCalibCol`` 

1464 

1465 See also 

1466 -------- 

1467 LocalPhotometry 

1468 LocalNanojansky 

1469 LocalNanojanskyErr 

1470 LocalMagnitude 

1471 LocalMagnitudeErr 

1472 """ 

1473 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1474 

1475 def __init__(self, 

1476 instFluxCol, 

1477 instFluxErrCol, 

1478 photoCalibCol, 

1479 photoCalibErrCol, 

1480 **kwargs): 

1481 self.instFluxCol = instFluxCol 

1482 self.instFluxErrCol = instFluxErrCol 

1483 self.photoCalibCol = photoCalibCol 

1484 self.photoCalibErrCol = photoCalibErrCol 

1485 super().__init__(**kwargs) 

1486 

1487 def instFluxToNanojansky(self, instFlux, localCalib): 

1488 """Convert instrument flux to nanojanskys. 

1489 

1490 Parameters 

1491 ---------- 

1492 instFlux : `numpy.ndarray` or `pandas.Series` 

1493 Array of instrument flux measurements 

1494 localCalib : `numpy.ndarray` or `pandas.Series` 

1495 Array of local photometric calibration estimates. 

1496 

1497 Returns 

1498 ------- 

1499 calibFlux : `numpy.ndarray` or `pandas.Series` 

1500 Array of calibrated flux measurements. 

1501 """ 

1502 return instFlux * localCalib 

1503 

1504 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1505 """Convert instrument flux to nanojanskys. 

1506 

1507 Parameters 

1508 ---------- 

1509 instFlux : `numpy.ndarray` or `pandas.Series` 

1510 Array of instrument flux measurements 

1511 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1512 Errors on associated ``instFlux`` values 

1513 localCalib : `numpy.ndarray` or `pandas.Series` 

1514 Array of local photometric calibration estimates. 

1515 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1516 Errors on associated ``localCalib`` values 

1517 

1518 Returns 

1519 ------- 

1520 calibFluxErr : `numpy.ndarray` or `pandas.Series` 

1521 Errors on calibrated flux measurements. 

1522 """ 

1523 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1524 

1525 def instFluxToMagnitude(self, instFlux, localCalib): 

1526 """Convert instrument flux to nanojanskys. 

1527 

1528 Parameters 

1529 ---------- 

1530 instFlux : `numpy.ndarray` or `pandas.Series` 

1531 Array of instrument flux measurements 

1532 localCalib : `numpy.ndarray` or `pandas.Series` 

1533 Array of local photometric calibration estimates. 

1534 

1535 Returns 

1536 ------- 

1537 calibMag : `numpy.ndarray` or `pandas.Series` 

1538 Array of calibrated AB magnitudes. 

1539 """ 

1540 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1541 

1542 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1543 """Convert instrument flux err to nanojanskys. 

1544 

1545 Parameters 

1546 ---------- 

1547 instFlux : `numpy.ndarray` or `pandas.Series` 

1548 Array of instrument flux measurements 

1549 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1550 Errors on associated ``instFlux`` values 

1551 localCalib : `numpy.ndarray` or `pandas.Series` 

1552 Array of local photometric calibration estimates. 

1553 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1554 Errors on associated ``localCalib`` values 

1555 

1556 Returns 

1557 ------- 

1558 calibMagErr: `numpy.ndarray` or `pandas.Series` 

1559 Error on calibrated AB magnitudes. 

1560 """ 

1561 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1562 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1563 

1564 

1565class LocalNanojansky(LocalPhotometry): 

1566 """Compute calibrated fluxes using the local calibration value. 

1567 

1568 See also 

1569 -------- 

1570 LocalNanojansky 

1571 LocalNanojanskyErr 

1572 LocalMagnitude 

1573 LocalMagnitudeErr 

1574 """ 

1575 

1576 @property 

1577 def columns(self): 

1578 return [self.instFluxCol, self.photoCalibCol] 

1579 

1580 @property 

1581 def name(self): 

1582 return f'flux_{self.instFluxCol}' 

1583 

1584 def _func(self, df): 

1585 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1586 

1587 

1588class LocalNanojanskyErr(LocalPhotometry): 

1589 """Compute calibrated flux errors using the local calibration value. 

1590 

1591 See also 

1592 -------- 

1593 LocalNanojansky 

1594 LocalNanojanskyErr 

1595 LocalMagnitude 

1596 LocalMagnitudeErr 

1597 """ 

1598 

1599 @property 

1600 def columns(self): 

1601 return [self.instFluxCol, self.instFluxErrCol, 

1602 self.photoCalibCol, self.photoCalibErrCol] 

1603 

1604 @property 

1605 def name(self): 

1606 return f'fluxErr_{self.instFluxCol}' 

1607 

1608 def _func(self, df): 

1609 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1610 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1611 

1612 

1613class LocalMagnitude(LocalPhotometry): 

1614 """Compute calibrated AB magnitudes using the local calibration value. 

1615 

1616 See also 

1617 -------- 

1618 LocalNanojansky 

1619 LocalNanojanskyErr 

1620 LocalMagnitude 

1621 LocalMagnitudeErr 

1622 """ 

1623 

1624 @property 

1625 def columns(self): 

1626 return [self.instFluxCol, self.photoCalibCol] 

1627 

1628 @property 

1629 def name(self): 

1630 return f'mag_{self.instFluxCol}' 

1631 

1632 def _func(self, df): 

1633 return self.instFluxToMagnitude(df[self.instFluxCol], 

1634 df[self.photoCalibCol]) 

1635 

1636 

1637class LocalMagnitudeErr(LocalPhotometry): 

1638 """Compute calibrated AB magnitude errors using the local calibration value. 

1639 

1640 See also 

1641 -------- 

1642 LocalNanojansky 

1643 LocalNanojanskyErr 

1644 LocalMagnitude 

1645 LocalMagnitudeErr 

1646 """ 

1647 

1648 @property 

1649 def columns(self): 

1650 return [self.instFluxCol, self.instFluxErrCol, 

1651 self.photoCalibCol, self.photoCalibErrCol] 

1652 

1653 @property 

1654 def name(self): 

1655 return f'magErr_{self.instFluxCol}' 

1656 

1657 def _func(self, df): 

1658 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol], 

1659 df[self.instFluxErrCol], 

1660 df[self.photoCalibCol], 

1661 df[self.photoCalibErrCol]) 

1662 

1663 

1664class LocalDipoleMeanFlux(LocalPhotometry): 

1665 """Compute absolute mean of dipole fluxes. 

1666 

1667 See also 

1668 -------- 

1669 LocalNanojansky 

1670 LocalNanojanskyErr 

1671 LocalMagnitude 

1672 LocalMagnitudeErr 

1673 LocalDipoleMeanFlux 

1674 LocalDipoleMeanFluxErr 

1675 LocalDipoleDiffFlux 

1676 LocalDipoleDiffFluxErr 

1677 """ 

1678 def __init__(self, 

1679 instFluxPosCol, 

1680 instFluxNegCol, 

1681 instFluxPosErrCol, 

1682 instFluxNegErrCol, 

1683 photoCalibCol, 

1684 photoCalibErrCol, 

1685 **kwargs): 

1686 self.instFluxNegCol = instFluxNegCol 

1687 self.instFluxPosCol = instFluxPosCol 

1688 self.instFluxNegErrCol = instFluxNegErrCol 

1689 self.instFluxPosErrCol = instFluxPosErrCol 

1690 self.photoCalibCol = photoCalibCol 

1691 self.photoCalibErrCol = photoCalibErrCol 

1692 super().__init__(instFluxNegCol, 

1693 instFluxNegErrCol, 

1694 photoCalibCol, 

1695 photoCalibErrCol, 

1696 **kwargs) 

1697 

1698 @property 

1699 def columns(self): 

1700 return [self.instFluxPosCol, 

1701 self.instFluxNegCol, 

1702 self.photoCalibCol] 

1703 

1704 @property 

1705 def name(self): 

1706 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1707 

1708 def _func(self, df): 

1709 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1710 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1711 

1712 

1713class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1714 """Compute the error on the absolute mean of dipole fluxes. 

1715 

1716 See also 

1717 -------- 

1718 LocalNanojansky 

1719 LocalNanojanskyErr 

1720 LocalMagnitude 

1721 LocalMagnitudeErr 

1722 LocalDipoleMeanFlux 

1723 LocalDipoleMeanFluxErr 

1724 LocalDipoleDiffFlux 

1725 LocalDipoleDiffFluxErr 

1726 """ 

1727 

1728 @property 

1729 def columns(self): 

1730 return [self.instFluxPosCol, 

1731 self.instFluxNegCol, 

1732 self.instFluxPosErrCol, 

1733 self.instFluxNegErrCol, 

1734 self.photoCalibCol, 

1735 self.photoCalibErrCol] 

1736 

1737 @property 

1738 def name(self): 

1739 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1740 

1741 def _func(self, df): 

1742 return 0.5*np.sqrt( 

1743 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1744 * df[self.photoCalibErrCol])**2 

1745 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1746 * df[self.photoCalibCol]**2) 

1747 

1748 

1749class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1750 """Compute the absolute difference of dipole fluxes. 

1751 

1752 Value is (abs(pos) - abs(neg)) 

1753 

1754 See also 

1755 -------- 

1756 LocalNanojansky 

1757 LocalNanojanskyErr 

1758 LocalMagnitude 

1759 LocalMagnitudeErr 

1760 LocalDipoleMeanFlux 

1761 LocalDipoleMeanFluxErr 

1762 LocalDipoleDiffFlux 

1763 LocalDipoleDiffFluxErr 

1764 """ 

1765 

1766 @property 

1767 def columns(self): 

1768 return [self.instFluxPosCol, 

1769 self.instFluxNegCol, 

1770 self.photoCalibCol] 

1771 

1772 @property 

1773 def name(self): 

1774 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1775 

1776 def _func(self, df): 

1777 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1778 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1779 

1780 

1781class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1782 """Compute the error on the absolute difference of dipole fluxes. 

1783 

1784 See also 

1785 -------- 

1786 LocalNanojansky 

1787 LocalNanojanskyErr 

1788 LocalMagnitude 

1789 LocalMagnitudeErr 

1790 LocalDipoleMeanFlux 

1791 LocalDipoleMeanFluxErr 

1792 LocalDipoleDiffFlux 

1793 LocalDipoleDiffFluxErr 

1794 """ 

1795 

1796 @property 

1797 def columns(self): 

1798 return [self.instFluxPosCol, 

1799 self.instFluxNegCol, 

1800 self.instFluxPosErrCol, 

1801 self.instFluxNegErrCol, 

1802 self.photoCalibCol, 

1803 self.photoCalibErrCol] 

1804 

1805 @property 

1806 def name(self): 

1807 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1808 

1809 def _func(self, df): 

1810 return np.sqrt( 

1811 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1812 * df[self.photoCalibErrCol])**2 

1813 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1814 * df[self.photoCalibCol]**2) 

1815 

1816 

1817class Ratio(Functor): 

1818 """Base class for returning the ratio of 2 columns. 

1819 

1820 Can be used to compute a Signal to Noise ratio for any input flux. 

1821 

1822 Parameters 

1823 ---------- 

1824 numerator : `str` 

1825 Name of the column to use at the numerator in the ratio 

1826 denominator : `str` 

1827 Name of the column to use as the denominator in the ratio. 

1828 """ 

1829 def __init__(self, 

1830 numerator, 

1831 denominator, 

1832 **kwargs): 

1833 self.numerator = numerator 

1834 self.denominator = denominator 

1835 super().__init__(**kwargs) 

1836 

1837 @property 

1838 def columns(self): 

1839 return [self.numerator, self.denominator] 

1840 

1841 @property 

1842 def name(self): 

1843 return f'ratio_{self.numerator}_{self.denominator}' 

1844 

1845 def _func(self, df): 

1846 with np.warnings.catch_warnings(): 

1847 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1848 np.warnings.filterwarnings('ignore', r'divide by zero') 

1849 return df[self.numerator] / df[self.denominator]