Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks. 

2# 

3# LSST Data Management System 

4# This product includes software developed by the 

5# LSST Project (http://www.lsst.org/). 

6# See COPYRIGHT file at the top of the source tree. 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <https://www.lsstcorp.org/LegalNotices/>. 

21# 

22import yaml 

23import re 

24from itertools import product 

25import os.path 

26 

27import pandas as pd 

28import numpy as np 

29import astropy.units as u 

30 

31from lsst.daf.persistence import doImport 

32from lsst.daf.butler import DeferredDatasetHandle 

33from .parquetTable import ParquetTable, MultilevelParquetTable 

34 

35 

36def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

37 typeKey='functor', name=None): 

38 """Initialize an object defined in a dictionary 

39 

40 The object needs to be importable as 

41 f'{basePath}.{initDict[typeKey]}' 

42 The positional and keyword arguments (if any) are contained in 

43 "args" and "kwargs" entries in the dictionary, respectively. 

44 This is used in `functors.CompositeFunctor.from_yaml` to initialize 

45 a composite functor from a specification in a YAML file. 

46 

47 Parameters 

48 ---------- 

49 initDict : dictionary 

50 Dictionary describing object's initialization. Must contain 

51 an entry keyed by ``typeKey`` that is the name of the object, 

52 relative to ``basePath``. 

53 basePath : str 

54 Path relative to module in which ``initDict[typeKey]`` is defined. 

55 typeKey : str 

56 Key of ``initDict`` that is the name of the object 

57 (relative to `basePath`). 

58 """ 

59 initDict = initDict.copy() 

60 # TO DO: DM-21956 We should be able to define functors outside this module 

61 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

62 args = [] 

63 if 'args' in initDict: 

64 args = initDict.pop('args') 

65 if isinstance(args, str): 

66 args = [args] 

67 try: 

68 element = pythonType(*args, **initDict) 

69 except Exception as e: 

70 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

71 raise type(e)(message, e.args) 

72 return element 

73 

74 

75class Functor(object): 

76 """Define and execute a calculation on a ParquetTable 

77 

78 The `__call__` method accepts either a `ParquetTable` object or a 

79 `DeferredDatasetHandle`, and returns the 

80 result of the calculation as a single column. Each functor defines what 

81 columns are needed for the calculation, and only these columns are read 

82 from the `ParquetTable`. 

83 

84 The action of `__call__` consists of two steps: first, loading the 

85 necessary columns from disk into memory as a `pandas.DataFrame` object; 

86 and second, performing the computation on this dataframe and returning the 

87 result. 

88 

89 

90 To define a new `Functor`, a subclass must define a `_func` method, 

91 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`. 

92 In addition, it must define the following attributes 

93 

94 * `_columns`: The columns necessary to perform the calculation 

95 * `name`: A name appropriate for a figure axis label 

96 * `shortname`: A name appropriate for use as a dictionary key 

97 

98 On initialization, a `Functor` should declare what band (`filt` kwarg) 

99 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be 

100 applied to. This enables the `_get_data` method to extract the proper 

101 columns from the parquet file. If not specified, the dataset will fall back 

102 on the `_defaultDataset`attribute. If band is not specified and `dataset` 

103 is anything other than `'ref'`, then an error will be raised when trying to 

104 perform the calculation. 

105 

106 As currently implemented, `Functor` is only set up to expect a 

107 dataset of the format of the `deepCoadd_obj` dataset; that is, a 

108 dataframe with a multi-level column index, 

109 with the levels of the column index being `band`, 

110 `dataset`, and `column`. This is defined in the `_columnLevels` attribute, 

111 as well as being implicit in the role of the `filt` and `dataset` attributes 

112 defined at initialization. In addition, the `_get_data` method that reads 

113 the dataframe from the `ParquetTable` will return a dataframe with column 

114 index levels defined by the `_dfLevels` attribute; by default, this is 

115 `column`. 

116 

117 The `_columnLevels` and `_dfLevels` attributes should generally not need to 

118 be changed, unless `_func` needs columns from multiple filters or datasets 

119 to do the calculation. 

120 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for 

121 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe 

122 it gets to have those levels in the column index. 

123 

124 Parameters 

125 ---------- 

126 filt : str 

127 Filter upon which to do the calculation 

128 

129 dataset : str 

130 Dataset upon which to do the calculation 

131 (e.g., 'ref', 'meas', 'forced_src'). 

132 

133 """ 

134 

135 _defaultDataset = 'ref' 

136 _columnLevels = ('band', 'dataset', 'column') 

137 _dfLevels = ('column',) 

138 _defaultNoDup = False 

139 

140 def __init__(self, filt=None, dataset=None, noDup=None): 

141 self.filt = filt 

142 self.dataset = dataset if dataset is not None else self._defaultDataset 

143 self._noDup = noDup 

144 

145 @property 

146 def noDup(self): 

147 if self._noDup is not None: 

148 return self._noDup 

149 else: 

150 return self._defaultNoDup 

151 

152 @property 

153 def columns(self): 

154 """Columns required to perform calculation 

155 """ 

156 if not hasattr(self, '_columns'): 

157 raise NotImplementedError('Must define columns property or _columns attribute') 

158 return self._columns 

159 

160 def _get_data_columnLevels(self, data, columnIndex=None): 

161 """Gets the names of the column index levels 

162 

163 This should only be called in the context of a multilevel table. 

164 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable` 

165 and with the gen3 `DeferredDatasetHandle`. 

166 

167 Parameters 

168 ---------- 

169 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

170 

171 columnnIndex (optional): pandas `Index` object 

172 if not passed, then it is read from the `DeferredDatasetHandle` 

173 """ 

174 if isinstance(data, DeferredDatasetHandle): 

175 if columnIndex is None: 

176 columnIndex = data.get(component="columns") 

177 if columnIndex is not None: 

178 return columnIndex.names 

179 if isinstance(data, MultilevelParquetTable): 

180 return data.columnLevels 

181 else: 

182 raise TypeError(f"Unknown type for data: {type(data)}!") 

183 

184 def _get_data_columnLevelNames(self, data, columnIndex=None): 

185 """Gets the content of each of the column levels for a multilevel table 

186 

187 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2. 

188 

189 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

190 """ 

191 if isinstance(data, DeferredDatasetHandle): 

192 if columnIndex is None: 

193 columnIndex = data.get(component="columns") 

194 if columnIndex is not None: 

195 columnLevels = columnIndex.names 

196 columnLevelNames = { 

197 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

198 for i, level in enumerate(columnLevels) 

199 } 

200 return columnLevelNames 

201 if isinstance(data, MultilevelParquetTable): 

202 return data.columnLevelNames 

203 else: 

204 raise TypeError(f"Unknown type for data: {type(data)}!") 

205 

206 def _colsFromDict(self, colDict, columnIndex=None): 

207 """Converts dictionary column specficiation to a list of columns 

208 

209 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

210 """ 

211 new_colDict = {} 

212 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

213 

214 for i, lev in enumerate(columnLevels): 

215 if lev in colDict: 

216 if isinstance(colDict[lev], str): 

217 new_colDict[lev] = [colDict[lev]] 

218 else: 

219 new_colDict[lev] = colDict[lev] 

220 else: 

221 new_colDict[lev] = columnIndex.levels[i] 

222 

223 levelCols = [new_colDict[lev] for lev in columnLevels] 

224 cols = product(*levelCols) 

225 return list(cols) 

226 

227 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

228 """Returns columns needed by functor from multilevel dataset 

229 

230 To access tables with multilevel column structure, the `MultilevelParquetTable` 

231 or `DeferredDatasetHandle` need to be passed either a list of tuples or a 

232 dictionary. 

233 

234 Parameters 

235 ---------- 

236 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

237 

238 columnIndex (optional): pandas `Index` object 

239 either passed or read in from `DeferredDatasetHandle`. 

240 

241 `returnTuple` : bool 

242 If true, then return a list of tuples rather than the column dictionary 

243 specification. This is set to `True` by `CompositeFunctor` in order to be able to 

244 combine columns from the various component functors. 

245 

246 """ 

247 if isinstance(data, DeferredDatasetHandle) and columnIndex is None: 

248 columnIndex = data.get(component="columns") 

249 

250 # Confirm that the dataset has the column levels the functor is expecting it to have. 

251 columnLevels = self._get_data_columnLevels(data, columnIndex) 

252 

253 if not set(columnLevels) == set(self._columnLevels): 

254 raise ValueError( 

255 "ParquetTable does not have the expected column levels. " 

256 f"Got {columnLevels}; expected {self._columnLevels}." 

257 ) 

258 

259 columnDict = {'column': self.columns, 

260 'dataset': self.dataset} 

261 if self.filt is None: 

262 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

263 if "band" in columnLevels: 

264 if self.dataset == "ref": 

265 columnDict["band"] = columnLevelNames["band"][0] 

266 else: 

267 raise ValueError(f"'filt' not set for functor {self.name}" 

268 f"(dataset {self.dataset}) " 

269 "and ParquetTable " 

270 "contains multiple filters in column index. " 

271 "Set 'filt' or set 'dataset' to 'ref'.") 

272 else: 

273 columnDict['band'] = self.filt 

274 

275 if isinstance(data, MultilevelParquetTable): 

276 return data._colsFromDict(columnDict) 

277 elif isinstance(data, DeferredDatasetHandle): 

278 if returnTuple: 

279 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

280 else: 

281 return columnDict 

282 

283 def _func(self, df, dropna=True): 

284 raise NotImplementedError('Must define calculation on dataframe') 

285 

286 def _get_columnIndex(self, data): 

287 """Return columnIndex 

288 """ 

289 

290 if isinstance(data, DeferredDatasetHandle): 

291 return data.get(component="columns") 

292 else: 

293 return None 

294 

295 def _get_data(self, data): 

296 """Retrieve dataframe necessary for calculation. 

297 

298 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle 

299 

300 Returns dataframe upon which `self._func` can act. 

301 

302 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested. 

303 """ 

304 if isinstance(data, pd.DataFrame): 

305 return data 

306 

307 # First thing to do: check to see if the data source has a multilevel column index or not. 

308 columnIndex = self._get_columnIndex(data) 

309 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

310 

311 # Simple single-level parquet table, gen2 

312 if isinstance(data, ParquetTable) and not is_multiLevel: 

313 columns = self.columns 

314 df = data.toDataFrame(columns=columns) 

315 return df 

316 

317 # Get proper columns specification for this functor 

318 if is_multiLevel: 

319 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

320 else: 

321 columns = self.columns 

322 

323 if isinstance(data, MultilevelParquetTable): 

324 # Load in-memory dataframe with appropriate columns the gen2 way 

325 df = data.toDataFrame(columns=columns, droplevels=False) 

326 elif isinstance(data, DeferredDatasetHandle): 

327 # Load in-memory dataframe with appropriate columns the gen3 way 

328 df = data.get(parameters={"columns": columns}) 

329 

330 # Drop unnecessary column levels 

331 if is_multiLevel: 

332 df = self._setLevels(df) 

333 

334 return df 

335 

336 def _setLevels(self, df): 

337 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

338 df.columns = df.columns.droplevel(levelsToDrop) 

339 return df 

340 

341 def _dropna(self, vals): 

342 return vals.dropna() 

343 

344 def __call__(self, data, dropna=False): 

345 try: 

346 df = self._get_data(data) 

347 vals = self._func(df) 

348 except Exception: 

349 vals = self.fail(df) 

350 if dropna: 

351 vals = self._dropna(vals) 

352 

353 return vals 

354 

355 def difference(self, data1, data2, **kwargs): 

356 """Computes difference between functor called on two different ParquetTable objects 

357 """ 

358 return self(data1, **kwargs) - self(data2, **kwargs) 

359 

360 def fail(self, df): 

361 return pd.Series(np.full(len(df), np.nan), index=df.index) 

362 

363 @property 

364 def name(self): 

365 """Full name of functor (suitable for figure labels) 

366 """ 

367 return NotImplementedError 

368 

369 @property 

370 def shortname(self): 

371 """Short name of functor (suitable for column name/dict key) 

372 """ 

373 return self.name 

374 

375 

376class CompositeFunctor(Functor): 

377 """Perform multiple calculations at once on a catalog 

378 

379 The role of a `CompositeFunctor` is to group together computations from 

380 multiple functors. Instead of returning `pandas.Series` a 

381 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names 

382 being the keys of `funcDict`. 

383 

384 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

385 in all the component functors. 

386 

387 A `CompositeFunctor` does not use a `_func` method itself; rather, 

388 when a `CompositeFunctor` is called, all its columns are loaded 

389 at once, and the resulting dataframe is passed to the `_func` method of each component 

390 functor. This has the advantage of only doing I/O (reading from parquet file) once, 

391 and works because each individual `_func` method of each component functor does not 

392 care if there are *extra* columns in the dataframe being passed; only that it must contain 

393 *at least* the `columns` it expects. 

394 

395 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML 

396 file specifying a collection of functors. 

397 

398 Parameters 

399 ---------- 

400 funcs : `dict` or `list` 

401 Dictionary or list of functors. If a list, then it will be converted 

402 into a dictonary according to the `.shortname` attribute of each functor. 

403 

404 """ 

405 dataset = None 

406 

407 def __init__(self, funcs, **kwargs): 

408 

409 if type(funcs) == dict: 

410 self.funcDict = funcs 

411 else: 

412 self.funcDict = {f.shortname: f for f in funcs} 

413 

414 self._filt = None 

415 

416 super().__init__(**kwargs) 

417 

418 @property 

419 def filt(self): 

420 return self._filt 

421 

422 @filt.setter 

423 def filt(self, filt): 

424 if filt is not None: 

425 for _, f in self.funcDict.items(): 

426 f.filt = filt 

427 self._filt = filt 

428 

429 def update(self, new): 

430 if isinstance(new, dict): 

431 self.funcDict.update(new) 

432 elif isinstance(new, CompositeFunctor): 

433 self.funcDict.update(new.funcDict) 

434 else: 

435 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

436 

437 # Make sure new functors have the same 'filt' set 

438 if self.filt is not None: 

439 self.filt = self.filt 

440 

441 @property 

442 def columns(self): 

443 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

444 

445 def multilevelColumns(self, data, **kwargs): 

446 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here. 

447 return list( 

448 set( 

449 [ 

450 x 

451 for y in [ 

452 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

453 ] 

454 for x in y 

455 ] 

456 ) 

457 ) 

458 

459 def __call__(self, data, **kwargs): 

460 """Apply the functor to the data table 

461 

462 Parameters 

463 ---------- 

464 data : `lsst.daf.butler.DeferredDatasetHandle`, 

465 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`, 

466 `lsst.pipe.tasks.parquetTable.ParquetTable`, 

467 or `pandas.DataFrame`. 

468 The table or a pointer to a table on disk from which columns can 

469 be accessed 

470 """ 

471 columnIndex = self._get_columnIndex(data) 

472 

473 # First, determine whether data has a multilevel index (either gen2 or gen3) 

474 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

475 

476 # Multilevel index, gen2 or gen3 

477 if is_multiLevel: 

478 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

479 

480 if isinstance(data, MultilevelParquetTable): 

481 # Read data into memory the gen2 way 

482 df = data.toDataFrame(columns=columns, droplevels=False) 

483 elif isinstance(data, DeferredDatasetHandle): 

484 # Read data into memory the gen3 way 

485 df = data.get(parameters={"columns": columns}) 

486 

487 valDict = {} 

488 for k, f in self.funcDict.items(): 

489 try: 

490 subdf = f._setLevels( 

491 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)] 

492 ) 

493 valDict[k] = f._func(subdf) 

494 except Exception: 

495 valDict[k] = f.fail(subdf) 

496 

497 else: 

498 if isinstance(data, DeferredDatasetHandle): 

499 # input if Gen3 deferLoad=True 

500 df = data.get(parameters={"columns": self.columns}) 

501 elif isinstance(data, pd.DataFrame): 

502 # input if Gen3 deferLoad=False 

503 df = data 

504 else: 

505 # Original Gen2 input is type ParquetTable and the fallback 

506 df = data.toDataFrame(columns=self.columns) 

507 

508 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

509 

510 try: 

511 valDf = pd.concat(valDict, axis=1) 

512 except TypeError: 

513 print([(k, type(v)) for k, v in valDict.items()]) 

514 raise 

515 

516 if kwargs.get('dropna', False): 

517 valDf = valDf.dropna(how='any') 

518 

519 return valDf 

520 

521 @classmethod 

522 def renameCol(cls, col, renameRules): 

523 if renameRules is None: 

524 return col 

525 for old, new in renameRules: 

526 if col.startswith(old): 

527 col = col.replace(old, new) 

528 return col 

529 

530 @classmethod 

531 def from_file(cls, filename, **kwargs): 

532 # Allow environment variables in the filename. 

533 filename = os.path.expandvars(filename) 

534 with open(filename) as f: 

535 translationDefinition = yaml.safe_load(f) 

536 

537 return cls.from_yaml(translationDefinition, **kwargs) 

538 

539 @classmethod 

540 def from_yaml(cls, translationDefinition, **kwargs): 

541 funcs = {} 

542 for func, val in translationDefinition['funcs'].items(): 

543 funcs[func] = init_fromDict(val, name=func) 

544 

545 if 'flag_rename_rules' in translationDefinition: 

546 renameRules = translationDefinition['flag_rename_rules'] 

547 else: 

548 renameRules = None 

549 

550 if 'refFlags' in translationDefinition: 

551 for flag in translationDefinition['refFlags']: 

552 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

553 

554 if 'forcedFlags' in translationDefinition: 

555 for flag in translationDefinition['forcedFlags']: 

556 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

557 

558 if 'flags' in translationDefinition: 

559 for flag in translationDefinition['flags']: 

560 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

561 

562 return cls(funcs, **kwargs) 

563 

564 

565def mag_aware_eval(df, expr): 

566 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means 

567 

568 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes. 

569 

570 Parameters 

571 ---------- 

572 df : pandas.DataFrame 

573 Dataframe on which to evaluate expression. 

574 

575 expr : str 

576 Expression. 

577 """ 

578 try: 

579 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

580 val = df.eval(expr_new, truediv=True) 

581 except Exception: # Should check what actually gets raised 

582 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

583 val = df.eval(expr_new, truediv=True) 

584 return val 

585 

586 

587class CustomFunctor(Functor): 

588 """Arbitrary computation on a catalog 

589 

590 Column names (and thus the columns to be loaded from catalog) are found 

591 by finding all words and trying to ignore all "math-y" words. 

592 

593 Parameters 

594 ---------- 

595 expr : str 

596 Expression to evaluate, to be parsed and executed by `mag_aware_eval`. 

597 """ 

598 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

599 

600 def __init__(self, expr, **kwargs): 

601 self.expr = expr 

602 super().__init__(**kwargs) 

603 

604 @property 

605 def name(self): 

606 return self.expr 

607 

608 @property 

609 def columns(self): 

610 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

611 

612 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

613 not_a_col = [] 

614 for c in flux_cols: 

615 if not re.search('_instFlux$', c): 

616 cols.append(f'{c}_instFlux') 

617 not_a_col.append(c) 

618 else: 

619 cols.append(c) 

620 

621 return list(set([c for c in cols if c not in not_a_col])) 

622 

623 def _func(self, df): 

624 return mag_aware_eval(df, self.expr) 

625 

626 

627class Column(Functor): 

628 """Get column with specified name 

629 """ 

630 

631 def __init__(self, col, **kwargs): 

632 self.col = col 

633 super().__init__(**kwargs) 

634 

635 @property 

636 def name(self): 

637 return self.col 

638 

639 @property 

640 def columns(self): 

641 return [self.col] 

642 

643 def _func(self, df): 

644 return df[self.col] 

645 

646 

647class Index(Functor): 

648 """Return the value of the index for each object 

649 """ 

650 

651 columns = ['coord_ra'] # just a dummy; something has to be here 

652 _defaultDataset = 'ref' 

653 _defaultNoDup = True 

654 

655 def _func(self, df): 

656 return pd.Series(df.index, index=df.index) 

657 

658 

659class IDColumn(Column): 

660 col = 'id' 

661 _allow_difference = False 

662 _defaultNoDup = True 

663 

664 def _func(self, df): 

665 return pd.Series(df.index, index=df.index) 

666 

667 

668class FootprintNPix(Column): 

669 col = 'base_Footprint_nPix' 

670 

671 

672class CoordColumn(Column): 

673 """Base class for coordinate column, in degrees 

674 """ 

675 _radians = True 

676 

677 def __init__(self, col, **kwargs): 

678 super().__init__(col, **kwargs) 

679 

680 def _func(self, df): 

681 # Must not modify original column in case that column is used by another functor 

682 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

683 return output 

684 

685 

686class RAColumn(CoordColumn): 

687 """Right Ascension, in degrees 

688 """ 

689 name = 'RA' 

690 _defaultNoDup = True 

691 

692 def __init__(self, **kwargs): 

693 super().__init__('coord_ra', **kwargs) 

694 

695 def __call__(self, catalog, **kwargs): 

696 return super().__call__(catalog, **kwargs) 

697 

698 

699class DecColumn(CoordColumn): 

700 """Declination, in degrees 

701 """ 

702 name = 'Dec' 

703 _defaultNoDup = True 

704 

705 def __init__(self, **kwargs): 

706 super().__init__('coord_dec', **kwargs) 

707 

708 def __call__(self, catalog, **kwargs): 

709 return super().__call__(catalog, **kwargs) 

710 

711 

712def fluxName(col): 

713 if not col.endswith('_instFlux'): 

714 col += '_instFlux' 

715 return col 

716 

717 

718def fluxErrName(col): 

719 if not col.endswith('_instFluxErr'): 

720 col += '_instFluxErr' 

721 return col 

722 

723 

724class Mag(Functor): 

725 """Compute calibrated magnitude 

726 

727 Takes a `calib` argument, which returns the flux at mag=0 

728 as `calib.getFluxMag0()`. If not provided, then the default 

729 `fluxMag0` is 63095734448.0194, which is default for HSC. 

730 This default should be removed in DM-21955 

731 

732 This calculation hides warnings about invalid values and dividing by zero. 

733 

734 As for all functors, a `dataset` and `filt` kwarg should be provided upon 

735 initialization. Unlike the default `Functor`, however, the default dataset 

736 for a `Mag` is `'meas'`, rather than `'ref'`. 

737 

738 Parameters 

739 ---------- 

740 col : `str` 

741 Name of flux column from which to compute magnitude. Can be parseable 

742 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass 

743 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will 

744 understand. 

745 calib : `lsst.afw.image.calib.Calib` (optional) 

746 Object that knows zero point. 

747 """ 

748 _defaultDataset = 'meas' 

749 

750 def __init__(self, col, calib=None, **kwargs): 

751 self.col = fluxName(col) 

752 self.calib = calib 

753 if calib is not None: 

754 self.fluxMag0 = calib.getFluxMag0()[0] 

755 else: 

756 # TO DO: DM-21955 Replace hard coded photometic calibration values 

757 self.fluxMag0 = 63095734448.0194 

758 

759 super().__init__(**kwargs) 

760 

761 @property 

762 def columns(self): 

763 return [self.col] 

764 

765 def _func(self, df): 

766 with np.warnings.catch_warnings(): 

767 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

768 np.warnings.filterwarnings('ignore', r'divide by zero') 

769 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

770 

771 @property 

772 def name(self): 

773 return f'mag_{self.col}' 

774 

775 

776class MagErr(Mag): 

777 """Compute calibrated magnitude uncertainty 

778 

779 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`. 

780 

781 Parameters 

782 col : `str` 

783 Name of flux column 

784 calib : `lsst.afw.image.calib.Calib` (optional) 

785 Object that knows zero point. 

786 """ 

787 

788 def __init__(self, *args, **kwargs): 

789 super().__init__(*args, **kwargs) 

790 if self.calib is not None: 

791 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

792 else: 

793 self.fluxMag0Err = 0. 

794 

795 @property 

796 def columns(self): 

797 return [self.col, self.col + 'Err'] 

798 

799 def _func(self, df): 

800 with np.warnings.catch_warnings(): 

801 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

802 np.warnings.filterwarnings('ignore', r'divide by zero') 

803 fluxCol, fluxErrCol = self.columns 

804 x = df[fluxErrCol] / df[fluxCol] 

805 y = self.fluxMag0Err / self.fluxMag0 

806 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

807 return magErr 

808 

809 @property 

810 def name(self): 

811 return super().name + '_err' 

812 

813 

814class NanoMaggie(Mag): 

815 """ 

816 """ 

817 

818 def _func(self, df): 

819 return (df[self.col] / self.fluxMag0) * 1e9 

820 

821 

822class MagDiff(Functor): 

823 _defaultDataset = 'meas' 

824 

825 """Functor to calculate magnitude difference""" 

826 

827 def __init__(self, col1, col2, **kwargs): 

828 self.col1 = fluxName(col1) 

829 self.col2 = fluxName(col2) 

830 super().__init__(**kwargs) 

831 

832 @property 

833 def columns(self): 

834 return [self.col1, self.col2] 

835 

836 def _func(self, df): 

837 with np.warnings.catch_warnings(): 

838 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

839 np.warnings.filterwarnings('ignore', r'divide by zero') 

840 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

841 

842 @property 

843 def name(self): 

844 return f'(mag_{self.col1} - mag_{self.col2})' 

845 

846 @property 

847 def shortname(self): 

848 return f'magDiff_{self.col1}_{self.col2}' 

849 

850 

851class Color(Functor): 

852 """Compute the color between two filters 

853 

854 Computes color by initializing two different `Mag` 

855 functors based on the `col` and filters provided, and 

856 then returning the difference. 

857 

858 This is enabled by the `_func` expecting a dataframe with a 

859 multilevel column index, with both `'band'` and `'column'`, 

860 instead of just `'column'`, which is the `Functor` default. 

861 This is controlled by the `_dfLevels` attribute. 

862 

863 Also of note, the default dataset for `Color` is `forced_src'`, 

864 whereas for `Mag` it is `'meas'`. 

865 

866 Parameters 

867 ---------- 

868 col : str 

869 Name of flux column from which to compute; same as would be passed to 

870 `lsst.pipe.tasks.functors.Mag`. 

871 

872 filt2, filt1 : str 

873 Filters from which to compute magnitude difference. 

874 Color computed is `Mag(filt2) - Mag(filt1)`. 

875 """ 

876 _defaultDataset = 'forced_src' 

877 _dfLevels = ('band', 'column') 

878 _defaultNoDup = True 

879 

880 def __init__(self, col, filt2, filt1, **kwargs): 

881 self.col = fluxName(col) 

882 if filt2 == filt1: 

883 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

884 self.filt2 = filt2 

885 self.filt1 = filt1 

886 

887 self.mag2 = Mag(col, filt=filt2, **kwargs) 

888 self.mag1 = Mag(col, filt=filt1, **kwargs) 

889 

890 super().__init__(**kwargs) 

891 

892 @property 

893 def filt(self): 

894 return None 

895 

896 @filt.setter 

897 def filt(self, filt): 

898 pass 

899 

900 def _func(self, df): 

901 mag2 = self.mag2._func(df[self.filt2]) 

902 mag1 = self.mag1._func(df[self.filt1]) 

903 return mag2 - mag1 

904 

905 @property 

906 def columns(self): 

907 return [self.mag1.col, self.mag2.col] 

908 

909 def multilevelColumns(self, parq, **kwargs): 

910 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

911 

912 @property 

913 def name(self): 

914 return f'{self.filt2} - {self.filt1} ({self.col})' 

915 

916 @property 

917 def shortname(self): 

918 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

919 

920 

921class Labeller(Functor): 

922 """Main function of this subclass is to override the dropna=True 

923 """ 

924 _null_label = 'null' 

925 _allow_difference = False 

926 name = 'label' 

927 _force_str = False 

928 

929 def __call__(self, parq, dropna=False, **kwargs): 

930 return super().__call__(parq, dropna=False, **kwargs) 

931 

932 

933class StarGalaxyLabeller(Labeller): 

934 _columns = ["base_ClassificationExtendedness_value"] 

935 _column = "base_ClassificationExtendedness_value" 

936 

937 def _func(self, df): 

938 x = df[self._columns][self._column] 

939 mask = x.isnull() 

940 test = (x < 0.5).astype(int) 

941 test = test.mask(mask, 2) 

942 

943 # TODO: DM-21954 Look into veracity of inline comment below 

944 # are these backwards? 

945 categories = ['galaxy', 'star', self._null_label] 

946 label = pd.Series(pd.Categorical.from_codes(test, categories=categories), 

947 index=x.index, name='label') 

948 if self._force_str: 

949 label = label.astype(str) 

950 return label 

951 

952 

953class NumStarLabeller(Labeller): 

954 _columns = ['numStarFlags'] 

955 labels = {"star": 0, "maybe": 1, "notStar": 2} 

956 

957 def _func(self, df): 

958 x = df[self._columns][self._columns[0]] 

959 

960 # Number of filters 

961 n = len(x.unique()) - 1 

962 

963 labels = ['noStar', 'maybe', 'star'] 

964 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels), 

965 index=x.index, name='label') 

966 

967 if self._force_str: 

968 label = label.astype(str) 

969 

970 return label 

971 

972 

973class DeconvolvedMoments(Functor): 

974 name = 'Deconvolved Moments' 

975 shortname = 'deconvolvedMoments' 

976 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

977 "ext_shapeHSM_HsmSourceMoments_yy", 

978 "base_SdssShape_xx", "base_SdssShape_yy", 

979 "ext_shapeHSM_HsmPsfMoments_xx", 

980 "ext_shapeHSM_HsmPsfMoments_yy") 

981 

982 def _func(self, df): 

983 """Calculate deconvolved moments""" 

984 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

985 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

986 else: 

987 hsm = np.ones(len(df))*np.nan 

988 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

989 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

990 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

991 else: 

992 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using 

993 # exposure.getPsf().computeShape(s.getCentroid()).getIxx() 

994 # raise TaskError("No psf shape parameter found in catalog") 

995 raise RuntimeError('No psf shape parameter found in catalog') 

996 

997 return hsm.where(np.isfinite(hsm), sdss) - psf 

998 

999 

1000class SdssTraceSize(Functor): 

1001 """Functor to calculate SDSS trace radius size for sources""" 

1002 name = "SDSS Trace Size" 

1003 shortname = 'sdssTrace' 

1004 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

1005 

1006 def _func(self, df): 

1007 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1008 return srcSize 

1009 

1010 

1011class PsfSdssTraceSizeDiff(Functor): 

1012 """Functor to calculate SDSS trace radius size difference (%) between object and psf model""" 

1013 name = "PSF - SDSS Trace Size" 

1014 shortname = 'psf_sdssTrace' 

1015 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

1016 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

1017 

1018 def _func(self, df): 

1019 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1020 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

1021 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1022 return sizeDiff 

1023 

1024 

1025class HsmTraceSize(Functor): 

1026 """Functor to calculate HSM trace radius size for sources""" 

1027 name = 'HSM Trace Size' 

1028 shortname = 'hsmTrace' 

1029 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1030 "ext_shapeHSM_HsmSourceMoments_yy") 

1031 

1032 def _func(self, df): 

1033 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1034 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1035 return srcSize 

1036 

1037 

1038class PsfHsmTraceSizeDiff(Functor): 

1039 """Functor to calculate HSM trace radius size difference (%) between object and psf model""" 

1040 name = 'PSF - HSM Trace Size' 

1041 shortname = 'psf_HsmTrace' 

1042 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1043 "ext_shapeHSM_HsmSourceMoments_yy", 

1044 "ext_shapeHSM_HsmPsfMoments_xx", 

1045 "ext_shapeHSM_HsmPsfMoments_yy") 

1046 

1047 def _func(self, df): 

1048 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1049 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1050 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1051 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1052 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1053 return sizeDiff 

1054 

1055 

1056class HsmFwhm(Functor): 

1057 name = 'HSM Psf FWHM' 

1058 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1059 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1060 pixelScale = 0.168 

1061 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1062 

1063 def _func(self, df): 

1064 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1065 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1066 

1067 

1068class E1(Functor): 

1069 name = "Distortion Ellipticity (e1)" 

1070 shortname = "Distortion" 

1071 

1072 def __init__(self, colXX, colXY, colYY, **kwargs): 

1073 self.colXX = colXX 

1074 self.colXY = colXY 

1075 self.colYY = colYY 

1076 self._columns = [self.colXX, self.colXY, self.colYY] 

1077 super().__init__(**kwargs) 

1078 

1079 @property 

1080 def columns(self): 

1081 return [self.colXX, self.colXY, self.colYY] 

1082 

1083 def _func(self, df): 

1084 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1085 

1086 

1087class E2(Functor): 

1088 name = "Ellipticity e2" 

1089 

1090 def __init__(self, colXX, colXY, colYY, **kwargs): 

1091 self.colXX = colXX 

1092 self.colXY = colXY 

1093 self.colYY = colYY 

1094 super().__init__(**kwargs) 

1095 

1096 @property 

1097 def columns(self): 

1098 return [self.colXX, self.colXY, self.colYY] 

1099 

1100 def _func(self, df): 

1101 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1102 

1103 

1104class RadiusFromQuadrupole(Functor): 

1105 

1106 def __init__(self, colXX, colXY, colYY, **kwargs): 

1107 self.colXX = colXX 

1108 self.colXY = colXY 

1109 self.colYY = colYY 

1110 super().__init__(**kwargs) 

1111 

1112 @property 

1113 def columns(self): 

1114 return [self.colXX, self.colXY, self.colYY] 

1115 

1116 def _func(self, df): 

1117 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1118 

1119 

1120class LocalWcs(Functor): 

1121 """Computations using the stored localWcs. 

1122 """ 

1123 name = "LocalWcsOperations" 

1124 

1125 def __init__(self, 

1126 colCD_1_1, 

1127 colCD_1_2, 

1128 colCD_2_1, 

1129 colCD_2_2, 

1130 **kwargs): 

1131 self.colCD_1_1 = colCD_1_1 

1132 self.colCD_1_2 = colCD_1_2 

1133 self.colCD_2_1 = colCD_2_1 

1134 self.colCD_2_2 = colCD_2_2 

1135 super().__init__(**kwargs) 

1136 

1137 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1138 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1139 

1140 Parameters 

1141 ---------- 

1142 x : `pandas.Series` 

1143 X pixel coordinate. 

1144 y : `pandas.Series` 

1145 Y pixel coordinate. 

1146 cd11 : `pandas.Series` 

1147 [1, 1] element of the local Wcs affine transform. 

1148 cd11 : `pandas.Series` 

1149 [1, 1] element of the local Wcs affine transform. 

1150 cd12 : `pandas.Series` 

1151 [1, 2] element of the local Wcs affine transform. 

1152 cd21 : `pandas.Series` 

1153 [2, 1] element of the local Wcs affine transform. 

1154 cd22 : `pandas.Series` 

1155 [2, 2] element of the local Wcs affine transform. 

1156 

1157 Returns 

1158 ------- 

1159 raDecTuple : tuple 

1160 RA and dec conversion of x and y given the local Wcs. Returned 

1161 units are in radians. 

1162 

1163 """ 

1164 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1165 

1166 def computeSkySeperation(self, ra1, dec1, ra2, dec2): 

1167 """Compute the local pixel scale conversion. 

1168 

1169 Parameters 

1170 ---------- 

1171 ra1 : `pandas.Series` 

1172 Ra of the first coordinate in radians. 

1173 dec1 : `pandas.Series` 

1174 Dec of the first coordinate in radians. 

1175 ra2 : `pandas.Series` 

1176 Ra of the second coordinate in radians. 

1177 dec2 : `pandas.Series` 

1178 Dec of the second coordinate in radians. 

1179 

1180 Returns 

1181 ------- 

1182 dist : `pandas.Series` 

1183 Distance on the sphere in radians. 

1184 """ 

1185 deltaDec = dec2 - dec1 

1186 deltaRa = ra2 - ra1 

1187 return 2 * np.arcsin( 

1188 np.sqrt( 

1189 np.sin(deltaDec / 2) ** 2 

1190 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1191 

1192 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1193 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1194 

1195 Parameters 

1196 ---------- 

1197 x1 : `pandas.Series` 

1198 X pixel coordinate. 

1199 y1 : `pandas.Series` 

1200 Y pixel coordinate. 

1201 x2 : `pandas.Series` 

1202 X pixel coordinate. 

1203 y2 : `pandas.Series` 

1204 Y pixel coordinate. 

1205 cd11 : `pandas.Series` 

1206 [1, 1] element of the local Wcs affine transform. 

1207 cd11 : `pandas.Series` 

1208 [1, 1] element of the local Wcs affine transform. 

1209 cd12 : `pandas.Series` 

1210 [1, 2] element of the local Wcs affine transform. 

1211 cd21 : `pandas.Series` 

1212 [2, 1] element of the local Wcs affine transform. 

1213 cd22 : `pandas.Series` 

1214 [2, 2] element of the local Wcs affine transform. 

1215 

1216 Returns 

1217 ------- 

1218 Distance : `pandas.Series` 

1219 Arcseconds per pixel at the location of the local WC 

1220 """ 

1221 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1222 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1223 # Great circle distance for small separations. 

1224 return self.computeSkySeperation(ra1, dec1, ra2, dec2) 

1225 

1226 

1227class ComputePixelScale(LocalWcs): 

1228 """Compute the local pixel scale from the stored CDMatrix. 

1229 """ 

1230 name = "PixelScale" 

1231 

1232 @property 

1233 def columns(self): 

1234 return [self.colCD_1_1, 

1235 self.colCD_1_2, 

1236 self.colCD_2_1, 

1237 self.colCD_2_2] 

1238 

1239 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1240 """Compute the local pixel to scale conversion in arcseconds. 

1241 

1242 Parameters 

1243 ---------- 

1244 cd11 : `pandas.Series` 

1245 [1, 1] element of the local Wcs affine transform in radians. 

1246 cd11 : `pandas.Series` 

1247 [1, 1] element of the local Wcs affine transform in radians. 

1248 cd12 : `pandas.Series` 

1249 [1, 2] element of the local Wcs affine transform in radians. 

1250 cd21 : `pandas.Series` 

1251 [2, 1] element of the local Wcs affine transform in radians. 

1252 cd22 : `pandas.Series` 

1253 [2, 2] element of the local Wcs affine transform in radians. 

1254 

1255 Returns 

1256 ------- 

1257 pixScale : `pandas.Series` 

1258 Arcseconds per pixel at the location of the local WC 

1259 """ 

1260 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1261 

1262 def _func(self, df): 

1263 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1264 df[self.colCD_1_2], 

1265 df[self.colCD_2_1], 

1266 df[self.colCD_2_2]) 

1267 

1268 

1269class ConvertPixelToArcseconds(ComputePixelScale): 

1270 """Convert a value in units pixels squared to units arcseconds squared. 

1271 """ 

1272 

1273 def __init__(self, 

1274 col, 

1275 colCD_1_1, 

1276 colCD_1_2, 

1277 colCD_2_1, 

1278 colCD_2_2, 

1279 **kwargs): 

1280 self.col = col 

1281 super().__init__(colCD_1_1, 

1282 colCD_1_2, 

1283 colCD_2_1, 

1284 colCD_2_2, 

1285 **kwargs) 

1286 

1287 @property 

1288 def name(self): 

1289 return f"{self.col}_asArcseconds" 

1290 

1291 @property 

1292 def columns(self): 

1293 return [self.col, 

1294 self.colCD_1_1, 

1295 self.colCD_1_2, 

1296 self.colCD_2_1, 

1297 self.colCD_2_2] 

1298 

1299 def _func(self, df): 

1300 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1301 df[self.colCD_1_2], 

1302 df[self.colCD_2_1], 

1303 df[self.colCD_2_2]) 

1304 

1305 

1306class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1307 """Convert a value in units pixels to units arcseconds. 

1308 """ 

1309 

1310 def __init__(self, 

1311 col, 

1312 colCD_1_1, 

1313 colCD_1_2, 

1314 colCD_2_1, 

1315 colCD_2_2, 

1316 **kwargs): 

1317 self.col = col 

1318 super().__init__(colCD_1_1, 

1319 colCD_1_2, 

1320 colCD_2_1, 

1321 colCD_2_2, 

1322 **kwargs) 

1323 

1324 @property 

1325 def name(self): 

1326 return f"{self.col}_asArcsecondsSq" 

1327 

1328 @property 

1329 def columns(self): 

1330 return [self.col, 

1331 self.colCD_1_1, 

1332 self.colCD_1_2, 

1333 self.colCD_2_1, 

1334 self.colCD_2_2] 

1335 

1336 def _func(self, df): 

1337 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1338 df[self.colCD_1_2], 

1339 df[self.colCD_2_1], 

1340 df[self.colCD_2_2]) 

1341 return df[self.col] * pixScale * pixScale 

1342 

1343 

1344class ReferenceBand(Functor): 

1345 name = 'Reference Band' 

1346 shortname = 'refBand' 

1347 

1348 @property 

1349 def columns(self): 

1350 return ["merge_measurement_i", 

1351 "merge_measurement_r", 

1352 "merge_measurement_z", 

1353 "merge_measurement_y", 

1354 "merge_measurement_g"] 

1355 

1356 def _func(self, df): 

1357 def getFilterAliasName(row): 

1358 # get column name with the max value (True > False) 

1359 colName = row.idxmax() 

1360 return colName.replace('merge_measurement_', '') 

1361 

1362 return df[self.columns].apply(getFilterAliasName, axis=1) 

1363 

1364 

1365class Photometry(Functor): 

1366 # AB to NanoJansky (3631 Jansky) 

1367 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1368 LOG_AB_FLUX_SCALE = 12.56 

1369 FIVE_OVER_2LOG10 = 1.085736204758129569 

1370 # TO DO: DM-21955 Replace hard coded photometic calibration values 

1371 COADD_ZP = 27 

1372 

1373 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1374 self.vhypot = np.vectorize(self.hypot) 

1375 self.col = colFlux 

1376 self.colFluxErr = colFluxErr 

1377 

1378 self.calib = calib 

1379 if calib is not None: 

1380 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1381 else: 

1382 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1383 self.fluxMag0Err = 0. 

1384 

1385 super().__init__(**kwargs) 

1386 

1387 @property 

1388 def columns(self): 

1389 return [self.col] 

1390 

1391 @property 

1392 def name(self): 

1393 return f'mag_{self.col}' 

1394 

1395 @classmethod 

1396 def hypot(cls, a, b): 

1397 if np.abs(a) < np.abs(b): 

1398 a, b = b, a 

1399 if a == 0.: 

1400 return 0. 

1401 q = b/a 

1402 return np.abs(a) * np.sqrt(1. + q*q) 

1403 

1404 def dn2flux(self, dn, fluxMag0): 

1405 return self.AB_FLUX_SCALE * dn / fluxMag0 

1406 

1407 def dn2mag(self, dn, fluxMag0): 

1408 with np.warnings.catch_warnings(): 

1409 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1410 np.warnings.filterwarnings('ignore', r'divide by zero') 

1411 return -2.5 * np.log10(dn/fluxMag0) 

1412 

1413 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1414 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1415 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1416 return retVal 

1417 

1418 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1419 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1420 return self.FIVE_OVER_2LOG10 * retVal 

1421 

1422 

1423class NanoJansky(Photometry): 

1424 def _func(self, df): 

1425 return self.dn2flux(df[self.col], self.fluxMag0) 

1426 

1427 

1428class NanoJanskyErr(Photometry): 

1429 @property 

1430 def columns(self): 

1431 return [self.col, self.colFluxErr] 

1432 

1433 def _func(self, df): 

1434 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1435 return pd.Series(retArr, index=df.index) 

1436 

1437 

1438class Magnitude(Photometry): 

1439 def _func(self, df): 

1440 return self.dn2mag(df[self.col], self.fluxMag0) 

1441 

1442 

1443class MagnitudeErr(Photometry): 

1444 @property 

1445 def columns(self): 

1446 return [self.col, self.colFluxErr] 

1447 

1448 def _func(self, df): 

1449 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1450 return pd.Series(retArr, index=df.index) 

1451 

1452 

1453class LocalPhotometry(Functor): 

1454 """Base class for calibrating the specified instrument flux column using 

1455 the local photometric calibration. 

1456 

1457 Parameters 

1458 ---------- 

1459 instFluxCol : `str` 

1460 Name of the instrument flux column. 

1461 instFluxErrCol : `str` 

1462 Name of the assocated error columns for ``instFluxCol``. 

1463 photoCalibCol : `str` 

1464 Name of local calibration column. 

1465 photoCalibErrCol : `str` 

1466 Error associated with ``photoCalibCol`` 

1467 

1468 See also 

1469 -------- 

1470 LocalPhotometry 

1471 LocalNanojansky 

1472 LocalNanojanskyErr 

1473 LocalMagnitude 

1474 LocalMagnitudeErr 

1475 """ 

1476 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1477 

1478 def __init__(self, 

1479 instFluxCol, 

1480 instFluxErrCol, 

1481 photoCalibCol, 

1482 photoCalibErrCol, 

1483 **kwargs): 

1484 self.instFluxCol = instFluxCol 

1485 self.instFluxErrCol = instFluxErrCol 

1486 self.photoCalibCol = photoCalibCol 

1487 self.photoCalibErrCol = photoCalibErrCol 

1488 super().__init__(**kwargs) 

1489 

1490 def instFluxToNanojansky(self, instFlux, localCalib): 

1491 """Convert instrument flux to nanojanskys. 

1492 

1493 Parameters 

1494 ---------- 

1495 instFlux : `numpy.ndarray` or `pandas.Series` 

1496 Array of instrument flux measurements 

1497 localCalib : `numpy.ndarray` or `pandas.Series` 

1498 Array of local photometric calibration estimates. 

1499 

1500 Returns 

1501 ------- 

1502 calibFlux : `numpy.ndarray` or `pandas.Series` 

1503 Array of calibrated flux measurements. 

1504 """ 

1505 return instFlux * localCalib 

1506 

1507 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1508 """Convert instrument flux to nanojanskys. 

1509 

1510 Parameters 

1511 ---------- 

1512 instFlux : `numpy.ndarray` or `pandas.Series` 

1513 Array of instrument flux measurements 

1514 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1515 Errors on associated ``instFlux`` values 

1516 localCalib : `numpy.ndarray` or `pandas.Series` 

1517 Array of local photometric calibration estimates. 

1518 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1519 Errors on associated ``localCalib`` values 

1520 

1521 Returns 

1522 ------- 

1523 calibFluxErr : `numpy.ndarray` or `pandas.Series` 

1524 Errors on calibrated flux measurements. 

1525 """ 

1526 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1527 

1528 def instFluxToMagnitude(self, instFlux, localCalib): 

1529 """Convert instrument flux to nanojanskys. 

1530 

1531 Parameters 

1532 ---------- 

1533 instFlux : `numpy.ndarray` or `pandas.Series` 

1534 Array of instrument flux measurements 

1535 localCalib : `numpy.ndarray` or `pandas.Series` 

1536 Array of local photometric calibration estimates. 

1537 

1538 Returns 

1539 ------- 

1540 calibMag : `numpy.ndarray` or `pandas.Series` 

1541 Array of calibrated AB magnitudes. 

1542 """ 

1543 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1544 

1545 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1546 """Convert instrument flux err to nanojanskys. 

1547 

1548 Parameters 

1549 ---------- 

1550 instFlux : `numpy.ndarray` or `pandas.Series` 

1551 Array of instrument flux measurements 

1552 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1553 Errors on associated ``instFlux`` values 

1554 localCalib : `numpy.ndarray` or `pandas.Series` 

1555 Array of local photometric calibration estimates. 

1556 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1557 Errors on associated ``localCalib`` values 

1558 

1559 Returns 

1560 ------- 

1561 calibMagErr: `numpy.ndarray` or `pandas.Series` 

1562 Error on calibrated AB magnitudes. 

1563 """ 

1564 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1565 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1566 

1567 

1568class LocalNanojansky(LocalPhotometry): 

1569 """Compute calibrated fluxes using the local calibration value. 

1570 

1571 See also 

1572 -------- 

1573 LocalNanojansky 

1574 LocalNanojanskyErr 

1575 LocalMagnitude 

1576 LocalMagnitudeErr 

1577 """ 

1578 

1579 @property 

1580 def columns(self): 

1581 return [self.instFluxCol, self.photoCalibCol] 

1582 

1583 @property 

1584 def name(self): 

1585 return f'flux_{self.instFluxCol}' 

1586 

1587 def _func(self, df): 

1588 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1589 

1590 

1591class LocalNanojanskyErr(LocalPhotometry): 

1592 """Compute calibrated flux errors using the local calibration value. 

1593 

1594 See also 

1595 -------- 

1596 LocalNanojansky 

1597 LocalNanojanskyErr 

1598 LocalMagnitude 

1599 LocalMagnitudeErr 

1600 """ 

1601 

1602 @property 

1603 def columns(self): 

1604 return [self.instFluxCol, self.instFluxErrCol, 

1605 self.photoCalibCol, self.photoCalibErrCol] 

1606 

1607 @property 

1608 def name(self): 

1609 return f'fluxErr_{self.instFluxCol}' 

1610 

1611 def _func(self, df): 

1612 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1613 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1614 

1615 

1616class LocalMagnitude(LocalPhotometry): 

1617 """Compute calibrated AB magnitudes using the local calibration value. 

1618 

1619 See also 

1620 -------- 

1621 LocalNanojansky 

1622 LocalNanojanskyErr 

1623 LocalMagnitude 

1624 LocalMagnitudeErr 

1625 """ 

1626 

1627 @property 

1628 def columns(self): 

1629 return [self.instFluxCol, self.photoCalibCol] 

1630 

1631 @property 

1632 def name(self): 

1633 return f'mag_{self.instFluxCol}' 

1634 

1635 def _func(self, df): 

1636 return self.instFluxToMagnitude(df[self.instFluxCol], 

1637 df[self.photoCalibCol]) 

1638 

1639 

1640class LocalMagnitudeErr(LocalPhotometry): 

1641 """Compute calibrated AB magnitude errors using the local calibration value. 

1642 

1643 See also 

1644 -------- 

1645 LocalNanojansky 

1646 LocalNanojanskyErr 

1647 LocalMagnitude 

1648 LocalMagnitudeErr 

1649 """ 

1650 

1651 @property 

1652 def columns(self): 

1653 return [self.instFluxCol, self.instFluxErrCol, 

1654 self.photoCalibCol, self.photoCalibErrCol] 

1655 

1656 @property 

1657 def name(self): 

1658 return f'magErr_{self.instFluxCol}' 

1659 

1660 def _func(self, df): 

1661 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol], 

1662 df[self.instFluxErrCol], 

1663 df[self.photoCalibCol], 

1664 df[self.photoCalibErrCol]) 

1665 

1666 

1667class LocalDipoleMeanFlux(LocalPhotometry): 

1668 """Compute absolute mean of dipole fluxes. 

1669 

1670 See also 

1671 -------- 

1672 LocalNanojansky 

1673 LocalNanojanskyErr 

1674 LocalMagnitude 

1675 LocalMagnitudeErr 

1676 LocalDipoleMeanFlux 

1677 LocalDipoleMeanFluxErr 

1678 LocalDipoleDiffFlux 

1679 LocalDipoleDiffFluxErr 

1680 """ 

1681 def __init__(self, 

1682 instFluxPosCol, 

1683 instFluxNegCol, 

1684 instFluxPosErrCol, 

1685 instFluxNegErrCol, 

1686 photoCalibCol, 

1687 photoCalibErrCol, 

1688 **kwargs): 

1689 self.instFluxNegCol = instFluxNegCol 

1690 self.instFluxPosCol = instFluxPosCol 

1691 self.instFluxNegErrCol = instFluxNegErrCol 

1692 self.instFluxPosErrCol = instFluxPosErrCol 

1693 self.photoCalibCol = photoCalibCol 

1694 self.photoCalibErrCol = photoCalibErrCol 

1695 super().__init__(instFluxNegCol, 

1696 instFluxNegErrCol, 

1697 photoCalibCol, 

1698 photoCalibErrCol, 

1699 **kwargs) 

1700 

1701 @property 

1702 def columns(self): 

1703 return [self.instFluxPosCol, 

1704 self.instFluxNegCol, 

1705 self.photoCalibCol] 

1706 

1707 @property 

1708 def name(self): 

1709 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1710 

1711 def _func(self, df): 

1712 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1713 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1714 

1715 

1716class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1717 """Compute the error on the absolute mean of dipole fluxes. 

1718 

1719 See also 

1720 -------- 

1721 LocalNanojansky 

1722 LocalNanojanskyErr 

1723 LocalMagnitude 

1724 LocalMagnitudeErr 

1725 LocalDipoleMeanFlux 

1726 LocalDipoleMeanFluxErr 

1727 LocalDipoleDiffFlux 

1728 LocalDipoleDiffFluxErr 

1729 """ 

1730 

1731 @property 

1732 def columns(self): 

1733 return [self.instFluxPosCol, 

1734 self.instFluxNegCol, 

1735 self.instFluxPosErrCol, 

1736 self.instFluxNegErrCol, 

1737 self.photoCalibCol, 

1738 self.photoCalibErrCol] 

1739 

1740 @property 

1741 def name(self): 

1742 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1743 

1744 def _func(self, df): 

1745 return 0.5*np.sqrt( 

1746 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1747 * df[self.photoCalibErrCol])**2 

1748 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1749 * df[self.photoCalibCol]**2) 

1750 

1751 

1752class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1753 """Compute the absolute difference of dipole fluxes. 

1754 

1755 Value is (abs(pos) - abs(neg)) 

1756 

1757 See also 

1758 -------- 

1759 LocalNanojansky 

1760 LocalNanojanskyErr 

1761 LocalMagnitude 

1762 LocalMagnitudeErr 

1763 LocalDipoleMeanFlux 

1764 LocalDipoleMeanFluxErr 

1765 LocalDipoleDiffFlux 

1766 LocalDipoleDiffFluxErr 

1767 """ 

1768 

1769 @property 

1770 def columns(self): 

1771 return [self.instFluxPosCol, 

1772 self.instFluxNegCol, 

1773 self.photoCalibCol] 

1774 

1775 @property 

1776 def name(self): 

1777 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1778 

1779 def _func(self, df): 

1780 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1781 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1782 

1783 

1784class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1785 """Compute the error on the absolute difference of dipole fluxes. 

1786 

1787 See also 

1788 -------- 

1789 LocalNanojansky 

1790 LocalNanojanskyErr 

1791 LocalMagnitude 

1792 LocalMagnitudeErr 

1793 LocalDipoleMeanFlux 

1794 LocalDipoleMeanFluxErr 

1795 LocalDipoleDiffFlux 

1796 LocalDipoleDiffFluxErr 

1797 """ 

1798 

1799 @property 

1800 def columns(self): 

1801 return [self.instFluxPosCol, 

1802 self.instFluxNegCol, 

1803 self.instFluxPosErrCol, 

1804 self.instFluxNegErrCol, 

1805 self.photoCalibCol, 

1806 self.photoCalibErrCol] 

1807 

1808 @property 

1809 def name(self): 

1810 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1811 

1812 def _func(self, df): 

1813 return np.sqrt( 

1814 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1815 * df[self.photoCalibErrCol])**2 

1816 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1817 * df[self.photoCalibCol]**2) 

1818 

1819 

1820class Ratio(Functor): 

1821 """Base class for returning the ratio of 2 columns. 

1822 

1823 Can be used to compute a Signal to Noise ratio for any input flux. 

1824 

1825 Parameters 

1826 ---------- 

1827 numerator : `str` 

1828 Name of the column to use at the numerator in the ratio 

1829 denominator : `str` 

1830 Name of the column to use as the denominator in the ratio. 

1831 """ 

1832 def __init__(self, 

1833 numerator, 

1834 denominator, 

1835 **kwargs): 

1836 self.numerator = numerator 

1837 self.denominator = denominator 

1838 super().__init__(**kwargs) 

1839 

1840 @property 

1841 def columns(self): 

1842 return [self.numerator, self.denominator] 

1843 

1844 @property 

1845 def name(self): 

1846 return f'ratio_{self.numerator}_{self.denominator}' 

1847 

1848 def _func(self, df): 

1849 with np.warnings.catch_warnings(): 

1850 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1851 np.warnings.filterwarnings('ignore', r'divide by zero') 

1852 return df[self.numerator] / df[self.denominator]