Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks. 

2# 

3# LSST Data Management System 

4# This product includes software developed by the 

5# LSST Project (http://www.lsst.org/). 

6# See COPYRIGHT file at the top of the source tree. 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <https://www.lsstcorp.org/LegalNotices/>. 

21# 

22import yaml 

23import re 

24from itertools import product 

25import os.path 

26 

27import pandas as pd 

28import numpy as np 

29import astropy.units as u 

30 

31from lsst.daf.persistence import doImport 

32from lsst.daf.butler import DeferredDatasetHandle 

33import lsst.geom as geom 

34import lsst.sphgeom as sphgeom 

35 

36from .parquetTable import ParquetTable, MultilevelParquetTable 

37 

38 

39def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

40 typeKey='functor', name=None): 

41 """Initialize an object defined in a dictionary 

42 

43 The object needs to be importable as 

44 f'{basePath}.{initDict[typeKey]}' 

45 The positional and keyword arguments (if any) are contained in 

46 "args" and "kwargs" entries in the dictionary, respectively. 

47 This is used in `functors.CompositeFunctor.from_yaml` to initialize 

48 a composite functor from a specification in a YAML file. 

49 

50 Parameters 

51 ---------- 

52 initDict : dictionary 

53 Dictionary describing object's initialization. Must contain 

54 an entry keyed by ``typeKey`` that is the name of the object, 

55 relative to ``basePath``. 

56 basePath : str 

57 Path relative to module in which ``initDict[typeKey]`` is defined. 

58 typeKey : str 

59 Key of ``initDict`` that is the name of the object 

60 (relative to `basePath`). 

61 """ 

62 initDict = initDict.copy() 

63 # TO DO: DM-21956 We should be able to define functors outside this module 

64 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

65 args = [] 

66 if 'args' in initDict: 

67 args = initDict.pop('args') 

68 if isinstance(args, str): 

69 args = [args] 

70 try: 

71 element = pythonType(*args, **initDict) 

72 except Exception as e: 

73 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

74 raise type(e)(message, e.args) 

75 return element 

76 

77 

78class Functor(object): 

79 """Define and execute a calculation on a ParquetTable 

80 

81 The `__call__` method accepts either a `ParquetTable` object or a 

82 `DeferredDatasetHandle`, and returns the 

83 result of the calculation as a single column. Each functor defines what 

84 columns are needed for the calculation, and only these columns are read 

85 from the `ParquetTable`. 

86 

87 The action of `__call__` consists of two steps: first, loading the 

88 necessary columns from disk into memory as a `pandas.DataFrame` object; 

89 and second, performing the computation on this dataframe and returning the 

90 result. 

91 

92 

93 To define a new `Functor`, a subclass must define a `_func` method, 

94 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`. 

95 In addition, it must define the following attributes 

96 

97 * `_columns`: The columns necessary to perform the calculation 

98 * `name`: A name appropriate for a figure axis label 

99 * `shortname`: A name appropriate for use as a dictionary key 

100 

101 On initialization, a `Functor` should declare what band (`filt` kwarg) 

102 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be 

103 applied to. This enables the `_get_data` method to extract the proper 

104 columns from the parquet file. If not specified, the dataset will fall back 

105 on the `_defaultDataset`attribute. If band is not specified and `dataset` 

106 is anything other than `'ref'`, then an error will be raised when trying to 

107 perform the calculation. 

108 

109 Originally, `Functor` was set up to expect 

110 datasets formatted like the `deepCoadd_obj` dataset; that is, a 

111 dataframe with a multi-level column index, with the levels of the 

112 column index being `band`, `dataset`, and `column`. 

113 It has since been generalized to apply to dataframes without mutli-level 

114 indices and multi-level indices with just `dataset` and `column` levels. 

115 In addition, the `_get_data` method that reads 

116 the dataframe from the `ParquetTable` will return a dataframe with column 

117 index levels defined by the `_dfLevels` attribute; by default, this is 

118 `column`. 

119 

120 The `_dfLevels` attributes should generally not need to 

121 be changed, unless `_func` needs columns from multiple filters or datasets 

122 to do the calculation. 

123 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for 

124 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe 

125 it gets to have those levels in the column index. 

126 

127 Parameters 

128 ---------- 

129 filt : str 

130 Filter upon which to do the calculation 

131 

132 dataset : str 

133 Dataset upon which to do the calculation 

134 (e.g., 'ref', 'meas', 'forced_src'). 

135 

136 """ 

137 

138 _defaultDataset = 'ref' 

139 _dfLevels = ('column',) 

140 _defaultNoDup = False 

141 

142 def __init__(self, filt=None, dataset=None, noDup=None): 

143 self.filt = filt 

144 self.dataset = dataset if dataset is not None else self._defaultDataset 

145 self._noDup = noDup 

146 

147 @property 

148 def noDup(self): 

149 if self._noDup is not None: 

150 return self._noDup 

151 else: 

152 return self._defaultNoDup 

153 

154 @property 

155 def columns(self): 

156 """Columns required to perform calculation 

157 """ 

158 if not hasattr(self, '_columns'): 

159 raise NotImplementedError('Must define columns property or _columns attribute') 

160 return self._columns 

161 

162 def _get_data_columnLevels(self, data, columnIndex=None): 

163 """Gets the names of the column index levels 

164 

165 This should only be called in the context of a multilevel table. 

166 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable` 

167 and with the gen3 `DeferredDatasetHandle`. 

168 

169 Parameters 

170 ---------- 

171 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

172 

173 columnnIndex (optional): pandas `Index` object 

174 if not passed, then it is read from the `DeferredDatasetHandle` 

175 """ 

176 if isinstance(data, DeferredDatasetHandle): 

177 if columnIndex is None: 

178 columnIndex = data.get(component="columns") 

179 if columnIndex is not None: 

180 return columnIndex.names 

181 if isinstance(data, MultilevelParquetTable): 

182 return data.columnLevels 

183 else: 

184 raise TypeError(f"Unknown type for data: {type(data)}!") 

185 

186 def _get_data_columnLevelNames(self, data, columnIndex=None): 

187 """Gets the content of each of the column levels for a multilevel table 

188 

189 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2. 

190 

191 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

192 """ 

193 if isinstance(data, DeferredDatasetHandle): 

194 if columnIndex is None: 

195 columnIndex = data.get(component="columns") 

196 if columnIndex is not None: 

197 columnLevels = columnIndex.names 

198 columnLevelNames = { 

199 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

200 for i, level in enumerate(columnLevels) 

201 } 

202 return columnLevelNames 

203 if isinstance(data, MultilevelParquetTable): 

204 return data.columnLevelNames 

205 else: 

206 raise TypeError(f"Unknown type for data: {type(data)}!") 

207 

208 def _colsFromDict(self, colDict, columnIndex=None): 

209 """Converts dictionary column specficiation to a list of columns 

210 

211 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

212 """ 

213 new_colDict = {} 

214 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

215 

216 for i, lev in enumerate(columnLevels): 

217 if lev in colDict: 

218 if isinstance(colDict[lev], str): 

219 new_colDict[lev] = [colDict[lev]] 

220 else: 

221 new_colDict[lev] = colDict[lev] 

222 else: 

223 new_colDict[lev] = columnIndex.levels[i] 

224 

225 levelCols = [new_colDict[lev] for lev in columnLevels] 

226 cols = product(*levelCols) 

227 return list(cols) 

228 

229 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

230 """Returns columns needed by functor from multilevel dataset 

231 

232 To access tables with multilevel column structure, the `MultilevelParquetTable` 

233 or `DeferredDatasetHandle` need to be passed either a list of tuples or a 

234 dictionary. 

235 

236 Parameters 

237 ---------- 

238 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

239 

240 columnIndex (optional): pandas `Index` object 

241 either passed or read in from `DeferredDatasetHandle`. 

242 

243 `returnTuple` : bool 

244 If true, then return a list of tuples rather than the column dictionary 

245 specification. This is set to `True` by `CompositeFunctor` in order to be able to 

246 combine columns from the various component functors. 

247 

248 """ 

249 if isinstance(data, DeferredDatasetHandle) and columnIndex is None: 

250 columnIndex = data.get(component="columns") 

251 

252 # Confirm that the dataset has the column levels the functor is expecting it to have. 

253 columnLevels = self._get_data_columnLevels(data, columnIndex) 

254 

255 columnDict = {'column': self.columns, 

256 'dataset': self.dataset} 

257 if self.filt is None: 

258 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

259 if "band" in columnLevels: 

260 if self.dataset == "ref": 

261 columnDict["band"] = columnLevelNames["band"][0] 

262 else: 

263 raise ValueError(f"'filt' not set for functor {self.name}" 

264 f"(dataset {self.dataset}) " 

265 "and ParquetTable " 

266 "contains multiple filters in column index. " 

267 "Set 'filt' or set 'dataset' to 'ref'.") 

268 else: 

269 columnDict['band'] = self.filt 

270 

271 if isinstance(data, MultilevelParquetTable): 

272 return data._colsFromDict(columnDict) 

273 elif isinstance(data, DeferredDatasetHandle): 

274 if returnTuple: 

275 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

276 else: 

277 return columnDict 

278 

279 def _func(self, df, dropna=True): 

280 raise NotImplementedError('Must define calculation on dataframe') 

281 

282 def _get_columnIndex(self, data): 

283 """Return columnIndex 

284 """ 

285 

286 if isinstance(data, DeferredDatasetHandle): 

287 return data.get(component="columns") 

288 else: 

289 return None 

290 

291 def _get_data(self, data): 

292 """Retrieve dataframe necessary for calculation. 

293 

294 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle 

295 

296 Returns dataframe upon which `self._func` can act. 

297 

298 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested. 

299 """ 

300 if isinstance(data, pd.DataFrame): 

301 return data 

302 

303 # First thing to do: check to see if the data source has a multilevel column index or not. 

304 columnIndex = self._get_columnIndex(data) 

305 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

306 

307 # Simple single-level parquet table, gen2 

308 if isinstance(data, ParquetTable) and not is_multiLevel: 

309 columns = self.columns 

310 df = data.toDataFrame(columns=columns) 

311 return df 

312 

313 # Get proper columns specification for this functor 

314 if is_multiLevel: 

315 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

316 else: 

317 columns = self.columns 

318 

319 if isinstance(data, MultilevelParquetTable): 

320 # Load in-memory dataframe with appropriate columns the gen2 way 

321 df = data.toDataFrame(columns=columns, droplevels=False) 

322 elif isinstance(data, DeferredDatasetHandle): 

323 # Load in-memory dataframe with appropriate columns the gen3 way 

324 df = data.get(parameters={"columns": columns}) 

325 

326 # Drop unnecessary column levels 

327 if is_multiLevel: 

328 df = self._setLevels(df) 

329 

330 return df 

331 

332 def _setLevels(self, df): 

333 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

334 df.columns = df.columns.droplevel(levelsToDrop) 

335 return df 

336 

337 def _dropna(self, vals): 

338 return vals.dropna() 

339 

340 def __call__(self, data, dropna=False): 

341 try: 

342 df = self._get_data(data) 

343 vals = self._func(df) 

344 except Exception: 

345 vals = self.fail(df) 

346 if dropna: 

347 vals = self._dropna(vals) 

348 

349 return vals 

350 

351 def difference(self, data1, data2, **kwargs): 

352 """Computes difference between functor called on two different ParquetTable objects 

353 """ 

354 return self(data1, **kwargs) - self(data2, **kwargs) 

355 

356 def fail(self, df): 

357 return pd.Series(np.full(len(df), np.nan), index=df.index) 

358 

359 @property 

360 def name(self): 

361 """Full name of functor (suitable for figure labels) 

362 """ 

363 return NotImplementedError 

364 

365 @property 

366 def shortname(self): 

367 """Short name of functor (suitable for column name/dict key) 

368 """ 

369 return self.name 

370 

371 

372class CompositeFunctor(Functor): 

373 """Perform multiple calculations at once on a catalog 

374 

375 The role of a `CompositeFunctor` is to group together computations from 

376 multiple functors. Instead of returning `pandas.Series` a 

377 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names 

378 being the keys of `funcDict`. 

379 

380 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

381 in all the component functors. 

382 

383 A `CompositeFunctor` does not use a `_func` method itself; rather, 

384 when a `CompositeFunctor` is called, all its columns are loaded 

385 at once, and the resulting dataframe is passed to the `_func` method of each component 

386 functor. This has the advantage of only doing I/O (reading from parquet file) once, 

387 and works because each individual `_func` method of each component functor does not 

388 care if there are *extra* columns in the dataframe being passed; only that it must contain 

389 *at least* the `columns` it expects. 

390 

391 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML 

392 file specifying a collection of functors. 

393 

394 Parameters 

395 ---------- 

396 funcs : `dict` or `list` 

397 Dictionary or list of functors. If a list, then it will be converted 

398 into a dictonary according to the `.shortname` attribute of each functor. 

399 

400 """ 

401 dataset = None 

402 

403 def __init__(self, funcs, **kwargs): 

404 

405 if type(funcs) == dict: 

406 self.funcDict = funcs 

407 else: 

408 self.funcDict = {f.shortname: f for f in funcs} 

409 

410 self._filt = None 

411 

412 super().__init__(**kwargs) 

413 

414 @property 

415 def filt(self): 

416 return self._filt 

417 

418 @filt.setter 

419 def filt(self, filt): 

420 if filt is not None: 

421 for _, f in self.funcDict.items(): 

422 f.filt = filt 

423 self._filt = filt 

424 

425 def update(self, new): 

426 if isinstance(new, dict): 

427 self.funcDict.update(new) 

428 elif isinstance(new, CompositeFunctor): 

429 self.funcDict.update(new.funcDict) 

430 else: 

431 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

432 

433 # Make sure new functors have the same 'filt' set 

434 if self.filt is not None: 

435 self.filt = self.filt 

436 

437 @property 

438 def columns(self): 

439 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

440 

441 def multilevelColumns(self, data, **kwargs): 

442 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here. 

443 return list( 

444 set( 

445 [ 

446 x 

447 for y in [ 

448 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

449 ] 

450 for x in y 

451 ] 

452 ) 

453 ) 

454 

455 def __call__(self, data, **kwargs): 

456 """Apply the functor to the data table 

457 

458 Parameters 

459 ---------- 

460 data : `lsst.daf.butler.DeferredDatasetHandle`, 

461 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`, 

462 `lsst.pipe.tasks.parquetTable.ParquetTable`, 

463 or `pandas.DataFrame`. 

464 The table or a pointer to a table on disk from which columns can 

465 be accessed 

466 """ 

467 columnIndex = self._get_columnIndex(data) 

468 

469 # First, determine whether data has a multilevel index (either gen2 or gen3) 

470 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

471 

472 # Multilevel index, gen2 or gen3 

473 if is_multiLevel: 

474 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

475 

476 if isinstance(data, MultilevelParquetTable): 

477 # Read data into memory the gen2 way 

478 df = data.toDataFrame(columns=columns, droplevels=False) 

479 elif isinstance(data, DeferredDatasetHandle): 

480 # Read data into memory the gen3 way 

481 df = data.get(parameters={"columns": columns}) 

482 

483 valDict = {} 

484 for k, f in self.funcDict.items(): 

485 try: 

486 subdf = f._setLevels( 

487 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)] 

488 ) 

489 valDict[k] = f._func(subdf) 

490 except Exception as e: 

491 try: 

492 valDict[k] = f.fail(subdf) 

493 except NameError: 

494 raise e 

495 

496 else: 

497 if isinstance(data, DeferredDatasetHandle): 

498 # input if Gen3 deferLoad=True 

499 df = data.get(parameters={"columns": self.columns}) 

500 elif isinstance(data, pd.DataFrame): 

501 # input if Gen3 deferLoad=False 

502 df = data 

503 else: 

504 # Original Gen2 input is type ParquetTable and the fallback 

505 df = data.toDataFrame(columns=self.columns) 

506 

507 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

508 

509 try: 

510 valDf = pd.concat(valDict, axis=1) 

511 except TypeError: 

512 print([(k, type(v)) for k, v in valDict.items()]) 

513 raise 

514 

515 if kwargs.get('dropna', False): 

516 valDf = valDf.dropna(how='any') 

517 

518 return valDf 

519 

520 @classmethod 

521 def renameCol(cls, col, renameRules): 

522 if renameRules is None: 

523 return col 

524 for old, new in renameRules: 

525 if col.startswith(old): 

526 col = col.replace(old, new) 

527 return col 

528 

529 @classmethod 

530 def from_file(cls, filename, **kwargs): 

531 # Allow environment variables in the filename. 

532 filename = os.path.expandvars(filename) 

533 with open(filename) as f: 

534 translationDefinition = yaml.safe_load(f) 

535 

536 return cls.from_yaml(translationDefinition, **kwargs) 

537 

538 @classmethod 

539 def from_yaml(cls, translationDefinition, **kwargs): 

540 funcs = {} 

541 for func, val in translationDefinition['funcs'].items(): 

542 funcs[func] = init_fromDict(val, name=func) 

543 

544 if 'flag_rename_rules' in translationDefinition: 

545 renameRules = translationDefinition['flag_rename_rules'] 

546 else: 

547 renameRules = None 

548 

549 if 'calexpFlags' in translationDefinition: 

550 for flag in translationDefinition['calexpFlags']: 

551 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp') 

552 

553 if 'refFlags' in translationDefinition: 

554 for flag in translationDefinition['refFlags']: 

555 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

556 

557 if 'forcedFlags' in translationDefinition: 

558 for flag in translationDefinition['forcedFlags']: 

559 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

560 

561 if 'flags' in translationDefinition: 

562 for flag in translationDefinition['flags']: 

563 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

564 

565 return cls(funcs, **kwargs) 

566 

567 

568def mag_aware_eval(df, expr): 

569 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means 

570 

571 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes. 

572 

573 Parameters 

574 ---------- 

575 df : pandas.DataFrame 

576 Dataframe on which to evaluate expression. 

577 

578 expr : str 

579 Expression. 

580 """ 

581 try: 

582 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

583 val = df.eval(expr_new, truediv=True) 

584 except Exception: # Should check what actually gets raised 

585 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

586 val = df.eval(expr_new, truediv=True) 

587 return val 

588 

589 

590class CustomFunctor(Functor): 

591 """Arbitrary computation on a catalog 

592 

593 Column names (and thus the columns to be loaded from catalog) are found 

594 by finding all words and trying to ignore all "math-y" words. 

595 

596 Parameters 

597 ---------- 

598 expr : str 

599 Expression to evaluate, to be parsed and executed by `mag_aware_eval`. 

600 """ 

601 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

602 

603 def __init__(self, expr, **kwargs): 

604 self.expr = expr 

605 super().__init__(**kwargs) 

606 

607 @property 

608 def name(self): 

609 return self.expr 

610 

611 @property 

612 def columns(self): 

613 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

614 

615 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

616 not_a_col = [] 

617 for c in flux_cols: 

618 if not re.search('_instFlux$', c): 

619 cols.append(f'{c}_instFlux') 

620 not_a_col.append(c) 

621 else: 

622 cols.append(c) 

623 

624 return list(set([c for c in cols if c not in not_a_col])) 

625 

626 def _func(self, df): 

627 return mag_aware_eval(df, self.expr) 

628 

629 

630class Column(Functor): 

631 """Get column with specified name 

632 """ 

633 

634 def __init__(self, col, **kwargs): 

635 self.col = col 

636 super().__init__(**kwargs) 

637 

638 @property 

639 def name(self): 

640 return self.col 

641 

642 @property 

643 def columns(self): 

644 return [self.col] 

645 

646 def _func(self, df): 

647 return df[self.col] 

648 

649 

650class Index(Functor): 

651 """Return the value of the index for each object 

652 """ 

653 

654 columns = ['coord_ra'] # just a dummy; something has to be here 

655 _defaultDataset = 'ref' 

656 _defaultNoDup = True 

657 

658 def _func(self, df): 

659 return pd.Series(df.index, index=df.index) 

660 

661 

662class IDColumn(Column): 

663 col = 'id' 

664 _allow_difference = False 

665 _defaultNoDup = True 

666 

667 def _func(self, df): 

668 return pd.Series(df.index, index=df.index) 

669 

670 

671class FootprintNPix(Column): 

672 col = 'base_Footprint_nPix' 

673 

674 

675class CoordColumn(Column): 

676 """Base class for coordinate column, in degrees 

677 """ 

678 _radians = True 

679 

680 def __init__(self, col, **kwargs): 

681 super().__init__(col, **kwargs) 

682 

683 def _func(self, df): 

684 # Must not modify original column in case that column is used by another functor 

685 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

686 return output 

687 

688 

689class RAColumn(CoordColumn): 

690 """Right Ascension, in degrees 

691 """ 

692 name = 'RA' 

693 _defaultNoDup = True 

694 

695 def __init__(self, **kwargs): 

696 super().__init__('coord_ra', **kwargs) 

697 

698 def __call__(self, catalog, **kwargs): 

699 return super().__call__(catalog, **kwargs) 

700 

701 

702class DecColumn(CoordColumn): 

703 """Declination, in degrees 

704 """ 

705 name = 'Dec' 

706 _defaultNoDup = True 

707 

708 def __init__(self, **kwargs): 

709 super().__init__('coord_dec', **kwargs) 

710 

711 def __call__(self, catalog, **kwargs): 

712 return super().__call__(catalog, **kwargs) 

713 

714 

715class HtmIndex20(Functor): 

716 """Compute the level 20 HtmIndex for the catalog. 

717 

718 Notes 

719 ----- 

720 This functor was implemented to satisfy requirements of old APDB interface 

721 which required ``pixelId`` column in DiaObject with HTM20 index. APDB 

722 interface had migrated to not need that information, but we keep this 

723 class in case it may be useful for something else. 

724 """ 

725 name = "Htm20" 

726 htmLevel = 20 

727 _radians = True 

728 

729 def __init__(self, ra, decl, **kwargs): 

730 self.pixelator = sphgeom.HtmPixelization(self.htmLevel) 

731 self.ra = ra 

732 self.decl = decl 

733 self._columns = [self.ra, self.decl] 

734 super().__init__(**kwargs) 

735 

736 def _func(self, df): 

737 

738 def computePixel(row): 

739 if self._radians: 

740 sphPoint = geom.SpherePoint(row[self.ra], 

741 row[self.decl], 

742 geom.radians) 

743 else: 

744 sphPoint = geom.SpherePoint(row[self.ra], 

745 row[self.decl], 

746 geom.degrees) 

747 return self.pixelator.index(sphPoint.getVector()) 

748 

749 return df.apply(computePixel, axis=1) 

750 

751 

752def fluxName(col): 

753 if not col.endswith('_instFlux'): 

754 col += '_instFlux' 

755 return col 

756 

757 

758def fluxErrName(col): 

759 if not col.endswith('_instFluxErr'): 

760 col += '_instFluxErr' 

761 return col 

762 

763 

764class Mag(Functor): 

765 """Compute calibrated magnitude 

766 

767 Takes a `calib` argument, which returns the flux at mag=0 

768 as `calib.getFluxMag0()`. If not provided, then the default 

769 `fluxMag0` is 63095734448.0194, which is default for HSC. 

770 This default should be removed in DM-21955 

771 

772 This calculation hides warnings about invalid values and dividing by zero. 

773 

774 As for all functors, a `dataset` and `filt` kwarg should be provided upon 

775 initialization. Unlike the default `Functor`, however, the default dataset 

776 for a `Mag` is `'meas'`, rather than `'ref'`. 

777 

778 Parameters 

779 ---------- 

780 col : `str` 

781 Name of flux column from which to compute magnitude. Can be parseable 

782 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass 

783 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will 

784 understand. 

785 calib : `lsst.afw.image.calib.Calib` (optional) 

786 Object that knows zero point. 

787 """ 

788 _defaultDataset = 'meas' 

789 

790 def __init__(self, col, calib=None, **kwargs): 

791 self.col = fluxName(col) 

792 self.calib = calib 

793 if calib is not None: 

794 self.fluxMag0 = calib.getFluxMag0()[0] 

795 else: 

796 # TO DO: DM-21955 Replace hard coded photometic calibration values 

797 self.fluxMag0 = 63095734448.0194 

798 

799 super().__init__(**kwargs) 

800 

801 @property 

802 def columns(self): 

803 return [self.col] 

804 

805 def _func(self, df): 

806 with np.warnings.catch_warnings(): 

807 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

808 np.warnings.filterwarnings('ignore', r'divide by zero') 

809 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

810 

811 @property 

812 def name(self): 

813 return f'mag_{self.col}' 

814 

815 

816class MagErr(Mag): 

817 """Compute calibrated magnitude uncertainty 

818 

819 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`. 

820 

821 Parameters 

822 col : `str` 

823 Name of flux column 

824 calib : `lsst.afw.image.calib.Calib` (optional) 

825 Object that knows zero point. 

826 """ 

827 

828 def __init__(self, *args, **kwargs): 

829 super().__init__(*args, **kwargs) 

830 if self.calib is not None: 

831 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

832 else: 

833 self.fluxMag0Err = 0. 

834 

835 @property 

836 def columns(self): 

837 return [self.col, self.col + 'Err'] 

838 

839 def _func(self, df): 

840 with np.warnings.catch_warnings(): 

841 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

842 np.warnings.filterwarnings('ignore', r'divide by zero') 

843 fluxCol, fluxErrCol = self.columns 

844 x = df[fluxErrCol] / df[fluxCol] 

845 y = self.fluxMag0Err / self.fluxMag0 

846 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

847 return magErr 

848 

849 @property 

850 def name(self): 

851 return super().name + '_err' 

852 

853 

854class NanoMaggie(Mag): 

855 """ 

856 """ 

857 

858 def _func(self, df): 

859 return (df[self.col] / self.fluxMag0) * 1e9 

860 

861 

862class MagDiff(Functor): 

863 _defaultDataset = 'meas' 

864 

865 """Functor to calculate magnitude difference""" 

866 

867 def __init__(self, col1, col2, **kwargs): 

868 self.col1 = fluxName(col1) 

869 self.col2 = fluxName(col2) 

870 super().__init__(**kwargs) 

871 

872 @property 

873 def columns(self): 

874 return [self.col1, self.col2] 

875 

876 def _func(self, df): 

877 with np.warnings.catch_warnings(): 

878 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

879 np.warnings.filterwarnings('ignore', r'divide by zero') 

880 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

881 

882 @property 

883 def name(self): 

884 return f'(mag_{self.col1} - mag_{self.col2})' 

885 

886 @property 

887 def shortname(self): 

888 return f'magDiff_{self.col1}_{self.col2}' 

889 

890 

891class Color(Functor): 

892 """Compute the color between two filters 

893 

894 Computes color by initializing two different `Mag` 

895 functors based on the `col` and filters provided, and 

896 then returning the difference. 

897 

898 This is enabled by the `_func` expecting a dataframe with a 

899 multilevel column index, with both `'band'` and `'column'`, 

900 instead of just `'column'`, which is the `Functor` default. 

901 This is controlled by the `_dfLevels` attribute. 

902 

903 Also of note, the default dataset for `Color` is `forced_src'`, 

904 whereas for `Mag` it is `'meas'`. 

905 

906 Parameters 

907 ---------- 

908 col : str 

909 Name of flux column from which to compute; same as would be passed to 

910 `lsst.pipe.tasks.functors.Mag`. 

911 

912 filt2, filt1 : str 

913 Filters from which to compute magnitude difference. 

914 Color computed is `Mag(filt2) - Mag(filt1)`. 

915 """ 

916 _defaultDataset = 'forced_src' 

917 _dfLevels = ('band', 'column') 

918 _defaultNoDup = True 

919 

920 def __init__(self, col, filt2, filt1, **kwargs): 

921 self.col = fluxName(col) 

922 if filt2 == filt1: 

923 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

924 self.filt2 = filt2 

925 self.filt1 = filt1 

926 

927 self.mag2 = Mag(col, filt=filt2, **kwargs) 

928 self.mag1 = Mag(col, filt=filt1, **kwargs) 

929 

930 super().__init__(**kwargs) 

931 

932 @property 

933 def filt(self): 

934 return None 

935 

936 @filt.setter 

937 def filt(self, filt): 

938 pass 

939 

940 def _func(self, df): 

941 mag2 = self.mag2._func(df[self.filt2]) 

942 mag1 = self.mag1._func(df[self.filt1]) 

943 return mag2 - mag1 

944 

945 @property 

946 def columns(self): 

947 return [self.mag1.col, self.mag2.col] 

948 

949 def multilevelColumns(self, parq, **kwargs): 

950 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

951 

952 @property 

953 def name(self): 

954 return f'{self.filt2} - {self.filt1} ({self.col})' 

955 

956 @property 

957 def shortname(self): 

958 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

959 

960 

961class Labeller(Functor): 

962 """Main function of this subclass is to override the dropna=True 

963 """ 

964 _null_label = 'null' 

965 _allow_difference = False 

966 name = 'label' 

967 _force_str = False 

968 

969 def __call__(self, parq, dropna=False, **kwargs): 

970 return super().__call__(parq, dropna=False, **kwargs) 

971 

972 

973class StarGalaxyLabeller(Labeller): 

974 _columns = ["base_ClassificationExtendedness_value"] 

975 _column = "base_ClassificationExtendedness_value" 

976 

977 def _func(self, df): 

978 x = df[self._columns][self._column] 

979 mask = x.isnull() 

980 test = (x < 0.5).astype(int) 

981 test = test.mask(mask, 2) 

982 

983 # TODO: DM-21954 Look into veracity of inline comment below 

984 # are these backwards? 

985 categories = ['galaxy', 'star', self._null_label] 

986 label = pd.Series(pd.Categorical.from_codes(test, categories=categories), 

987 index=x.index, name='label') 

988 if self._force_str: 

989 label = label.astype(str) 

990 return label 

991 

992 

993class NumStarLabeller(Labeller): 

994 _columns = ['numStarFlags'] 

995 labels = {"star": 0, "maybe": 1, "notStar": 2} 

996 

997 def _func(self, df): 

998 x = df[self._columns][self._columns[0]] 

999 

1000 # Number of filters 

1001 n = len(x.unique()) - 1 

1002 

1003 labels = ['noStar', 'maybe', 'star'] 

1004 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels), 

1005 index=x.index, name='label') 

1006 

1007 if self._force_str: 

1008 label = label.astype(str) 

1009 

1010 return label 

1011 

1012 

1013class DeconvolvedMoments(Functor): 

1014 name = 'Deconvolved Moments' 

1015 shortname = 'deconvolvedMoments' 

1016 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1017 "ext_shapeHSM_HsmSourceMoments_yy", 

1018 "base_SdssShape_xx", "base_SdssShape_yy", 

1019 "ext_shapeHSM_HsmPsfMoments_xx", 

1020 "ext_shapeHSM_HsmPsfMoments_yy") 

1021 

1022 def _func(self, df): 

1023 """Calculate deconvolved moments""" 

1024 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

1025 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

1026 else: 

1027 hsm = np.ones(len(df))*np.nan 

1028 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

1029 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

1030 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

1031 else: 

1032 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using 

1033 # exposure.getPsf().computeShape(s.getCentroid()).getIxx() 

1034 # raise TaskError("No psf shape parameter found in catalog") 

1035 raise RuntimeError('No psf shape parameter found in catalog') 

1036 

1037 return hsm.where(np.isfinite(hsm), sdss) - psf 

1038 

1039 

1040class SdssTraceSize(Functor): 

1041 """Functor to calculate SDSS trace radius size for sources""" 

1042 name = "SDSS Trace Size" 

1043 shortname = 'sdssTrace' 

1044 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

1045 

1046 def _func(self, df): 

1047 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1048 return srcSize 

1049 

1050 

1051class PsfSdssTraceSizeDiff(Functor): 

1052 """Functor to calculate SDSS trace radius size difference (%) between object and psf model""" 

1053 name = "PSF - SDSS Trace Size" 

1054 shortname = 'psf_sdssTrace' 

1055 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

1056 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

1057 

1058 def _func(self, df): 

1059 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1060 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

1061 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1062 return sizeDiff 

1063 

1064 

1065class HsmTraceSize(Functor): 

1066 """Functor to calculate HSM trace radius size for sources""" 

1067 name = 'HSM Trace Size' 

1068 shortname = 'hsmTrace' 

1069 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1070 "ext_shapeHSM_HsmSourceMoments_yy") 

1071 

1072 def _func(self, df): 

1073 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1074 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1075 return srcSize 

1076 

1077 

1078class PsfHsmTraceSizeDiff(Functor): 

1079 """Functor to calculate HSM trace radius size difference (%) between object and psf model""" 

1080 name = 'PSF - HSM Trace Size' 

1081 shortname = 'psf_HsmTrace' 

1082 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1083 "ext_shapeHSM_HsmSourceMoments_yy", 

1084 "ext_shapeHSM_HsmPsfMoments_xx", 

1085 "ext_shapeHSM_HsmPsfMoments_yy") 

1086 

1087 def _func(self, df): 

1088 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1089 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1090 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1091 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1092 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1093 return sizeDiff 

1094 

1095 

1096class HsmFwhm(Functor): 

1097 name = 'HSM Psf FWHM' 

1098 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1099 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1100 pixelScale = 0.168 

1101 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1102 

1103 def _func(self, df): 

1104 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1105 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1106 

1107 

1108class E1(Functor): 

1109 name = "Distortion Ellipticity (e1)" 

1110 shortname = "Distortion" 

1111 

1112 def __init__(self, colXX, colXY, colYY, **kwargs): 

1113 self.colXX = colXX 

1114 self.colXY = colXY 

1115 self.colYY = colYY 

1116 self._columns = [self.colXX, self.colXY, self.colYY] 

1117 super().__init__(**kwargs) 

1118 

1119 @property 

1120 def columns(self): 

1121 return [self.colXX, self.colXY, self.colYY] 

1122 

1123 def _func(self, df): 

1124 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1125 

1126 

1127class E2(Functor): 

1128 name = "Ellipticity e2" 

1129 

1130 def __init__(self, colXX, colXY, colYY, **kwargs): 

1131 self.colXX = colXX 

1132 self.colXY = colXY 

1133 self.colYY = colYY 

1134 super().__init__(**kwargs) 

1135 

1136 @property 

1137 def columns(self): 

1138 return [self.colXX, self.colXY, self.colYY] 

1139 

1140 def _func(self, df): 

1141 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1142 

1143 

1144class RadiusFromQuadrupole(Functor): 

1145 

1146 def __init__(self, colXX, colXY, colYY, **kwargs): 

1147 self.colXX = colXX 

1148 self.colXY = colXY 

1149 self.colYY = colYY 

1150 super().__init__(**kwargs) 

1151 

1152 @property 

1153 def columns(self): 

1154 return [self.colXX, self.colXY, self.colYY] 

1155 

1156 def _func(self, df): 

1157 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1158 

1159 

1160class LocalWcs(Functor): 

1161 """Computations using the stored localWcs. 

1162 """ 

1163 name = "LocalWcsOperations" 

1164 

1165 def __init__(self, 

1166 colCD_1_1, 

1167 colCD_1_2, 

1168 colCD_2_1, 

1169 colCD_2_2, 

1170 **kwargs): 

1171 self.colCD_1_1 = colCD_1_1 

1172 self.colCD_1_2 = colCD_1_2 

1173 self.colCD_2_1 = colCD_2_1 

1174 self.colCD_2_2 = colCD_2_2 

1175 super().__init__(**kwargs) 

1176 

1177 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1178 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1179 

1180 Parameters 

1181 ---------- 

1182 x : `pandas.Series` 

1183 X pixel coordinate. 

1184 y : `pandas.Series` 

1185 Y pixel coordinate. 

1186 cd11 : `pandas.Series` 

1187 [1, 1] element of the local Wcs affine transform. 

1188 cd11 : `pandas.Series` 

1189 [1, 1] element of the local Wcs affine transform. 

1190 cd12 : `pandas.Series` 

1191 [1, 2] element of the local Wcs affine transform. 

1192 cd21 : `pandas.Series` 

1193 [2, 1] element of the local Wcs affine transform. 

1194 cd22 : `pandas.Series` 

1195 [2, 2] element of the local Wcs affine transform. 

1196 

1197 Returns 

1198 ------- 

1199 raDecTuple : tuple 

1200 RA and dec conversion of x and y given the local Wcs. Returned 

1201 units are in radians. 

1202 

1203 """ 

1204 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1205 

1206 def computeSkySeperation(self, ra1, dec1, ra2, dec2): 

1207 """Compute the local pixel scale conversion. 

1208 

1209 Parameters 

1210 ---------- 

1211 ra1 : `pandas.Series` 

1212 Ra of the first coordinate in radians. 

1213 dec1 : `pandas.Series` 

1214 Dec of the first coordinate in radians. 

1215 ra2 : `pandas.Series` 

1216 Ra of the second coordinate in radians. 

1217 dec2 : `pandas.Series` 

1218 Dec of the second coordinate in radians. 

1219 

1220 Returns 

1221 ------- 

1222 dist : `pandas.Series` 

1223 Distance on the sphere in radians. 

1224 """ 

1225 deltaDec = dec2 - dec1 

1226 deltaRa = ra2 - ra1 

1227 return 2 * np.arcsin( 

1228 np.sqrt( 

1229 np.sin(deltaDec / 2) ** 2 

1230 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1231 

1232 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1233 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1234 

1235 Parameters 

1236 ---------- 

1237 x1 : `pandas.Series` 

1238 X pixel coordinate. 

1239 y1 : `pandas.Series` 

1240 Y pixel coordinate. 

1241 x2 : `pandas.Series` 

1242 X pixel coordinate. 

1243 y2 : `pandas.Series` 

1244 Y pixel coordinate. 

1245 cd11 : `pandas.Series` 

1246 [1, 1] element of the local Wcs affine transform. 

1247 cd11 : `pandas.Series` 

1248 [1, 1] element of the local Wcs affine transform. 

1249 cd12 : `pandas.Series` 

1250 [1, 2] element of the local Wcs affine transform. 

1251 cd21 : `pandas.Series` 

1252 [2, 1] element of the local Wcs affine transform. 

1253 cd22 : `pandas.Series` 

1254 [2, 2] element of the local Wcs affine transform. 

1255 

1256 Returns 

1257 ------- 

1258 Distance : `pandas.Series` 

1259 Arcseconds per pixel at the location of the local WC 

1260 """ 

1261 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1262 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1263 # Great circle distance for small separations. 

1264 return self.computeSkySeperation(ra1, dec1, ra2, dec2) 

1265 

1266 

1267class ComputePixelScale(LocalWcs): 

1268 """Compute the local pixel scale from the stored CDMatrix. 

1269 """ 

1270 name = "PixelScale" 

1271 

1272 @property 

1273 def columns(self): 

1274 return [self.colCD_1_1, 

1275 self.colCD_1_2, 

1276 self.colCD_2_1, 

1277 self.colCD_2_2] 

1278 

1279 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1280 """Compute the local pixel to scale conversion in arcseconds. 

1281 

1282 Parameters 

1283 ---------- 

1284 cd11 : `pandas.Series` 

1285 [1, 1] element of the local Wcs affine transform in radians. 

1286 cd11 : `pandas.Series` 

1287 [1, 1] element of the local Wcs affine transform in radians. 

1288 cd12 : `pandas.Series` 

1289 [1, 2] element of the local Wcs affine transform in radians. 

1290 cd21 : `pandas.Series` 

1291 [2, 1] element of the local Wcs affine transform in radians. 

1292 cd22 : `pandas.Series` 

1293 [2, 2] element of the local Wcs affine transform in radians. 

1294 

1295 Returns 

1296 ------- 

1297 pixScale : `pandas.Series` 

1298 Arcseconds per pixel at the location of the local WC 

1299 """ 

1300 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1301 

1302 def _func(self, df): 

1303 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1304 df[self.colCD_1_2], 

1305 df[self.colCD_2_1], 

1306 df[self.colCD_2_2]) 

1307 

1308 

1309class ConvertPixelToArcseconds(ComputePixelScale): 

1310 """Convert a value in units pixels squared to units arcseconds squared. 

1311 """ 

1312 

1313 def __init__(self, 

1314 col, 

1315 colCD_1_1, 

1316 colCD_1_2, 

1317 colCD_2_1, 

1318 colCD_2_2, 

1319 **kwargs): 

1320 self.col = col 

1321 super().__init__(colCD_1_1, 

1322 colCD_1_2, 

1323 colCD_2_1, 

1324 colCD_2_2, 

1325 **kwargs) 

1326 

1327 @property 

1328 def name(self): 

1329 return f"{self.col}_asArcseconds" 

1330 

1331 @property 

1332 def columns(self): 

1333 return [self.col, 

1334 self.colCD_1_1, 

1335 self.colCD_1_2, 

1336 self.colCD_2_1, 

1337 self.colCD_2_2] 

1338 

1339 def _func(self, df): 

1340 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1341 df[self.colCD_1_2], 

1342 df[self.colCD_2_1], 

1343 df[self.colCD_2_2]) 

1344 

1345 

1346class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1347 """Convert a value in units pixels to units arcseconds. 

1348 """ 

1349 

1350 def __init__(self, 

1351 col, 

1352 colCD_1_1, 

1353 colCD_1_2, 

1354 colCD_2_1, 

1355 colCD_2_2, 

1356 **kwargs): 

1357 self.col = col 

1358 super().__init__(colCD_1_1, 

1359 colCD_1_2, 

1360 colCD_2_1, 

1361 colCD_2_2, 

1362 **kwargs) 

1363 

1364 @property 

1365 def name(self): 

1366 return f"{self.col}_asArcsecondsSq" 

1367 

1368 @property 

1369 def columns(self): 

1370 return [self.col, 

1371 self.colCD_1_1, 

1372 self.colCD_1_2, 

1373 self.colCD_2_1, 

1374 self.colCD_2_2] 

1375 

1376 def _func(self, df): 

1377 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1378 df[self.colCD_1_2], 

1379 df[self.colCD_2_1], 

1380 df[self.colCD_2_2]) 

1381 return df[self.col] * pixScale * pixScale 

1382 

1383 

1384class ReferenceBand(Functor): 

1385 name = 'Reference Band' 

1386 shortname = 'refBand' 

1387 

1388 @property 

1389 def columns(self): 

1390 return ["merge_measurement_i", 

1391 "merge_measurement_r", 

1392 "merge_measurement_z", 

1393 "merge_measurement_y", 

1394 "merge_measurement_g"] 

1395 

1396 def _func(self, df): 

1397 def getFilterAliasName(row): 

1398 # get column name with the max value (True > False) 

1399 colName = row.idxmax() 

1400 return colName.replace('merge_measurement_', '') 

1401 

1402 return df[self.columns].apply(getFilterAliasName, axis=1) 

1403 

1404 

1405class Photometry(Functor): 

1406 # AB to NanoJansky (3631 Jansky) 

1407 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1408 LOG_AB_FLUX_SCALE = 12.56 

1409 FIVE_OVER_2LOG10 = 1.085736204758129569 

1410 # TO DO: DM-21955 Replace hard coded photometic calibration values 

1411 COADD_ZP = 27 

1412 

1413 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1414 self.vhypot = np.vectorize(self.hypot) 

1415 self.col = colFlux 

1416 self.colFluxErr = colFluxErr 

1417 

1418 self.calib = calib 

1419 if calib is not None: 

1420 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1421 else: 

1422 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1423 self.fluxMag0Err = 0. 

1424 

1425 super().__init__(**kwargs) 

1426 

1427 @property 

1428 def columns(self): 

1429 return [self.col] 

1430 

1431 @property 

1432 def name(self): 

1433 return f'mag_{self.col}' 

1434 

1435 @classmethod 

1436 def hypot(cls, a, b): 

1437 if np.abs(a) < np.abs(b): 

1438 a, b = b, a 

1439 if a == 0.: 

1440 return 0. 

1441 q = b/a 

1442 return np.abs(a) * np.sqrt(1. + q*q) 

1443 

1444 def dn2flux(self, dn, fluxMag0): 

1445 return self.AB_FLUX_SCALE * dn / fluxMag0 

1446 

1447 def dn2mag(self, dn, fluxMag0): 

1448 with np.warnings.catch_warnings(): 

1449 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1450 np.warnings.filterwarnings('ignore', r'divide by zero') 

1451 return -2.5 * np.log10(dn/fluxMag0) 

1452 

1453 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1454 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1455 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1456 return retVal 

1457 

1458 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1459 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1460 return self.FIVE_OVER_2LOG10 * retVal 

1461 

1462 

1463class NanoJansky(Photometry): 

1464 def _func(self, df): 

1465 return self.dn2flux(df[self.col], self.fluxMag0) 

1466 

1467 

1468class NanoJanskyErr(Photometry): 

1469 @property 

1470 def columns(self): 

1471 return [self.col, self.colFluxErr] 

1472 

1473 def _func(self, df): 

1474 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1475 return pd.Series(retArr, index=df.index) 

1476 

1477 

1478class Magnitude(Photometry): 

1479 def _func(self, df): 

1480 return self.dn2mag(df[self.col], self.fluxMag0) 

1481 

1482 

1483class MagnitudeErr(Photometry): 

1484 @property 

1485 def columns(self): 

1486 return [self.col, self.colFluxErr] 

1487 

1488 def _func(self, df): 

1489 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1490 return pd.Series(retArr, index=df.index) 

1491 

1492 

1493class LocalPhotometry(Functor): 

1494 """Base class for calibrating the specified instrument flux column using 

1495 the local photometric calibration. 

1496 

1497 Parameters 

1498 ---------- 

1499 instFluxCol : `str` 

1500 Name of the instrument flux column. 

1501 instFluxErrCol : `str` 

1502 Name of the assocated error columns for ``instFluxCol``. 

1503 photoCalibCol : `str` 

1504 Name of local calibration column. 

1505 photoCalibErrCol : `str` 

1506 Error associated with ``photoCalibCol`` 

1507 

1508 See also 

1509 -------- 

1510 LocalPhotometry 

1511 LocalNanojansky 

1512 LocalNanojanskyErr 

1513 LocalMagnitude 

1514 LocalMagnitudeErr 

1515 """ 

1516 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1517 

1518 def __init__(self, 

1519 instFluxCol, 

1520 instFluxErrCol, 

1521 photoCalibCol, 

1522 photoCalibErrCol, 

1523 **kwargs): 

1524 self.instFluxCol = instFluxCol 

1525 self.instFluxErrCol = instFluxErrCol 

1526 self.photoCalibCol = photoCalibCol 

1527 self.photoCalibErrCol = photoCalibErrCol 

1528 super().__init__(**kwargs) 

1529 

1530 def instFluxToNanojansky(self, instFlux, localCalib): 

1531 """Convert instrument flux to nanojanskys. 

1532 

1533 Parameters 

1534 ---------- 

1535 instFlux : `numpy.ndarray` or `pandas.Series` 

1536 Array of instrument flux measurements 

1537 localCalib : `numpy.ndarray` or `pandas.Series` 

1538 Array of local photometric calibration estimates. 

1539 

1540 Returns 

1541 ------- 

1542 calibFlux : `numpy.ndarray` or `pandas.Series` 

1543 Array of calibrated flux measurements. 

1544 """ 

1545 return instFlux * localCalib 

1546 

1547 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1548 """Convert instrument flux to nanojanskys. 

1549 

1550 Parameters 

1551 ---------- 

1552 instFlux : `numpy.ndarray` or `pandas.Series` 

1553 Array of instrument flux measurements 

1554 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1555 Errors on associated ``instFlux`` values 

1556 localCalib : `numpy.ndarray` or `pandas.Series` 

1557 Array of local photometric calibration estimates. 

1558 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1559 Errors on associated ``localCalib`` values 

1560 

1561 Returns 

1562 ------- 

1563 calibFluxErr : `numpy.ndarray` or `pandas.Series` 

1564 Errors on calibrated flux measurements. 

1565 """ 

1566 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1567 

1568 def instFluxToMagnitude(self, instFlux, localCalib): 

1569 """Convert instrument flux to nanojanskys. 

1570 

1571 Parameters 

1572 ---------- 

1573 instFlux : `numpy.ndarray` or `pandas.Series` 

1574 Array of instrument flux measurements 

1575 localCalib : `numpy.ndarray` or `pandas.Series` 

1576 Array of local photometric calibration estimates. 

1577 

1578 Returns 

1579 ------- 

1580 calibMag : `numpy.ndarray` or `pandas.Series` 

1581 Array of calibrated AB magnitudes. 

1582 """ 

1583 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1584 

1585 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1586 """Convert instrument flux err to nanojanskys. 

1587 

1588 Parameters 

1589 ---------- 

1590 instFlux : `numpy.ndarray` or `pandas.Series` 

1591 Array of instrument flux measurements 

1592 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1593 Errors on associated ``instFlux`` values 

1594 localCalib : `numpy.ndarray` or `pandas.Series` 

1595 Array of local photometric calibration estimates. 

1596 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1597 Errors on associated ``localCalib`` values 

1598 

1599 Returns 

1600 ------- 

1601 calibMagErr: `numpy.ndarray` or `pandas.Series` 

1602 Error on calibrated AB magnitudes. 

1603 """ 

1604 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1605 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1606 

1607 

1608class LocalNanojansky(LocalPhotometry): 

1609 """Compute calibrated fluxes using the local calibration value. 

1610 

1611 See also 

1612 -------- 

1613 LocalNanojansky 

1614 LocalNanojanskyErr 

1615 LocalMagnitude 

1616 LocalMagnitudeErr 

1617 """ 

1618 

1619 @property 

1620 def columns(self): 

1621 return [self.instFluxCol, self.photoCalibCol] 

1622 

1623 @property 

1624 def name(self): 

1625 return f'flux_{self.instFluxCol}' 

1626 

1627 def _func(self, df): 

1628 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1629 

1630 

1631class LocalNanojanskyErr(LocalPhotometry): 

1632 """Compute calibrated flux errors using the local calibration value. 

1633 

1634 See also 

1635 -------- 

1636 LocalNanojansky 

1637 LocalNanojanskyErr 

1638 LocalMagnitude 

1639 LocalMagnitudeErr 

1640 """ 

1641 

1642 @property 

1643 def columns(self): 

1644 return [self.instFluxCol, self.instFluxErrCol, 

1645 self.photoCalibCol, self.photoCalibErrCol] 

1646 

1647 @property 

1648 def name(self): 

1649 return f'fluxErr_{self.instFluxCol}' 

1650 

1651 def _func(self, df): 

1652 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1653 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1654 

1655 

1656class LocalMagnitude(LocalPhotometry): 

1657 """Compute calibrated AB magnitudes using the local calibration value. 

1658 

1659 See also 

1660 -------- 

1661 LocalNanojansky 

1662 LocalNanojanskyErr 

1663 LocalMagnitude 

1664 LocalMagnitudeErr 

1665 """ 

1666 

1667 @property 

1668 def columns(self): 

1669 return [self.instFluxCol, self.photoCalibCol] 

1670 

1671 @property 

1672 def name(self): 

1673 return f'mag_{self.instFluxCol}' 

1674 

1675 def _func(self, df): 

1676 return self.instFluxToMagnitude(df[self.instFluxCol], 

1677 df[self.photoCalibCol]) 

1678 

1679 

1680class LocalMagnitudeErr(LocalPhotometry): 

1681 """Compute calibrated AB magnitude errors using the local calibration value. 

1682 

1683 See also 

1684 -------- 

1685 LocalNanojansky 

1686 LocalNanojanskyErr 

1687 LocalMagnitude 

1688 LocalMagnitudeErr 

1689 """ 

1690 

1691 @property 

1692 def columns(self): 

1693 return [self.instFluxCol, self.instFluxErrCol, 

1694 self.photoCalibCol, self.photoCalibErrCol] 

1695 

1696 @property 

1697 def name(self): 

1698 return f'magErr_{self.instFluxCol}' 

1699 

1700 def _func(self, df): 

1701 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol], 

1702 df[self.instFluxErrCol], 

1703 df[self.photoCalibCol], 

1704 df[self.photoCalibErrCol]) 

1705 

1706 

1707class LocalDipoleMeanFlux(LocalPhotometry): 

1708 """Compute absolute mean of dipole fluxes. 

1709 

1710 See also 

1711 -------- 

1712 LocalNanojansky 

1713 LocalNanojanskyErr 

1714 LocalMagnitude 

1715 LocalMagnitudeErr 

1716 LocalDipoleMeanFlux 

1717 LocalDipoleMeanFluxErr 

1718 LocalDipoleDiffFlux 

1719 LocalDipoleDiffFluxErr 

1720 """ 

1721 def __init__(self, 

1722 instFluxPosCol, 

1723 instFluxNegCol, 

1724 instFluxPosErrCol, 

1725 instFluxNegErrCol, 

1726 photoCalibCol, 

1727 photoCalibErrCol, 

1728 **kwargs): 

1729 self.instFluxNegCol = instFluxNegCol 

1730 self.instFluxPosCol = instFluxPosCol 

1731 self.instFluxNegErrCol = instFluxNegErrCol 

1732 self.instFluxPosErrCol = instFluxPosErrCol 

1733 self.photoCalibCol = photoCalibCol 

1734 self.photoCalibErrCol = photoCalibErrCol 

1735 super().__init__(instFluxNegCol, 

1736 instFluxNegErrCol, 

1737 photoCalibCol, 

1738 photoCalibErrCol, 

1739 **kwargs) 

1740 

1741 @property 

1742 def columns(self): 

1743 return [self.instFluxPosCol, 

1744 self.instFluxNegCol, 

1745 self.photoCalibCol] 

1746 

1747 @property 

1748 def name(self): 

1749 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1750 

1751 def _func(self, df): 

1752 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1753 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1754 

1755 

1756class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1757 """Compute the error on the absolute mean of dipole fluxes. 

1758 

1759 See also 

1760 -------- 

1761 LocalNanojansky 

1762 LocalNanojanskyErr 

1763 LocalMagnitude 

1764 LocalMagnitudeErr 

1765 LocalDipoleMeanFlux 

1766 LocalDipoleMeanFluxErr 

1767 LocalDipoleDiffFlux 

1768 LocalDipoleDiffFluxErr 

1769 """ 

1770 

1771 @property 

1772 def columns(self): 

1773 return [self.instFluxPosCol, 

1774 self.instFluxNegCol, 

1775 self.instFluxPosErrCol, 

1776 self.instFluxNegErrCol, 

1777 self.photoCalibCol, 

1778 self.photoCalibErrCol] 

1779 

1780 @property 

1781 def name(self): 

1782 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1783 

1784 def _func(self, df): 

1785 return 0.5*np.sqrt( 

1786 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1787 * df[self.photoCalibErrCol])**2 

1788 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1789 * df[self.photoCalibCol]**2) 

1790 

1791 

1792class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1793 """Compute the absolute difference of dipole fluxes. 

1794 

1795 Value is (abs(pos) - abs(neg)) 

1796 

1797 See also 

1798 -------- 

1799 LocalNanojansky 

1800 LocalNanojanskyErr 

1801 LocalMagnitude 

1802 LocalMagnitudeErr 

1803 LocalDipoleMeanFlux 

1804 LocalDipoleMeanFluxErr 

1805 LocalDipoleDiffFlux 

1806 LocalDipoleDiffFluxErr 

1807 """ 

1808 

1809 @property 

1810 def columns(self): 

1811 return [self.instFluxPosCol, 

1812 self.instFluxNegCol, 

1813 self.photoCalibCol] 

1814 

1815 @property 

1816 def name(self): 

1817 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1818 

1819 def _func(self, df): 

1820 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1821 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1822 

1823 

1824class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1825 """Compute the error on the absolute difference of dipole fluxes. 

1826 

1827 See also 

1828 -------- 

1829 LocalNanojansky 

1830 LocalNanojanskyErr 

1831 LocalMagnitude 

1832 LocalMagnitudeErr 

1833 LocalDipoleMeanFlux 

1834 LocalDipoleMeanFluxErr 

1835 LocalDipoleDiffFlux 

1836 LocalDipoleDiffFluxErr 

1837 """ 

1838 

1839 @property 

1840 def columns(self): 

1841 return [self.instFluxPosCol, 

1842 self.instFluxNegCol, 

1843 self.instFluxPosErrCol, 

1844 self.instFluxNegErrCol, 

1845 self.photoCalibCol, 

1846 self.photoCalibErrCol] 

1847 

1848 @property 

1849 def name(self): 

1850 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1851 

1852 def _func(self, df): 

1853 return np.sqrt( 

1854 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1855 * df[self.photoCalibErrCol])**2 

1856 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1857 * df[self.photoCalibCol]**2) 

1858 

1859 

1860class Ratio(Functor): 

1861 """Base class for returning the ratio of 2 columns. 

1862 

1863 Can be used to compute a Signal to Noise ratio for any input flux. 

1864 

1865 Parameters 

1866 ---------- 

1867 numerator : `str` 

1868 Name of the column to use at the numerator in the ratio 

1869 denominator : `str` 

1870 Name of the column to use as the denominator in the ratio. 

1871 """ 

1872 def __init__(self, 

1873 numerator, 

1874 denominator, 

1875 **kwargs): 

1876 self.numerator = numerator 

1877 self.denominator = denominator 

1878 super().__init__(**kwargs) 

1879 

1880 @property 

1881 def columns(self): 

1882 return [self.numerator, self.denominator] 

1883 

1884 @property 

1885 def name(self): 

1886 return f'ratio_{self.numerator}_{self.denominator}' 

1887 

1888 def _func(self, df): 

1889 with np.warnings.catch_warnings(): 

1890 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1891 np.warnings.filterwarnings('ignore', r'divide by zero') 

1892 return df[self.numerator] / df[self.denominator]