Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks. 

2# 

3# LSST Data Management System 

4# This product includes software developed by the 

5# LSST Project (http://www.lsst.org/). 

6# See COPYRIGHT file at the top of the source tree. 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <https://www.lsstcorp.org/LegalNotices/>. 

21# 

22import yaml 

23import re 

24from itertools import product 

25import os.path 

26 

27import pandas as pd 

28import numpy as np 

29import astropy.units as u 

30 

31from lsst.daf.persistence import doImport 

32from lsst.daf.butler import DeferredDatasetHandle 

33import lsst.geom as geom 

34import lsst.sphgeom as sphgeom 

35 

36from .parquetTable import ParquetTable, MultilevelParquetTable 

37 

38 

39def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

40 typeKey='functor', name=None): 

41 """Initialize an object defined in a dictionary 

42 

43 The object needs to be importable as 

44 f'{basePath}.{initDict[typeKey]}' 

45 The positional and keyword arguments (if any) are contained in 

46 "args" and "kwargs" entries in the dictionary, respectively. 

47 This is used in `functors.CompositeFunctor.from_yaml` to initialize 

48 a composite functor from a specification in a YAML file. 

49 

50 Parameters 

51 ---------- 

52 initDict : dictionary 

53 Dictionary describing object's initialization. Must contain 

54 an entry keyed by ``typeKey`` that is the name of the object, 

55 relative to ``basePath``. 

56 basePath : str 

57 Path relative to module in which ``initDict[typeKey]`` is defined. 

58 typeKey : str 

59 Key of ``initDict`` that is the name of the object 

60 (relative to `basePath`). 

61 """ 

62 initDict = initDict.copy() 

63 # TO DO: DM-21956 We should be able to define functors outside this module 

64 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

65 args = [] 

66 if 'args' in initDict: 

67 args = initDict.pop('args') 

68 if isinstance(args, str): 

69 args = [args] 

70 try: 

71 element = pythonType(*args, **initDict) 

72 except Exception as e: 

73 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

74 raise type(e)(message, e.args) 

75 return element 

76 

77 

78class Functor(object): 

79 """Define and execute a calculation on a ParquetTable 

80 

81 The `__call__` method accepts either a `ParquetTable` object or a 

82 `DeferredDatasetHandle`, and returns the 

83 result of the calculation as a single column. Each functor defines what 

84 columns are needed for the calculation, and only these columns are read 

85 from the `ParquetTable`. 

86 

87 The action of `__call__` consists of two steps: first, loading the 

88 necessary columns from disk into memory as a `pandas.DataFrame` object; 

89 and second, performing the computation on this dataframe and returning the 

90 result. 

91 

92 

93 To define a new `Functor`, a subclass must define a `_func` method, 

94 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`. 

95 In addition, it must define the following attributes 

96 

97 * `_columns`: The columns necessary to perform the calculation 

98 * `name`: A name appropriate for a figure axis label 

99 * `shortname`: A name appropriate for use as a dictionary key 

100 

101 On initialization, a `Functor` should declare what band (`filt` kwarg) 

102 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be 

103 applied to. This enables the `_get_data` method to extract the proper 

104 columns from the parquet file. If not specified, the dataset will fall back 

105 on the `_defaultDataset`attribute. If band is not specified and `dataset` 

106 is anything other than `'ref'`, then an error will be raised when trying to 

107 perform the calculation. 

108 

109 Originally, `Functor` was set up to expect 

110 datasets formatted like the `deepCoadd_obj` dataset; that is, a 

111 dataframe with a multi-level column index, with the levels of the 

112 column index being `band`, `dataset`, and `column`. 

113 It has since been generalized to apply to dataframes without mutli-level 

114 indices and multi-level indices with just `dataset` and `column` levels. 

115 In addition, the `_get_data` method that reads 

116 the dataframe from the `ParquetTable` will return a dataframe with column 

117 index levels defined by the `_dfLevels` attribute; by default, this is 

118 `column`. 

119 

120 The `_dfLevels` attributes should generally not need to 

121 be changed, unless `_func` needs columns from multiple filters or datasets 

122 to do the calculation. 

123 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for 

124 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe 

125 it gets to have those levels in the column index. 

126 

127 Parameters 

128 ---------- 

129 filt : str 

130 Filter upon which to do the calculation 

131 

132 dataset : str 

133 Dataset upon which to do the calculation 

134 (e.g., 'ref', 'meas', 'forced_src'). 

135 

136 """ 

137 

138 _defaultDataset = 'ref' 

139 _dfLevels = ('column',) 

140 _defaultNoDup = False 

141 

142 def __init__(self, filt=None, dataset=None, noDup=None): 

143 self.filt = filt 

144 self.dataset = dataset if dataset is not None else self._defaultDataset 

145 self._noDup = noDup 

146 

147 @property 

148 def noDup(self): 

149 if self._noDup is not None: 

150 return self._noDup 

151 else: 

152 return self._defaultNoDup 

153 

154 @property 

155 def columns(self): 

156 """Columns required to perform calculation 

157 """ 

158 if not hasattr(self, '_columns'): 

159 raise NotImplementedError('Must define columns property or _columns attribute') 

160 return self._columns 

161 

162 def _get_data_columnLevels(self, data, columnIndex=None): 

163 """Gets the names of the column index levels 

164 

165 This should only be called in the context of a multilevel table. 

166 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable` 

167 and with the gen3 `DeferredDatasetHandle`. 

168 

169 Parameters 

170 ---------- 

171 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

172 

173 columnnIndex (optional): pandas `Index` object 

174 if not passed, then it is read from the `DeferredDatasetHandle` 

175 """ 

176 if isinstance(data, DeferredDatasetHandle): 

177 if columnIndex is None: 

178 columnIndex = data.get(component="columns") 

179 if columnIndex is not None: 

180 return columnIndex.names 

181 if isinstance(data, MultilevelParquetTable): 

182 return data.columnLevels 

183 else: 

184 raise TypeError(f"Unknown type for data: {type(data)}!") 

185 

186 def _get_data_columnLevelNames(self, data, columnIndex=None): 

187 """Gets the content of each of the column levels for a multilevel table 

188 

189 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2. 

190 

191 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

192 """ 

193 if isinstance(data, DeferredDatasetHandle): 

194 if columnIndex is None: 

195 columnIndex = data.get(component="columns") 

196 if columnIndex is not None: 

197 columnLevels = columnIndex.names 

198 columnLevelNames = { 

199 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

200 for i, level in enumerate(columnLevels) 

201 } 

202 return columnLevelNames 

203 if isinstance(data, MultilevelParquetTable): 

204 return data.columnLevelNames 

205 else: 

206 raise TypeError(f"Unknown type for data: {type(data)}!") 

207 

208 def _colsFromDict(self, colDict, columnIndex=None): 

209 """Converts dictionary column specficiation to a list of columns 

210 

211 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

212 """ 

213 new_colDict = {} 

214 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

215 

216 for i, lev in enumerate(columnLevels): 

217 if lev in colDict: 

218 if isinstance(colDict[lev], str): 

219 new_colDict[lev] = [colDict[lev]] 

220 else: 

221 new_colDict[lev] = colDict[lev] 

222 else: 

223 new_colDict[lev] = columnIndex.levels[i] 

224 

225 levelCols = [new_colDict[lev] for lev in columnLevels] 

226 cols = product(*levelCols) 

227 return list(cols) 

228 

229 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

230 """Returns columns needed by functor from multilevel dataset 

231 

232 To access tables with multilevel column structure, the `MultilevelParquetTable` 

233 or `DeferredDatasetHandle` need to be passed either a list of tuples or a 

234 dictionary. 

235 

236 Parameters 

237 ---------- 

238 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

239 

240 columnIndex (optional): pandas `Index` object 

241 either passed or read in from `DeferredDatasetHandle`. 

242 

243 `returnTuple` : bool 

244 If true, then return a list of tuples rather than the column dictionary 

245 specification. This is set to `True` by `CompositeFunctor` in order to be able to 

246 combine columns from the various component functors. 

247 

248 """ 

249 if isinstance(data, DeferredDatasetHandle) and columnIndex is None: 

250 columnIndex = data.get(component="columns") 

251 

252 # Confirm that the dataset has the column levels the functor is expecting it to have. 

253 columnLevels = self._get_data_columnLevels(data, columnIndex) 

254 

255 columnDict = {'column': self.columns, 

256 'dataset': self.dataset} 

257 if self.filt is None: 

258 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

259 if "band" in columnLevels: 

260 if self.dataset == "ref": 

261 columnDict["band"] = columnLevelNames["band"][0] 

262 else: 

263 raise ValueError(f"'filt' not set for functor {self.name}" 

264 f"(dataset {self.dataset}) " 

265 "and ParquetTable " 

266 "contains multiple filters in column index. " 

267 "Set 'filt' or set 'dataset' to 'ref'.") 

268 else: 

269 columnDict['band'] = self.filt 

270 

271 if isinstance(data, MultilevelParquetTable): 

272 return data._colsFromDict(columnDict) 

273 elif isinstance(data, DeferredDatasetHandle): 

274 if returnTuple: 

275 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

276 else: 

277 return columnDict 

278 

279 def _func(self, df, dropna=True): 

280 raise NotImplementedError('Must define calculation on dataframe') 

281 

282 def _get_columnIndex(self, data): 

283 """Return columnIndex 

284 """ 

285 

286 if isinstance(data, DeferredDatasetHandle): 

287 return data.get(component="columns") 

288 else: 

289 return None 

290 

291 def _get_data(self, data): 

292 """Retrieve dataframe necessary for calculation. 

293 

294 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle 

295 

296 Returns dataframe upon which `self._func` can act. 

297 

298 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested. 

299 """ 

300 if isinstance(data, pd.DataFrame): 

301 return data 

302 

303 # First thing to do: check to see if the data source has a multilevel column index or not. 

304 columnIndex = self._get_columnIndex(data) 

305 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

306 

307 # Simple single-level parquet table, gen2 

308 if isinstance(data, ParquetTable) and not is_multiLevel: 

309 columns = self.columns 

310 df = data.toDataFrame(columns=columns) 

311 return df 

312 

313 # Get proper columns specification for this functor 

314 if is_multiLevel: 

315 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

316 else: 

317 columns = self.columns 

318 

319 if isinstance(data, MultilevelParquetTable): 

320 # Load in-memory dataframe with appropriate columns the gen2 way 

321 df = data.toDataFrame(columns=columns, droplevels=False) 

322 elif isinstance(data, DeferredDatasetHandle): 

323 # Load in-memory dataframe with appropriate columns the gen3 way 

324 df = data.get(parameters={"columns": columns}) 

325 

326 # Drop unnecessary column levels 

327 if is_multiLevel: 

328 df = self._setLevels(df) 

329 

330 return df 

331 

332 def _setLevels(self, df): 

333 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

334 df.columns = df.columns.droplevel(levelsToDrop) 

335 return df 

336 

337 def _dropna(self, vals): 

338 return vals.dropna() 

339 

340 def __call__(self, data, dropna=False): 

341 try: 

342 df = self._get_data(data) 

343 vals = self._func(df) 

344 except Exception: 

345 vals = self.fail(df) 

346 if dropna: 

347 vals = self._dropna(vals) 

348 

349 return vals 

350 

351 def difference(self, data1, data2, **kwargs): 

352 """Computes difference between functor called on two different ParquetTable objects 

353 """ 

354 return self(data1, **kwargs) - self(data2, **kwargs) 

355 

356 def fail(self, df): 

357 return pd.Series(np.full(len(df), np.nan), index=df.index) 

358 

359 @property 

360 def name(self): 

361 """Full name of functor (suitable for figure labels) 

362 """ 

363 return NotImplementedError 

364 

365 @property 

366 def shortname(self): 

367 """Short name of functor (suitable for column name/dict key) 

368 """ 

369 return self.name 

370 

371 

372class CompositeFunctor(Functor): 

373 """Perform multiple calculations at once on a catalog 

374 

375 The role of a `CompositeFunctor` is to group together computations from 

376 multiple functors. Instead of returning `pandas.Series` a 

377 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names 

378 being the keys of `funcDict`. 

379 

380 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

381 in all the component functors. 

382 

383 A `CompositeFunctor` does not use a `_func` method itself; rather, 

384 when a `CompositeFunctor` is called, all its columns are loaded 

385 at once, and the resulting dataframe is passed to the `_func` method of each component 

386 functor. This has the advantage of only doing I/O (reading from parquet file) once, 

387 and works because each individual `_func` method of each component functor does not 

388 care if there are *extra* columns in the dataframe being passed; only that it must contain 

389 *at least* the `columns` it expects. 

390 

391 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML 

392 file specifying a collection of functors. 

393 

394 Parameters 

395 ---------- 

396 funcs : `dict` or `list` 

397 Dictionary or list of functors. If a list, then it will be converted 

398 into a dictonary according to the `.shortname` attribute of each functor. 

399 

400 """ 

401 dataset = None 

402 

403 def __init__(self, funcs, **kwargs): 

404 

405 if type(funcs) == dict: 

406 self.funcDict = funcs 

407 else: 

408 self.funcDict = {f.shortname: f for f in funcs} 

409 

410 self._filt = None 

411 

412 super().__init__(**kwargs) 

413 

414 @property 

415 def filt(self): 

416 return self._filt 

417 

418 @filt.setter 

419 def filt(self, filt): 

420 if filt is not None: 

421 for _, f in self.funcDict.items(): 

422 f.filt = filt 

423 self._filt = filt 

424 

425 def update(self, new): 

426 if isinstance(new, dict): 

427 self.funcDict.update(new) 

428 elif isinstance(new, CompositeFunctor): 

429 self.funcDict.update(new.funcDict) 

430 else: 

431 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

432 

433 # Make sure new functors have the same 'filt' set 

434 if self.filt is not None: 

435 self.filt = self.filt 

436 

437 @property 

438 def columns(self): 

439 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

440 

441 def multilevelColumns(self, data, **kwargs): 

442 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here. 

443 return list( 

444 set( 

445 [ 

446 x 

447 for y in [ 

448 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

449 ] 

450 for x in y 

451 ] 

452 ) 

453 ) 

454 

455 def __call__(self, data, **kwargs): 

456 """Apply the functor to the data table 

457 

458 Parameters 

459 ---------- 

460 data : `lsst.daf.butler.DeferredDatasetHandle`, 

461 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`, 

462 `lsst.pipe.tasks.parquetTable.ParquetTable`, 

463 or `pandas.DataFrame`. 

464 The table or a pointer to a table on disk from which columns can 

465 be accessed 

466 """ 

467 columnIndex = self._get_columnIndex(data) 

468 

469 # First, determine whether data has a multilevel index (either gen2 or gen3) 

470 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

471 

472 # Multilevel index, gen2 or gen3 

473 if is_multiLevel: 

474 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

475 

476 if isinstance(data, MultilevelParquetTable): 

477 # Read data into memory the gen2 way 

478 df = data.toDataFrame(columns=columns, droplevels=False) 

479 elif isinstance(data, DeferredDatasetHandle): 

480 # Read data into memory the gen3 way 

481 df = data.get(parameters={"columns": columns}) 

482 

483 valDict = {} 

484 for k, f in self.funcDict.items(): 

485 try: 

486 subdf = f._setLevels( 

487 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)] 

488 ) 

489 valDict[k] = f._func(subdf) 

490 except Exception as e: 

491 try: 

492 valDict[k] = f.fail(subdf) 

493 except NameError: 

494 raise e 

495 

496 else: 

497 if isinstance(data, DeferredDatasetHandle): 

498 # input if Gen3 deferLoad=True 

499 df = data.get(parameters={"columns": self.columns}) 

500 elif isinstance(data, pd.DataFrame): 

501 # input if Gen3 deferLoad=False 

502 df = data 

503 else: 

504 # Original Gen2 input is type ParquetTable and the fallback 

505 df = data.toDataFrame(columns=self.columns) 

506 

507 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

508 

509 # Check that output columns are actually columns 

510 for name, colVal in valDict.items(): 

511 if len(colVal.shape) != 1: 

512 raise RuntimeError("Transformed column '%s' is not the shape of a column. " 

513 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal))) 

514 

515 try: 

516 valDf = pd.concat(valDict, axis=1) 

517 except TypeError: 

518 print([(k, type(v)) for k, v in valDict.items()]) 

519 raise 

520 

521 if kwargs.get('dropna', False): 

522 valDf = valDf.dropna(how='any') 

523 

524 return valDf 

525 

526 @classmethod 

527 def renameCol(cls, col, renameRules): 

528 if renameRules is None: 

529 return col 

530 for old, new in renameRules: 

531 if col.startswith(old): 

532 col = col.replace(old, new) 

533 return col 

534 

535 @classmethod 

536 def from_file(cls, filename, **kwargs): 

537 # Allow environment variables in the filename. 

538 filename = os.path.expandvars(filename) 

539 with open(filename) as f: 

540 translationDefinition = yaml.safe_load(f) 

541 

542 return cls.from_yaml(translationDefinition, **kwargs) 

543 

544 @classmethod 

545 def from_yaml(cls, translationDefinition, **kwargs): 

546 funcs = {} 

547 for func, val in translationDefinition['funcs'].items(): 

548 funcs[func] = init_fromDict(val, name=func) 

549 

550 if 'flag_rename_rules' in translationDefinition: 

551 renameRules = translationDefinition['flag_rename_rules'] 

552 else: 

553 renameRules = None 

554 

555 if 'calexpFlags' in translationDefinition: 

556 for flag in translationDefinition['calexpFlags']: 

557 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp') 

558 

559 if 'refFlags' in translationDefinition: 

560 for flag in translationDefinition['refFlags']: 

561 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

562 

563 if 'forcedFlags' in translationDefinition: 

564 for flag in translationDefinition['forcedFlags']: 

565 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

566 

567 if 'flags' in translationDefinition: 

568 for flag in translationDefinition['flags']: 

569 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

570 

571 return cls(funcs, **kwargs) 

572 

573 

574def mag_aware_eval(df, expr): 

575 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means 

576 

577 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes. 

578 

579 Parameters 

580 ---------- 

581 df : pandas.DataFrame 

582 Dataframe on which to evaluate expression. 

583 

584 expr : str 

585 Expression. 

586 """ 

587 try: 

588 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

589 val = df.eval(expr_new, truediv=True) 

590 except Exception: # Should check what actually gets raised 

591 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

592 val = df.eval(expr_new, truediv=True) 

593 return val 

594 

595 

596class CustomFunctor(Functor): 

597 """Arbitrary computation on a catalog 

598 

599 Column names (and thus the columns to be loaded from catalog) are found 

600 by finding all words and trying to ignore all "math-y" words. 

601 

602 Parameters 

603 ---------- 

604 expr : str 

605 Expression to evaluate, to be parsed and executed by `mag_aware_eval`. 

606 """ 

607 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

608 

609 def __init__(self, expr, **kwargs): 

610 self.expr = expr 

611 super().__init__(**kwargs) 

612 

613 @property 

614 def name(self): 

615 return self.expr 

616 

617 @property 

618 def columns(self): 

619 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

620 

621 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

622 not_a_col = [] 

623 for c in flux_cols: 

624 if not re.search('_instFlux$', c): 

625 cols.append(f'{c}_instFlux') 

626 not_a_col.append(c) 

627 else: 

628 cols.append(c) 

629 

630 return list(set([c for c in cols if c not in not_a_col])) 

631 

632 def _func(self, df): 

633 return mag_aware_eval(df, self.expr) 

634 

635 

636class Column(Functor): 

637 """Get column with specified name 

638 """ 

639 

640 def __init__(self, col, **kwargs): 

641 self.col = col 

642 super().__init__(**kwargs) 

643 

644 @property 

645 def name(self): 

646 return self.col 

647 

648 @property 

649 def columns(self): 

650 return [self.col] 

651 

652 def _func(self, df): 

653 return df[self.col] 

654 

655 

656class Index(Functor): 

657 """Return the value of the index for each object 

658 """ 

659 

660 columns = ['coord_ra'] # just a dummy; something has to be here 

661 _defaultDataset = 'ref' 

662 _defaultNoDup = True 

663 

664 def _func(self, df): 

665 return pd.Series(df.index, index=df.index) 

666 

667 

668class IDColumn(Column): 

669 col = 'id' 

670 _allow_difference = False 

671 _defaultNoDup = True 

672 

673 def _func(self, df): 

674 return pd.Series(df.index, index=df.index) 

675 

676 

677class FootprintNPix(Column): 

678 col = 'base_Footprint_nPix' 

679 

680 

681class CoordColumn(Column): 

682 """Base class for coordinate column, in degrees 

683 """ 

684 _radians = True 

685 

686 def __init__(self, col, **kwargs): 

687 super().__init__(col, **kwargs) 

688 

689 def _func(self, df): 

690 # Must not modify original column in case that column is used by another functor 

691 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

692 return output 

693 

694 

695class RAColumn(CoordColumn): 

696 """Right Ascension, in degrees 

697 """ 

698 name = 'RA' 

699 _defaultNoDup = True 

700 

701 def __init__(self, **kwargs): 

702 super().__init__('coord_ra', **kwargs) 

703 

704 def __call__(self, catalog, **kwargs): 

705 return super().__call__(catalog, **kwargs) 

706 

707 

708class DecColumn(CoordColumn): 

709 """Declination, in degrees 

710 """ 

711 name = 'Dec' 

712 _defaultNoDup = True 

713 

714 def __init__(self, **kwargs): 

715 super().__init__('coord_dec', **kwargs) 

716 

717 def __call__(self, catalog, **kwargs): 

718 return super().__call__(catalog, **kwargs) 

719 

720 

721class HtmIndex20(Functor): 

722 """Compute the level 20 HtmIndex for the catalog. 

723 

724 Notes 

725 ----- 

726 This functor was implemented to satisfy requirements of old APDB interface 

727 which required ``pixelId`` column in DiaObject with HTM20 index. APDB 

728 interface had migrated to not need that information, but we keep this 

729 class in case it may be useful for something else. 

730 """ 

731 name = "Htm20" 

732 htmLevel = 20 

733 _radians = True 

734 

735 def __init__(self, ra, decl, **kwargs): 

736 self.pixelator = sphgeom.HtmPixelization(self.htmLevel) 

737 self.ra = ra 

738 self.decl = decl 

739 self._columns = [self.ra, self.decl] 

740 super().__init__(**kwargs) 

741 

742 def _func(self, df): 

743 

744 def computePixel(row): 

745 if self._radians: 

746 sphPoint = geom.SpherePoint(row[self.ra], 

747 row[self.decl], 

748 geom.radians) 

749 else: 

750 sphPoint = geom.SpherePoint(row[self.ra], 

751 row[self.decl], 

752 geom.degrees) 

753 return self.pixelator.index(sphPoint.getVector()) 

754 

755 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64') 

756 

757 

758def fluxName(col): 

759 if not col.endswith('_instFlux'): 

760 col += '_instFlux' 

761 return col 

762 

763 

764def fluxErrName(col): 

765 if not col.endswith('_instFluxErr'): 

766 col += '_instFluxErr' 

767 return col 

768 

769 

770class Mag(Functor): 

771 """Compute calibrated magnitude 

772 

773 Takes a `calib` argument, which returns the flux at mag=0 

774 as `calib.getFluxMag0()`. If not provided, then the default 

775 `fluxMag0` is 63095734448.0194, which is default for HSC. 

776 This default should be removed in DM-21955 

777 

778 This calculation hides warnings about invalid values and dividing by zero. 

779 

780 As for all functors, a `dataset` and `filt` kwarg should be provided upon 

781 initialization. Unlike the default `Functor`, however, the default dataset 

782 for a `Mag` is `'meas'`, rather than `'ref'`. 

783 

784 Parameters 

785 ---------- 

786 col : `str` 

787 Name of flux column from which to compute magnitude. Can be parseable 

788 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass 

789 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will 

790 understand. 

791 calib : `lsst.afw.image.calib.Calib` (optional) 

792 Object that knows zero point. 

793 """ 

794 _defaultDataset = 'meas' 

795 

796 def __init__(self, col, calib=None, **kwargs): 

797 self.col = fluxName(col) 

798 self.calib = calib 

799 if calib is not None: 

800 self.fluxMag0 = calib.getFluxMag0()[0] 

801 else: 

802 # TO DO: DM-21955 Replace hard coded photometic calibration values 

803 self.fluxMag0 = 63095734448.0194 

804 

805 super().__init__(**kwargs) 

806 

807 @property 

808 def columns(self): 

809 return [self.col] 

810 

811 def _func(self, df): 

812 with np.warnings.catch_warnings(): 

813 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

814 np.warnings.filterwarnings('ignore', r'divide by zero') 

815 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

816 

817 @property 

818 def name(self): 

819 return f'mag_{self.col}' 

820 

821 

822class MagErr(Mag): 

823 """Compute calibrated magnitude uncertainty 

824 

825 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`. 

826 

827 Parameters 

828 col : `str` 

829 Name of flux column 

830 calib : `lsst.afw.image.calib.Calib` (optional) 

831 Object that knows zero point. 

832 """ 

833 

834 def __init__(self, *args, **kwargs): 

835 super().__init__(*args, **kwargs) 

836 if self.calib is not None: 

837 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

838 else: 

839 self.fluxMag0Err = 0. 

840 

841 @property 

842 def columns(self): 

843 return [self.col, self.col + 'Err'] 

844 

845 def _func(self, df): 

846 with np.warnings.catch_warnings(): 

847 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

848 np.warnings.filterwarnings('ignore', r'divide by zero') 

849 fluxCol, fluxErrCol = self.columns 

850 x = df[fluxErrCol] / df[fluxCol] 

851 y = self.fluxMag0Err / self.fluxMag0 

852 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

853 return magErr 

854 

855 @property 

856 def name(self): 

857 return super().name + '_err' 

858 

859 

860class NanoMaggie(Mag): 

861 """ 

862 """ 

863 

864 def _func(self, df): 

865 return (df[self.col] / self.fluxMag0) * 1e9 

866 

867 

868class MagDiff(Functor): 

869 _defaultDataset = 'meas' 

870 

871 """Functor to calculate magnitude difference""" 

872 

873 def __init__(self, col1, col2, **kwargs): 

874 self.col1 = fluxName(col1) 

875 self.col2 = fluxName(col2) 

876 super().__init__(**kwargs) 

877 

878 @property 

879 def columns(self): 

880 return [self.col1, self.col2] 

881 

882 def _func(self, df): 

883 with np.warnings.catch_warnings(): 

884 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

885 np.warnings.filterwarnings('ignore', r'divide by zero') 

886 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

887 

888 @property 

889 def name(self): 

890 return f'(mag_{self.col1} - mag_{self.col2})' 

891 

892 @property 

893 def shortname(self): 

894 return f'magDiff_{self.col1}_{self.col2}' 

895 

896 

897class Color(Functor): 

898 """Compute the color between two filters 

899 

900 Computes color by initializing two different `Mag` 

901 functors based on the `col` and filters provided, and 

902 then returning the difference. 

903 

904 This is enabled by the `_func` expecting a dataframe with a 

905 multilevel column index, with both `'band'` and `'column'`, 

906 instead of just `'column'`, which is the `Functor` default. 

907 This is controlled by the `_dfLevels` attribute. 

908 

909 Also of note, the default dataset for `Color` is `forced_src'`, 

910 whereas for `Mag` it is `'meas'`. 

911 

912 Parameters 

913 ---------- 

914 col : str 

915 Name of flux column from which to compute; same as would be passed to 

916 `lsst.pipe.tasks.functors.Mag`. 

917 

918 filt2, filt1 : str 

919 Filters from which to compute magnitude difference. 

920 Color computed is `Mag(filt2) - Mag(filt1)`. 

921 """ 

922 _defaultDataset = 'forced_src' 

923 _dfLevels = ('band', 'column') 

924 _defaultNoDup = True 

925 

926 def __init__(self, col, filt2, filt1, **kwargs): 

927 self.col = fluxName(col) 

928 if filt2 == filt1: 

929 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

930 self.filt2 = filt2 

931 self.filt1 = filt1 

932 

933 self.mag2 = Mag(col, filt=filt2, **kwargs) 

934 self.mag1 = Mag(col, filt=filt1, **kwargs) 

935 

936 super().__init__(**kwargs) 

937 

938 @property 

939 def filt(self): 

940 return None 

941 

942 @filt.setter 

943 def filt(self, filt): 

944 pass 

945 

946 def _func(self, df): 

947 mag2 = self.mag2._func(df[self.filt2]) 

948 mag1 = self.mag1._func(df[self.filt1]) 

949 return mag2 - mag1 

950 

951 @property 

952 def columns(self): 

953 return [self.mag1.col, self.mag2.col] 

954 

955 def multilevelColumns(self, parq, **kwargs): 

956 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

957 

958 @property 

959 def name(self): 

960 return f'{self.filt2} - {self.filt1} ({self.col})' 

961 

962 @property 

963 def shortname(self): 

964 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

965 

966 

967class Labeller(Functor): 

968 """Main function of this subclass is to override the dropna=True 

969 """ 

970 _null_label = 'null' 

971 _allow_difference = False 

972 name = 'label' 

973 _force_str = False 

974 

975 def __call__(self, parq, dropna=False, **kwargs): 

976 return super().__call__(parq, dropna=False, **kwargs) 

977 

978 

979class StarGalaxyLabeller(Labeller): 

980 _columns = ["base_ClassificationExtendedness_value"] 

981 _column = "base_ClassificationExtendedness_value" 

982 

983 def _func(self, df): 

984 x = df[self._columns][self._column] 

985 mask = x.isnull() 

986 test = (x < 0.5).astype(int) 

987 test = test.mask(mask, 2) 

988 

989 # TODO: DM-21954 Look into veracity of inline comment below 

990 # are these backwards? 

991 categories = ['galaxy', 'star', self._null_label] 

992 label = pd.Series(pd.Categorical.from_codes(test, categories=categories), 

993 index=x.index, name='label') 

994 if self._force_str: 

995 label = label.astype(str) 

996 return label 

997 

998 

999class NumStarLabeller(Labeller): 

1000 _columns = ['numStarFlags'] 

1001 labels = {"star": 0, "maybe": 1, "notStar": 2} 

1002 

1003 def _func(self, df): 

1004 x = df[self._columns][self._columns[0]] 

1005 

1006 # Number of filters 

1007 n = len(x.unique()) - 1 

1008 

1009 labels = ['noStar', 'maybe', 'star'] 

1010 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels), 

1011 index=x.index, name='label') 

1012 

1013 if self._force_str: 

1014 label = label.astype(str) 

1015 

1016 return label 

1017 

1018 

1019class DeconvolvedMoments(Functor): 

1020 name = 'Deconvolved Moments' 

1021 shortname = 'deconvolvedMoments' 

1022 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1023 "ext_shapeHSM_HsmSourceMoments_yy", 

1024 "base_SdssShape_xx", "base_SdssShape_yy", 

1025 "ext_shapeHSM_HsmPsfMoments_xx", 

1026 "ext_shapeHSM_HsmPsfMoments_yy") 

1027 

1028 def _func(self, df): 

1029 """Calculate deconvolved moments""" 

1030 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

1031 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

1032 else: 

1033 hsm = np.ones(len(df))*np.nan 

1034 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

1035 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

1036 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

1037 else: 

1038 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using 

1039 # exposure.getPsf().computeShape(s.getCentroid()).getIxx() 

1040 # raise TaskError("No psf shape parameter found in catalog") 

1041 raise RuntimeError('No psf shape parameter found in catalog') 

1042 

1043 return hsm.where(np.isfinite(hsm), sdss) - psf 

1044 

1045 

1046class SdssTraceSize(Functor): 

1047 """Functor to calculate SDSS trace radius size for sources""" 

1048 name = "SDSS Trace Size" 

1049 shortname = 'sdssTrace' 

1050 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

1051 

1052 def _func(self, df): 

1053 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1054 return srcSize 

1055 

1056 

1057class PsfSdssTraceSizeDiff(Functor): 

1058 """Functor to calculate SDSS trace radius size difference (%) between object and psf model""" 

1059 name = "PSF - SDSS Trace Size" 

1060 shortname = 'psf_sdssTrace' 

1061 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

1062 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

1063 

1064 def _func(self, df): 

1065 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1066 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

1067 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1068 return sizeDiff 

1069 

1070 

1071class HsmTraceSize(Functor): 

1072 """Functor to calculate HSM trace radius size for sources""" 

1073 name = 'HSM Trace Size' 

1074 shortname = 'hsmTrace' 

1075 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1076 "ext_shapeHSM_HsmSourceMoments_yy") 

1077 

1078 def _func(self, df): 

1079 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1080 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1081 return srcSize 

1082 

1083 

1084class PsfHsmTraceSizeDiff(Functor): 

1085 """Functor to calculate HSM trace radius size difference (%) between object and psf model""" 

1086 name = 'PSF - HSM Trace Size' 

1087 shortname = 'psf_HsmTrace' 

1088 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1089 "ext_shapeHSM_HsmSourceMoments_yy", 

1090 "ext_shapeHSM_HsmPsfMoments_xx", 

1091 "ext_shapeHSM_HsmPsfMoments_yy") 

1092 

1093 def _func(self, df): 

1094 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1095 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1096 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1097 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1098 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1099 return sizeDiff 

1100 

1101 

1102class HsmFwhm(Functor): 

1103 name = 'HSM Psf FWHM' 

1104 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1105 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1106 pixelScale = 0.168 

1107 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1108 

1109 def _func(self, df): 

1110 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1111 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1112 

1113 

1114class E1(Functor): 

1115 name = "Distortion Ellipticity (e1)" 

1116 shortname = "Distortion" 

1117 

1118 def __init__(self, colXX, colXY, colYY, **kwargs): 

1119 self.colXX = colXX 

1120 self.colXY = colXY 

1121 self.colYY = colYY 

1122 self._columns = [self.colXX, self.colXY, self.colYY] 

1123 super().__init__(**kwargs) 

1124 

1125 @property 

1126 def columns(self): 

1127 return [self.colXX, self.colXY, self.colYY] 

1128 

1129 def _func(self, df): 

1130 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1131 

1132 

1133class E2(Functor): 

1134 name = "Ellipticity e2" 

1135 

1136 def __init__(self, colXX, colXY, colYY, **kwargs): 

1137 self.colXX = colXX 

1138 self.colXY = colXY 

1139 self.colYY = colYY 

1140 super().__init__(**kwargs) 

1141 

1142 @property 

1143 def columns(self): 

1144 return [self.colXX, self.colXY, self.colYY] 

1145 

1146 def _func(self, df): 

1147 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1148 

1149 

1150class RadiusFromQuadrupole(Functor): 

1151 

1152 def __init__(self, colXX, colXY, colYY, **kwargs): 

1153 self.colXX = colXX 

1154 self.colXY = colXY 

1155 self.colYY = colYY 

1156 super().__init__(**kwargs) 

1157 

1158 @property 

1159 def columns(self): 

1160 return [self.colXX, self.colXY, self.colYY] 

1161 

1162 def _func(self, df): 

1163 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1164 

1165 

1166class LocalWcs(Functor): 

1167 """Computations using the stored localWcs. 

1168 """ 

1169 name = "LocalWcsOperations" 

1170 

1171 def __init__(self, 

1172 colCD_1_1, 

1173 colCD_1_2, 

1174 colCD_2_1, 

1175 colCD_2_2, 

1176 **kwargs): 

1177 self.colCD_1_1 = colCD_1_1 

1178 self.colCD_1_2 = colCD_1_2 

1179 self.colCD_2_1 = colCD_2_1 

1180 self.colCD_2_2 = colCD_2_2 

1181 super().__init__(**kwargs) 

1182 

1183 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1184 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1185 

1186 Parameters 

1187 ---------- 

1188 x : `pandas.Series` 

1189 X pixel coordinate. 

1190 y : `pandas.Series` 

1191 Y pixel coordinate. 

1192 cd11 : `pandas.Series` 

1193 [1, 1] element of the local Wcs affine transform. 

1194 cd11 : `pandas.Series` 

1195 [1, 1] element of the local Wcs affine transform. 

1196 cd12 : `pandas.Series` 

1197 [1, 2] element of the local Wcs affine transform. 

1198 cd21 : `pandas.Series` 

1199 [2, 1] element of the local Wcs affine transform. 

1200 cd22 : `pandas.Series` 

1201 [2, 2] element of the local Wcs affine transform. 

1202 

1203 Returns 

1204 ------- 

1205 raDecTuple : tuple 

1206 RA and dec conversion of x and y given the local Wcs. Returned 

1207 units are in radians. 

1208 

1209 """ 

1210 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1211 

1212 def computeSkySeperation(self, ra1, dec1, ra2, dec2): 

1213 """Compute the local pixel scale conversion. 

1214 

1215 Parameters 

1216 ---------- 

1217 ra1 : `pandas.Series` 

1218 Ra of the first coordinate in radians. 

1219 dec1 : `pandas.Series` 

1220 Dec of the first coordinate in radians. 

1221 ra2 : `pandas.Series` 

1222 Ra of the second coordinate in radians. 

1223 dec2 : `pandas.Series` 

1224 Dec of the second coordinate in radians. 

1225 

1226 Returns 

1227 ------- 

1228 dist : `pandas.Series` 

1229 Distance on the sphere in radians. 

1230 """ 

1231 deltaDec = dec2 - dec1 

1232 deltaRa = ra2 - ra1 

1233 return 2 * np.arcsin( 

1234 np.sqrt( 

1235 np.sin(deltaDec / 2) ** 2 

1236 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1237 

1238 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1239 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1240 

1241 Parameters 

1242 ---------- 

1243 x1 : `pandas.Series` 

1244 X pixel coordinate. 

1245 y1 : `pandas.Series` 

1246 Y pixel coordinate. 

1247 x2 : `pandas.Series` 

1248 X pixel coordinate. 

1249 y2 : `pandas.Series` 

1250 Y pixel coordinate. 

1251 cd11 : `pandas.Series` 

1252 [1, 1] element of the local Wcs affine transform. 

1253 cd11 : `pandas.Series` 

1254 [1, 1] element of the local Wcs affine transform. 

1255 cd12 : `pandas.Series` 

1256 [1, 2] element of the local Wcs affine transform. 

1257 cd21 : `pandas.Series` 

1258 [2, 1] element of the local Wcs affine transform. 

1259 cd22 : `pandas.Series` 

1260 [2, 2] element of the local Wcs affine transform. 

1261 

1262 Returns 

1263 ------- 

1264 Distance : `pandas.Series` 

1265 Arcseconds per pixel at the location of the local WC 

1266 """ 

1267 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1268 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1269 # Great circle distance for small separations. 

1270 return self.computeSkySeperation(ra1, dec1, ra2, dec2) 

1271 

1272 

1273class ComputePixelScale(LocalWcs): 

1274 """Compute the local pixel scale from the stored CDMatrix. 

1275 """ 

1276 name = "PixelScale" 

1277 

1278 @property 

1279 def columns(self): 

1280 return [self.colCD_1_1, 

1281 self.colCD_1_2, 

1282 self.colCD_2_1, 

1283 self.colCD_2_2] 

1284 

1285 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1286 """Compute the local pixel to scale conversion in arcseconds. 

1287 

1288 Parameters 

1289 ---------- 

1290 cd11 : `pandas.Series` 

1291 [1, 1] element of the local Wcs affine transform in radians. 

1292 cd11 : `pandas.Series` 

1293 [1, 1] element of the local Wcs affine transform in radians. 

1294 cd12 : `pandas.Series` 

1295 [1, 2] element of the local Wcs affine transform in radians. 

1296 cd21 : `pandas.Series` 

1297 [2, 1] element of the local Wcs affine transform in radians. 

1298 cd22 : `pandas.Series` 

1299 [2, 2] element of the local Wcs affine transform in radians. 

1300 

1301 Returns 

1302 ------- 

1303 pixScale : `pandas.Series` 

1304 Arcseconds per pixel at the location of the local WC 

1305 """ 

1306 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1307 

1308 def _func(self, df): 

1309 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1310 df[self.colCD_1_2], 

1311 df[self.colCD_2_1], 

1312 df[self.colCD_2_2]) 

1313 

1314 

1315class ConvertPixelToArcseconds(ComputePixelScale): 

1316 """Convert a value in units pixels squared to units arcseconds squared. 

1317 """ 

1318 

1319 def __init__(self, 

1320 col, 

1321 colCD_1_1, 

1322 colCD_1_2, 

1323 colCD_2_1, 

1324 colCD_2_2, 

1325 **kwargs): 

1326 self.col = col 

1327 super().__init__(colCD_1_1, 

1328 colCD_1_2, 

1329 colCD_2_1, 

1330 colCD_2_2, 

1331 **kwargs) 

1332 

1333 @property 

1334 def name(self): 

1335 return f"{self.col}_asArcseconds" 

1336 

1337 @property 

1338 def columns(self): 

1339 return [self.col, 

1340 self.colCD_1_1, 

1341 self.colCD_1_2, 

1342 self.colCD_2_1, 

1343 self.colCD_2_2] 

1344 

1345 def _func(self, df): 

1346 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1347 df[self.colCD_1_2], 

1348 df[self.colCD_2_1], 

1349 df[self.colCD_2_2]) 

1350 

1351 

1352class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1353 """Convert a value in units pixels to units arcseconds. 

1354 """ 

1355 

1356 def __init__(self, 

1357 col, 

1358 colCD_1_1, 

1359 colCD_1_2, 

1360 colCD_2_1, 

1361 colCD_2_2, 

1362 **kwargs): 

1363 self.col = col 

1364 super().__init__(colCD_1_1, 

1365 colCD_1_2, 

1366 colCD_2_1, 

1367 colCD_2_2, 

1368 **kwargs) 

1369 

1370 @property 

1371 def name(self): 

1372 return f"{self.col}_asArcsecondsSq" 

1373 

1374 @property 

1375 def columns(self): 

1376 return [self.col, 

1377 self.colCD_1_1, 

1378 self.colCD_1_2, 

1379 self.colCD_2_1, 

1380 self.colCD_2_2] 

1381 

1382 def _func(self, df): 

1383 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1384 df[self.colCD_1_2], 

1385 df[self.colCD_2_1], 

1386 df[self.colCD_2_2]) 

1387 return df[self.col] * pixScale * pixScale 

1388 

1389 

1390class ReferenceBand(Functor): 

1391 name = 'Reference Band' 

1392 shortname = 'refBand' 

1393 

1394 @property 

1395 def columns(self): 

1396 return ["merge_measurement_i", 

1397 "merge_measurement_r", 

1398 "merge_measurement_z", 

1399 "merge_measurement_y", 

1400 "merge_measurement_g", 

1401 "merge_measurement_u"] 

1402 

1403 def _func(self, df: pd.DataFrame) -> pd.Series: 

1404 def getFilterAliasName(row): 

1405 # get column name with the max value (True > False) 

1406 colName = row.idxmax() 

1407 return colName.replace('merge_measurement_', '') 

1408 

1409 # Makes a Series of dtype object if df is empty 

1410 return df[self.columns].apply(getFilterAliasName, axis=1, 

1411 result_type='reduce').astype('object') 

1412 

1413 

1414class Photometry(Functor): 

1415 # AB to NanoJansky (3631 Jansky) 

1416 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1417 LOG_AB_FLUX_SCALE = 12.56 

1418 FIVE_OVER_2LOG10 = 1.085736204758129569 

1419 # TO DO: DM-21955 Replace hard coded photometic calibration values 

1420 COADD_ZP = 27 

1421 

1422 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1423 self.vhypot = np.vectorize(self.hypot) 

1424 self.col = colFlux 

1425 self.colFluxErr = colFluxErr 

1426 

1427 self.calib = calib 

1428 if calib is not None: 

1429 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1430 else: 

1431 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1432 self.fluxMag0Err = 0. 

1433 

1434 super().__init__(**kwargs) 

1435 

1436 @property 

1437 def columns(self): 

1438 return [self.col] 

1439 

1440 @property 

1441 def name(self): 

1442 return f'mag_{self.col}' 

1443 

1444 @classmethod 

1445 def hypot(cls, a, b): 

1446 if np.abs(a) < np.abs(b): 

1447 a, b = b, a 

1448 if a == 0.: 

1449 return 0. 

1450 q = b/a 

1451 return np.abs(a) * np.sqrt(1. + q*q) 

1452 

1453 def dn2flux(self, dn, fluxMag0): 

1454 return self.AB_FLUX_SCALE * dn / fluxMag0 

1455 

1456 def dn2mag(self, dn, fluxMag0): 

1457 with np.warnings.catch_warnings(): 

1458 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1459 np.warnings.filterwarnings('ignore', r'divide by zero') 

1460 return -2.5 * np.log10(dn/fluxMag0) 

1461 

1462 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1463 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1464 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1465 return retVal 

1466 

1467 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1468 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1469 return self.FIVE_OVER_2LOG10 * retVal 

1470 

1471 

1472class NanoJansky(Photometry): 

1473 def _func(self, df): 

1474 return self.dn2flux(df[self.col], self.fluxMag0) 

1475 

1476 

1477class NanoJanskyErr(Photometry): 

1478 @property 

1479 def columns(self): 

1480 return [self.col, self.colFluxErr] 

1481 

1482 def _func(self, df): 

1483 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1484 return pd.Series(retArr, index=df.index) 

1485 

1486 

1487class Magnitude(Photometry): 

1488 def _func(self, df): 

1489 return self.dn2mag(df[self.col], self.fluxMag0) 

1490 

1491 

1492class MagnitudeErr(Photometry): 

1493 @property 

1494 def columns(self): 

1495 return [self.col, self.colFluxErr] 

1496 

1497 def _func(self, df): 

1498 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1499 return pd.Series(retArr, index=df.index) 

1500 

1501 

1502class LocalPhotometry(Functor): 

1503 """Base class for calibrating the specified instrument flux column using 

1504 the local photometric calibration. 

1505 

1506 Parameters 

1507 ---------- 

1508 instFluxCol : `str` 

1509 Name of the instrument flux column. 

1510 instFluxErrCol : `str` 

1511 Name of the assocated error columns for ``instFluxCol``. 

1512 photoCalibCol : `str` 

1513 Name of local calibration column. 

1514 photoCalibErrCol : `str` 

1515 Error associated with ``photoCalibCol`` 

1516 

1517 See also 

1518 -------- 

1519 LocalPhotometry 

1520 LocalNanojansky 

1521 LocalNanojanskyErr 

1522 LocalMagnitude 

1523 LocalMagnitudeErr 

1524 """ 

1525 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1526 

1527 def __init__(self, 

1528 instFluxCol, 

1529 instFluxErrCol, 

1530 photoCalibCol, 

1531 photoCalibErrCol, 

1532 **kwargs): 

1533 self.instFluxCol = instFluxCol 

1534 self.instFluxErrCol = instFluxErrCol 

1535 self.photoCalibCol = photoCalibCol 

1536 self.photoCalibErrCol = photoCalibErrCol 

1537 super().__init__(**kwargs) 

1538 

1539 def instFluxToNanojansky(self, instFlux, localCalib): 

1540 """Convert instrument flux to nanojanskys. 

1541 

1542 Parameters 

1543 ---------- 

1544 instFlux : `numpy.ndarray` or `pandas.Series` 

1545 Array of instrument flux measurements 

1546 localCalib : `numpy.ndarray` or `pandas.Series` 

1547 Array of local photometric calibration estimates. 

1548 

1549 Returns 

1550 ------- 

1551 calibFlux : `numpy.ndarray` or `pandas.Series` 

1552 Array of calibrated flux measurements. 

1553 """ 

1554 return instFlux * localCalib 

1555 

1556 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1557 """Convert instrument flux to nanojanskys. 

1558 

1559 Parameters 

1560 ---------- 

1561 instFlux : `numpy.ndarray` or `pandas.Series` 

1562 Array of instrument flux measurements 

1563 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1564 Errors on associated ``instFlux`` values 

1565 localCalib : `numpy.ndarray` or `pandas.Series` 

1566 Array of local photometric calibration estimates. 

1567 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1568 Errors on associated ``localCalib`` values 

1569 

1570 Returns 

1571 ------- 

1572 calibFluxErr : `numpy.ndarray` or `pandas.Series` 

1573 Errors on calibrated flux measurements. 

1574 """ 

1575 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1576 

1577 def instFluxToMagnitude(self, instFlux, localCalib): 

1578 """Convert instrument flux to nanojanskys. 

1579 

1580 Parameters 

1581 ---------- 

1582 instFlux : `numpy.ndarray` or `pandas.Series` 

1583 Array of instrument flux measurements 

1584 localCalib : `numpy.ndarray` or `pandas.Series` 

1585 Array of local photometric calibration estimates. 

1586 

1587 Returns 

1588 ------- 

1589 calibMag : `numpy.ndarray` or `pandas.Series` 

1590 Array of calibrated AB magnitudes. 

1591 """ 

1592 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1593 

1594 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1595 """Convert instrument flux err to nanojanskys. 

1596 

1597 Parameters 

1598 ---------- 

1599 instFlux : `numpy.ndarray` or `pandas.Series` 

1600 Array of instrument flux measurements 

1601 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1602 Errors on associated ``instFlux`` values 

1603 localCalib : `numpy.ndarray` or `pandas.Series` 

1604 Array of local photometric calibration estimates. 

1605 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1606 Errors on associated ``localCalib`` values 

1607 

1608 Returns 

1609 ------- 

1610 calibMagErr: `numpy.ndarray` or `pandas.Series` 

1611 Error on calibrated AB magnitudes. 

1612 """ 

1613 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1614 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1615 

1616 

1617class LocalNanojansky(LocalPhotometry): 

1618 """Compute calibrated fluxes using the local calibration value. 

1619 

1620 See also 

1621 -------- 

1622 LocalNanojansky 

1623 LocalNanojanskyErr 

1624 LocalMagnitude 

1625 LocalMagnitudeErr 

1626 """ 

1627 

1628 @property 

1629 def columns(self): 

1630 return [self.instFluxCol, self.photoCalibCol] 

1631 

1632 @property 

1633 def name(self): 

1634 return f'flux_{self.instFluxCol}' 

1635 

1636 def _func(self, df): 

1637 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1638 

1639 

1640class LocalNanojanskyErr(LocalPhotometry): 

1641 """Compute calibrated flux errors using the local calibration value. 

1642 

1643 See also 

1644 -------- 

1645 LocalNanojansky 

1646 LocalNanojanskyErr 

1647 LocalMagnitude 

1648 LocalMagnitudeErr 

1649 """ 

1650 

1651 @property 

1652 def columns(self): 

1653 return [self.instFluxCol, self.instFluxErrCol, 

1654 self.photoCalibCol, self.photoCalibErrCol] 

1655 

1656 @property 

1657 def name(self): 

1658 return f'fluxErr_{self.instFluxCol}' 

1659 

1660 def _func(self, df): 

1661 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1662 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1663 

1664 

1665class LocalMagnitude(LocalPhotometry): 

1666 """Compute calibrated AB magnitudes using the local calibration value. 

1667 

1668 See also 

1669 -------- 

1670 LocalNanojansky 

1671 LocalNanojanskyErr 

1672 LocalMagnitude 

1673 LocalMagnitudeErr 

1674 """ 

1675 

1676 @property 

1677 def columns(self): 

1678 return [self.instFluxCol, self.photoCalibCol] 

1679 

1680 @property 

1681 def name(self): 

1682 return f'mag_{self.instFluxCol}' 

1683 

1684 def _func(self, df): 

1685 return self.instFluxToMagnitude(df[self.instFluxCol], 

1686 df[self.photoCalibCol]) 

1687 

1688 

1689class LocalMagnitudeErr(LocalPhotometry): 

1690 """Compute calibrated AB magnitude errors using the local calibration value. 

1691 

1692 See also 

1693 -------- 

1694 LocalNanojansky 

1695 LocalNanojanskyErr 

1696 LocalMagnitude 

1697 LocalMagnitudeErr 

1698 """ 

1699 

1700 @property 

1701 def columns(self): 

1702 return [self.instFluxCol, self.instFluxErrCol, 

1703 self.photoCalibCol, self.photoCalibErrCol] 

1704 

1705 @property 

1706 def name(self): 

1707 return f'magErr_{self.instFluxCol}' 

1708 

1709 def _func(self, df): 

1710 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol], 

1711 df[self.instFluxErrCol], 

1712 df[self.photoCalibCol], 

1713 df[self.photoCalibErrCol]) 

1714 

1715 

1716class LocalDipoleMeanFlux(LocalPhotometry): 

1717 """Compute absolute mean of dipole fluxes. 

1718 

1719 See also 

1720 -------- 

1721 LocalNanojansky 

1722 LocalNanojanskyErr 

1723 LocalMagnitude 

1724 LocalMagnitudeErr 

1725 LocalDipoleMeanFlux 

1726 LocalDipoleMeanFluxErr 

1727 LocalDipoleDiffFlux 

1728 LocalDipoleDiffFluxErr 

1729 """ 

1730 def __init__(self, 

1731 instFluxPosCol, 

1732 instFluxNegCol, 

1733 instFluxPosErrCol, 

1734 instFluxNegErrCol, 

1735 photoCalibCol, 

1736 photoCalibErrCol, 

1737 **kwargs): 

1738 self.instFluxNegCol = instFluxNegCol 

1739 self.instFluxPosCol = instFluxPosCol 

1740 self.instFluxNegErrCol = instFluxNegErrCol 

1741 self.instFluxPosErrCol = instFluxPosErrCol 

1742 self.photoCalibCol = photoCalibCol 

1743 self.photoCalibErrCol = photoCalibErrCol 

1744 super().__init__(instFluxNegCol, 

1745 instFluxNegErrCol, 

1746 photoCalibCol, 

1747 photoCalibErrCol, 

1748 **kwargs) 

1749 

1750 @property 

1751 def columns(self): 

1752 return [self.instFluxPosCol, 

1753 self.instFluxNegCol, 

1754 self.photoCalibCol] 

1755 

1756 @property 

1757 def name(self): 

1758 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1759 

1760 def _func(self, df): 

1761 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1762 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1763 

1764 

1765class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1766 """Compute the error on the absolute mean of dipole fluxes. 

1767 

1768 See also 

1769 -------- 

1770 LocalNanojansky 

1771 LocalNanojanskyErr 

1772 LocalMagnitude 

1773 LocalMagnitudeErr 

1774 LocalDipoleMeanFlux 

1775 LocalDipoleMeanFluxErr 

1776 LocalDipoleDiffFlux 

1777 LocalDipoleDiffFluxErr 

1778 """ 

1779 

1780 @property 

1781 def columns(self): 

1782 return [self.instFluxPosCol, 

1783 self.instFluxNegCol, 

1784 self.instFluxPosErrCol, 

1785 self.instFluxNegErrCol, 

1786 self.photoCalibCol, 

1787 self.photoCalibErrCol] 

1788 

1789 @property 

1790 def name(self): 

1791 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1792 

1793 def _func(self, df): 

1794 return 0.5*np.sqrt( 

1795 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1796 * df[self.photoCalibErrCol])**2 

1797 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1798 * df[self.photoCalibCol]**2) 

1799 

1800 

1801class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1802 """Compute the absolute difference of dipole fluxes. 

1803 

1804 Value is (abs(pos) - abs(neg)) 

1805 

1806 See also 

1807 -------- 

1808 LocalNanojansky 

1809 LocalNanojanskyErr 

1810 LocalMagnitude 

1811 LocalMagnitudeErr 

1812 LocalDipoleMeanFlux 

1813 LocalDipoleMeanFluxErr 

1814 LocalDipoleDiffFlux 

1815 LocalDipoleDiffFluxErr 

1816 """ 

1817 

1818 @property 

1819 def columns(self): 

1820 return [self.instFluxPosCol, 

1821 self.instFluxNegCol, 

1822 self.photoCalibCol] 

1823 

1824 @property 

1825 def name(self): 

1826 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1827 

1828 def _func(self, df): 

1829 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1830 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1831 

1832 

1833class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1834 """Compute the error on the absolute difference of dipole fluxes. 

1835 

1836 See also 

1837 -------- 

1838 LocalNanojansky 

1839 LocalNanojanskyErr 

1840 LocalMagnitude 

1841 LocalMagnitudeErr 

1842 LocalDipoleMeanFlux 

1843 LocalDipoleMeanFluxErr 

1844 LocalDipoleDiffFlux 

1845 LocalDipoleDiffFluxErr 

1846 """ 

1847 

1848 @property 

1849 def columns(self): 

1850 return [self.instFluxPosCol, 

1851 self.instFluxNegCol, 

1852 self.instFluxPosErrCol, 

1853 self.instFluxNegErrCol, 

1854 self.photoCalibCol, 

1855 self.photoCalibErrCol] 

1856 

1857 @property 

1858 def name(self): 

1859 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1860 

1861 def _func(self, df): 

1862 return np.sqrt( 

1863 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1864 * df[self.photoCalibErrCol])**2 

1865 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1866 * df[self.photoCalibCol]**2) 

1867 

1868 

1869class Ratio(Functor): 

1870 """Base class for returning the ratio of 2 columns. 

1871 

1872 Can be used to compute a Signal to Noise ratio for any input flux. 

1873 

1874 Parameters 

1875 ---------- 

1876 numerator : `str` 

1877 Name of the column to use at the numerator in the ratio 

1878 denominator : `str` 

1879 Name of the column to use as the denominator in the ratio. 

1880 """ 

1881 def __init__(self, 

1882 numerator, 

1883 denominator, 

1884 **kwargs): 

1885 self.numerator = numerator 

1886 self.denominator = denominator 

1887 super().__init__(**kwargs) 

1888 

1889 @property 

1890 def columns(self): 

1891 return [self.numerator, self.denominator] 

1892 

1893 @property 

1894 def name(self): 

1895 return f'ratio_{self.numerator}_{self.denominator}' 

1896 

1897 def _func(self, df): 

1898 with np.warnings.catch_warnings(): 

1899 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1900 np.warnings.filterwarnings('ignore', r'divide by zero') 

1901 return df[self.numerator] / df[self.denominator]