Coverage for python/lsst/pipe/tasks/functors.py: 41%

803 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-17 08:57 +0000

1# This file is part of pipe_tasks. 

2# 

3# LSST Data Management System 

4# This product includes software developed by the 

5# LSST Project (http://www.lsst.org/). 

6# See COPYRIGHT file at the top of the source tree. 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <https://www.lsstcorp.org/LegalNotices/>. 

21# 

22import yaml 

23import re 

24from itertools import product 

25import os.path 

26 

27import pandas as pd 

28import numpy as np 

29import astropy.units as u 

30 

31from lsst.daf.persistence import doImport 

32from lsst.daf.butler import DeferredDatasetHandle 

33import lsst.geom as geom 

34import lsst.sphgeom as sphgeom 

35 

36from .parquetTable import ParquetTable, MultilevelParquetTable 

37 

38 

39def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

40 typeKey='functor', name=None): 

41 """Initialize an object defined in a dictionary 

42 

43 The object needs to be importable as 

44 f'{basePath}.{initDict[typeKey]}' 

45 The positional and keyword arguments (if any) are contained in 

46 "args" and "kwargs" entries in the dictionary, respectively. 

47 This is used in `functors.CompositeFunctor.from_yaml` to initialize 

48 a composite functor from a specification in a YAML file. 

49 

50 Parameters 

51 ---------- 

52 initDict : dictionary 

53 Dictionary describing object's initialization. Must contain 

54 an entry keyed by ``typeKey`` that is the name of the object, 

55 relative to ``basePath``. 

56 basePath : str 

57 Path relative to module in which ``initDict[typeKey]`` is defined. 

58 typeKey : str 

59 Key of ``initDict`` that is the name of the object 

60 (relative to `basePath`). 

61 """ 

62 initDict = initDict.copy() 

63 # TO DO: DM-21956 We should be able to define functors outside this module 

64 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

65 args = [] 

66 if 'args' in initDict: 

67 args = initDict.pop('args') 

68 if isinstance(args, str): 

69 args = [args] 

70 try: 

71 element = pythonType(*args, **initDict) 

72 except Exception as e: 

73 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

74 raise type(e)(message, e.args) 

75 return element 

76 

77 

78class Functor(object): 

79 """Define and execute a calculation on a ParquetTable 

80 

81 The `__call__` method accepts either a `ParquetTable` object or a 

82 `DeferredDatasetHandle`, and returns the 

83 result of the calculation as a single column. Each functor defines what 

84 columns are needed for the calculation, and only these columns are read 

85 from the `ParquetTable`. 

86 

87 The action of `__call__` consists of two steps: first, loading the 

88 necessary columns from disk into memory as a `pandas.DataFrame` object; 

89 and second, performing the computation on this dataframe and returning the 

90 result. 

91 

92 

93 To define a new `Functor`, a subclass must define a `_func` method, 

94 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`. 

95 In addition, it must define the following attributes 

96 

97 * `_columns`: The columns necessary to perform the calculation 

98 * `name`: A name appropriate for a figure axis label 

99 * `shortname`: A name appropriate for use as a dictionary key 

100 

101 On initialization, a `Functor` should declare what band (`filt` kwarg) 

102 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be 

103 applied to. This enables the `_get_data` method to extract the proper 

104 columns from the parquet file. If not specified, the dataset will fall back 

105 on the `_defaultDataset`attribute. If band is not specified and `dataset` 

106 is anything other than `'ref'`, then an error will be raised when trying to 

107 perform the calculation. 

108 

109 Originally, `Functor` was set up to expect 

110 datasets formatted like the `deepCoadd_obj` dataset; that is, a 

111 dataframe with a multi-level column index, with the levels of the 

112 column index being `band`, `dataset`, and `column`. 

113 It has since been generalized to apply to dataframes without mutli-level 

114 indices and multi-level indices with just `dataset` and `column` levels. 

115 In addition, the `_get_data` method that reads 

116 the dataframe from the `ParquetTable` will return a dataframe with column 

117 index levels defined by the `_dfLevels` attribute; by default, this is 

118 `column`. 

119 

120 The `_dfLevels` attributes should generally not need to 

121 be changed, unless `_func` needs columns from multiple filters or datasets 

122 to do the calculation. 

123 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for 

124 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe 

125 it gets to have those levels in the column index. 

126 

127 Parameters 

128 ---------- 

129 filt : str 

130 Filter upon which to do the calculation 

131 

132 dataset : str 

133 Dataset upon which to do the calculation 

134 (e.g., 'ref', 'meas', 'forced_src'). 

135 

136 """ 

137 

138 _defaultDataset = 'ref' 

139 _dfLevels = ('column',) 

140 _defaultNoDup = False 

141 

142 def __init__(self, filt=None, dataset=None, noDup=None): 

143 self.filt = filt 

144 self.dataset = dataset if dataset is not None else self._defaultDataset 

145 self._noDup = noDup 

146 

147 @property 

148 def noDup(self): 

149 if self._noDup is not None: 

150 return self._noDup 

151 else: 

152 return self._defaultNoDup 

153 

154 @property 

155 def columns(self): 

156 """Columns required to perform calculation 

157 """ 

158 if not hasattr(self, '_columns'): 

159 raise NotImplementedError('Must define columns property or _columns attribute') 

160 return self._columns 

161 

162 def _get_data_columnLevels(self, data, columnIndex=None): 

163 """Gets the names of the column index levels 

164 

165 This should only be called in the context of a multilevel table. 

166 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable` 

167 and with the gen3 `DeferredDatasetHandle`. 

168 

169 Parameters 

170 ---------- 

171 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

172 

173 columnnIndex (optional): pandas `Index` object 

174 if not passed, then it is read from the `DeferredDatasetHandle` 

175 """ 

176 if isinstance(data, DeferredDatasetHandle): 

177 if columnIndex is None: 

178 columnIndex = data.get(component="columns") 

179 if columnIndex is not None: 

180 return columnIndex.names 

181 if isinstance(data, MultilevelParquetTable): 

182 return data.columnLevels 

183 else: 

184 raise TypeError(f"Unknown type for data: {type(data)}!") 

185 

186 def _get_data_columnLevelNames(self, data, columnIndex=None): 

187 """Gets the content of each of the column levels for a multilevel table 

188 

189 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2. 

190 

191 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

192 """ 

193 if isinstance(data, DeferredDatasetHandle): 

194 if columnIndex is None: 

195 columnIndex = data.get(component="columns") 

196 if columnIndex is not None: 

197 columnLevels = columnIndex.names 

198 columnLevelNames = { 

199 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

200 for i, level in enumerate(columnLevels) 

201 } 

202 return columnLevelNames 

203 if isinstance(data, MultilevelParquetTable): 

204 return data.columnLevelNames 

205 else: 

206 raise TypeError(f"Unknown type for data: {type(data)}!") 

207 

208 def _colsFromDict(self, colDict, columnIndex=None): 

209 """Converts dictionary column specficiation to a list of columns 

210 

211 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

212 """ 

213 new_colDict = {} 

214 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

215 

216 for i, lev in enumerate(columnLevels): 

217 if lev in colDict: 

218 if isinstance(colDict[lev], str): 

219 new_colDict[lev] = [colDict[lev]] 

220 else: 

221 new_colDict[lev] = colDict[lev] 

222 else: 

223 new_colDict[lev] = columnIndex.levels[i] 

224 

225 levelCols = [new_colDict[lev] for lev in columnLevels] 

226 cols = product(*levelCols) 

227 return list(cols) 

228 

229 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

230 """Returns columns needed by functor from multilevel dataset 

231 

232 To access tables with multilevel column structure, the `MultilevelParquetTable` 

233 or `DeferredDatasetHandle` need to be passed either a list of tuples or a 

234 dictionary. 

235 

236 Parameters 

237 ---------- 

238 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

239 

240 columnIndex (optional): pandas `Index` object 

241 either passed or read in from `DeferredDatasetHandle`. 

242 

243 `returnTuple` : bool 

244 If true, then return a list of tuples rather than the column dictionary 

245 specification. This is set to `True` by `CompositeFunctor` in order to be able to 

246 combine columns from the various component functors. 

247 

248 """ 

249 if isinstance(data, DeferredDatasetHandle) and columnIndex is None: 

250 columnIndex = data.get(component="columns") 

251 

252 # Confirm that the dataset has the column levels the functor is expecting it to have. 

253 columnLevels = self._get_data_columnLevels(data, columnIndex) 

254 

255 columnDict = {'column': self.columns, 

256 'dataset': self.dataset} 

257 if self.filt is None: 

258 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

259 if "band" in columnLevels: 

260 if self.dataset == "ref": 

261 columnDict["band"] = columnLevelNames["band"][0] 

262 else: 

263 raise ValueError(f"'filt' not set for functor {self.name}" 

264 f"(dataset {self.dataset}) " 

265 "and ParquetTable " 

266 "contains multiple filters in column index. " 

267 "Set 'filt' or set 'dataset' to 'ref'.") 

268 else: 

269 columnDict['band'] = self.filt 

270 

271 if isinstance(data, MultilevelParquetTable): 

272 return data._colsFromDict(columnDict) 

273 elif isinstance(data, DeferredDatasetHandle): 

274 if returnTuple: 

275 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

276 else: 

277 return columnDict 

278 

279 def _func(self, df, dropna=True): 

280 raise NotImplementedError('Must define calculation on dataframe') 

281 

282 def _get_columnIndex(self, data): 

283 """Return columnIndex 

284 """ 

285 

286 if isinstance(data, DeferredDatasetHandle): 

287 return data.get(component="columns") 

288 else: 

289 return None 

290 

291 def _get_data(self, data): 

292 """Retrieve dataframe necessary for calculation. 

293 

294 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle 

295 

296 Returns dataframe upon which `self._func` can act. 

297 

298 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested. 

299 """ 

300 if isinstance(data, pd.DataFrame): 

301 return data 

302 

303 # First thing to do: check to see if the data source has a multilevel column index or not. 

304 columnIndex = self._get_columnIndex(data) 

305 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

306 

307 # Simple single-level parquet table, gen2 

308 if isinstance(data, ParquetTable) and not is_multiLevel: 

309 columns = self.columns 

310 df = data.toDataFrame(columns=columns) 

311 return df 

312 

313 # Get proper columns specification for this functor 

314 if is_multiLevel: 

315 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

316 else: 

317 columns = self.columns 

318 

319 if isinstance(data, MultilevelParquetTable): 

320 # Load in-memory dataframe with appropriate columns the gen2 way 

321 df = data.toDataFrame(columns=columns, droplevels=False) 

322 elif isinstance(data, DeferredDatasetHandle): 

323 # Load in-memory dataframe with appropriate columns the gen3 way 

324 df = data.get(parameters={"columns": columns}) 

325 

326 # Drop unnecessary column levels 

327 if is_multiLevel: 

328 df = self._setLevels(df) 

329 

330 return df 

331 

332 def _setLevels(self, df): 

333 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

334 df.columns = df.columns.droplevel(levelsToDrop) 

335 return df 

336 

337 def _dropna(self, vals): 

338 return vals.dropna() 

339 

340 def __call__(self, data, dropna=False): 

341 try: 

342 df = self._get_data(data) 

343 vals = self._func(df) 

344 except Exception: 

345 vals = self.fail(df) 

346 if dropna: 

347 vals = self._dropna(vals) 

348 

349 return vals 

350 

351 def difference(self, data1, data2, **kwargs): 

352 """Computes difference between functor called on two different ParquetTable objects 

353 """ 

354 return self(data1, **kwargs) - self(data2, **kwargs) 

355 

356 def fail(self, df): 

357 return pd.Series(np.full(len(df), np.nan), index=df.index) 

358 

359 @property 

360 def name(self): 

361 """Full name of functor (suitable for figure labels) 

362 """ 

363 return NotImplementedError 

364 

365 @property 

366 def shortname(self): 

367 """Short name of functor (suitable for column name/dict key) 

368 """ 

369 return self.name 

370 

371 

372class CompositeFunctor(Functor): 

373 """Perform multiple calculations at once on a catalog 

374 

375 The role of a `CompositeFunctor` is to group together computations from 

376 multiple functors. Instead of returning `pandas.Series` a 

377 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names 

378 being the keys of `funcDict`. 

379 

380 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

381 in all the component functors. 

382 

383 A `CompositeFunctor` does not use a `_func` method itself; rather, 

384 when a `CompositeFunctor` is called, all its columns are loaded 

385 at once, and the resulting dataframe is passed to the `_func` method of each component 

386 functor. This has the advantage of only doing I/O (reading from parquet file) once, 

387 and works because each individual `_func` method of each component functor does not 

388 care if there are *extra* columns in the dataframe being passed; only that it must contain 

389 *at least* the `columns` it expects. 

390 

391 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML 

392 file specifying a collection of functors. 

393 

394 Parameters 

395 ---------- 

396 funcs : `dict` or `list` 

397 Dictionary or list of functors. If a list, then it will be converted 

398 into a dictonary according to the `.shortname` attribute of each functor. 

399 

400 """ 

401 dataset = None 

402 

403 def __init__(self, funcs, **kwargs): 

404 

405 if type(funcs) == dict: 

406 self.funcDict = funcs 

407 else: 

408 self.funcDict = {f.shortname: f for f in funcs} 

409 

410 self._filt = None 

411 

412 super().__init__(**kwargs) 

413 

414 @property 

415 def filt(self): 

416 return self._filt 

417 

418 @filt.setter 

419 def filt(self, filt): 

420 if filt is not None: 

421 for _, f in self.funcDict.items(): 

422 f.filt = filt 

423 self._filt = filt 

424 

425 def update(self, new): 

426 if isinstance(new, dict): 

427 self.funcDict.update(new) 

428 elif isinstance(new, CompositeFunctor): 

429 self.funcDict.update(new.funcDict) 

430 else: 

431 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

432 

433 # Make sure new functors have the same 'filt' set 

434 if self.filt is not None: 

435 self.filt = self.filt 

436 

437 @property 

438 def columns(self): 

439 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

440 

441 def multilevelColumns(self, data, **kwargs): 

442 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here. 

443 return list( 

444 set( 

445 [ 

446 x 

447 for y in [ 

448 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

449 ] 

450 for x in y 

451 ] 

452 ) 

453 ) 

454 

455 def __call__(self, data, **kwargs): 

456 """Apply the functor to the data table 

457 

458 Parameters 

459 ---------- 

460 data : `lsst.daf.butler.DeferredDatasetHandle`, 

461 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`, 

462 `lsst.pipe.tasks.parquetTable.ParquetTable`, 

463 or `pandas.DataFrame`. 

464 The table or a pointer to a table on disk from which columns can 

465 be accessed 

466 """ 

467 columnIndex = self._get_columnIndex(data) 

468 

469 # First, determine whether data has a multilevel index (either gen2 or gen3) 

470 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

471 

472 # Multilevel index, gen2 or gen3 

473 if is_multiLevel: 

474 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

475 

476 if isinstance(data, MultilevelParquetTable): 

477 # Read data into memory the gen2 way 

478 df = data.toDataFrame(columns=columns, droplevels=False) 

479 elif isinstance(data, DeferredDatasetHandle): 

480 # Read data into memory the gen3 way 

481 df = data.get(parameters={"columns": columns}) 

482 

483 valDict = {} 

484 for k, f in self.funcDict.items(): 

485 try: 

486 subdf = f._setLevels( 

487 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)] 

488 ) 

489 valDict[k] = f._func(subdf) 

490 except Exception as e: 

491 try: 

492 valDict[k] = f.fail(subdf) 

493 except NameError: 

494 raise e 

495 

496 else: 

497 if isinstance(data, DeferredDatasetHandle): 

498 # input if Gen3 deferLoad=True 

499 df = data.get(parameters={"columns": self.columns}) 

500 elif isinstance(data, pd.DataFrame): 

501 # input if Gen3 deferLoad=False 

502 df = data 

503 else: 

504 # Original Gen2 input is type ParquetTable and the fallback 

505 df = data.toDataFrame(columns=self.columns) 

506 

507 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

508 

509 # Check that output columns are actually columns 

510 for name, colVal in valDict.items(): 

511 if len(colVal.shape) != 1: 

512 raise RuntimeError("Transformed column '%s' is not the shape of a column. " 

513 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal))) 

514 

515 try: 

516 valDf = pd.concat(valDict, axis=1) 

517 except TypeError: 

518 print([(k, type(v)) for k, v in valDict.items()]) 

519 raise 

520 

521 if kwargs.get('dropna', False): 

522 valDf = valDf.dropna(how='any') 

523 

524 return valDf 

525 

526 @classmethod 

527 def renameCol(cls, col, renameRules): 

528 if renameRules is None: 

529 return col 

530 for old, new in renameRules: 

531 if col.startswith(old): 

532 col = col.replace(old, new) 

533 return col 

534 

535 @classmethod 

536 def from_file(cls, filename, **kwargs): 

537 # Allow environment variables in the filename. 

538 filename = os.path.expandvars(filename) 

539 with open(filename) as f: 

540 translationDefinition = yaml.safe_load(f) 

541 

542 return cls.from_yaml(translationDefinition, **kwargs) 

543 

544 @classmethod 

545 def from_yaml(cls, translationDefinition, **kwargs): 

546 funcs = {} 

547 for func, val in translationDefinition['funcs'].items(): 

548 funcs[func] = init_fromDict(val, name=func) 

549 

550 if 'flag_rename_rules' in translationDefinition: 

551 renameRules = translationDefinition['flag_rename_rules'] 

552 else: 

553 renameRules = None 

554 

555 if 'calexpFlags' in translationDefinition: 

556 for flag in translationDefinition['calexpFlags']: 

557 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp') 

558 

559 if 'refFlags' in translationDefinition: 

560 for flag in translationDefinition['refFlags']: 

561 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

562 

563 if 'forcedFlags' in translationDefinition: 

564 for flag in translationDefinition['forcedFlags']: 

565 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

566 

567 if 'flags' in translationDefinition: 

568 for flag in translationDefinition['flags']: 

569 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

570 

571 return cls(funcs, **kwargs) 

572 

573 

574def mag_aware_eval(df, expr): 

575 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means 

576 

577 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes. 

578 

579 Parameters 

580 ---------- 

581 df : pandas.DataFrame 

582 Dataframe on which to evaluate expression. 

583 

584 expr : str 

585 Expression. 

586 """ 

587 try: 

588 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

589 val = df.eval(expr_new, truediv=True) 

590 except Exception: # Should check what actually gets raised 

591 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

592 val = df.eval(expr_new, truediv=True) 

593 return val 

594 

595 

596class CustomFunctor(Functor): 

597 """Arbitrary computation on a catalog 

598 

599 Column names (and thus the columns to be loaded from catalog) are found 

600 by finding all words and trying to ignore all "math-y" words. 

601 

602 Parameters 

603 ---------- 

604 expr : str 

605 Expression to evaluate, to be parsed and executed by `mag_aware_eval`. 

606 """ 

607 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

608 

609 def __init__(self, expr, **kwargs): 

610 self.expr = expr 

611 super().__init__(**kwargs) 

612 

613 @property 

614 def name(self): 

615 return self.expr 

616 

617 @property 

618 def columns(self): 

619 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

620 

621 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

622 not_a_col = [] 

623 for c in flux_cols: 

624 if not re.search('_instFlux$', c): 

625 cols.append(f'{c}_instFlux') 

626 not_a_col.append(c) 

627 else: 

628 cols.append(c) 

629 

630 return list(set([c for c in cols if c not in not_a_col])) 

631 

632 def _func(self, df): 

633 return mag_aware_eval(df, self.expr) 

634 

635 

636class Column(Functor): 

637 """Get column with specified name 

638 """ 

639 

640 def __init__(self, col, **kwargs): 

641 self.col = col 

642 super().__init__(**kwargs) 

643 

644 @property 

645 def name(self): 

646 return self.col 

647 

648 @property 

649 def columns(self): 

650 return [self.col] 

651 

652 def _func(self, df): 

653 return df[self.col] 

654 

655 

656class Index(Functor): 

657 """Return the value of the index for each object 

658 """ 

659 

660 columns = ['coord_ra'] # just a dummy; something has to be here 

661 _defaultDataset = 'ref' 

662 _defaultNoDup = True 

663 

664 def _func(self, df): 

665 return pd.Series(df.index, index=df.index) 

666 

667 

668class IDColumn(Column): 

669 col = 'id' 

670 _allow_difference = False 

671 _defaultNoDup = True 

672 

673 def _func(self, df): 

674 return pd.Series(df.index, index=df.index) 

675 

676 

677class FootprintNPix(Column): 

678 col = 'base_Footprint_nPix' 

679 

680 

681class CoordColumn(Column): 

682 """Base class for coordinate column, in degrees 

683 """ 

684 _radians = True 

685 

686 def __init__(self, col, **kwargs): 

687 super().__init__(col, **kwargs) 

688 

689 def _func(self, df): 

690 # Must not modify original column in case that column is used by another functor 

691 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

692 return output 

693 

694 

695class RAColumn(CoordColumn): 

696 """Right Ascension, in degrees 

697 """ 

698 name = 'RA' 

699 _defaultNoDup = True 

700 

701 def __init__(self, **kwargs): 

702 super().__init__('coord_ra', **kwargs) 

703 

704 def __call__(self, catalog, **kwargs): 

705 return super().__call__(catalog, **kwargs) 

706 

707 

708class DecColumn(CoordColumn): 

709 """Declination, in degrees 

710 """ 

711 name = 'Dec' 

712 _defaultNoDup = True 

713 

714 def __init__(self, **kwargs): 

715 super().__init__('coord_dec', **kwargs) 

716 

717 def __call__(self, catalog, **kwargs): 

718 return super().__call__(catalog, **kwargs) 

719 

720 

721class HtmIndex20(Functor): 

722 """Compute the level 20 HtmIndex for the catalog. 

723 """ 

724 name = "Htm20" 

725 htmLevel = 20 

726 _radians = True 

727 

728 def __init__(self, ra, decl, **kwargs): 

729 self.pixelator = sphgeom.HtmPixelization(self.htmLevel) 

730 self.ra = ra 

731 self.decl = decl 

732 self._columns = [self.ra, self.decl] 

733 super().__init__(**kwargs) 

734 

735 def _func(self, df): 

736 

737 def computePixel(row): 

738 if self._radians: 

739 sphPoint = geom.SpherePoint(row[self.ra], 

740 row[self.decl], 

741 geom.radians) 

742 else: 

743 sphPoint = geom.SpherePoint(row[self.ra], 

744 row[self.decl], 

745 geom.degrees) 

746 return self.pixelator.index(sphPoint.getVector()) 

747 

748 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64') 

749 

750 

751def fluxName(col): 

752 if not col.endswith('_instFlux'): 

753 col += '_instFlux' 

754 return col 

755 

756 

757def fluxErrName(col): 

758 if not col.endswith('_instFluxErr'): 

759 col += '_instFluxErr' 

760 return col 

761 

762 

763class Mag(Functor): 

764 """Compute calibrated magnitude 

765 

766 Takes a `calib` argument, which returns the flux at mag=0 

767 as `calib.getFluxMag0()`. If not provided, then the default 

768 `fluxMag0` is 63095734448.0194, which is default for HSC. 

769 This default should be removed in DM-21955 

770 

771 This calculation hides warnings about invalid values and dividing by zero. 

772 

773 As for all functors, a `dataset` and `filt` kwarg should be provided upon 

774 initialization. Unlike the default `Functor`, however, the default dataset 

775 for a `Mag` is `'meas'`, rather than `'ref'`. 

776 

777 Parameters 

778 ---------- 

779 col : `str` 

780 Name of flux column from which to compute magnitude. Can be parseable 

781 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass 

782 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will 

783 understand. 

784 calib : `lsst.afw.image.calib.Calib` (optional) 

785 Object that knows zero point. 

786 """ 

787 _defaultDataset = 'meas' 

788 

789 def __init__(self, col, calib=None, **kwargs): 

790 self.col = fluxName(col) 

791 self.calib = calib 

792 if calib is not None: 

793 self.fluxMag0 = calib.getFluxMag0()[0] 

794 else: 

795 # TO DO: DM-21955 Replace hard coded photometic calibration values 

796 self.fluxMag0 = 63095734448.0194 

797 

798 super().__init__(**kwargs) 

799 

800 @property 

801 def columns(self): 

802 return [self.col] 

803 

804 def _func(self, df): 

805 with np.warnings.catch_warnings(): 

806 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

807 np.warnings.filterwarnings('ignore', r'divide by zero') 

808 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

809 

810 @property 

811 def name(self): 

812 return f'mag_{self.col}' 

813 

814 

815class MagErr(Mag): 

816 """Compute calibrated magnitude uncertainty 

817 

818 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`. 

819 

820 Parameters 

821 col : `str` 

822 Name of flux column 

823 calib : `lsst.afw.image.calib.Calib` (optional) 

824 Object that knows zero point. 

825 """ 

826 

827 def __init__(self, *args, **kwargs): 

828 super().__init__(*args, **kwargs) 

829 if self.calib is not None: 

830 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

831 else: 

832 self.fluxMag0Err = 0. 

833 

834 @property 

835 def columns(self): 

836 return [self.col, self.col + 'Err'] 

837 

838 def _func(self, df): 

839 with np.warnings.catch_warnings(): 

840 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

841 np.warnings.filterwarnings('ignore', r'divide by zero') 

842 fluxCol, fluxErrCol = self.columns 

843 x = df[fluxErrCol] / df[fluxCol] 

844 y = self.fluxMag0Err / self.fluxMag0 

845 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

846 return magErr 

847 

848 @property 

849 def name(self): 

850 return super().name + '_err' 

851 

852 

853class NanoMaggie(Mag): 

854 """ 

855 """ 

856 

857 def _func(self, df): 

858 return (df[self.col] / self.fluxMag0) * 1e9 

859 

860 

861class MagDiff(Functor): 

862 _defaultDataset = 'meas' 

863 

864 """Functor to calculate magnitude difference""" 

865 

866 def __init__(self, col1, col2, **kwargs): 

867 self.col1 = fluxName(col1) 

868 self.col2 = fluxName(col2) 

869 super().__init__(**kwargs) 

870 

871 @property 

872 def columns(self): 

873 return [self.col1, self.col2] 

874 

875 def _func(self, df): 

876 with np.warnings.catch_warnings(): 

877 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

878 np.warnings.filterwarnings('ignore', r'divide by zero') 

879 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

880 

881 @property 

882 def name(self): 

883 return f'(mag_{self.col1} - mag_{self.col2})' 

884 

885 @property 

886 def shortname(self): 

887 return f'magDiff_{self.col1}_{self.col2}' 

888 

889 

890class Color(Functor): 

891 """Compute the color between two filters 

892 

893 Computes color by initializing two different `Mag` 

894 functors based on the `col` and filters provided, and 

895 then returning the difference. 

896 

897 This is enabled by the `_func` expecting a dataframe with a 

898 multilevel column index, with both `'band'` and `'column'`, 

899 instead of just `'column'`, which is the `Functor` default. 

900 This is controlled by the `_dfLevels` attribute. 

901 

902 Also of note, the default dataset for `Color` is `forced_src'`, 

903 whereas for `Mag` it is `'meas'`. 

904 

905 Parameters 

906 ---------- 

907 col : str 

908 Name of flux column from which to compute; same as would be passed to 

909 `lsst.pipe.tasks.functors.Mag`. 

910 

911 filt2, filt1 : str 

912 Filters from which to compute magnitude difference. 

913 Color computed is `Mag(filt2) - Mag(filt1)`. 

914 """ 

915 _defaultDataset = 'forced_src' 

916 _dfLevels = ('band', 'column') 

917 _defaultNoDup = True 

918 

919 def __init__(self, col, filt2, filt1, **kwargs): 

920 self.col = fluxName(col) 

921 if filt2 == filt1: 

922 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

923 self.filt2 = filt2 

924 self.filt1 = filt1 

925 

926 self.mag2 = Mag(col, filt=filt2, **kwargs) 

927 self.mag1 = Mag(col, filt=filt1, **kwargs) 

928 

929 super().__init__(**kwargs) 

930 

931 @property 

932 def filt(self): 

933 return None 

934 

935 @filt.setter 

936 def filt(self, filt): 

937 pass 

938 

939 def _func(self, df): 

940 mag2 = self.mag2._func(df[self.filt2]) 

941 mag1 = self.mag1._func(df[self.filt1]) 

942 return mag2 - mag1 

943 

944 @property 

945 def columns(self): 

946 return [self.mag1.col, self.mag2.col] 

947 

948 def multilevelColumns(self, parq, **kwargs): 

949 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

950 

951 @property 

952 def name(self): 

953 return f'{self.filt2} - {self.filt1} ({self.col})' 

954 

955 @property 

956 def shortname(self): 

957 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

958 

959 

960class Labeller(Functor): 

961 """Main function of this subclass is to override the dropna=True 

962 """ 

963 _null_label = 'null' 

964 _allow_difference = False 

965 name = 'label' 

966 _force_str = False 

967 

968 def __call__(self, parq, dropna=False, **kwargs): 

969 return super().__call__(parq, dropna=False, **kwargs) 

970 

971 

972class StarGalaxyLabeller(Labeller): 

973 _columns = ["base_ClassificationExtendedness_value"] 

974 _column = "base_ClassificationExtendedness_value" 

975 

976 def _func(self, df): 

977 x = df[self._columns][self._column] 

978 mask = x.isnull() 

979 test = (x < 0.5).astype(int) 

980 test = test.mask(mask, 2) 

981 

982 # TODO: DM-21954 Look into veracity of inline comment below 

983 # are these backwards? 

984 categories = ['galaxy', 'star', self._null_label] 

985 label = pd.Series(pd.Categorical.from_codes(test, categories=categories), 

986 index=x.index, name='label') 

987 if self._force_str: 

988 label = label.astype(str) 

989 return label 

990 

991 

992class NumStarLabeller(Labeller): 

993 _columns = ['numStarFlags'] 

994 labels = {"star": 0, "maybe": 1, "notStar": 2} 

995 

996 def _func(self, df): 

997 x = df[self._columns][self._columns[0]] 

998 

999 # Number of filters 

1000 n = len(x.unique()) - 1 

1001 

1002 labels = ['noStar', 'maybe', 'star'] 

1003 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels), 

1004 index=x.index, name='label') 

1005 

1006 if self._force_str: 

1007 label = label.astype(str) 

1008 

1009 return label 

1010 

1011 

1012class DeconvolvedMoments(Functor): 

1013 name = 'Deconvolved Moments' 

1014 shortname = 'deconvolvedMoments' 

1015 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1016 "ext_shapeHSM_HsmSourceMoments_yy", 

1017 "base_SdssShape_xx", "base_SdssShape_yy", 

1018 "ext_shapeHSM_HsmPsfMoments_xx", 

1019 "ext_shapeHSM_HsmPsfMoments_yy") 

1020 

1021 def _func(self, df): 

1022 """Calculate deconvolved moments""" 

1023 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

1024 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

1025 else: 

1026 hsm = np.ones(len(df))*np.nan 

1027 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

1028 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

1029 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

1030 else: 

1031 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using 

1032 # exposure.getPsf().computeShape(s.getCentroid()).getIxx() 

1033 # raise TaskError("No psf shape parameter found in catalog") 

1034 raise RuntimeError('No psf shape parameter found in catalog') 

1035 

1036 return hsm.where(np.isfinite(hsm), sdss) - psf 

1037 

1038 

1039class SdssTraceSize(Functor): 

1040 """Functor to calculate SDSS trace radius size for sources""" 

1041 name = "SDSS Trace Size" 

1042 shortname = 'sdssTrace' 

1043 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

1044 

1045 def _func(self, df): 

1046 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1047 return srcSize 

1048 

1049 

1050class PsfSdssTraceSizeDiff(Functor): 

1051 """Functor to calculate SDSS trace radius size difference (%) between object and psf model""" 

1052 name = "PSF - SDSS Trace Size" 

1053 shortname = 'psf_sdssTrace' 

1054 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

1055 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

1056 

1057 def _func(self, df): 

1058 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1059 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

1060 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1061 return sizeDiff 

1062 

1063 

1064class HsmTraceSize(Functor): 

1065 """Functor to calculate HSM trace radius size for sources""" 

1066 name = 'HSM Trace Size' 

1067 shortname = 'hsmTrace' 

1068 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1069 "ext_shapeHSM_HsmSourceMoments_yy") 

1070 

1071 def _func(self, df): 

1072 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1073 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1074 return srcSize 

1075 

1076 

1077class PsfHsmTraceSizeDiff(Functor): 

1078 """Functor to calculate HSM trace radius size difference (%) between object and psf model""" 

1079 name = 'PSF - HSM Trace Size' 

1080 shortname = 'psf_HsmTrace' 

1081 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1082 "ext_shapeHSM_HsmSourceMoments_yy", 

1083 "ext_shapeHSM_HsmPsfMoments_xx", 

1084 "ext_shapeHSM_HsmPsfMoments_yy") 

1085 

1086 def _func(self, df): 

1087 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1088 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1089 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1090 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1091 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1092 return sizeDiff 

1093 

1094 

1095class HsmFwhm(Functor): 

1096 name = 'HSM Psf FWHM' 

1097 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1098 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1099 pixelScale = 0.168 

1100 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1101 

1102 def _func(self, df): 

1103 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1104 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1105 

1106 

1107class E1(Functor): 

1108 name = "Distortion Ellipticity (e1)" 

1109 shortname = "Distortion" 

1110 

1111 def __init__(self, colXX, colXY, colYY, **kwargs): 

1112 self.colXX = colXX 

1113 self.colXY = colXY 

1114 self.colYY = colYY 

1115 self._columns = [self.colXX, self.colXY, self.colYY] 

1116 super().__init__(**kwargs) 

1117 

1118 @property 

1119 def columns(self): 

1120 return [self.colXX, self.colXY, self.colYY] 

1121 

1122 def _func(self, df): 

1123 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1124 

1125 

1126class E2(Functor): 

1127 name = "Ellipticity e2" 

1128 

1129 def __init__(self, colXX, colXY, colYY, **kwargs): 

1130 self.colXX = colXX 

1131 self.colXY = colXY 

1132 self.colYY = colYY 

1133 super().__init__(**kwargs) 

1134 

1135 @property 

1136 def columns(self): 

1137 return [self.colXX, self.colXY, self.colYY] 

1138 

1139 def _func(self, df): 

1140 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1141 

1142 

1143class RadiusFromQuadrupole(Functor): 

1144 

1145 def __init__(self, colXX, colXY, colYY, **kwargs): 

1146 self.colXX = colXX 

1147 self.colXY = colXY 

1148 self.colYY = colYY 

1149 super().__init__(**kwargs) 

1150 

1151 @property 

1152 def columns(self): 

1153 return [self.colXX, self.colXY, self.colYY] 

1154 

1155 def _func(self, df): 

1156 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1157 

1158 

1159class LocalWcs(Functor): 

1160 """Computations using the stored localWcs. 

1161 """ 

1162 name = "LocalWcsOperations" 

1163 

1164 def __init__(self, 

1165 colCD_1_1, 

1166 colCD_1_2, 

1167 colCD_2_1, 

1168 colCD_2_2, 

1169 **kwargs): 

1170 self.colCD_1_1 = colCD_1_1 

1171 self.colCD_1_2 = colCD_1_2 

1172 self.colCD_2_1 = colCD_2_1 

1173 self.colCD_2_2 = colCD_2_2 

1174 super().__init__(**kwargs) 

1175 

1176 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1177 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1178 

1179 Parameters 

1180 ---------- 

1181 x : `pandas.Series` 

1182 X pixel coordinate. 

1183 y : `pandas.Series` 

1184 Y pixel coordinate. 

1185 cd11 : `pandas.Series` 

1186 [1, 1] element of the local Wcs affine transform. 

1187 cd11 : `pandas.Series` 

1188 [1, 1] element of the local Wcs affine transform. 

1189 cd12 : `pandas.Series` 

1190 [1, 2] element of the local Wcs affine transform. 

1191 cd21 : `pandas.Series` 

1192 [2, 1] element of the local Wcs affine transform. 

1193 cd22 : `pandas.Series` 

1194 [2, 2] element of the local Wcs affine transform. 

1195 

1196 Returns 

1197 ------- 

1198 raDecTuple : tuple 

1199 RA and dec conversion of x and y given the local Wcs. Returned 

1200 units are in radians. 

1201 

1202 """ 

1203 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1204 

1205 def computeSkySeperation(self, ra1, dec1, ra2, dec2): 

1206 """Compute the local pixel scale conversion. 

1207 

1208 Parameters 

1209 ---------- 

1210 ra1 : `pandas.Series` 

1211 Ra of the first coordinate in radians. 

1212 dec1 : `pandas.Series` 

1213 Dec of the first coordinate in radians. 

1214 ra2 : `pandas.Series` 

1215 Ra of the second coordinate in radians. 

1216 dec2 : `pandas.Series` 

1217 Dec of the second coordinate in radians. 

1218 

1219 Returns 

1220 ------- 

1221 dist : `pandas.Series` 

1222 Distance on the sphere in radians. 

1223 """ 

1224 deltaDec = dec2 - dec1 

1225 deltaRa = ra2 - ra1 

1226 return 2 * np.arcsin( 

1227 np.sqrt( 

1228 np.sin(deltaDec / 2) ** 2 

1229 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1230 

1231 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1232 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1233 

1234 Parameters 

1235 ---------- 

1236 x1 : `pandas.Series` 

1237 X pixel coordinate. 

1238 y1 : `pandas.Series` 

1239 Y pixel coordinate. 

1240 x2 : `pandas.Series` 

1241 X pixel coordinate. 

1242 y2 : `pandas.Series` 

1243 Y pixel coordinate. 

1244 cd11 : `pandas.Series` 

1245 [1, 1] element of the local Wcs affine transform. 

1246 cd11 : `pandas.Series` 

1247 [1, 1] element of the local Wcs affine transform. 

1248 cd12 : `pandas.Series` 

1249 [1, 2] element of the local Wcs affine transform. 

1250 cd21 : `pandas.Series` 

1251 [2, 1] element of the local Wcs affine transform. 

1252 cd22 : `pandas.Series` 

1253 [2, 2] element of the local Wcs affine transform. 

1254 

1255 Returns 

1256 ------- 

1257 Distance : `pandas.Series` 

1258 Arcseconds per pixel at the location of the local WC 

1259 """ 

1260 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1261 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1262 # Great circle distance for small separations. 

1263 return self.computeSkySeperation(ra1, dec1, ra2, dec2) 

1264 

1265 

1266class ComputePixelScale(LocalWcs): 

1267 """Compute the local pixel scale from the stored CDMatrix. 

1268 """ 

1269 name = "PixelScale" 

1270 

1271 @property 

1272 def columns(self): 

1273 return [self.colCD_1_1, 

1274 self.colCD_1_2, 

1275 self.colCD_2_1, 

1276 self.colCD_2_2] 

1277 

1278 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1279 """Compute the local pixel to scale conversion in arcseconds. 

1280 

1281 Parameters 

1282 ---------- 

1283 cd11 : `pandas.Series` 

1284 [1, 1] element of the local Wcs affine transform in radians. 

1285 cd11 : `pandas.Series` 

1286 [1, 1] element of the local Wcs affine transform in radians. 

1287 cd12 : `pandas.Series` 

1288 [1, 2] element of the local Wcs affine transform in radians. 

1289 cd21 : `pandas.Series` 

1290 [2, 1] element of the local Wcs affine transform in radians. 

1291 cd22 : `pandas.Series` 

1292 [2, 2] element of the local Wcs affine transform in radians. 

1293 

1294 Returns 

1295 ------- 

1296 pixScale : `pandas.Series` 

1297 Arcseconds per pixel at the location of the local WC 

1298 """ 

1299 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1300 

1301 def _func(self, df): 

1302 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1303 df[self.colCD_1_2], 

1304 df[self.colCD_2_1], 

1305 df[self.colCD_2_2]) 

1306 

1307 

1308class ConvertPixelToArcseconds(ComputePixelScale): 

1309 """Convert a value in units pixels squared to units arcseconds squared. 

1310 """ 

1311 

1312 def __init__(self, 

1313 col, 

1314 colCD_1_1, 

1315 colCD_1_2, 

1316 colCD_2_1, 

1317 colCD_2_2, 

1318 **kwargs): 

1319 self.col = col 

1320 super().__init__(colCD_1_1, 

1321 colCD_1_2, 

1322 colCD_2_1, 

1323 colCD_2_2, 

1324 **kwargs) 

1325 

1326 @property 

1327 def name(self): 

1328 return f"{self.col}_asArcseconds" 

1329 

1330 @property 

1331 def columns(self): 

1332 return [self.col, 

1333 self.colCD_1_1, 

1334 self.colCD_1_2, 

1335 self.colCD_2_1, 

1336 self.colCD_2_2] 

1337 

1338 def _func(self, df): 

1339 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1340 df[self.colCD_1_2], 

1341 df[self.colCD_2_1], 

1342 df[self.colCD_2_2]) 

1343 

1344 

1345class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1346 """Convert a value in units pixels to units arcseconds. 

1347 """ 

1348 

1349 def __init__(self, 

1350 col, 

1351 colCD_1_1, 

1352 colCD_1_2, 

1353 colCD_2_1, 

1354 colCD_2_2, 

1355 **kwargs): 

1356 self.col = col 

1357 super().__init__(colCD_1_1, 

1358 colCD_1_2, 

1359 colCD_2_1, 

1360 colCD_2_2, 

1361 **kwargs) 

1362 

1363 @property 

1364 def name(self): 

1365 return f"{self.col}_asArcsecondsSq" 

1366 

1367 @property 

1368 def columns(self): 

1369 return [self.col, 

1370 self.colCD_1_1, 

1371 self.colCD_1_2, 

1372 self.colCD_2_1, 

1373 self.colCD_2_2] 

1374 

1375 def _func(self, df): 

1376 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1377 df[self.colCD_1_2], 

1378 df[self.colCD_2_1], 

1379 df[self.colCD_2_2]) 

1380 return df[self.col] * pixScale * pixScale 

1381 

1382 

1383class ReferenceBand(Functor): 

1384 name = 'Reference Band' 

1385 shortname = 'refBand' 

1386 

1387 @property 

1388 def columns(self): 

1389 return ["merge_measurement_i", 

1390 "merge_measurement_r", 

1391 "merge_measurement_z", 

1392 "merge_measurement_y", 

1393 "merge_measurement_g", 

1394 "merge_measurement_u"] 

1395 

1396 def _func(self, df: pd.DataFrame) -> pd.Series: 

1397 def getFilterAliasName(row): 

1398 # get column name with the max value (True > False) 

1399 colName = row.idxmax() 

1400 return colName.replace('merge_measurement_', '') 

1401 

1402 # Makes a Series of dtype object if df is empty 

1403 return df[self.columns].apply(getFilterAliasName, axis=1, 

1404 result_type='reduce').astype('object') 

1405 

1406 

1407class Photometry(Functor): 

1408 # AB to NanoJansky (3631 Jansky) 

1409 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1410 LOG_AB_FLUX_SCALE = 12.56 

1411 FIVE_OVER_2LOG10 = 1.085736204758129569 

1412 # TO DO: DM-21955 Replace hard coded photometic calibration values 

1413 COADD_ZP = 27 

1414 

1415 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1416 self.vhypot = np.vectorize(self.hypot) 

1417 self.col = colFlux 

1418 self.colFluxErr = colFluxErr 

1419 

1420 self.calib = calib 

1421 if calib is not None: 

1422 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1423 else: 

1424 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1425 self.fluxMag0Err = 0. 

1426 

1427 super().__init__(**kwargs) 

1428 

1429 @property 

1430 def columns(self): 

1431 return [self.col] 

1432 

1433 @property 

1434 def name(self): 

1435 return f'mag_{self.col}' 

1436 

1437 @classmethod 

1438 def hypot(cls, a, b): 

1439 if np.abs(a) < np.abs(b): 

1440 a, b = b, a 

1441 if a == 0.: 

1442 return 0. 

1443 q = b/a 

1444 return np.abs(a) * np.sqrt(1. + q*q) 

1445 

1446 def dn2flux(self, dn, fluxMag0): 

1447 return self.AB_FLUX_SCALE * dn / fluxMag0 

1448 

1449 def dn2mag(self, dn, fluxMag0): 

1450 with np.warnings.catch_warnings(): 

1451 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1452 np.warnings.filterwarnings('ignore', r'divide by zero') 

1453 return -2.5 * np.log10(dn/fluxMag0) 

1454 

1455 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1456 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1457 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1458 return retVal 

1459 

1460 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1461 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1462 return self.FIVE_OVER_2LOG10 * retVal 

1463 

1464 

1465class NanoJansky(Photometry): 

1466 def _func(self, df): 

1467 return self.dn2flux(df[self.col], self.fluxMag0) 

1468 

1469 

1470class NanoJanskyErr(Photometry): 

1471 @property 

1472 def columns(self): 

1473 return [self.col, self.colFluxErr] 

1474 

1475 def _func(self, df): 

1476 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1477 return pd.Series(retArr, index=df.index) 

1478 

1479 

1480class Magnitude(Photometry): 

1481 def _func(self, df): 

1482 return self.dn2mag(df[self.col], self.fluxMag0) 

1483 

1484 

1485class MagnitudeErr(Photometry): 

1486 @property 

1487 def columns(self): 

1488 return [self.col, self.colFluxErr] 

1489 

1490 def _func(self, df): 

1491 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1492 return pd.Series(retArr, index=df.index) 

1493 

1494 

1495class LocalPhotometry(Functor): 

1496 """Base class for calibrating the specified instrument flux column using 

1497 the local photometric calibration. 

1498 

1499 Parameters 

1500 ---------- 

1501 instFluxCol : `str` 

1502 Name of the instrument flux column. 

1503 instFluxErrCol : `str` 

1504 Name of the assocated error columns for ``instFluxCol``. 

1505 photoCalibCol : `str` 

1506 Name of local calibration column. 

1507 photoCalibErrCol : `str` 

1508 Error associated with ``photoCalibCol`` 

1509 

1510 See also 

1511 -------- 

1512 LocalPhotometry 

1513 LocalNanojansky 

1514 LocalNanojanskyErr 

1515 LocalMagnitude 

1516 LocalMagnitudeErr 

1517 """ 

1518 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1519 

1520 def __init__(self, 

1521 instFluxCol, 

1522 instFluxErrCol, 

1523 photoCalibCol, 

1524 photoCalibErrCol, 

1525 **kwargs): 

1526 self.instFluxCol = instFluxCol 

1527 self.instFluxErrCol = instFluxErrCol 

1528 self.photoCalibCol = photoCalibCol 

1529 self.photoCalibErrCol = photoCalibErrCol 

1530 super().__init__(**kwargs) 

1531 

1532 def instFluxToNanojansky(self, instFlux, localCalib): 

1533 """Convert instrument flux to nanojanskys. 

1534 

1535 Parameters 

1536 ---------- 

1537 instFlux : `numpy.ndarray` or `pandas.Series` 

1538 Array of instrument flux measurements 

1539 localCalib : `numpy.ndarray` or `pandas.Series` 

1540 Array of local photometric calibration estimates. 

1541 

1542 Returns 

1543 ------- 

1544 calibFlux : `numpy.ndarray` or `pandas.Series` 

1545 Array of calibrated flux measurements. 

1546 """ 

1547 return instFlux * localCalib 

1548 

1549 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1550 """Convert instrument flux to nanojanskys. 

1551 

1552 Parameters 

1553 ---------- 

1554 instFlux : `numpy.ndarray` or `pandas.Series` 

1555 Array of instrument flux measurements 

1556 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1557 Errors on associated ``instFlux`` values 

1558 localCalib : `numpy.ndarray` or `pandas.Series` 

1559 Array of local photometric calibration estimates. 

1560 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1561 Errors on associated ``localCalib`` values 

1562 

1563 Returns 

1564 ------- 

1565 calibFluxErr : `numpy.ndarray` or `pandas.Series` 

1566 Errors on calibrated flux measurements. 

1567 """ 

1568 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1569 

1570 def instFluxToMagnitude(self, instFlux, localCalib): 

1571 """Convert instrument flux to nanojanskys. 

1572 

1573 Parameters 

1574 ---------- 

1575 instFlux : `numpy.ndarray` or `pandas.Series` 

1576 Array of instrument flux measurements 

1577 localCalib : `numpy.ndarray` or `pandas.Series` 

1578 Array of local photometric calibration estimates. 

1579 

1580 Returns 

1581 ------- 

1582 calibMag : `numpy.ndarray` or `pandas.Series` 

1583 Array of calibrated AB magnitudes. 

1584 """ 

1585 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1586 

1587 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1588 """Convert instrument flux err to nanojanskys. 

1589 

1590 Parameters 

1591 ---------- 

1592 instFlux : `numpy.ndarray` or `pandas.Series` 

1593 Array of instrument flux measurements 

1594 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1595 Errors on associated ``instFlux`` values 

1596 localCalib : `numpy.ndarray` or `pandas.Series` 

1597 Array of local photometric calibration estimates. 

1598 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1599 Errors on associated ``localCalib`` values 

1600 

1601 Returns 

1602 ------- 

1603 calibMagErr: `numpy.ndarray` or `pandas.Series` 

1604 Error on calibrated AB magnitudes. 

1605 """ 

1606 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1607 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1608 

1609 

1610class LocalNanojansky(LocalPhotometry): 

1611 """Compute calibrated fluxes using the local calibration value. 

1612 

1613 See also 

1614 -------- 

1615 LocalNanojansky 

1616 LocalNanojanskyErr 

1617 LocalMagnitude 

1618 LocalMagnitudeErr 

1619 """ 

1620 

1621 @property 

1622 def columns(self): 

1623 return [self.instFluxCol, self.photoCalibCol] 

1624 

1625 @property 

1626 def name(self): 

1627 return f'flux_{self.instFluxCol}' 

1628 

1629 def _func(self, df): 

1630 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1631 

1632 

1633class LocalNanojanskyErr(LocalPhotometry): 

1634 """Compute calibrated flux errors using the local calibration value. 

1635 

1636 See also 

1637 -------- 

1638 LocalNanojansky 

1639 LocalNanojanskyErr 

1640 LocalMagnitude 

1641 LocalMagnitudeErr 

1642 """ 

1643 

1644 @property 

1645 def columns(self): 

1646 return [self.instFluxCol, self.instFluxErrCol, 

1647 self.photoCalibCol, self.photoCalibErrCol] 

1648 

1649 @property 

1650 def name(self): 

1651 return f'fluxErr_{self.instFluxCol}' 

1652 

1653 def _func(self, df): 

1654 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1655 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1656 

1657 

1658class LocalMagnitude(LocalPhotometry): 

1659 """Compute calibrated AB magnitudes using the local calibration value. 

1660 

1661 See also 

1662 -------- 

1663 LocalNanojansky 

1664 LocalNanojanskyErr 

1665 LocalMagnitude 

1666 LocalMagnitudeErr 

1667 """ 

1668 

1669 @property 

1670 def columns(self): 

1671 return [self.instFluxCol, self.photoCalibCol] 

1672 

1673 @property 

1674 def name(self): 

1675 return f'mag_{self.instFluxCol}' 

1676 

1677 def _func(self, df): 

1678 return self.instFluxToMagnitude(df[self.instFluxCol], 

1679 df[self.photoCalibCol]) 

1680 

1681 

1682class LocalMagnitudeErr(LocalPhotometry): 

1683 """Compute calibrated AB magnitude errors using the local calibration value. 

1684 

1685 See also 

1686 -------- 

1687 LocalNanojansky 

1688 LocalNanojanskyErr 

1689 LocalMagnitude 

1690 LocalMagnitudeErr 

1691 """ 

1692 

1693 @property 

1694 def columns(self): 

1695 return [self.instFluxCol, self.instFluxErrCol, 

1696 self.photoCalibCol, self.photoCalibErrCol] 

1697 

1698 @property 

1699 def name(self): 

1700 return f'magErr_{self.instFluxCol}' 

1701 

1702 def _func(self, df): 

1703 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol], 

1704 df[self.instFluxErrCol], 

1705 df[self.photoCalibCol], 

1706 df[self.photoCalibErrCol]) 

1707 

1708 

1709class LocalDipoleMeanFlux(LocalPhotometry): 

1710 """Compute absolute mean of dipole fluxes. 

1711 

1712 See also 

1713 -------- 

1714 LocalNanojansky 

1715 LocalNanojanskyErr 

1716 LocalMagnitude 

1717 LocalMagnitudeErr 

1718 LocalDipoleMeanFlux 

1719 LocalDipoleMeanFluxErr 

1720 LocalDipoleDiffFlux 

1721 LocalDipoleDiffFluxErr 

1722 """ 

1723 def __init__(self, 

1724 instFluxPosCol, 

1725 instFluxNegCol, 

1726 instFluxPosErrCol, 

1727 instFluxNegErrCol, 

1728 photoCalibCol, 

1729 photoCalibErrCol, 

1730 **kwargs): 

1731 self.instFluxNegCol = instFluxNegCol 

1732 self.instFluxPosCol = instFluxPosCol 

1733 self.instFluxNegErrCol = instFluxNegErrCol 

1734 self.instFluxPosErrCol = instFluxPosErrCol 

1735 self.photoCalibCol = photoCalibCol 

1736 self.photoCalibErrCol = photoCalibErrCol 

1737 super().__init__(instFluxNegCol, 

1738 instFluxNegErrCol, 

1739 photoCalibCol, 

1740 photoCalibErrCol, 

1741 **kwargs) 

1742 

1743 @property 

1744 def columns(self): 

1745 return [self.instFluxPosCol, 

1746 self.instFluxNegCol, 

1747 self.photoCalibCol] 

1748 

1749 @property 

1750 def name(self): 

1751 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1752 

1753 def _func(self, df): 

1754 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1755 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1756 

1757 

1758class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1759 """Compute the error on the absolute mean of dipole fluxes. 

1760 

1761 See also 

1762 -------- 

1763 LocalNanojansky 

1764 LocalNanojanskyErr 

1765 LocalMagnitude 

1766 LocalMagnitudeErr 

1767 LocalDipoleMeanFlux 

1768 LocalDipoleMeanFluxErr 

1769 LocalDipoleDiffFlux 

1770 LocalDipoleDiffFluxErr 

1771 """ 

1772 

1773 @property 

1774 def columns(self): 

1775 return [self.instFluxPosCol, 

1776 self.instFluxNegCol, 

1777 self.instFluxPosErrCol, 

1778 self.instFluxNegErrCol, 

1779 self.photoCalibCol, 

1780 self.photoCalibErrCol] 

1781 

1782 @property 

1783 def name(self): 

1784 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1785 

1786 def _func(self, df): 

1787 return 0.5*np.sqrt( 

1788 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1789 * df[self.photoCalibErrCol])**2 

1790 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1791 * df[self.photoCalibCol]**2) 

1792 

1793 

1794class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1795 """Compute the absolute difference of dipole fluxes. 

1796 

1797 Value is (abs(pos) - abs(neg)) 

1798 

1799 See also 

1800 -------- 

1801 LocalNanojansky 

1802 LocalNanojanskyErr 

1803 LocalMagnitude 

1804 LocalMagnitudeErr 

1805 LocalDipoleMeanFlux 

1806 LocalDipoleMeanFluxErr 

1807 LocalDipoleDiffFlux 

1808 LocalDipoleDiffFluxErr 

1809 """ 

1810 

1811 @property 

1812 def columns(self): 

1813 return [self.instFluxPosCol, 

1814 self.instFluxNegCol, 

1815 self.photoCalibCol] 

1816 

1817 @property 

1818 def name(self): 

1819 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1820 

1821 def _func(self, df): 

1822 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1823 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1824 

1825 

1826class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1827 """Compute the error on the absolute difference of dipole fluxes. 

1828 

1829 See also 

1830 -------- 

1831 LocalNanojansky 

1832 LocalNanojanskyErr 

1833 LocalMagnitude 

1834 LocalMagnitudeErr 

1835 LocalDipoleMeanFlux 

1836 LocalDipoleMeanFluxErr 

1837 LocalDipoleDiffFlux 

1838 LocalDipoleDiffFluxErr 

1839 """ 

1840 

1841 @property 

1842 def columns(self): 

1843 return [self.instFluxPosCol, 

1844 self.instFluxNegCol, 

1845 self.instFluxPosErrCol, 

1846 self.instFluxNegErrCol, 

1847 self.photoCalibCol, 

1848 self.photoCalibErrCol] 

1849 

1850 @property 

1851 def name(self): 

1852 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1853 

1854 def _func(self, df): 

1855 return np.sqrt( 

1856 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1857 * df[self.photoCalibErrCol])**2 

1858 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1859 * df[self.photoCalibCol]**2) 

1860 

1861 

1862class Ratio(Functor): 

1863 """Base class for returning the ratio of 2 columns. 

1864 

1865 Can be used to compute a Signal to Noise ratio for any input flux. 

1866 

1867 Parameters 

1868 ---------- 

1869 numerator : `str` 

1870 Name of the column to use at the numerator in the ratio 

1871 denominator : `str` 

1872 Name of the column to use as the denominator in the ratio. 

1873 """ 

1874 def __init__(self, 

1875 numerator, 

1876 denominator, 

1877 **kwargs): 

1878 self.numerator = numerator 

1879 self.denominator = denominator 

1880 super().__init__(**kwargs) 

1881 

1882 @property 

1883 def columns(self): 

1884 return [self.numerator, self.denominator] 

1885 

1886 @property 

1887 def name(self): 

1888 return f'ratio_{self.numerator}_{self.denominator}' 

1889 

1890 def _func(self, df): 

1891 with np.warnings.catch_warnings(): 

1892 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1893 np.warnings.filterwarnings('ignore', r'divide by zero') 

1894 return df[self.numerator] / df[self.denominator]