Coverage for python/lsst/pipe/tasks/functors.py: 40%

820 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-24 10:54 +0000

1# This file is part of pipe_tasks. 

2# 

3# LSST Data Management System 

4# This product includes software developed by the 

5# LSST Project (http://www.lsst.org/). 

6# See COPYRIGHT file at the top of the source tree. 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <https://www.lsstcorp.org/LegalNotices/>. 

21# 

22import yaml 

23import re 

24from itertools import product 

25import os.path 

26 

27import pandas as pd 

28import numpy as np 

29import astropy.units as u 

30from dustmaps.sfd import SFDQuery 

31from astropy.coordinates import SkyCoord 

32 

33from lsst.daf.persistence import doImport 

34from lsst.daf.butler import DeferredDatasetHandle 

35import lsst.geom as geom 

36import lsst.sphgeom as sphgeom 

37 

38from .parquetTable import ParquetTable, MultilevelParquetTable 

39 

40 

41def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

42 typeKey='functor', name=None): 

43 """Initialize an object defined in a dictionary 

44 

45 The object needs to be importable as 

46 f'{basePath}.{initDict[typeKey]}' 

47 The positional and keyword arguments (if any) are contained in 

48 "args" and "kwargs" entries in the dictionary, respectively. 

49 This is used in `functors.CompositeFunctor.from_yaml` to initialize 

50 a composite functor from a specification in a YAML file. 

51 

52 Parameters 

53 ---------- 

54 initDict : dictionary 

55 Dictionary describing object's initialization. Must contain 

56 an entry keyed by ``typeKey`` that is the name of the object, 

57 relative to ``basePath``. 

58 basePath : str 

59 Path relative to module in which ``initDict[typeKey]`` is defined. 

60 typeKey : str 

61 Key of ``initDict`` that is the name of the object 

62 (relative to `basePath`). 

63 """ 

64 initDict = initDict.copy() 

65 # TO DO: DM-21956 We should be able to define functors outside this module 

66 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

67 args = [] 

68 if 'args' in initDict: 

69 args = initDict.pop('args') 

70 if isinstance(args, str): 

71 args = [args] 

72 try: 

73 element = pythonType(*args, **initDict) 

74 except Exception as e: 

75 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

76 raise type(e)(message, e.args) 

77 return element 

78 

79 

80class Functor(object): 

81 """Define and execute a calculation on a ParquetTable 

82 

83 The `__call__` method accepts either a `ParquetTable` object or a 

84 `DeferredDatasetHandle`, and returns the 

85 result of the calculation as a single column. Each functor defines what 

86 columns are needed for the calculation, and only these columns are read 

87 from the `ParquetTable`. 

88 

89 The action of `__call__` consists of two steps: first, loading the 

90 necessary columns from disk into memory as a `pandas.DataFrame` object; 

91 and second, performing the computation on this dataframe and returning the 

92 result. 

93 

94 

95 To define a new `Functor`, a subclass must define a `_func` method, 

96 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`. 

97 In addition, it must define the following attributes 

98 

99 * `_columns`: The columns necessary to perform the calculation 

100 * `name`: A name appropriate for a figure axis label 

101 * `shortname`: A name appropriate for use as a dictionary key 

102 

103 On initialization, a `Functor` should declare what band (`filt` kwarg) 

104 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be 

105 applied to. This enables the `_get_data` method to extract the proper 

106 columns from the parquet file. If not specified, the dataset will fall back 

107 on the `_defaultDataset`attribute. If band is not specified and `dataset` 

108 is anything other than `'ref'`, then an error will be raised when trying to 

109 perform the calculation. 

110 

111 Originally, `Functor` was set up to expect 

112 datasets formatted like the `deepCoadd_obj` dataset; that is, a 

113 dataframe with a multi-level column index, with the levels of the 

114 column index being `band`, `dataset`, and `column`. 

115 It has since been generalized to apply to dataframes without mutli-level 

116 indices and multi-level indices with just `dataset` and `column` levels. 

117 In addition, the `_get_data` method that reads 

118 the dataframe from the `ParquetTable` will return a dataframe with column 

119 index levels defined by the `_dfLevels` attribute; by default, this is 

120 `column`. 

121 

122 The `_dfLevels` attributes should generally not need to 

123 be changed, unless `_func` needs columns from multiple filters or datasets 

124 to do the calculation. 

125 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for 

126 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe 

127 it gets to have those levels in the column index. 

128 

129 Parameters 

130 ---------- 

131 filt : str 

132 Filter upon which to do the calculation 

133 

134 dataset : str 

135 Dataset upon which to do the calculation 

136 (e.g., 'ref', 'meas', 'forced_src'). 

137 

138 """ 

139 

140 _defaultDataset = 'ref' 

141 _dfLevels = ('column',) 

142 _defaultNoDup = False 

143 

144 def __init__(self, filt=None, dataset=None, noDup=None): 

145 self.filt = filt 

146 self.dataset = dataset if dataset is not None else self._defaultDataset 

147 self._noDup = noDup 

148 

149 @property 

150 def noDup(self): 

151 if self._noDup is not None: 

152 return self._noDup 

153 else: 

154 return self._defaultNoDup 

155 

156 @property 

157 def columns(self): 

158 """Columns required to perform calculation 

159 """ 

160 if not hasattr(self, '_columns'): 

161 raise NotImplementedError('Must define columns property or _columns attribute') 

162 return self._columns 

163 

164 def _get_data_columnLevels(self, data, columnIndex=None): 

165 """Gets the names of the column index levels 

166 

167 This should only be called in the context of a multilevel table. 

168 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable` 

169 and with the gen3 `DeferredDatasetHandle`. 

170 

171 Parameters 

172 ---------- 

173 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

174 

175 columnnIndex (optional): pandas `Index` object 

176 if not passed, then it is read from the `DeferredDatasetHandle` 

177 """ 

178 if isinstance(data, DeferredDatasetHandle): 

179 if columnIndex is None: 

180 columnIndex = data.get(component="columns") 

181 if columnIndex is not None: 

182 return columnIndex.names 

183 if isinstance(data, MultilevelParquetTable): 

184 return data.columnLevels 

185 else: 

186 raise TypeError(f"Unknown type for data: {type(data)}!") 

187 

188 def _get_data_columnLevelNames(self, data, columnIndex=None): 

189 """Gets the content of each of the column levels for a multilevel table 

190 

191 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2. 

192 

193 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

194 """ 

195 if isinstance(data, DeferredDatasetHandle): 

196 if columnIndex is None: 

197 columnIndex = data.get(component="columns") 

198 if columnIndex is not None: 

199 columnLevels = columnIndex.names 

200 columnLevelNames = { 

201 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

202 for i, level in enumerate(columnLevels) 

203 } 

204 return columnLevelNames 

205 if isinstance(data, MultilevelParquetTable): 

206 return data.columnLevelNames 

207 else: 

208 raise TypeError(f"Unknown type for data: {type(data)}!") 

209 

210 def _colsFromDict(self, colDict, columnIndex=None): 

211 """Converts dictionary column specficiation to a list of columns 

212 

213 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

214 """ 

215 new_colDict = {} 

216 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

217 

218 for i, lev in enumerate(columnLevels): 

219 if lev in colDict: 

220 if isinstance(colDict[lev], str): 

221 new_colDict[lev] = [colDict[lev]] 

222 else: 

223 new_colDict[lev] = colDict[lev] 

224 else: 

225 new_colDict[lev] = columnIndex.levels[i] 

226 

227 levelCols = [new_colDict[lev] for lev in columnLevels] 

228 cols = list(product(*levelCols)) 

229 colsAvailable = [col for col in cols if col in columnIndex] 

230 return colsAvailable 

231 

232 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

233 """Returns columns needed by functor from multilevel dataset 

234 

235 To access tables with multilevel column structure, the `MultilevelParquetTable` 

236 or `DeferredDatasetHandle` need to be passed either a list of tuples or a 

237 dictionary. 

238 

239 Parameters 

240 ---------- 

241 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

242 

243 columnIndex (optional): pandas `Index` object 

244 either passed or read in from `DeferredDatasetHandle`. 

245 

246 `returnTuple` : bool 

247 If true, then return a list of tuples rather than the column dictionary 

248 specification. This is set to `True` by `CompositeFunctor` in order to be able to 

249 combine columns from the various component functors. 

250 

251 """ 

252 if isinstance(data, DeferredDatasetHandle) and columnIndex is None: 

253 columnIndex = data.get(component="columns") 

254 

255 # Confirm that the dataset has the column levels the functor is expecting it to have. 

256 columnLevels = self._get_data_columnLevels(data, columnIndex) 

257 

258 columnDict = {'column': self.columns, 

259 'dataset': self.dataset} 

260 if self.filt is None: 

261 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

262 if "band" in columnLevels: 

263 if self.dataset == "ref": 

264 columnDict["band"] = columnLevelNames["band"][0] 

265 else: 

266 raise ValueError(f"'filt' not set for functor {self.name}" 

267 f"(dataset {self.dataset}) " 

268 "and ParquetTable " 

269 "contains multiple filters in column index. " 

270 "Set 'filt' or set 'dataset' to 'ref'.") 

271 else: 

272 columnDict['band'] = self.filt 

273 

274 if isinstance(data, MultilevelParquetTable): 

275 return data._colsFromDict(columnDict) 

276 elif isinstance(data, DeferredDatasetHandle): 

277 if returnTuple: 

278 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

279 else: 

280 return columnDict 

281 

282 def _func(self, df, dropna=True): 

283 raise NotImplementedError('Must define calculation on dataframe') 

284 

285 def _get_columnIndex(self, data): 

286 """Return columnIndex 

287 """ 

288 

289 if isinstance(data, DeferredDatasetHandle): 

290 return data.get(component="columns") 

291 else: 

292 return None 

293 

294 def _get_data(self, data): 

295 """Retrieve dataframe necessary for calculation. 

296 

297 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle 

298 

299 Returns dataframe upon which `self._func` can act. 

300 

301 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested. 

302 """ 

303 if isinstance(data, pd.DataFrame): 

304 return data 

305 

306 # First thing to do: check to see if the data source has a multilevel column index or not. 

307 columnIndex = self._get_columnIndex(data) 

308 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

309 

310 # Simple single-level parquet table, gen2 

311 if isinstance(data, ParquetTable) and not is_multiLevel: 

312 columns = self.columns 

313 df = data.toDataFrame(columns=columns) 

314 return df 

315 

316 # Get proper columns specification for this functor 

317 if is_multiLevel: 

318 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

319 else: 

320 columns = self.columns 

321 

322 if isinstance(data, MultilevelParquetTable): 

323 # Load in-memory dataframe with appropriate columns the gen2 way 

324 df = data.toDataFrame(columns=columns, droplevels=False) 

325 elif isinstance(data, DeferredDatasetHandle): 

326 # Load in-memory dataframe with appropriate columns the gen3 way 

327 df = data.get(parameters={"columns": columns}) 

328 

329 # Drop unnecessary column levels 

330 if is_multiLevel: 

331 df = self._setLevels(df) 

332 

333 return df 

334 

335 def _setLevels(self, df): 

336 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

337 df.columns = df.columns.droplevel(levelsToDrop) 

338 return df 

339 

340 def _dropna(self, vals): 

341 return vals.dropna() 

342 

343 def __call__(self, data, dropna=False): 

344 try: 

345 df = self._get_data(data) 

346 vals = self._func(df) 

347 except Exception: 

348 vals = self.fail(df) 

349 if dropna: 

350 vals = self._dropna(vals) 

351 

352 return vals 

353 

354 def difference(self, data1, data2, **kwargs): 

355 """Computes difference between functor called on two different ParquetTable objects 

356 """ 

357 return self(data1, **kwargs) - self(data2, **kwargs) 

358 

359 def fail(self, df): 

360 return pd.Series(np.full(len(df), np.nan), index=df.index) 

361 

362 @property 

363 def name(self): 

364 """Full name of functor (suitable for figure labels) 

365 """ 

366 return NotImplementedError 

367 

368 @property 

369 def shortname(self): 

370 """Short name of functor (suitable for column name/dict key) 

371 """ 

372 return self.name 

373 

374 

375class CompositeFunctor(Functor): 

376 """Perform multiple calculations at once on a catalog 

377 

378 The role of a `CompositeFunctor` is to group together computations from 

379 multiple functors. Instead of returning `pandas.Series` a 

380 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names 

381 being the keys of `funcDict`. 

382 

383 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

384 in all the component functors. 

385 

386 A `CompositeFunctor` does not use a `_func` method itself; rather, 

387 when a `CompositeFunctor` is called, all its columns are loaded 

388 at once, and the resulting dataframe is passed to the `_func` method of each component 

389 functor. This has the advantage of only doing I/O (reading from parquet file) once, 

390 and works because each individual `_func` method of each component functor does not 

391 care if there are *extra* columns in the dataframe being passed; only that it must contain 

392 *at least* the `columns` it expects. 

393 

394 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML 

395 file specifying a collection of functors. 

396 

397 Parameters 

398 ---------- 

399 funcs : `dict` or `list` 

400 Dictionary or list of functors. If a list, then it will be converted 

401 into a dictonary according to the `.shortname` attribute of each functor. 

402 

403 """ 

404 dataset = None 

405 

406 def __init__(self, funcs, **kwargs): 

407 

408 if type(funcs) == dict: 

409 self.funcDict = funcs 

410 else: 

411 self.funcDict = {f.shortname: f for f in funcs} 

412 

413 self._filt = None 

414 

415 super().__init__(**kwargs) 

416 

417 @property 

418 def filt(self): 

419 return self._filt 

420 

421 @filt.setter 

422 def filt(self, filt): 

423 if filt is not None: 

424 for _, f in self.funcDict.items(): 

425 f.filt = filt 

426 self._filt = filt 

427 

428 def update(self, new): 

429 if isinstance(new, dict): 

430 self.funcDict.update(new) 

431 elif isinstance(new, CompositeFunctor): 

432 self.funcDict.update(new.funcDict) 

433 else: 

434 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

435 

436 # Make sure new functors have the same 'filt' set 

437 if self.filt is not None: 

438 self.filt = self.filt 

439 

440 @property 

441 def columns(self): 

442 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

443 

444 def multilevelColumns(self, data, **kwargs): 

445 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here. 

446 return list( 

447 set( 

448 [ 

449 x 

450 for y in [ 

451 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

452 ] 

453 for x in y 

454 ] 

455 ) 

456 ) 

457 

458 def __call__(self, data, **kwargs): 

459 """Apply the functor to the data table 

460 

461 Parameters 

462 ---------- 

463 data : `lsst.daf.butler.DeferredDatasetHandle`, 

464 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`, 

465 `lsst.pipe.tasks.parquetTable.ParquetTable`, 

466 or `pandas.DataFrame`. 

467 The table or a pointer to a table on disk from which columns can 

468 be accessed 

469 """ 

470 columnIndex = self._get_columnIndex(data) 

471 

472 # First, determine whether data has a multilevel index (either gen2 or gen3) 

473 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

474 

475 # Multilevel index, gen2 or gen3 

476 if is_multiLevel: 

477 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

478 

479 if isinstance(data, MultilevelParquetTable): 

480 # Read data into memory the gen2 way 

481 df = data.toDataFrame(columns=columns, droplevels=False) 

482 elif isinstance(data, DeferredDatasetHandle): 

483 # Read data into memory the gen3 way 

484 df = data.get(parameters={"columns": columns}) 

485 

486 valDict = {} 

487 for k, f in self.funcDict.items(): 

488 try: 

489 subdf = f._setLevels( 

490 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)] 

491 ) 

492 valDict[k] = f._func(subdf) 

493 except Exception as e: 

494 try: 

495 valDict[k] = f.fail(subdf) 

496 except NameError: 

497 raise e 

498 

499 else: 

500 if isinstance(data, DeferredDatasetHandle): 

501 # input if Gen3 deferLoad=True 

502 df = data.get(parameters={"columns": self.columns}) 

503 elif isinstance(data, pd.DataFrame): 

504 # input if Gen3 deferLoad=False 

505 df = data 

506 else: 

507 # Original Gen2 input is type ParquetTable and the fallback 

508 df = data.toDataFrame(columns=self.columns) 

509 

510 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

511 

512 # Check that output columns are actually columns 

513 for name, colVal in valDict.items(): 

514 if len(colVal.shape) != 1: 

515 raise RuntimeError("Transformed column '%s' is not the shape of a column. " 

516 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal))) 

517 

518 try: 

519 valDf = pd.concat(valDict, axis=1) 

520 except TypeError: 

521 print([(k, type(v)) for k, v in valDict.items()]) 

522 raise 

523 

524 if kwargs.get('dropna', False): 

525 valDf = valDf.dropna(how='any') 

526 

527 return valDf 

528 

529 @classmethod 

530 def renameCol(cls, col, renameRules): 

531 if renameRules is None: 

532 return col 

533 for old, new in renameRules: 

534 if col.startswith(old): 

535 col = col.replace(old, new) 

536 return col 

537 

538 @classmethod 

539 def from_file(cls, filename, **kwargs): 

540 # Allow environment variables in the filename. 

541 filename = os.path.expandvars(filename) 

542 with open(filename) as f: 

543 translationDefinition = yaml.safe_load(f) 

544 

545 return cls.from_yaml(translationDefinition, **kwargs) 

546 

547 @classmethod 

548 def from_yaml(cls, translationDefinition, **kwargs): 

549 funcs = {} 

550 for func, val in translationDefinition['funcs'].items(): 

551 funcs[func] = init_fromDict(val, name=func) 

552 

553 if 'flag_rename_rules' in translationDefinition: 

554 renameRules = translationDefinition['flag_rename_rules'] 

555 else: 

556 renameRules = None 

557 

558 if 'calexpFlags' in translationDefinition: 

559 for flag in translationDefinition['calexpFlags']: 

560 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp') 

561 

562 if 'refFlags' in translationDefinition: 

563 for flag in translationDefinition['refFlags']: 

564 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

565 

566 if 'forcedFlags' in translationDefinition: 

567 for flag in translationDefinition['forcedFlags']: 

568 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

569 

570 if 'flags' in translationDefinition: 

571 for flag in translationDefinition['flags']: 

572 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

573 

574 return cls(funcs, **kwargs) 

575 

576 

577def mag_aware_eval(df, expr): 

578 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means 

579 

580 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes. 

581 

582 Parameters 

583 ---------- 

584 df : pandas.DataFrame 

585 Dataframe on which to evaluate expression. 

586 

587 expr : str 

588 Expression. 

589 """ 

590 try: 

591 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

592 val = df.eval(expr_new) 

593 except Exception: # Should check what actually gets raised 

594 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

595 val = df.eval(expr_new) 

596 return val 

597 

598 

599class CustomFunctor(Functor): 

600 """Arbitrary computation on a catalog 

601 

602 Column names (and thus the columns to be loaded from catalog) are found 

603 by finding all words and trying to ignore all "math-y" words. 

604 

605 Parameters 

606 ---------- 

607 expr : str 

608 Expression to evaluate, to be parsed and executed by `mag_aware_eval`. 

609 """ 

610 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

611 

612 def __init__(self, expr, **kwargs): 

613 self.expr = expr 

614 super().__init__(**kwargs) 

615 

616 @property 

617 def name(self): 

618 return self.expr 

619 

620 @property 

621 def columns(self): 

622 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

623 

624 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

625 not_a_col = [] 

626 for c in flux_cols: 

627 if not re.search('_instFlux$', c): 

628 cols.append(f'{c}_instFlux') 

629 not_a_col.append(c) 

630 else: 

631 cols.append(c) 

632 

633 return list(set([c for c in cols if c not in not_a_col])) 

634 

635 def _func(self, df): 

636 return mag_aware_eval(df, self.expr) 

637 

638 

639class Column(Functor): 

640 """Get column with specified name 

641 """ 

642 

643 def __init__(self, col, **kwargs): 

644 self.col = col 

645 super().__init__(**kwargs) 

646 

647 @property 

648 def name(self): 

649 return self.col 

650 

651 @property 

652 def columns(self): 

653 return [self.col] 

654 

655 def _func(self, df): 

656 return df[self.col] 

657 

658 

659class Index(Functor): 

660 """Return the value of the index for each object 

661 """ 

662 

663 columns = ['coord_ra'] # just a dummy; something has to be here 

664 _defaultDataset = 'ref' 

665 _defaultNoDup = True 

666 

667 def _func(self, df): 

668 return pd.Series(df.index, index=df.index) 

669 

670 

671class IDColumn(Column): 

672 col = 'id' 

673 _allow_difference = False 

674 _defaultNoDup = True 

675 

676 def _func(self, df): 

677 return pd.Series(df.index, index=df.index) 

678 

679 

680class FootprintNPix(Column): 

681 col = 'base_Footprint_nPix' 

682 

683 

684class CoordColumn(Column): 

685 """Base class for coordinate column, in degrees 

686 """ 

687 _radians = True 

688 

689 def __init__(self, col, **kwargs): 

690 super().__init__(col, **kwargs) 

691 

692 def _func(self, df): 

693 # Must not modify original column in case that column is used by another functor 

694 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

695 return output 

696 

697 

698class RAColumn(CoordColumn): 

699 """Right Ascension, in degrees 

700 """ 

701 name = 'RA' 

702 _defaultNoDup = True 

703 

704 def __init__(self, **kwargs): 

705 super().__init__('coord_ra', **kwargs) 

706 

707 def __call__(self, catalog, **kwargs): 

708 return super().__call__(catalog, **kwargs) 

709 

710 

711class DecColumn(CoordColumn): 

712 """Declination, in degrees 

713 """ 

714 name = 'Dec' 

715 _defaultNoDup = True 

716 

717 def __init__(self, **kwargs): 

718 super().__init__('coord_dec', **kwargs) 

719 

720 def __call__(self, catalog, **kwargs): 

721 return super().__call__(catalog, **kwargs) 

722 

723 

724class HtmIndex20(Functor): 

725 """Compute the level 20 HtmIndex for the catalog. 

726 

727 Notes 

728 ----- 

729 This functor was implemented to satisfy requirements of old APDB interface 

730 which required ``pixelId`` column in DiaObject with HTM20 index. APDB 

731 interface had migrated to not need that information, but we keep this 

732 class in case it may be useful for something else. 

733 """ 

734 name = "Htm20" 

735 htmLevel = 20 

736 _radians = True 

737 

738 def __init__(self, ra, decl, **kwargs): 

739 self.pixelator = sphgeom.HtmPixelization(self.htmLevel) 

740 self.ra = ra 

741 self.decl = decl 

742 self._columns = [self.ra, self.decl] 

743 super().__init__(**kwargs) 

744 

745 def _func(self, df): 

746 

747 def computePixel(row): 

748 if self._radians: 

749 sphPoint = geom.SpherePoint(row[self.ra], 

750 row[self.decl], 

751 geom.radians) 

752 else: 

753 sphPoint = geom.SpherePoint(row[self.ra], 

754 row[self.decl], 

755 geom.degrees) 

756 return self.pixelator.index(sphPoint.getVector()) 

757 

758 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64') 

759 

760 

761def fluxName(col): 

762 if not col.endswith('_instFlux'): 

763 col += '_instFlux' 

764 return col 

765 

766 

767def fluxErrName(col): 

768 if not col.endswith('_instFluxErr'): 

769 col += '_instFluxErr' 

770 return col 

771 

772 

773class Mag(Functor): 

774 """Compute calibrated magnitude 

775 

776 Takes a `calib` argument, which returns the flux at mag=0 

777 as `calib.getFluxMag0()`. If not provided, then the default 

778 `fluxMag0` is 63095734448.0194, which is default for HSC. 

779 This default should be removed in DM-21955 

780 

781 This calculation hides warnings about invalid values and dividing by zero. 

782 

783 As for all functors, a `dataset` and `filt` kwarg should be provided upon 

784 initialization. Unlike the default `Functor`, however, the default dataset 

785 for a `Mag` is `'meas'`, rather than `'ref'`. 

786 

787 Parameters 

788 ---------- 

789 col : `str` 

790 Name of flux column from which to compute magnitude. Can be parseable 

791 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass 

792 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will 

793 understand. 

794 calib : `lsst.afw.image.calib.Calib` (optional) 

795 Object that knows zero point. 

796 """ 

797 _defaultDataset = 'meas' 

798 

799 def __init__(self, col, calib=None, **kwargs): 

800 self.col = fluxName(col) 

801 self.calib = calib 

802 if calib is not None: 

803 self.fluxMag0 = calib.getFluxMag0()[0] 

804 else: 

805 # TO DO: DM-21955 Replace hard coded photometic calibration values 

806 self.fluxMag0 = 63095734448.0194 

807 

808 super().__init__(**kwargs) 

809 

810 @property 

811 def columns(self): 

812 return [self.col] 

813 

814 def _func(self, df): 

815 with np.warnings.catch_warnings(): 

816 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

817 np.warnings.filterwarnings('ignore', r'divide by zero') 

818 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

819 

820 @property 

821 def name(self): 

822 return f'mag_{self.col}' 

823 

824 

825class MagErr(Mag): 

826 """Compute calibrated magnitude uncertainty 

827 

828 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`. 

829 

830 Parameters 

831 col : `str` 

832 Name of flux column 

833 calib : `lsst.afw.image.calib.Calib` (optional) 

834 Object that knows zero point. 

835 """ 

836 

837 def __init__(self, *args, **kwargs): 

838 super().__init__(*args, **kwargs) 

839 if self.calib is not None: 

840 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

841 else: 

842 self.fluxMag0Err = 0. 

843 

844 @property 

845 def columns(self): 

846 return [self.col, self.col + 'Err'] 

847 

848 def _func(self, df): 

849 with np.warnings.catch_warnings(): 

850 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

851 np.warnings.filterwarnings('ignore', r'divide by zero') 

852 fluxCol, fluxErrCol = self.columns 

853 x = df[fluxErrCol] / df[fluxCol] 

854 y = self.fluxMag0Err / self.fluxMag0 

855 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

856 return magErr 

857 

858 @property 

859 def name(self): 

860 return super().name + '_err' 

861 

862 

863class NanoMaggie(Mag): 

864 """ 

865 """ 

866 

867 def _func(self, df): 

868 return (df[self.col] / self.fluxMag0) * 1e9 

869 

870 

871class MagDiff(Functor): 

872 _defaultDataset = 'meas' 

873 

874 """Functor to calculate magnitude difference""" 

875 

876 def __init__(self, col1, col2, **kwargs): 

877 self.col1 = fluxName(col1) 

878 self.col2 = fluxName(col2) 

879 super().__init__(**kwargs) 

880 

881 @property 

882 def columns(self): 

883 return [self.col1, self.col2] 

884 

885 def _func(self, df): 

886 with np.warnings.catch_warnings(): 

887 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

888 np.warnings.filterwarnings('ignore', r'divide by zero') 

889 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

890 

891 @property 

892 def name(self): 

893 return f'(mag_{self.col1} - mag_{self.col2})' 

894 

895 @property 

896 def shortname(self): 

897 return f'magDiff_{self.col1}_{self.col2}' 

898 

899 

900class Color(Functor): 

901 """Compute the color between two filters 

902 

903 Computes color by initializing two different `Mag` 

904 functors based on the `col` and filters provided, and 

905 then returning the difference. 

906 

907 This is enabled by the `_func` expecting a dataframe with a 

908 multilevel column index, with both `'band'` and `'column'`, 

909 instead of just `'column'`, which is the `Functor` default. 

910 This is controlled by the `_dfLevels` attribute. 

911 

912 Also of note, the default dataset for `Color` is `forced_src'`, 

913 whereas for `Mag` it is `'meas'`. 

914 

915 Parameters 

916 ---------- 

917 col : str 

918 Name of flux column from which to compute; same as would be passed to 

919 `lsst.pipe.tasks.functors.Mag`. 

920 

921 filt2, filt1 : str 

922 Filters from which to compute magnitude difference. 

923 Color computed is `Mag(filt2) - Mag(filt1)`. 

924 """ 

925 _defaultDataset = 'forced_src' 

926 _dfLevels = ('band', 'column') 

927 _defaultNoDup = True 

928 

929 def __init__(self, col, filt2, filt1, **kwargs): 

930 self.col = fluxName(col) 

931 if filt2 == filt1: 

932 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

933 self.filt2 = filt2 

934 self.filt1 = filt1 

935 

936 self.mag2 = Mag(col, filt=filt2, **kwargs) 

937 self.mag1 = Mag(col, filt=filt1, **kwargs) 

938 

939 super().__init__(**kwargs) 

940 

941 @property 

942 def filt(self): 

943 return None 

944 

945 @filt.setter 

946 def filt(self, filt): 

947 pass 

948 

949 def _func(self, df): 

950 mag2 = self.mag2._func(df[self.filt2]) 

951 mag1 = self.mag1._func(df[self.filt1]) 

952 return mag2 - mag1 

953 

954 @property 

955 def columns(self): 

956 return [self.mag1.col, self.mag2.col] 

957 

958 def multilevelColumns(self, parq, **kwargs): 

959 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

960 

961 @property 

962 def name(self): 

963 return f'{self.filt2} - {self.filt1} ({self.col})' 

964 

965 @property 

966 def shortname(self): 

967 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

968 

969 

970class Labeller(Functor): 

971 """Main function of this subclass is to override the dropna=True 

972 """ 

973 _null_label = 'null' 

974 _allow_difference = False 

975 name = 'label' 

976 _force_str = False 

977 

978 def __call__(self, parq, dropna=False, **kwargs): 

979 return super().__call__(parq, dropna=False, **kwargs) 

980 

981 

982class StarGalaxyLabeller(Labeller): 

983 _columns = ["base_ClassificationExtendedness_value"] 

984 _column = "base_ClassificationExtendedness_value" 

985 

986 def _func(self, df): 

987 x = df[self._columns][self._column] 

988 mask = x.isnull() 

989 test = (x < 0.5).astype(int) 

990 test = test.mask(mask, 2) 

991 

992 # TODO: DM-21954 Look into veracity of inline comment below 

993 # are these backwards? 

994 categories = ['galaxy', 'star', self._null_label] 

995 label = pd.Series(pd.Categorical.from_codes(test, categories=categories), 

996 index=x.index, name='label') 

997 if self._force_str: 

998 label = label.astype(str) 

999 return label 

1000 

1001 

1002class NumStarLabeller(Labeller): 

1003 _columns = ['numStarFlags'] 

1004 labels = {"star": 0, "maybe": 1, "notStar": 2} 

1005 

1006 def _func(self, df): 

1007 x = df[self._columns][self._columns[0]] 

1008 

1009 # Number of filters 

1010 n = len(x.unique()) - 1 

1011 

1012 labels = ['noStar', 'maybe', 'star'] 

1013 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels), 

1014 index=x.index, name='label') 

1015 

1016 if self._force_str: 

1017 label = label.astype(str) 

1018 

1019 return label 

1020 

1021 

1022class DeconvolvedMoments(Functor): 

1023 name = 'Deconvolved Moments' 

1024 shortname = 'deconvolvedMoments' 

1025 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1026 "ext_shapeHSM_HsmSourceMoments_yy", 

1027 "base_SdssShape_xx", "base_SdssShape_yy", 

1028 "ext_shapeHSM_HsmPsfMoments_xx", 

1029 "ext_shapeHSM_HsmPsfMoments_yy") 

1030 

1031 def _func(self, df): 

1032 """Calculate deconvolved moments""" 

1033 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

1034 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

1035 else: 

1036 hsm = np.ones(len(df))*np.nan 

1037 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

1038 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

1039 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

1040 else: 

1041 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using 

1042 # exposure.getPsf().computeShape(s.getCentroid()).getIxx() 

1043 # raise TaskError("No psf shape parameter found in catalog") 

1044 raise RuntimeError('No psf shape parameter found in catalog') 

1045 

1046 return hsm.where(np.isfinite(hsm), sdss) - psf 

1047 

1048 

1049class SdssTraceSize(Functor): 

1050 """Functor to calculate SDSS trace radius size for sources""" 

1051 name = "SDSS Trace Size" 

1052 shortname = 'sdssTrace' 

1053 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

1054 

1055 def _func(self, df): 

1056 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1057 return srcSize 

1058 

1059 

1060class PsfSdssTraceSizeDiff(Functor): 

1061 """Functor to calculate SDSS trace radius size difference (%) between object and psf model""" 

1062 name = "PSF - SDSS Trace Size" 

1063 shortname = 'psf_sdssTrace' 

1064 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

1065 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

1066 

1067 def _func(self, df): 

1068 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1069 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

1070 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1071 return sizeDiff 

1072 

1073 

1074class HsmTraceSize(Functor): 

1075 """Functor to calculate HSM trace radius size for sources""" 

1076 name = 'HSM Trace Size' 

1077 shortname = 'hsmTrace' 

1078 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1079 "ext_shapeHSM_HsmSourceMoments_yy") 

1080 

1081 def _func(self, df): 

1082 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1083 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1084 return srcSize 

1085 

1086 

1087class PsfHsmTraceSizeDiff(Functor): 

1088 """Functor to calculate HSM trace radius size difference (%) between object and psf model""" 

1089 name = 'PSF - HSM Trace Size' 

1090 shortname = 'psf_HsmTrace' 

1091 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1092 "ext_shapeHSM_HsmSourceMoments_yy", 

1093 "ext_shapeHSM_HsmPsfMoments_xx", 

1094 "ext_shapeHSM_HsmPsfMoments_yy") 

1095 

1096 def _func(self, df): 

1097 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1098 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1099 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1100 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1101 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1102 return sizeDiff 

1103 

1104 

1105class HsmFwhm(Functor): 

1106 name = 'HSM Psf FWHM' 

1107 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1108 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1109 pixelScale = 0.168 

1110 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1111 

1112 def _func(self, df): 

1113 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1114 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1115 

1116 

1117class E1(Functor): 

1118 name = "Distortion Ellipticity (e1)" 

1119 shortname = "Distortion" 

1120 

1121 def __init__(self, colXX, colXY, colYY, **kwargs): 

1122 self.colXX = colXX 

1123 self.colXY = colXY 

1124 self.colYY = colYY 

1125 self._columns = [self.colXX, self.colXY, self.colYY] 

1126 super().__init__(**kwargs) 

1127 

1128 @property 

1129 def columns(self): 

1130 return [self.colXX, self.colXY, self.colYY] 

1131 

1132 def _func(self, df): 

1133 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1134 

1135 

1136class E2(Functor): 

1137 name = "Ellipticity e2" 

1138 

1139 def __init__(self, colXX, colXY, colYY, **kwargs): 

1140 self.colXX = colXX 

1141 self.colXY = colXY 

1142 self.colYY = colYY 

1143 super().__init__(**kwargs) 

1144 

1145 @property 

1146 def columns(self): 

1147 return [self.colXX, self.colXY, self.colYY] 

1148 

1149 def _func(self, df): 

1150 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1151 

1152 

1153class RadiusFromQuadrupole(Functor): 

1154 

1155 def __init__(self, colXX, colXY, colYY, **kwargs): 

1156 self.colXX = colXX 

1157 self.colXY = colXY 

1158 self.colYY = colYY 

1159 super().__init__(**kwargs) 

1160 

1161 @property 

1162 def columns(self): 

1163 return [self.colXX, self.colXY, self.colYY] 

1164 

1165 def _func(self, df): 

1166 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1167 

1168 

1169class LocalWcs(Functor): 

1170 """Computations using the stored localWcs. 

1171 """ 

1172 name = "LocalWcsOperations" 

1173 

1174 def __init__(self, 

1175 colCD_1_1, 

1176 colCD_1_2, 

1177 colCD_2_1, 

1178 colCD_2_2, 

1179 **kwargs): 

1180 self.colCD_1_1 = colCD_1_1 

1181 self.colCD_1_2 = colCD_1_2 

1182 self.colCD_2_1 = colCD_2_1 

1183 self.colCD_2_2 = colCD_2_2 

1184 super().__init__(**kwargs) 

1185 

1186 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1187 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1188 

1189 Parameters 

1190 ---------- 

1191 x : `pandas.Series` 

1192 X pixel coordinate. 

1193 y : `pandas.Series` 

1194 Y pixel coordinate. 

1195 cd11 : `pandas.Series` 

1196 [1, 1] element of the local Wcs affine transform. 

1197 cd11 : `pandas.Series` 

1198 [1, 1] element of the local Wcs affine transform. 

1199 cd12 : `pandas.Series` 

1200 [1, 2] element of the local Wcs affine transform. 

1201 cd21 : `pandas.Series` 

1202 [2, 1] element of the local Wcs affine transform. 

1203 cd22 : `pandas.Series` 

1204 [2, 2] element of the local Wcs affine transform. 

1205 

1206 Returns 

1207 ------- 

1208 raDecTuple : tuple 

1209 RA and dec conversion of x and y given the local Wcs. Returned 

1210 units are in radians. 

1211 

1212 """ 

1213 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1214 

1215 def computeSkySeperation(self, ra1, dec1, ra2, dec2): 

1216 """Compute the local pixel scale conversion. 

1217 

1218 Parameters 

1219 ---------- 

1220 ra1 : `pandas.Series` 

1221 Ra of the first coordinate in radians. 

1222 dec1 : `pandas.Series` 

1223 Dec of the first coordinate in radians. 

1224 ra2 : `pandas.Series` 

1225 Ra of the second coordinate in radians. 

1226 dec2 : `pandas.Series` 

1227 Dec of the second coordinate in radians. 

1228 

1229 Returns 

1230 ------- 

1231 dist : `pandas.Series` 

1232 Distance on the sphere in radians. 

1233 """ 

1234 deltaDec = dec2 - dec1 

1235 deltaRa = ra2 - ra1 

1236 return 2 * np.arcsin( 

1237 np.sqrt( 

1238 np.sin(deltaDec / 2) ** 2 

1239 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1240 

1241 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1242 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1243 

1244 Parameters 

1245 ---------- 

1246 x1 : `pandas.Series` 

1247 X pixel coordinate. 

1248 y1 : `pandas.Series` 

1249 Y pixel coordinate. 

1250 x2 : `pandas.Series` 

1251 X pixel coordinate. 

1252 y2 : `pandas.Series` 

1253 Y pixel coordinate. 

1254 cd11 : `pandas.Series` 

1255 [1, 1] element of the local Wcs affine transform. 

1256 cd11 : `pandas.Series` 

1257 [1, 1] element of the local Wcs affine transform. 

1258 cd12 : `pandas.Series` 

1259 [1, 2] element of the local Wcs affine transform. 

1260 cd21 : `pandas.Series` 

1261 [2, 1] element of the local Wcs affine transform. 

1262 cd22 : `pandas.Series` 

1263 [2, 2] element of the local Wcs affine transform. 

1264 

1265 Returns 

1266 ------- 

1267 Distance : `pandas.Series` 

1268 Arcseconds per pixel at the location of the local WC 

1269 """ 

1270 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1271 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1272 # Great circle distance for small separations. 

1273 return self.computeSkySeperation(ra1, dec1, ra2, dec2) 

1274 

1275 

1276class ComputePixelScale(LocalWcs): 

1277 """Compute the local pixel scale from the stored CDMatrix. 

1278 """ 

1279 name = "PixelScale" 

1280 

1281 @property 

1282 def columns(self): 

1283 return [self.colCD_1_1, 

1284 self.colCD_1_2, 

1285 self.colCD_2_1, 

1286 self.colCD_2_2] 

1287 

1288 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1289 """Compute the local pixel to scale conversion in arcseconds. 

1290 

1291 Parameters 

1292 ---------- 

1293 cd11 : `pandas.Series` 

1294 [1, 1] element of the local Wcs affine transform in radians. 

1295 cd11 : `pandas.Series` 

1296 [1, 1] element of the local Wcs affine transform in radians. 

1297 cd12 : `pandas.Series` 

1298 [1, 2] element of the local Wcs affine transform in radians. 

1299 cd21 : `pandas.Series` 

1300 [2, 1] element of the local Wcs affine transform in radians. 

1301 cd22 : `pandas.Series` 

1302 [2, 2] element of the local Wcs affine transform in radians. 

1303 

1304 Returns 

1305 ------- 

1306 pixScale : `pandas.Series` 

1307 Arcseconds per pixel at the location of the local WC 

1308 """ 

1309 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1310 

1311 def _func(self, df): 

1312 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1313 df[self.colCD_1_2], 

1314 df[self.colCD_2_1], 

1315 df[self.colCD_2_2]) 

1316 

1317 

1318class ConvertPixelToArcseconds(ComputePixelScale): 

1319 """Convert a value in units pixels squared to units arcseconds squared. 

1320 """ 

1321 

1322 def __init__(self, 

1323 col, 

1324 colCD_1_1, 

1325 colCD_1_2, 

1326 colCD_2_1, 

1327 colCD_2_2, 

1328 **kwargs): 

1329 self.col = col 

1330 super().__init__(colCD_1_1, 

1331 colCD_1_2, 

1332 colCD_2_1, 

1333 colCD_2_2, 

1334 **kwargs) 

1335 

1336 @property 

1337 def name(self): 

1338 return f"{self.col}_asArcseconds" 

1339 

1340 @property 

1341 def columns(self): 

1342 return [self.col, 

1343 self.colCD_1_1, 

1344 self.colCD_1_2, 

1345 self.colCD_2_1, 

1346 self.colCD_2_2] 

1347 

1348 def _func(self, df): 

1349 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1350 df[self.colCD_1_2], 

1351 df[self.colCD_2_1], 

1352 df[self.colCD_2_2]) 

1353 

1354 

1355class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1356 """Convert a value in units pixels to units arcseconds. 

1357 """ 

1358 

1359 def __init__(self, 

1360 col, 

1361 colCD_1_1, 

1362 colCD_1_2, 

1363 colCD_2_1, 

1364 colCD_2_2, 

1365 **kwargs): 

1366 self.col = col 

1367 super().__init__(colCD_1_1, 

1368 colCD_1_2, 

1369 colCD_2_1, 

1370 colCD_2_2, 

1371 **kwargs) 

1372 

1373 @property 

1374 def name(self): 

1375 return f"{self.col}_asArcsecondsSq" 

1376 

1377 @property 

1378 def columns(self): 

1379 return [self.col, 

1380 self.colCD_1_1, 

1381 self.colCD_1_2, 

1382 self.colCD_2_1, 

1383 self.colCD_2_2] 

1384 

1385 def _func(self, df): 

1386 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1387 df[self.colCD_1_2], 

1388 df[self.colCD_2_1], 

1389 df[self.colCD_2_2]) 

1390 return df[self.col] * pixScale * pixScale 

1391 

1392 

1393class ReferenceBand(Functor): 

1394 name = 'Reference Band' 

1395 shortname = 'refBand' 

1396 

1397 @property 

1398 def columns(self): 

1399 return ["merge_measurement_i", 

1400 "merge_measurement_r", 

1401 "merge_measurement_z", 

1402 "merge_measurement_y", 

1403 "merge_measurement_g", 

1404 "merge_measurement_u"] 

1405 

1406 def _func(self, df: pd.DataFrame) -> pd.Series: 

1407 def getFilterAliasName(row): 

1408 # get column name with the max value (True > False) 

1409 colName = row.idxmax() 

1410 return colName.replace('merge_measurement_', '') 

1411 

1412 # Skip columns that are unavailable, because this functor requests the 

1413 # superset of bands that could be included in the object table 

1414 columns = [col for col in self.columns if col in df.columns] 

1415 # Makes a Series of dtype object if df is empty 

1416 return df[columns].apply(getFilterAliasName, axis=1, 

1417 result_type='reduce').astype('object') 

1418 

1419 

1420class Photometry(Functor): 

1421 # AB to NanoJansky (3631 Jansky) 

1422 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1423 LOG_AB_FLUX_SCALE = 12.56 

1424 FIVE_OVER_2LOG10 = 1.085736204758129569 

1425 # TO DO: DM-21955 Replace hard coded photometic calibration values 

1426 COADD_ZP = 27 

1427 

1428 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1429 self.vhypot = np.vectorize(self.hypot) 

1430 self.col = colFlux 

1431 self.colFluxErr = colFluxErr 

1432 

1433 self.calib = calib 

1434 if calib is not None: 

1435 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1436 else: 

1437 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1438 self.fluxMag0Err = 0. 

1439 

1440 super().__init__(**kwargs) 

1441 

1442 @property 

1443 def columns(self): 

1444 return [self.col] 

1445 

1446 @property 

1447 def name(self): 

1448 return f'mag_{self.col}' 

1449 

1450 @classmethod 

1451 def hypot(cls, a, b): 

1452 if np.abs(a) < np.abs(b): 

1453 a, b = b, a 

1454 if a == 0.: 

1455 return 0. 

1456 q = b/a 

1457 return np.abs(a) * np.sqrt(1. + q*q) 

1458 

1459 def dn2flux(self, dn, fluxMag0): 

1460 return self.AB_FLUX_SCALE * dn / fluxMag0 

1461 

1462 def dn2mag(self, dn, fluxMag0): 

1463 with np.warnings.catch_warnings(): 

1464 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1465 np.warnings.filterwarnings('ignore', r'divide by zero') 

1466 return -2.5 * np.log10(dn/fluxMag0) 

1467 

1468 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1469 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1470 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1471 return retVal 

1472 

1473 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1474 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1475 return self.FIVE_OVER_2LOG10 * retVal 

1476 

1477 

1478class NanoJansky(Photometry): 

1479 def _func(self, df): 

1480 return self.dn2flux(df[self.col], self.fluxMag0) 

1481 

1482 

1483class NanoJanskyErr(Photometry): 

1484 @property 

1485 def columns(self): 

1486 return [self.col, self.colFluxErr] 

1487 

1488 def _func(self, df): 

1489 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1490 return pd.Series(retArr, index=df.index) 

1491 

1492 

1493class Magnitude(Photometry): 

1494 def _func(self, df): 

1495 return self.dn2mag(df[self.col], self.fluxMag0) 

1496 

1497 

1498class MagnitudeErr(Photometry): 

1499 @property 

1500 def columns(self): 

1501 return [self.col, self.colFluxErr] 

1502 

1503 def _func(self, df): 

1504 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1505 return pd.Series(retArr, index=df.index) 

1506 

1507 

1508class LocalPhotometry(Functor): 

1509 """Base class for calibrating the specified instrument flux column using 

1510 the local photometric calibration. 

1511 

1512 Parameters 

1513 ---------- 

1514 instFluxCol : `str` 

1515 Name of the instrument flux column. 

1516 instFluxErrCol : `str` 

1517 Name of the assocated error columns for ``instFluxCol``. 

1518 photoCalibCol : `str` 

1519 Name of local calibration column. 

1520 photoCalibErrCol : `str` 

1521 Error associated with ``photoCalibCol`` 

1522 

1523 See also 

1524 -------- 

1525 LocalPhotometry 

1526 LocalNanojansky 

1527 LocalNanojanskyErr 

1528 LocalMagnitude 

1529 LocalMagnitudeErr 

1530 """ 

1531 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1532 

1533 def __init__(self, 

1534 instFluxCol, 

1535 instFluxErrCol, 

1536 photoCalibCol, 

1537 photoCalibErrCol, 

1538 **kwargs): 

1539 self.instFluxCol = instFluxCol 

1540 self.instFluxErrCol = instFluxErrCol 

1541 self.photoCalibCol = photoCalibCol 

1542 self.photoCalibErrCol = photoCalibErrCol 

1543 super().__init__(**kwargs) 

1544 

1545 def instFluxToNanojansky(self, instFlux, localCalib): 

1546 """Convert instrument flux to nanojanskys. 

1547 

1548 Parameters 

1549 ---------- 

1550 instFlux : `numpy.ndarray` or `pandas.Series` 

1551 Array of instrument flux measurements 

1552 localCalib : `numpy.ndarray` or `pandas.Series` 

1553 Array of local photometric calibration estimates. 

1554 

1555 Returns 

1556 ------- 

1557 calibFlux : `numpy.ndarray` or `pandas.Series` 

1558 Array of calibrated flux measurements. 

1559 """ 

1560 return instFlux * localCalib 

1561 

1562 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1563 """Convert instrument flux to nanojanskys. 

1564 

1565 Parameters 

1566 ---------- 

1567 instFlux : `numpy.ndarray` or `pandas.Series` 

1568 Array of instrument flux measurements 

1569 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1570 Errors on associated ``instFlux`` values 

1571 localCalib : `numpy.ndarray` or `pandas.Series` 

1572 Array of local photometric calibration estimates. 

1573 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1574 Errors on associated ``localCalib`` values 

1575 

1576 Returns 

1577 ------- 

1578 calibFluxErr : `numpy.ndarray` or `pandas.Series` 

1579 Errors on calibrated flux measurements. 

1580 """ 

1581 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1582 

1583 def instFluxToMagnitude(self, instFlux, localCalib): 

1584 """Convert instrument flux to nanojanskys. 

1585 

1586 Parameters 

1587 ---------- 

1588 instFlux : `numpy.ndarray` or `pandas.Series` 

1589 Array of instrument flux measurements 

1590 localCalib : `numpy.ndarray` or `pandas.Series` 

1591 Array of local photometric calibration estimates. 

1592 

1593 Returns 

1594 ------- 

1595 calibMag : `numpy.ndarray` or `pandas.Series` 

1596 Array of calibrated AB magnitudes. 

1597 """ 

1598 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1599 

1600 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1601 """Convert instrument flux err to nanojanskys. 

1602 

1603 Parameters 

1604 ---------- 

1605 instFlux : `numpy.ndarray` or `pandas.Series` 

1606 Array of instrument flux measurements 

1607 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1608 Errors on associated ``instFlux`` values 

1609 localCalib : `numpy.ndarray` or `pandas.Series` 

1610 Array of local photometric calibration estimates. 

1611 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1612 Errors on associated ``localCalib`` values 

1613 

1614 Returns 

1615 ------- 

1616 calibMagErr: `numpy.ndarray` or `pandas.Series` 

1617 Error on calibrated AB magnitudes. 

1618 """ 

1619 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1620 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1621 

1622 

1623class LocalNanojansky(LocalPhotometry): 

1624 """Compute calibrated fluxes using the local calibration value. 

1625 

1626 See also 

1627 -------- 

1628 LocalNanojansky 

1629 LocalNanojanskyErr 

1630 LocalMagnitude 

1631 LocalMagnitudeErr 

1632 """ 

1633 

1634 @property 

1635 def columns(self): 

1636 return [self.instFluxCol, self.photoCalibCol] 

1637 

1638 @property 

1639 def name(self): 

1640 return f'flux_{self.instFluxCol}' 

1641 

1642 def _func(self, df): 

1643 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1644 

1645 

1646class LocalNanojanskyErr(LocalPhotometry): 

1647 """Compute calibrated flux errors using the local calibration value. 

1648 

1649 See also 

1650 -------- 

1651 LocalNanojansky 

1652 LocalNanojanskyErr 

1653 LocalMagnitude 

1654 LocalMagnitudeErr 

1655 """ 

1656 

1657 @property 

1658 def columns(self): 

1659 return [self.instFluxCol, self.instFluxErrCol, 

1660 self.photoCalibCol, self.photoCalibErrCol] 

1661 

1662 @property 

1663 def name(self): 

1664 return f'fluxErr_{self.instFluxCol}' 

1665 

1666 def _func(self, df): 

1667 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1668 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1669 

1670 

1671class LocalMagnitude(LocalPhotometry): 

1672 """Compute calibrated AB magnitudes using the local calibration value. 

1673 

1674 See also 

1675 -------- 

1676 LocalNanojansky 

1677 LocalNanojanskyErr 

1678 LocalMagnitude 

1679 LocalMagnitudeErr 

1680 """ 

1681 

1682 @property 

1683 def columns(self): 

1684 return [self.instFluxCol, self.photoCalibCol] 

1685 

1686 @property 

1687 def name(self): 

1688 return f'mag_{self.instFluxCol}' 

1689 

1690 def _func(self, df): 

1691 return self.instFluxToMagnitude(df[self.instFluxCol], 

1692 df[self.photoCalibCol]) 

1693 

1694 

1695class LocalMagnitudeErr(LocalPhotometry): 

1696 """Compute calibrated AB magnitude errors using the local calibration value. 

1697 

1698 See also 

1699 -------- 

1700 LocalNanojansky 

1701 LocalNanojanskyErr 

1702 LocalMagnitude 

1703 LocalMagnitudeErr 

1704 """ 

1705 

1706 @property 

1707 def columns(self): 

1708 return [self.instFluxCol, self.instFluxErrCol, 

1709 self.photoCalibCol, self.photoCalibErrCol] 

1710 

1711 @property 

1712 def name(self): 

1713 return f'magErr_{self.instFluxCol}' 

1714 

1715 def _func(self, df): 

1716 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol], 

1717 df[self.instFluxErrCol], 

1718 df[self.photoCalibCol], 

1719 df[self.photoCalibErrCol]) 

1720 

1721 

1722class LocalDipoleMeanFlux(LocalPhotometry): 

1723 """Compute absolute mean of dipole fluxes. 

1724 

1725 See also 

1726 -------- 

1727 LocalNanojansky 

1728 LocalNanojanskyErr 

1729 LocalMagnitude 

1730 LocalMagnitudeErr 

1731 LocalDipoleMeanFlux 

1732 LocalDipoleMeanFluxErr 

1733 LocalDipoleDiffFlux 

1734 LocalDipoleDiffFluxErr 

1735 """ 

1736 def __init__(self, 

1737 instFluxPosCol, 

1738 instFluxNegCol, 

1739 instFluxPosErrCol, 

1740 instFluxNegErrCol, 

1741 photoCalibCol, 

1742 photoCalibErrCol, 

1743 **kwargs): 

1744 self.instFluxNegCol = instFluxNegCol 

1745 self.instFluxPosCol = instFluxPosCol 

1746 self.instFluxNegErrCol = instFluxNegErrCol 

1747 self.instFluxPosErrCol = instFluxPosErrCol 

1748 self.photoCalibCol = photoCalibCol 

1749 self.photoCalibErrCol = photoCalibErrCol 

1750 super().__init__(instFluxNegCol, 

1751 instFluxNegErrCol, 

1752 photoCalibCol, 

1753 photoCalibErrCol, 

1754 **kwargs) 

1755 

1756 @property 

1757 def columns(self): 

1758 return [self.instFluxPosCol, 

1759 self.instFluxNegCol, 

1760 self.photoCalibCol] 

1761 

1762 @property 

1763 def name(self): 

1764 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1765 

1766 def _func(self, df): 

1767 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1768 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1769 

1770 

1771class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1772 """Compute the error on the absolute mean of dipole fluxes. 

1773 

1774 See also 

1775 -------- 

1776 LocalNanojansky 

1777 LocalNanojanskyErr 

1778 LocalMagnitude 

1779 LocalMagnitudeErr 

1780 LocalDipoleMeanFlux 

1781 LocalDipoleMeanFluxErr 

1782 LocalDipoleDiffFlux 

1783 LocalDipoleDiffFluxErr 

1784 """ 

1785 

1786 @property 

1787 def columns(self): 

1788 return [self.instFluxPosCol, 

1789 self.instFluxNegCol, 

1790 self.instFluxPosErrCol, 

1791 self.instFluxNegErrCol, 

1792 self.photoCalibCol, 

1793 self.photoCalibErrCol] 

1794 

1795 @property 

1796 def name(self): 

1797 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1798 

1799 def _func(self, df): 

1800 return 0.5*np.sqrt( 

1801 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1802 * df[self.photoCalibErrCol])**2 

1803 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1804 * df[self.photoCalibCol]**2) 

1805 

1806 

1807class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1808 """Compute the absolute difference of dipole fluxes. 

1809 

1810 Value is (abs(pos) - abs(neg)) 

1811 

1812 See also 

1813 -------- 

1814 LocalNanojansky 

1815 LocalNanojanskyErr 

1816 LocalMagnitude 

1817 LocalMagnitudeErr 

1818 LocalDipoleMeanFlux 

1819 LocalDipoleMeanFluxErr 

1820 LocalDipoleDiffFlux 

1821 LocalDipoleDiffFluxErr 

1822 """ 

1823 

1824 @property 

1825 def columns(self): 

1826 return [self.instFluxPosCol, 

1827 self.instFluxNegCol, 

1828 self.photoCalibCol] 

1829 

1830 @property 

1831 def name(self): 

1832 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1833 

1834 def _func(self, df): 

1835 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1836 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1837 

1838 

1839class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1840 """Compute the error on the absolute difference of dipole fluxes. 

1841 

1842 See also 

1843 -------- 

1844 LocalNanojansky 

1845 LocalNanojanskyErr 

1846 LocalMagnitude 

1847 LocalMagnitudeErr 

1848 LocalDipoleMeanFlux 

1849 LocalDipoleMeanFluxErr 

1850 LocalDipoleDiffFlux 

1851 LocalDipoleDiffFluxErr 

1852 """ 

1853 

1854 @property 

1855 def columns(self): 

1856 return [self.instFluxPosCol, 

1857 self.instFluxNegCol, 

1858 self.instFluxPosErrCol, 

1859 self.instFluxNegErrCol, 

1860 self.photoCalibCol, 

1861 self.photoCalibErrCol] 

1862 

1863 @property 

1864 def name(self): 

1865 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1866 

1867 def _func(self, df): 

1868 return np.sqrt( 

1869 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1870 * df[self.photoCalibErrCol])**2 

1871 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1872 * df[self.photoCalibCol]**2) 

1873 

1874 

1875class Ratio(Functor): 

1876 """Base class for returning the ratio of 2 columns. 

1877 

1878 Can be used to compute a Signal to Noise ratio for any input flux. 

1879 

1880 Parameters 

1881 ---------- 

1882 numerator : `str` 

1883 Name of the column to use at the numerator in the ratio 

1884 denominator : `str` 

1885 Name of the column to use as the denominator in the ratio. 

1886 """ 

1887 def __init__(self, 

1888 numerator, 

1889 denominator, 

1890 **kwargs): 

1891 self.numerator = numerator 

1892 self.denominator = denominator 

1893 super().__init__(**kwargs) 

1894 

1895 @property 

1896 def columns(self): 

1897 return [self.numerator, self.denominator] 

1898 

1899 @property 

1900 def name(self): 

1901 return f'ratio_{self.numerator}_{self.denominator}' 

1902 

1903 def _func(self, df): 

1904 with np.warnings.catch_warnings(): 

1905 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1906 np.warnings.filterwarnings('ignore', r'divide by zero') 

1907 return df[self.numerator] / df[self.denominator] 

1908 

1909 

1910class Ebv(Functor): 

1911 """Compute E(B-V) from dustmaps.sfd 

1912 """ 

1913 _defaultDataset = 'ref' 

1914 name = "E(B-V)" 

1915 shortname = "ebv" 

1916 

1917 def __init__(self, **kwargs): 

1918 self._columns = ['coord_ra', 'coord_dec'] 

1919 self.sfd = SFDQuery() 

1920 super().__init__(**kwargs) 

1921 

1922 def _func(self, df): 

1923 coords = SkyCoord(df['coord_ra']*u.rad, df['coord_dec']*u.rad) 

1924 ebv = self.sfd(coords) 

1925 # Double precision unnecessary scientifically 

1926 # but currently needed for ingest to qserv 

1927 return pd.Series(ebv, index=df.index).astype('float64')