Coverage for python/lsst/pipe/tasks/functors.py: 40%

825 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-07 10:57 +0000

1# This file is part of pipe_tasks. 

2# 

3# LSST Data Management System 

4# This product includes software developed by the 

5# LSST Project (http://www.lsst.org/). 

6# See COPYRIGHT file at the top of the source tree. 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <https://www.lsstcorp.org/LegalNotices/>. 

21# 

22import yaml 

23import re 

24from itertools import product 

25import logging 

26import os.path 

27 

28import pandas as pd 

29import numpy as np 

30import astropy.units as u 

31from dustmaps.sfd import SFDQuery 

32from astropy.coordinates import SkyCoord 

33 

34from lsst.utils import doImport 

35from lsst.daf.butler import DeferredDatasetHandle 

36import lsst.geom as geom 

37import lsst.sphgeom as sphgeom 

38 

39from .parquetTable import ParquetTable, MultilevelParquetTable 

40 

41 

42def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

43 typeKey='functor', name=None): 

44 """Initialize an object defined in a dictionary 

45 

46 The object needs to be importable as 

47 f'{basePath}.{initDict[typeKey]}' 

48 The positional and keyword arguments (if any) are contained in 

49 "args" and "kwargs" entries in the dictionary, respectively. 

50 This is used in `functors.CompositeFunctor.from_yaml` to initialize 

51 a composite functor from a specification in a YAML file. 

52 

53 Parameters 

54 ---------- 

55 initDict : dictionary 

56 Dictionary describing object's initialization. Must contain 

57 an entry keyed by ``typeKey`` that is the name of the object, 

58 relative to ``basePath``. 

59 basePath : str 

60 Path relative to module in which ``initDict[typeKey]`` is defined. 

61 typeKey : str 

62 Key of ``initDict`` that is the name of the object 

63 (relative to `basePath`). 

64 """ 

65 initDict = initDict.copy() 

66 # TO DO: DM-21956 We should be able to define functors outside this module 

67 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

68 args = [] 

69 if 'args' in initDict: 

70 args = initDict.pop('args') 

71 if isinstance(args, str): 

72 args = [args] 

73 try: 

74 element = pythonType(*args, **initDict) 

75 except Exception as e: 

76 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

77 raise type(e)(message, e.args) 

78 return element 

79 

80 

81class Functor(object): 

82 """Define and execute a calculation on a ParquetTable 

83 

84 The `__call__` method accepts either a `ParquetTable` object or a 

85 `DeferredDatasetHandle`, and returns the 

86 result of the calculation as a single column. Each functor defines what 

87 columns are needed for the calculation, and only these columns are read 

88 from the `ParquetTable`. 

89 

90 The action of `__call__` consists of two steps: first, loading the 

91 necessary columns from disk into memory as a `pandas.DataFrame` object; 

92 and second, performing the computation on this dataframe and returning the 

93 result. 

94 

95 

96 To define a new `Functor`, a subclass must define a `_func` method, 

97 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`. 

98 In addition, it must define the following attributes 

99 

100 * `_columns`: The columns necessary to perform the calculation 

101 * `name`: A name appropriate for a figure axis label 

102 * `shortname`: A name appropriate for use as a dictionary key 

103 

104 On initialization, a `Functor` should declare what band (`filt` kwarg) 

105 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be 

106 applied to. This enables the `_get_data` method to extract the proper 

107 columns from the parquet file. If not specified, the dataset will fall back 

108 on the `_defaultDataset`attribute. If band is not specified and `dataset` 

109 is anything other than `'ref'`, then an error will be raised when trying to 

110 perform the calculation. 

111 

112 Originally, `Functor` was set up to expect 

113 datasets formatted like the `deepCoadd_obj` dataset; that is, a 

114 dataframe with a multi-level column index, with the levels of the 

115 column index being `band`, `dataset`, and `column`. 

116 It has since been generalized to apply to dataframes without mutli-level 

117 indices and multi-level indices with just `dataset` and `column` levels. 

118 In addition, the `_get_data` method that reads 

119 the dataframe from the `ParquetTable` will return a dataframe with column 

120 index levels defined by the `_dfLevels` attribute; by default, this is 

121 `column`. 

122 

123 The `_dfLevels` attributes should generally not need to 

124 be changed, unless `_func` needs columns from multiple filters or datasets 

125 to do the calculation. 

126 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for 

127 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe 

128 it gets to have those levels in the column index. 

129 

130 Parameters 

131 ---------- 

132 filt : str 

133 Filter upon which to do the calculation 

134 

135 dataset : str 

136 Dataset upon which to do the calculation 

137 (e.g., 'ref', 'meas', 'forced_src'). 

138 

139 """ 

140 

141 _defaultDataset = 'ref' 

142 _dfLevels = ('column',) 

143 _defaultNoDup = False 

144 

145 def __init__(self, filt=None, dataset=None, noDup=None): 

146 self.filt = filt 

147 self.dataset = dataset if dataset is not None else self._defaultDataset 

148 self._noDup = noDup 

149 self.log = logging.getLogger(type(self).__name__) 

150 

151 @property 

152 def noDup(self): 

153 if self._noDup is not None: 

154 return self._noDup 

155 else: 

156 return self._defaultNoDup 

157 

158 @property 

159 def columns(self): 

160 """Columns required to perform calculation 

161 """ 

162 if not hasattr(self, '_columns'): 

163 raise NotImplementedError('Must define columns property or _columns attribute') 

164 return self._columns 

165 

166 def _get_data_columnLevels(self, data, columnIndex=None): 

167 """Gets the names of the column index levels 

168 

169 This should only be called in the context of a multilevel table. 

170 The logic here is to enable this to work both with the gen2 `MultilevelParquetTable` 

171 and with the gen3 `DeferredDatasetHandle`. 

172 

173 Parameters 

174 ---------- 

175 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

176 

177 columnnIndex (optional): pandas `Index` object 

178 if not passed, then it is read from the `DeferredDatasetHandle` 

179 """ 

180 if isinstance(data, DeferredDatasetHandle): 

181 if columnIndex is None: 

182 columnIndex = data.get(component="columns") 

183 if columnIndex is not None: 

184 return columnIndex.names 

185 if isinstance(data, MultilevelParquetTable): 

186 return data.columnLevels 

187 else: 

188 raise TypeError(f"Unknown type for data: {type(data)}!") 

189 

190 def _get_data_columnLevelNames(self, data, columnIndex=None): 

191 """Gets the content of each of the column levels for a multilevel table 

192 

193 Similar to `_get_data_columnLevels`, this enables backward compatibility with gen2. 

194 

195 Mirrors original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

196 """ 

197 if isinstance(data, DeferredDatasetHandle): 

198 if columnIndex is None: 

199 columnIndex = data.get(component="columns") 

200 if columnIndex is not None: 

201 columnLevels = columnIndex.names 

202 columnLevelNames = { 

203 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

204 for i, level in enumerate(columnLevels) 

205 } 

206 return columnLevelNames 

207 if isinstance(data, MultilevelParquetTable): 

208 return data.columnLevelNames 

209 else: 

210 raise TypeError(f"Unknown type for data: {type(data)}!") 

211 

212 def _colsFromDict(self, colDict, columnIndex=None): 

213 """Converts dictionary column specficiation to a list of columns 

214 

215 This mirrors the original gen2 implementation within `pipe.tasks.parquetTable.MultilevelParquetTable` 

216 """ 

217 new_colDict = {} 

218 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

219 

220 for i, lev in enumerate(columnLevels): 

221 if lev in colDict: 

222 if isinstance(colDict[lev], str): 

223 new_colDict[lev] = [colDict[lev]] 

224 else: 

225 new_colDict[lev] = colDict[lev] 

226 else: 

227 new_colDict[lev] = columnIndex.levels[i] 

228 

229 levelCols = [new_colDict[lev] for lev in columnLevels] 

230 cols = list(product(*levelCols)) 

231 colsAvailable = [col for col in cols if col in columnIndex] 

232 return colsAvailable 

233 

234 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

235 """Returns columns needed by functor from multilevel dataset 

236 

237 To access tables with multilevel column structure, the `MultilevelParquetTable` 

238 or `DeferredDatasetHandle` need to be passed either a list of tuples or a 

239 dictionary. 

240 

241 Parameters 

242 ---------- 

243 data : `MultilevelParquetTable` or `DeferredDatasetHandle` 

244 

245 columnIndex (optional): pandas `Index` object 

246 either passed or read in from `DeferredDatasetHandle`. 

247 

248 `returnTuple` : bool 

249 If true, then return a list of tuples rather than the column dictionary 

250 specification. This is set to `True` by `CompositeFunctor` in order to be able to 

251 combine columns from the various component functors. 

252 

253 """ 

254 if isinstance(data, DeferredDatasetHandle) and columnIndex is None: 

255 columnIndex = data.get(component="columns") 

256 

257 # Confirm that the dataset has the column levels the functor is expecting it to have. 

258 columnLevels = self._get_data_columnLevels(data, columnIndex) 

259 

260 columnDict = {'column': self.columns, 

261 'dataset': self.dataset} 

262 if self.filt is None: 

263 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

264 if "band" in columnLevels: 

265 if self.dataset == "ref": 

266 columnDict["band"] = columnLevelNames["band"][0] 

267 else: 

268 raise ValueError(f"'filt' not set for functor {self.name}" 

269 f"(dataset {self.dataset}) " 

270 "and ParquetTable " 

271 "contains multiple filters in column index. " 

272 "Set 'filt' or set 'dataset' to 'ref'.") 

273 else: 

274 columnDict['band'] = self.filt 

275 

276 if isinstance(data, MultilevelParquetTable): 

277 return data._colsFromDict(columnDict) 

278 elif isinstance(data, DeferredDatasetHandle): 

279 if returnTuple: 

280 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

281 else: 

282 return columnDict 

283 

284 def _func(self, df, dropna=True): 

285 raise NotImplementedError('Must define calculation on dataframe') 

286 

287 def _get_columnIndex(self, data): 

288 """Return columnIndex 

289 """ 

290 

291 if isinstance(data, DeferredDatasetHandle): 

292 return data.get(component="columns") 

293 else: 

294 return None 

295 

296 def _get_data(self, data): 

297 """Retrieve dataframe necessary for calculation. 

298 

299 The data argument can be a DataFrame, a ParquetTable instance, or a gen3 DeferredDatasetHandle 

300 

301 Returns dataframe upon which `self._func` can act. 

302 

303 N.B. while passing a raw pandas `DataFrame` *should* work here, it has not been tested. 

304 """ 

305 if isinstance(data, pd.DataFrame): 

306 return data 

307 

308 # First thing to do: check to see if the data source has a multilevel column index or not. 

309 columnIndex = self._get_columnIndex(data) 

310 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

311 

312 # Simple single-level parquet table, gen2 

313 if isinstance(data, ParquetTable) and not is_multiLevel: 

314 columns = self.columns 

315 df = data.toDataFrame(columns=columns) 

316 return df 

317 

318 # Get proper columns specification for this functor 

319 if is_multiLevel: 

320 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

321 else: 

322 columns = self.columns 

323 

324 if isinstance(data, MultilevelParquetTable): 

325 # Load in-memory dataframe with appropriate columns the gen2 way 

326 df = data.toDataFrame(columns=columns, droplevels=False) 

327 elif isinstance(data, DeferredDatasetHandle): 

328 # Load in-memory dataframe with appropriate columns the gen3 way 

329 df = data.get(parameters={"columns": columns}) 

330 

331 # Drop unnecessary column levels 

332 if is_multiLevel: 

333 df = self._setLevels(df) 

334 

335 return df 

336 

337 def _setLevels(self, df): 

338 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

339 df.columns = df.columns.droplevel(levelsToDrop) 

340 return df 

341 

342 def _dropna(self, vals): 

343 return vals.dropna() 

344 

345 def __call__(self, data, dropna=False): 

346 try: 

347 df = self._get_data(data) 

348 vals = self._func(df) 

349 except Exception as e: 

350 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e) 

351 vals = self.fail(df) 

352 if dropna: 

353 vals = self._dropna(vals) 

354 

355 return vals 

356 

357 def difference(self, data1, data2, **kwargs): 

358 """Computes difference between functor called on two different ParquetTable objects 

359 """ 

360 return self(data1, **kwargs) - self(data2, **kwargs) 

361 

362 def fail(self, df): 

363 return pd.Series(np.full(len(df), np.nan), index=df.index) 

364 

365 @property 

366 def name(self): 

367 """Full name of functor (suitable for figure labels) 

368 """ 

369 return NotImplementedError 

370 

371 @property 

372 def shortname(self): 

373 """Short name of functor (suitable for column name/dict key) 

374 """ 

375 return self.name 

376 

377 

378class CompositeFunctor(Functor): 

379 """Perform multiple calculations at once on a catalog 

380 

381 The role of a `CompositeFunctor` is to group together computations from 

382 multiple functors. Instead of returning `pandas.Series` a 

383 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names 

384 being the keys of `funcDict`. 

385 

386 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

387 in all the component functors. 

388 

389 A `CompositeFunctor` does not use a `_func` method itself; rather, 

390 when a `CompositeFunctor` is called, all its columns are loaded 

391 at once, and the resulting dataframe is passed to the `_func` method of each component 

392 functor. This has the advantage of only doing I/O (reading from parquet file) once, 

393 and works because each individual `_func` method of each component functor does not 

394 care if there are *extra* columns in the dataframe being passed; only that it must contain 

395 *at least* the `columns` it expects. 

396 

397 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML 

398 file specifying a collection of functors. 

399 

400 Parameters 

401 ---------- 

402 funcs : `dict` or `list` 

403 Dictionary or list of functors. If a list, then it will be converted 

404 into a dictonary according to the `.shortname` attribute of each functor. 

405 

406 """ 

407 dataset = None 

408 

409 def __init__(self, funcs, **kwargs): 

410 

411 if type(funcs) == dict: 

412 self.funcDict = funcs 

413 else: 

414 self.funcDict = {f.shortname: f for f in funcs} 

415 

416 self._filt = None 

417 

418 super().__init__(**kwargs) 

419 

420 @property 

421 def filt(self): 

422 return self._filt 

423 

424 @filt.setter 

425 def filt(self, filt): 

426 if filt is not None: 

427 for _, f in self.funcDict.items(): 

428 f.filt = filt 

429 self._filt = filt 

430 

431 def update(self, new): 

432 if isinstance(new, dict): 

433 self.funcDict.update(new) 

434 elif isinstance(new, CompositeFunctor): 

435 self.funcDict.update(new.funcDict) 

436 else: 

437 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

438 

439 # Make sure new functors have the same 'filt' set 

440 if self.filt is not None: 

441 self.filt = self.filt 

442 

443 @property 

444 def columns(self): 

445 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

446 

447 def multilevelColumns(self, data, **kwargs): 

448 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here. 

449 return list( 

450 set( 

451 [ 

452 x 

453 for y in [ 

454 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

455 ] 

456 for x in y 

457 ] 

458 ) 

459 ) 

460 

461 def __call__(self, data, **kwargs): 

462 """Apply the functor to the data table 

463 

464 Parameters 

465 ---------- 

466 data : `lsst.daf.butler.DeferredDatasetHandle`, 

467 `lsst.pipe.tasks.parquetTable.MultilevelParquetTable`, 

468 `lsst.pipe.tasks.parquetTable.ParquetTable`, 

469 or `pandas.DataFrame`. 

470 The table or a pointer to a table on disk from which columns can 

471 be accessed 

472 """ 

473 columnIndex = self._get_columnIndex(data) 

474 

475 # First, determine whether data has a multilevel index (either gen2 or gen3) 

476 is_multiLevel = isinstance(data, MultilevelParquetTable) or isinstance(columnIndex, pd.MultiIndex) 

477 

478 # Multilevel index, gen2 or gen3 

479 if is_multiLevel: 

480 columns = self.multilevelColumns(data, columnIndex=columnIndex) 

481 

482 if isinstance(data, MultilevelParquetTable): 

483 # Read data into memory the gen2 way 

484 df = data.toDataFrame(columns=columns, droplevels=False) 

485 elif isinstance(data, DeferredDatasetHandle): 

486 # Read data into memory the gen3 way 

487 df = data.get(parameters={"columns": columns}) 

488 

489 valDict = {} 

490 for k, f in self.funcDict.items(): 

491 try: 

492 subdf = f._setLevels( 

493 df[f.multilevelColumns(data, returnTuple=True, columnIndex=columnIndex)] 

494 ) 

495 valDict[k] = f._func(subdf) 

496 except Exception as e: 

497 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e) 

498 try: 

499 valDict[k] = f.fail(subdf) 

500 except NameError: 

501 raise e 

502 

503 else: 

504 if isinstance(data, DeferredDatasetHandle): 

505 # input if Gen3 deferLoad=True 

506 df = data.get(parameters={"columns": self.columns}) 

507 elif isinstance(data, pd.DataFrame): 

508 # input if Gen3 deferLoad=False 

509 df = data 

510 else: 

511 # Original Gen2 input is type ParquetTable and the fallback 

512 df = data.toDataFrame(columns=self.columns) 

513 

514 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

515 

516 # Check that output columns are actually columns 

517 for name, colVal in valDict.items(): 

518 if len(colVal.shape) != 1: 

519 raise RuntimeError("Transformed column '%s' is not the shape of a column. " 

520 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal))) 

521 

522 try: 

523 valDf = pd.concat(valDict, axis=1) 

524 except TypeError: 

525 print([(k, type(v)) for k, v in valDict.items()]) 

526 raise 

527 

528 if kwargs.get('dropna', False): 

529 valDf = valDf.dropna(how='any') 

530 

531 return valDf 

532 

533 @classmethod 

534 def renameCol(cls, col, renameRules): 

535 if renameRules is None: 

536 return col 

537 for old, new in renameRules: 

538 if col.startswith(old): 

539 col = col.replace(old, new) 

540 return col 

541 

542 @classmethod 

543 def from_file(cls, filename, **kwargs): 

544 # Allow environment variables in the filename. 

545 filename = os.path.expandvars(filename) 

546 with open(filename) as f: 

547 translationDefinition = yaml.safe_load(f) 

548 

549 return cls.from_yaml(translationDefinition, **kwargs) 

550 

551 @classmethod 

552 def from_yaml(cls, translationDefinition, **kwargs): 

553 funcs = {} 

554 for func, val in translationDefinition['funcs'].items(): 

555 funcs[func] = init_fromDict(val, name=func) 

556 

557 if 'flag_rename_rules' in translationDefinition: 

558 renameRules = translationDefinition['flag_rename_rules'] 

559 else: 

560 renameRules = None 

561 

562 if 'calexpFlags' in translationDefinition: 

563 for flag in translationDefinition['calexpFlags']: 

564 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp') 

565 

566 if 'refFlags' in translationDefinition: 

567 for flag in translationDefinition['refFlags']: 

568 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

569 

570 if 'forcedFlags' in translationDefinition: 

571 for flag in translationDefinition['forcedFlags']: 

572 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

573 

574 if 'flags' in translationDefinition: 

575 for flag in translationDefinition['flags']: 

576 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

577 

578 return cls(funcs, **kwargs) 

579 

580 

581def mag_aware_eval(df, expr, log): 

582 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means 

583 

584 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes. 

585 

586 Parameters 

587 ---------- 

588 df : pandas.DataFrame 

589 Dataframe on which to evaluate expression. 

590 

591 expr : str 

592 Expression. 

593 """ 

594 try: 

595 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

596 val = df.eval(expr_new) 

597 except Exception as e: # Should check what actually gets raised 

598 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e) 

599 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

600 val = df.eval(expr_new) 

601 return val 

602 

603 

604class CustomFunctor(Functor): 

605 """Arbitrary computation on a catalog 

606 

607 Column names (and thus the columns to be loaded from catalog) are found 

608 by finding all words and trying to ignore all "math-y" words. 

609 

610 Parameters 

611 ---------- 

612 expr : str 

613 Expression to evaluate, to be parsed and executed by `mag_aware_eval`. 

614 """ 

615 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

616 

617 def __init__(self, expr, **kwargs): 

618 self.expr = expr 

619 super().__init__(**kwargs) 

620 

621 @property 

622 def name(self): 

623 return self.expr 

624 

625 @property 

626 def columns(self): 

627 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

628 

629 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

630 not_a_col = [] 

631 for c in flux_cols: 

632 if not re.search('_instFlux$', c): 

633 cols.append(f'{c}_instFlux') 

634 not_a_col.append(c) 

635 else: 

636 cols.append(c) 

637 

638 return list(set([c for c in cols if c not in not_a_col])) 

639 

640 def _func(self, df): 

641 return mag_aware_eval(df, self.expr, self.log) 

642 

643 

644class Column(Functor): 

645 """Get column with specified name 

646 """ 

647 

648 def __init__(self, col, **kwargs): 

649 self.col = col 

650 super().__init__(**kwargs) 

651 

652 @property 

653 def name(self): 

654 return self.col 

655 

656 @property 

657 def columns(self): 

658 return [self.col] 

659 

660 def _func(self, df): 

661 return df[self.col] 

662 

663 

664class Index(Functor): 

665 """Return the value of the index for each object 

666 """ 

667 

668 columns = ['coord_ra'] # just a dummy; something has to be here 

669 _defaultDataset = 'ref' 

670 _defaultNoDup = True 

671 

672 def _func(self, df): 

673 return pd.Series(df.index, index=df.index) 

674 

675 

676class IDColumn(Column): 

677 col = 'id' 

678 _allow_difference = False 

679 _defaultNoDup = True 

680 

681 def _func(self, df): 

682 return pd.Series(df.index, index=df.index) 

683 

684 

685class FootprintNPix(Column): 

686 col = 'base_Footprint_nPix' 

687 

688 

689class CoordColumn(Column): 

690 """Base class for coordinate column, in degrees 

691 """ 

692 _radians = True 

693 

694 def __init__(self, col, **kwargs): 

695 super().__init__(col, **kwargs) 

696 

697 def _func(self, df): 

698 # Must not modify original column in case that column is used by another functor 

699 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

700 return output 

701 

702 

703class RAColumn(CoordColumn): 

704 """Right Ascension, in degrees 

705 """ 

706 name = 'RA' 

707 _defaultNoDup = True 

708 

709 def __init__(self, **kwargs): 

710 super().__init__('coord_ra', **kwargs) 

711 

712 def __call__(self, catalog, **kwargs): 

713 return super().__call__(catalog, **kwargs) 

714 

715 

716class DecColumn(CoordColumn): 

717 """Declination, in degrees 

718 """ 

719 name = 'Dec' 

720 _defaultNoDup = True 

721 

722 def __init__(self, **kwargs): 

723 super().__init__('coord_dec', **kwargs) 

724 

725 def __call__(self, catalog, **kwargs): 

726 return super().__call__(catalog, **kwargs) 

727 

728 

729class HtmIndex20(Functor): 

730 """Compute the level 20 HtmIndex for the catalog. 

731 

732 Notes 

733 ----- 

734 This functor was implemented to satisfy requirements of old APDB interface 

735 which required ``pixelId`` column in DiaObject with HTM20 index. APDB 

736 interface had migrated to not need that information, but we keep this 

737 class in case it may be useful for something else. 

738 """ 

739 name = "Htm20" 

740 htmLevel = 20 

741 _radians = True 

742 

743 def __init__(self, ra, decl, **kwargs): 

744 self.pixelator = sphgeom.HtmPixelization(self.htmLevel) 

745 self.ra = ra 

746 self.decl = decl 

747 self._columns = [self.ra, self.decl] 

748 super().__init__(**kwargs) 

749 

750 def _func(self, df): 

751 

752 def computePixel(row): 

753 if self._radians: 

754 sphPoint = geom.SpherePoint(row[self.ra], 

755 row[self.decl], 

756 geom.radians) 

757 else: 

758 sphPoint = geom.SpherePoint(row[self.ra], 

759 row[self.decl], 

760 geom.degrees) 

761 return self.pixelator.index(sphPoint.getVector()) 

762 

763 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64') 

764 

765 

766def fluxName(col): 

767 if not col.endswith('_instFlux'): 

768 col += '_instFlux' 

769 return col 

770 

771 

772def fluxErrName(col): 

773 if not col.endswith('_instFluxErr'): 

774 col += '_instFluxErr' 

775 return col 

776 

777 

778class Mag(Functor): 

779 """Compute calibrated magnitude 

780 

781 Takes a `calib` argument, which returns the flux at mag=0 

782 as `calib.getFluxMag0()`. If not provided, then the default 

783 `fluxMag0` is 63095734448.0194, which is default for HSC. 

784 This default should be removed in DM-21955 

785 

786 This calculation hides warnings about invalid values and dividing by zero. 

787 

788 As for all functors, a `dataset` and `filt` kwarg should be provided upon 

789 initialization. Unlike the default `Functor`, however, the default dataset 

790 for a `Mag` is `'meas'`, rather than `'ref'`. 

791 

792 Parameters 

793 ---------- 

794 col : `str` 

795 Name of flux column from which to compute magnitude. Can be parseable 

796 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass 

797 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will 

798 understand. 

799 calib : `lsst.afw.image.calib.Calib` (optional) 

800 Object that knows zero point. 

801 """ 

802 _defaultDataset = 'meas' 

803 

804 def __init__(self, col, calib=None, **kwargs): 

805 self.col = fluxName(col) 

806 self.calib = calib 

807 if calib is not None: 

808 self.fluxMag0 = calib.getFluxMag0()[0] 

809 else: 

810 # TO DO: DM-21955 Replace hard coded photometic calibration values 

811 self.fluxMag0 = 63095734448.0194 

812 

813 super().__init__(**kwargs) 

814 

815 @property 

816 def columns(self): 

817 return [self.col] 

818 

819 def _func(self, df): 

820 with np.warnings.catch_warnings(): 

821 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

822 np.warnings.filterwarnings('ignore', r'divide by zero') 

823 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

824 

825 @property 

826 def name(self): 

827 return f'mag_{self.col}' 

828 

829 

830class MagErr(Mag): 

831 """Compute calibrated magnitude uncertainty 

832 

833 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`. 

834 

835 Parameters 

836 col : `str` 

837 Name of flux column 

838 calib : `lsst.afw.image.calib.Calib` (optional) 

839 Object that knows zero point. 

840 """ 

841 

842 def __init__(self, *args, **kwargs): 

843 super().__init__(*args, **kwargs) 

844 if self.calib is not None: 

845 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

846 else: 

847 self.fluxMag0Err = 0. 

848 

849 @property 

850 def columns(self): 

851 return [self.col, self.col + 'Err'] 

852 

853 def _func(self, df): 

854 with np.warnings.catch_warnings(): 

855 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

856 np.warnings.filterwarnings('ignore', r'divide by zero') 

857 fluxCol, fluxErrCol = self.columns 

858 x = df[fluxErrCol] / df[fluxCol] 

859 y = self.fluxMag0Err / self.fluxMag0 

860 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

861 return magErr 

862 

863 @property 

864 def name(self): 

865 return super().name + '_err' 

866 

867 

868class NanoMaggie(Mag): 

869 """ 

870 """ 

871 

872 def _func(self, df): 

873 return (df[self.col] / self.fluxMag0) * 1e9 

874 

875 

876class MagDiff(Functor): 

877 _defaultDataset = 'meas' 

878 

879 """Functor to calculate magnitude difference""" 

880 

881 def __init__(self, col1, col2, **kwargs): 

882 self.col1 = fluxName(col1) 

883 self.col2 = fluxName(col2) 

884 super().__init__(**kwargs) 

885 

886 @property 

887 def columns(self): 

888 return [self.col1, self.col2] 

889 

890 def _func(self, df): 

891 with np.warnings.catch_warnings(): 

892 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

893 np.warnings.filterwarnings('ignore', r'divide by zero') 

894 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

895 

896 @property 

897 def name(self): 

898 return f'(mag_{self.col1} - mag_{self.col2})' 

899 

900 @property 

901 def shortname(self): 

902 return f'magDiff_{self.col1}_{self.col2}' 

903 

904 

905class Color(Functor): 

906 """Compute the color between two filters 

907 

908 Computes color by initializing two different `Mag` 

909 functors based on the `col` and filters provided, and 

910 then returning the difference. 

911 

912 This is enabled by the `_func` expecting a dataframe with a 

913 multilevel column index, with both `'band'` and `'column'`, 

914 instead of just `'column'`, which is the `Functor` default. 

915 This is controlled by the `_dfLevels` attribute. 

916 

917 Also of note, the default dataset for `Color` is `forced_src'`, 

918 whereas for `Mag` it is `'meas'`. 

919 

920 Parameters 

921 ---------- 

922 col : str 

923 Name of flux column from which to compute; same as would be passed to 

924 `lsst.pipe.tasks.functors.Mag`. 

925 

926 filt2, filt1 : str 

927 Filters from which to compute magnitude difference. 

928 Color computed is `Mag(filt2) - Mag(filt1)`. 

929 """ 

930 _defaultDataset = 'forced_src' 

931 _dfLevels = ('band', 'column') 

932 _defaultNoDup = True 

933 

934 def __init__(self, col, filt2, filt1, **kwargs): 

935 self.col = fluxName(col) 

936 if filt2 == filt1: 

937 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

938 self.filt2 = filt2 

939 self.filt1 = filt1 

940 

941 self.mag2 = Mag(col, filt=filt2, **kwargs) 

942 self.mag1 = Mag(col, filt=filt1, **kwargs) 

943 

944 super().__init__(**kwargs) 

945 

946 @property 

947 def filt(self): 

948 return None 

949 

950 @filt.setter 

951 def filt(self, filt): 

952 pass 

953 

954 def _func(self, df): 

955 mag2 = self.mag2._func(df[self.filt2]) 

956 mag1 = self.mag1._func(df[self.filt1]) 

957 return mag2 - mag1 

958 

959 @property 

960 def columns(self): 

961 return [self.mag1.col, self.mag2.col] 

962 

963 def multilevelColumns(self, parq, **kwargs): 

964 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

965 

966 @property 

967 def name(self): 

968 return f'{self.filt2} - {self.filt1} ({self.col})' 

969 

970 @property 

971 def shortname(self): 

972 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

973 

974 

975class Labeller(Functor): 

976 """Main function of this subclass is to override the dropna=True 

977 """ 

978 _null_label = 'null' 

979 _allow_difference = False 

980 name = 'label' 

981 _force_str = False 

982 

983 def __call__(self, parq, dropna=False, **kwargs): 

984 return super().__call__(parq, dropna=False, **kwargs) 

985 

986 

987class StarGalaxyLabeller(Labeller): 

988 _columns = ["base_ClassificationExtendedness_value"] 

989 _column = "base_ClassificationExtendedness_value" 

990 

991 def _func(self, df): 

992 x = df[self._columns][self._column] 

993 mask = x.isnull() 

994 test = (x < 0.5).astype(int) 

995 test = test.mask(mask, 2) 

996 

997 # TODO: DM-21954 Look into veracity of inline comment below 

998 # are these backwards? 

999 categories = ['galaxy', 'star', self._null_label] 

1000 label = pd.Series(pd.Categorical.from_codes(test, categories=categories), 

1001 index=x.index, name='label') 

1002 if self._force_str: 

1003 label = label.astype(str) 

1004 return label 

1005 

1006 

1007class NumStarLabeller(Labeller): 

1008 _columns = ['numStarFlags'] 

1009 labels = {"star": 0, "maybe": 1, "notStar": 2} 

1010 

1011 def _func(self, df): 

1012 x = df[self._columns][self._columns[0]] 

1013 

1014 # Number of filters 

1015 n = len(x.unique()) - 1 

1016 

1017 labels = ['noStar', 'maybe', 'star'] 

1018 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels), 

1019 index=x.index, name='label') 

1020 

1021 if self._force_str: 

1022 label = label.astype(str) 

1023 

1024 return label 

1025 

1026 

1027class DeconvolvedMoments(Functor): 

1028 name = 'Deconvolved Moments' 

1029 shortname = 'deconvolvedMoments' 

1030 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1031 "ext_shapeHSM_HsmSourceMoments_yy", 

1032 "base_SdssShape_xx", "base_SdssShape_yy", 

1033 "ext_shapeHSM_HsmPsfMoments_xx", 

1034 "ext_shapeHSM_HsmPsfMoments_yy") 

1035 

1036 def _func(self, df): 

1037 """Calculate deconvolved moments""" 

1038 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

1039 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

1040 else: 

1041 hsm = np.ones(len(df))*np.nan 

1042 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

1043 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

1044 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

1045 else: 

1046 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using 

1047 # exposure.getPsf().computeShape(s.getCentroid()).getIxx() 

1048 # raise TaskError("No psf shape parameter found in catalog") 

1049 raise RuntimeError('No psf shape parameter found in catalog') 

1050 

1051 return hsm.where(np.isfinite(hsm), sdss) - psf 

1052 

1053 

1054class SdssTraceSize(Functor): 

1055 """Functor to calculate SDSS trace radius size for sources""" 

1056 name = "SDSS Trace Size" 

1057 shortname = 'sdssTrace' 

1058 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

1059 

1060 def _func(self, df): 

1061 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1062 return srcSize 

1063 

1064 

1065class PsfSdssTraceSizeDiff(Functor): 

1066 """Functor to calculate SDSS trace radius size difference (%) between object and psf model""" 

1067 name = "PSF - SDSS Trace Size" 

1068 shortname = 'psf_sdssTrace' 

1069 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

1070 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

1071 

1072 def _func(self, df): 

1073 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1074 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

1075 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1076 return sizeDiff 

1077 

1078 

1079class HsmTraceSize(Functor): 

1080 """Functor to calculate HSM trace radius size for sources""" 

1081 name = 'HSM Trace Size' 

1082 shortname = 'hsmTrace' 

1083 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1084 "ext_shapeHSM_HsmSourceMoments_yy") 

1085 

1086 def _func(self, df): 

1087 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1088 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1089 return srcSize 

1090 

1091 

1092class PsfHsmTraceSizeDiff(Functor): 

1093 """Functor to calculate HSM trace radius size difference (%) between object and psf model""" 

1094 name = 'PSF - HSM Trace Size' 

1095 shortname = 'psf_HsmTrace' 

1096 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1097 "ext_shapeHSM_HsmSourceMoments_yy", 

1098 "ext_shapeHSM_HsmPsfMoments_xx", 

1099 "ext_shapeHSM_HsmPsfMoments_yy") 

1100 

1101 def _func(self, df): 

1102 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1103 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1104 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1105 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1106 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1107 return sizeDiff 

1108 

1109 

1110class HsmFwhm(Functor): 

1111 name = 'HSM Psf FWHM' 

1112 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1113 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1114 pixelScale = 0.168 

1115 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1116 

1117 def _func(self, df): 

1118 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1119 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1120 

1121 

1122class E1(Functor): 

1123 name = "Distortion Ellipticity (e1)" 

1124 shortname = "Distortion" 

1125 

1126 def __init__(self, colXX, colXY, colYY, **kwargs): 

1127 self.colXX = colXX 

1128 self.colXY = colXY 

1129 self.colYY = colYY 

1130 self._columns = [self.colXX, self.colXY, self.colYY] 

1131 super().__init__(**kwargs) 

1132 

1133 @property 

1134 def columns(self): 

1135 return [self.colXX, self.colXY, self.colYY] 

1136 

1137 def _func(self, df): 

1138 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1139 

1140 

1141class E2(Functor): 

1142 name = "Ellipticity e2" 

1143 

1144 def __init__(self, colXX, colXY, colYY, **kwargs): 

1145 self.colXX = colXX 

1146 self.colXY = colXY 

1147 self.colYY = colYY 

1148 super().__init__(**kwargs) 

1149 

1150 @property 

1151 def columns(self): 

1152 return [self.colXX, self.colXY, self.colYY] 

1153 

1154 def _func(self, df): 

1155 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1156 

1157 

1158class RadiusFromQuadrupole(Functor): 

1159 

1160 def __init__(self, colXX, colXY, colYY, **kwargs): 

1161 self.colXX = colXX 

1162 self.colXY = colXY 

1163 self.colYY = colYY 

1164 super().__init__(**kwargs) 

1165 

1166 @property 

1167 def columns(self): 

1168 return [self.colXX, self.colXY, self.colYY] 

1169 

1170 def _func(self, df): 

1171 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1172 

1173 

1174class LocalWcs(Functor): 

1175 """Computations using the stored localWcs. 

1176 """ 

1177 name = "LocalWcsOperations" 

1178 

1179 def __init__(self, 

1180 colCD_1_1, 

1181 colCD_1_2, 

1182 colCD_2_1, 

1183 colCD_2_2, 

1184 **kwargs): 

1185 self.colCD_1_1 = colCD_1_1 

1186 self.colCD_1_2 = colCD_1_2 

1187 self.colCD_2_1 = colCD_2_1 

1188 self.colCD_2_2 = colCD_2_2 

1189 super().__init__(**kwargs) 

1190 

1191 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1192 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1193 

1194 Parameters 

1195 ---------- 

1196 x : `pandas.Series` 

1197 X pixel coordinate. 

1198 y : `pandas.Series` 

1199 Y pixel coordinate. 

1200 cd11 : `pandas.Series` 

1201 [1, 1] element of the local Wcs affine transform. 

1202 cd11 : `pandas.Series` 

1203 [1, 1] element of the local Wcs affine transform. 

1204 cd12 : `pandas.Series` 

1205 [1, 2] element of the local Wcs affine transform. 

1206 cd21 : `pandas.Series` 

1207 [2, 1] element of the local Wcs affine transform. 

1208 cd22 : `pandas.Series` 

1209 [2, 2] element of the local Wcs affine transform. 

1210 

1211 Returns 

1212 ------- 

1213 raDecTuple : tuple 

1214 RA and dec conversion of x and y given the local Wcs. Returned 

1215 units are in radians. 

1216 

1217 """ 

1218 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1219 

1220 def computeSkySeperation(self, ra1, dec1, ra2, dec2): 

1221 """Compute the local pixel scale conversion. 

1222 

1223 Parameters 

1224 ---------- 

1225 ra1 : `pandas.Series` 

1226 Ra of the first coordinate in radians. 

1227 dec1 : `pandas.Series` 

1228 Dec of the first coordinate in radians. 

1229 ra2 : `pandas.Series` 

1230 Ra of the second coordinate in radians. 

1231 dec2 : `pandas.Series` 

1232 Dec of the second coordinate in radians. 

1233 

1234 Returns 

1235 ------- 

1236 dist : `pandas.Series` 

1237 Distance on the sphere in radians. 

1238 """ 

1239 deltaDec = dec2 - dec1 

1240 deltaRa = ra2 - ra1 

1241 return 2 * np.arcsin( 

1242 np.sqrt( 

1243 np.sin(deltaDec / 2) ** 2 

1244 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1245 

1246 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1247 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1248 

1249 Parameters 

1250 ---------- 

1251 x1 : `pandas.Series` 

1252 X pixel coordinate. 

1253 y1 : `pandas.Series` 

1254 Y pixel coordinate. 

1255 x2 : `pandas.Series` 

1256 X pixel coordinate. 

1257 y2 : `pandas.Series` 

1258 Y pixel coordinate. 

1259 cd11 : `pandas.Series` 

1260 [1, 1] element of the local Wcs affine transform. 

1261 cd11 : `pandas.Series` 

1262 [1, 1] element of the local Wcs affine transform. 

1263 cd12 : `pandas.Series` 

1264 [1, 2] element of the local Wcs affine transform. 

1265 cd21 : `pandas.Series` 

1266 [2, 1] element of the local Wcs affine transform. 

1267 cd22 : `pandas.Series` 

1268 [2, 2] element of the local Wcs affine transform. 

1269 

1270 Returns 

1271 ------- 

1272 Distance : `pandas.Series` 

1273 Arcseconds per pixel at the location of the local WC 

1274 """ 

1275 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1276 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1277 # Great circle distance for small separations. 

1278 return self.computeSkySeperation(ra1, dec1, ra2, dec2) 

1279 

1280 

1281class ComputePixelScale(LocalWcs): 

1282 """Compute the local pixel scale from the stored CDMatrix. 

1283 """ 

1284 name = "PixelScale" 

1285 

1286 @property 

1287 def columns(self): 

1288 return [self.colCD_1_1, 

1289 self.colCD_1_2, 

1290 self.colCD_2_1, 

1291 self.colCD_2_2] 

1292 

1293 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1294 """Compute the local pixel to scale conversion in arcseconds. 

1295 

1296 Parameters 

1297 ---------- 

1298 cd11 : `pandas.Series` 

1299 [1, 1] element of the local Wcs affine transform in radians. 

1300 cd11 : `pandas.Series` 

1301 [1, 1] element of the local Wcs affine transform in radians. 

1302 cd12 : `pandas.Series` 

1303 [1, 2] element of the local Wcs affine transform in radians. 

1304 cd21 : `pandas.Series` 

1305 [2, 1] element of the local Wcs affine transform in radians. 

1306 cd22 : `pandas.Series` 

1307 [2, 2] element of the local Wcs affine transform in radians. 

1308 

1309 Returns 

1310 ------- 

1311 pixScale : `pandas.Series` 

1312 Arcseconds per pixel at the location of the local WC 

1313 """ 

1314 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1315 

1316 def _func(self, df): 

1317 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1318 df[self.colCD_1_2], 

1319 df[self.colCD_2_1], 

1320 df[self.colCD_2_2]) 

1321 

1322 

1323class ConvertPixelToArcseconds(ComputePixelScale): 

1324 """Convert a value in units pixels squared to units arcseconds squared. 

1325 """ 

1326 

1327 def __init__(self, 

1328 col, 

1329 colCD_1_1, 

1330 colCD_1_2, 

1331 colCD_2_1, 

1332 colCD_2_2, 

1333 **kwargs): 

1334 self.col = col 

1335 super().__init__(colCD_1_1, 

1336 colCD_1_2, 

1337 colCD_2_1, 

1338 colCD_2_2, 

1339 **kwargs) 

1340 

1341 @property 

1342 def name(self): 

1343 return f"{self.col}_asArcseconds" 

1344 

1345 @property 

1346 def columns(self): 

1347 return [self.col, 

1348 self.colCD_1_1, 

1349 self.colCD_1_2, 

1350 self.colCD_2_1, 

1351 self.colCD_2_2] 

1352 

1353 def _func(self, df): 

1354 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1355 df[self.colCD_1_2], 

1356 df[self.colCD_2_1], 

1357 df[self.colCD_2_2]) 

1358 

1359 

1360class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1361 """Convert a value in units pixels to units arcseconds. 

1362 """ 

1363 

1364 def __init__(self, 

1365 col, 

1366 colCD_1_1, 

1367 colCD_1_2, 

1368 colCD_2_1, 

1369 colCD_2_2, 

1370 **kwargs): 

1371 self.col = col 

1372 super().__init__(colCD_1_1, 

1373 colCD_1_2, 

1374 colCD_2_1, 

1375 colCD_2_2, 

1376 **kwargs) 

1377 

1378 @property 

1379 def name(self): 

1380 return f"{self.col}_asArcsecondsSq" 

1381 

1382 @property 

1383 def columns(self): 

1384 return [self.col, 

1385 self.colCD_1_1, 

1386 self.colCD_1_2, 

1387 self.colCD_2_1, 

1388 self.colCD_2_2] 

1389 

1390 def _func(self, df): 

1391 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1392 df[self.colCD_1_2], 

1393 df[self.colCD_2_1], 

1394 df[self.colCD_2_2]) 

1395 return df[self.col] * pixScale * pixScale 

1396 

1397 

1398class ReferenceBand(Functor): 

1399 name = 'Reference Band' 

1400 shortname = 'refBand' 

1401 

1402 @property 

1403 def columns(self): 

1404 return ["merge_measurement_i", 

1405 "merge_measurement_r", 

1406 "merge_measurement_z", 

1407 "merge_measurement_y", 

1408 "merge_measurement_g", 

1409 "merge_measurement_u"] 

1410 

1411 def _func(self, df: pd.DataFrame) -> pd.Series: 

1412 def getFilterAliasName(row): 

1413 # get column name with the max value (True > False) 

1414 colName = row.idxmax() 

1415 return colName.replace('merge_measurement_', '') 

1416 

1417 # Skip columns that are unavailable, because this functor requests the 

1418 # superset of bands that could be included in the object table 

1419 columns = [col for col in self.columns if col in df.columns] 

1420 # Makes a Series of dtype object if df is empty 

1421 return df[columns].apply(getFilterAliasName, axis=1, 

1422 result_type='reduce').astype('object') 

1423 

1424 

1425class Photometry(Functor): 

1426 # AB to NanoJansky (3631 Jansky) 

1427 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1428 LOG_AB_FLUX_SCALE = 12.56 

1429 FIVE_OVER_2LOG10 = 1.085736204758129569 

1430 # TO DO: DM-21955 Replace hard coded photometic calibration values 

1431 COADD_ZP = 27 

1432 

1433 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1434 self.vhypot = np.vectorize(self.hypot) 

1435 self.col = colFlux 

1436 self.colFluxErr = colFluxErr 

1437 

1438 self.calib = calib 

1439 if calib is not None: 

1440 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1441 else: 

1442 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1443 self.fluxMag0Err = 0. 

1444 

1445 super().__init__(**kwargs) 

1446 

1447 @property 

1448 def columns(self): 

1449 return [self.col] 

1450 

1451 @property 

1452 def name(self): 

1453 return f'mag_{self.col}' 

1454 

1455 @classmethod 

1456 def hypot(cls, a, b): 

1457 if np.abs(a) < np.abs(b): 

1458 a, b = b, a 

1459 if a == 0.: 

1460 return 0. 

1461 q = b/a 

1462 return np.abs(a) * np.sqrt(1. + q*q) 

1463 

1464 def dn2flux(self, dn, fluxMag0): 

1465 return self.AB_FLUX_SCALE * dn / fluxMag0 

1466 

1467 def dn2mag(self, dn, fluxMag0): 

1468 with np.warnings.catch_warnings(): 

1469 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1470 np.warnings.filterwarnings('ignore', r'divide by zero') 

1471 return -2.5 * np.log10(dn/fluxMag0) 

1472 

1473 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1474 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1475 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1476 return retVal 

1477 

1478 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1479 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1480 return self.FIVE_OVER_2LOG10 * retVal 

1481 

1482 

1483class NanoJansky(Photometry): 

1484 def _func(self, df): 

1485 return self.dn2flux(df[self.col], self.fluxMag0) 

1486 

1487 

1488class NanoJanskyErr(Photometry): 

1489 @property 

1490 def columns(self): 

1491 return [self.col, self.colFluxErr] 

1492 

1493 def _func(self, df): 

1494 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1495 return pd.Series(retArr, index=df.index) 

1496 

1497 

1498class Magnitude(Photometry): 

1499 def _func(self, df): 

1500 return self.dn2mag(df[self.col], self.fluxMag0) 

1501 

1502 

1503class MagnitudeErr(Photometry): 

1504 @property 

1505 def columns(self): 

1506 return [self.col, self.colFluxErr] 

1507 

1508 def _func(self, df): 

1509 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1510 return pd.Series(retArr, index=df.index) 

1511 

1512 

1513class LocalPhotometry(Functor): 

1514 """Base class for calibrating the specified instrument flux column using 

1515 the local photometric calibration. 

1516 

1517 Parameters 

1518 ---------- 

1519 instFluxCol : `str` 

1520 Name of the instrument flux column. 

1521 instFluxErrCol : `str` 

1522 Name of the assocated error columns for ``instFluxCol``. 

1523 photoCalibCol : `str` 

1524 Name of local calibration column. 

1525 photoCalibErrCol : `str` 

1526 Error associated with ``photoCalibCol`` 

1527 

1528 See also 

1529 -------- 

1530 LocalPhotometry 

1531 LocalNanojansky 

1532 LocalNanojanskyErr 

1533 LocalMagnitude 

1534 LocalMagnitudeErr 

1535 """ 

1536 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1537 

1538 def __init__(self, 

1539 instFluxCol, 

1540 instFluxErrCol, 

1541 photoCalibCol, 

1542 photoCalibErrCol, 

1543 **kwargs): 

1544 self.instFluxCol = instFluxCol 

1545 self.instFluxErrCol = instFluxErrCol 

1546 self.photoCalibCol = photoCalibCol 

1547 self.photoCalibErrCol = photoCalibErrCol 

1548 super().__init__(**kwargs) 

1549 

1550 def instFluxToNanojansky(self, instFlux, localCalib): 

1551 """Convert instrument flux to nanojanskys. 

1552 

1553 Parameters 

1554 ---------- 

1555 instFlux : `numpy.ndarray` or `pandas.Series` 

1556 Array of instrument flux measurements 

1557 localCalib : `numpy.ndarray` or `pandas.Series` 

1558 Array of local photometric calibration estimates. 

1559 

1560 Returns 

1561 ------- 

1562 calibFlux : `numpy.ndarray` or `pandas.Series` 

1563 Array of calibrated flux measurements. 

1564 """ 

1565 return instFlux * localCalib 

1566 

1567 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1568 """Convert instrument flux to nanojanskys. 

1569 

1570 Parameters 

1571 ---------- 

1572 instFlux : `numpy.ndarray` or `pandas.Series` 

1573 Array of instrument flux measurements 

1574 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1575 Errors on associated ``instFlux`` values 

1576 localCalib : `numpy.ndarray` or `pandas.Series` 

1577 Array of local photometric calibration estimates. 

1578 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1579 Errors on associated ``localCalib`` values 

1580 

1581 Returns 

1582 ------- 

1583 calibFluxErr : `numpy.ndarray` or `pandas.Series` 

1584 Errors on calibrated flux measurements. 

1585 """ 

1586 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1587 

1588 def instFluxToMagnitude(self, instFlux, localCalib): 

1589 """Convert instrument flux to nanojanskys. 

1590 

1591 Parameters 

1592 ---------- 

1593 instFlux : `numpy.ndarray` or `pandas.Series` 

1594 Array of instrument flux measurements 

1595 localCalib : `numpy.ndarray` or `pandas.Series` 

1596 Array of local photometric calibration estimates. 

1597 

1598 Returns 

1599 ------- 

1600 calibMag : `numpy.ndarray` or `pandas.Series` 

1601 Array of calibrated AB magnitudes. 

1602 """ 

1603 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1604 

1605 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1606 """Convert instrument flux err to nanojanskys. 

1607 

1608 Parameters 

1609 ---------- 

1610 instFlux : `numpy.ndarray` or `pandas.Series` 

1611 Array of instrument flux measurements 

1612 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1613 Errors on associated ``instFlux`` values 

1614 localCalib : `numpy.ndarray` or `pandas.Series` 

1615 Array of local photometric calibration estimates. 

1616 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1617 Errors on associated ``localCalib`` values 

1618 

1619 Returns 

1620 ------- 

1621 calibMagErr: `numpy.ndarray` or `pandas.Series` 

1622 Error on calibrated AB magnitudes. 

1623 """ 

1624 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1625 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1626 

1627 

1628class LocalNanojansky(LocalPhotometry): 

1629 """Compute calibrated fluxes using the local calibration value. 

1630 

1631 See also 

1632 -------- 

1633 LocalNanojansky 

1634 LocalNanojanskyErr 

1635 LocalMagnitude 

1636 LocalMagnitudeErr 

1637 """ 

1638 

1639 @property 

1640 def columns(self): 

1641 return [self.instFluxCol, self.photoCalibCol] 

1642 

1643 @property 

1644 def name(self): 

1645 return f'flux_{self.instFluxCol}' 

1646 

1647 def _func(self, df): 

1648 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1649 

1650 

1651class LocalNanojanskyErr(LocalPhotometry): 

1652 """Compute calibrated flux errors using the local calibration value. 

1653 

1654 See also 

1655 -------- 

1656 LocalNanojansky 

1657 LocalNanojanskyErr 

1658 LocalMagnitude 

1659 LocalMagnitudeErr 

1660 """ 

1661 

1662 @property 

1663 def columns(self): 

1664 return [self.instFluxCol, self.instFluxErrCol, 

1665 self.photoCalibCol, self.photoCalibErrCol] 

1666 

1667 @property 

1668 def name(self): 

1669 return f'fluxErr_{self.instFluxCol}' 

1670 

1671 def _func(self, df): 

1672 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1673 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1674 

1675 

1676class LocalMagnitude(LocalPhotometry): 

1677 """Compute calibrated AB magnitudes using the local calibration value. 

1678 

1679 See also 

1680 -------- 

1681 LocalNanojansky 

1682 LocalNanojanskyErr 

1683 LocalMagnitude 

1684 LocalMagnitudeErr 

1685 """ 

1686 

1687 @property 

1688 def columns(self): 

1689 return [self.instFluxCol, self.photoCalibCol] 

1690 

1691 @property 

1692 def name(self): 

1693 return f'mag_{self.instFluxCol}' 

1694 

1695 def _func(self, df): 

1696 return self.instFluxToMagnitude(df[self.instFluxCol], 

1697 df[self.photoCalibCol]) 

1698 

1699 

1700class LocalMagnitudeErr(LocalPhotometry): 

1701 """Compute calibrated AB magnitude errors using the local calibration value. 

1702 

1703 See also 

1704 -------- 

1705 LocalNanojansky 

1706 LocalNanojanskyErr 

1707 LocalMagnitude 

1708 LocalMagnitudeErr 

1709 """ 

1710 

1711 @property 

1712 def columns(self): 

1713 return [self.instFluxCol, self.instFluxErrCol, 

1714 self.photoCalibCol, self.photoCalibErrCol] 

1715 

1716 @property 

1717 def name(self): 

1718 return f'magErr_{self.instFluxCol}' 

1719 

1720 def _func(self, df): 

1721 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol], 

1722 df[self.instFluxErrCol], 

1723 df[self.photoCalibCol], 

1724 df[self.photoCalibErrCol]) 

1725 

1726 

1727class LocalDipoleMeanFlux(LocalPhotometry): 

1728 """Compute absolute mean of dipole fluxes. 

1729 

1730 See also 

1731 -------- 

1732 LocalNanojansky 

1733 LocalNanojanskyErr 

1734 LocalMagnitude 

1735 LocalMagnitudeErr 

1736 LocalDipoleMeanFlux 

1737 LocalDipoleMeanFluxErr 

1738 LocalDipoleDiffFlux 

1739 LocalDipoleDiffFluxErr 

1740 """ 

1741 def __init__(self, 

1742 instFluxPosCol, 

1743 instFluxNegCol, 

1744 instFluxPosErrCol, 

1745 instFluxNegErrCol, 

1746 photoCalibCol, 

1747 photoCalibErrCol, 

1748 **kwargs): 

1749 self.instFluxNegCol = instFluxNegCol 

1750 self.instFluxPosCol = instFluxPosCol 

1751 self.instFluxNegErrCol = instFluxNegErrCol 

1752 self.instFluxPosErrCol = instFluxPosErrCol 

1753 self.photoCalibCol = photoCalibCol 

1754 self.photoCalibErrCol = photoCalibErrCol 

1755 super().__init__(instFluxNegCol, 

1756 instFluxNegErrCol, 

1757 photoCalibCol, 

1758 photoCalibErrCol, 

1759 **kwargs) 

1760 

1761 @property 

1762 def columns(self): 

1763 return [self.instFluxPosCol, 

1764 self.instFluxNegCol, 

1765 self.photoCalibCol] 

1766 

1767 @property 

1768 def name(self): 

1769 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1770 

1771 def _func(self, df): 

1772 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1773 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1774 

1775 

1776class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1777 """Compute the error on the absolute mean of dipole fluxes. 

1778 

1779 See also 

1780 -------- 

1781 LocalNanojansky 

1782 LocalNanojanskyErr 

1783 LocalMagnitude 

1784 LocalMagnitudeErr 

1785 LocalDipoleMeanFlux 

1786 LocalDipoleMeanFluxErr 

1787 LocalDipoleDiffFlux 

1788 LocalDipoleDiffFluxErr 

1789 """ 

1790 

1791 @property 

1792 def columns(self): 

1793 return [self.instFluxPosCol, 

1794 self.instFluxNegCol, 

1795 self.instFluxPosErrCol, 

1796 self.instFluxNegErrCol, 

1797 self.photoCalibCol, 

1798 self.photoCalibErrCol] 

1799 

1800 @property 

1801 def name(self): 

1802 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1803 

1804 def _func(self, df): 

1805 return 0.5*np.sqrt( 

1806 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1807 * df[self.photoCalibErrCol])**2 

1808 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1809 * df[self.photoCalibCol]**2) 

1810 

1811 

1812class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1813 """Compute the absolute difference of dipole fluxes. 

1814 

1815 Value is (abs(pos) - abs(neg)) 

1816 

1817 See also 

1818 -------- 

1819 LocalNanojansky 

1820 LocalNanojanskyErr 

1821 LocalMagnitude 

1822 LocalMagnitudeErr 

1823 LocalDipoleMeanFlux 

1824 LocalDipoleMeanFluxErr 

1825 LocalDipoleDiffFlux 

1826 LocalDipoleDiffFluxErr 

1827 """ 

1828 

1829 @property 

1830 def columns(self): 

1831 return [self.instFluxPosCol, 

1832 self.instFluxNegCol, 

1833 self.photoCalibCol] 

1834 

1835 @property 

1836 def name(self): 

1837 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1838 

1839 def _func(self, df): 

1840 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1841 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1842 

1843 

1844class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1845 """Compute the error on the absolute difference of dipole fluxes. 

1846 

1847 See also 

1848 -------- 

1849 LocalNanojansky 

1850 LocalNanojanskyErr 

1851 LocalMagnitude 

1852 LocalMagnitudeErr 

1853 LocalDipoleMeanFlux 

1854 LocalDipoleMeanFluxErr 

1855 LocalDipoleDiffFlux 

1856 LocalDipoleDiffFluxErr 

1857 """ 

1858 

1859 @property 

1860 def columns(self): 

1861 return [self.instFluxPosCol, 

1862 self.instFluxNegCol, 

1863 self.instFluxPosErrCol, 

1864 self.instFluxNegErrCol, 

1865 self.photoCalibCol, 

1866 self.photoCalibErrCol] 

1867 

1868 @property 

1869 def name(self): 

1870 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1871 

1872 def _func(self, df): 

1873 return np.sqrt( 

1874 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1875 * df[self.photoCalibErrCol])**2 

1876 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1877 * df[self.photoCalibCol]**2) 

1878 

1879 

1880class Ratio(Functor): 

1881 """Base class for returning the ratio of 2 columns. 

1882 

1883 Can be used to compute a Signal to Noise ratio for any input flux. 

1884 

1885 Parameters 

1886 ---------- 

1887 numerator : `str` 

1888 Name of the column to use at the numerator in the ratio 

1889 denominator : `str` 

1890 Name of the column to use as the denominator in the ratio. 

1891 """ 

1892 def __init__(self, 

1893 numerator, 

1894 denominator, 

1895 **kwargs): 

1896 self.numerator = numerator 

1897 self.denominator = denominator 

1898 super().__init__(**kwargs) 

1899 

1900 @property 

1901 def columns(self): 

1902 return [self.numerator, self.denominator] 

1903 

1904 @property 

1905 def name(self): 

1906 return f'ratio_{self.numerator}_{self.denominator}' 

1907 

1908 def _func(self, df): 

1909 with np.warnings.catch_warnings(): 

1910 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1911 np.warnings.filterwarnings('ignore', r'divide by zero') 

1912 return df[self.numerator] / df[self.denominator] 

1913 

1914 

1915class Ebv(Functor): 

1916 """Compute E(B-V) from dustmaps.sfd 

1917 """ 

1918 _defaultDataset = 'ref' 

1919 name = "E(B-V)" 

1920 shortname = "ebv" 

1921 

1922 def __init__(self, **kwargs): 

1923 self._columns = ['coord_ra', 'coord_dec'] 

1924 self.sfd = SFDQuery() 

1925 super().__init__(**kwargs) 

1926 

1927 def _func(self, df): 

1928 coords = SkyCoord(df['coord_ra']*u.rad, df['coord_dec']*u.rad) 

1929 ebv = self.sfd(coords) 

1930 # Double precision unnecessary scientifically 

1931 # but currently needed for ingest to qserv 

1932 return pd.Series(ebv, index=df.index).astype('float64')