Coverage for python/lsst/pipe/tasks/functors.py: 36%

809 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-25 04:33 -0700

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["Functor", "CompositeFunctor", "CustomFunctor", "Column", "Index", 

23 "IDColumn", "FootprintNPix", "CoordColumn", "RAColumn", "DecColumn", 

24 "HtmIndex20", "Mag", "MagErr", "NanoMaggie", "MagDiff", "Color", 

25 "Labeller", "StarGalaxyLabeller", "NumStarLabeller", "DeconvolvedMoments", 

26 "SdssTraceSize", "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff", 

27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs", "ComputePixelScale", 

28 "ConvertPixelToArcseconds", "ConvertPixelSqToArcsecondsSq", "ReferenceBand", 

29 "Photometry", "NanoJansky", "NanoJanskyErr", "Magnitude", "MagnitudeErr", 

30 "LocalPhotometry", "LocalNanojansky", "LocalNanojanskyErr", 

31 "LocalMagnitude", "LocalMagnitudeErr", "LocalDipoleMeanFlux", 

32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux", "LocalDipoleDiffFluxErr", 

33 "Ratio", "Ebv"] 

34 

35import yaml 

36import re 

37from itertools import product 

38import logging 

39import os.path 

40 

41import pandas as pd 

42import numpy as np 

43import astropy.units as u 

44from astropy.coordinates import SkyCoord 

45 

46from lsst.utils import doImport 

47from lsst.utils.introspection import get_full_type_name 

48from lsst.daf.butler import DeferredDatasetHandle 

49from lsst.pipe.base import InMemoryDatasetHandle 

50import lsst.geom as geom 

51import lsst.sphgeom as sphgeom 

52 

53 

54def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

55 typeKey='functor', name=None): 

56 """Initialize an object defined in a dictionary 

57 

58 The object needs to be importable as 

59 f'{basePath}.{initDict[typeKey]}' 

60 The positional and keyword arguments (if any) are contained in 

61 "args" and "kwargs" entries in the dictionary, respectively. 

62 This is used in `functors.CompositeFunctor.from_yaml` to initialize 

63 a composite functor from a specification in a YAML file. 

64 

65 Parameters 

66 ---------- 

67 initDict : dictionary 

68 Dictionary describing object's initialization. Must contain 

69 an entry keyed by ``typeKey`` that is the name of the object, 

70 relative to ``basePath``. 

71 basePath : str 

72 Path relative to module in which ``initDict[typeKey]`` is defined. 

73 typeKey : str 

74 Key of ``initDict`` that is the name of the object 

75 (relative to `basePath`). 

76 """ 

77 initDict = initDict.copy() 

78 # TO DO: DM-21956 We should be able to define functors outside this module 

79 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

80 args = [] 

81 if 'args' in initDict: 

82 args = initDict.pop('args') 

83 if isinstance(args, str): 

84 args = [args] 

85 try: 

86 element = pythonType(*args, **initDict) 

87 except Exception as e: 

88 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

89 raise type(e)(message, e.args) 

90 return element 

91 

92 

93class Functor(object): 

94 """Define and execute a calculation on a DataFrame or Handle holding a DataFrame. 

95 

96 The `__call__` method accepts either a `DataFrame` object or a 

97 `DeferredDatasetHandle` or `InMemoryDatasetHandle`, and returns the 

98 result of the calculation as a single column. Each functor defines what 

99 columns are needed for the calculation, and only these columns are read 

100 from the dataset handle. 

101 

102 The action of `__call__` consists of two steps: first, loading the 

103 necessary columns from disk into memory as a `pandas.DataFrame` object; 

104 and second, performing the computation on this dataframe and returning the 

105 result. 

106 

107 

108 To define a new `Functor`, a subclass must define a `_func` method, 

109 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`. 

110 In addition, it must define the following attributes 

111 

112 * `_columns`: The columns necessary to perform the calculation 

113 * `name`: A name appropriate for a figure axis label 

114 * `shortname`: A name appropriate for use as a dictionary key 

115 

116 On initialization, a `Functor` should declare what band (`filt` kwarg) 

117 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be 

118 applied to. This enables the `_get_data` method to extract the proper 

119 columns from the underlying data. If not specified, the dataset will fall back 

120 on the `_defaultDataset`attribute. If band is not specified and `dataset` 

121 is anything other than `'ref'`, then an error will be raised when trying to 

122 perform the calculation. 

123 

124 Originally, `Functor` was set up to expect 

125 datasets formatted like the `deepCoadd_obj` dataset; that is, a 

126 dataframe with a multi-level column index, with the levels of the 

127 column index being `band`, `dataset`, and `column`. 

128 It has since been generalized to apply to dataframes without mutli-level 

129 indices and multi-level indices with just `dataset` and `column` levels. 

130 In addition, the `_get_data` method that reads 

131 the columns from the underlying data will return a dataframe with column 

132 index levels defined by the `_dfLevels` attribute; by default, this is 

133 `column`. 

134 

135 The `_dfLevels` attributes should generally not need to 

136 be changed, unless `_func` needs columns from multiple filters or datasets 

137 to do the calculation. 

138 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for 

139 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe 

140 it gets to have those levels in the column index. 

141 

142 Parameters 

143 ---------- 

144 filt : str 

145 Filter upon which to do the calculation 

146 

147 dataset : str 

148 Dataset upon which to do the calculation 

149 (e.g., 'ref', 'meas', 'forced_src'). 

150 """ 

151 

152 _defaultDataset = 'ref' 

153 _dfLevels = ('column',) 

154 _defaultNoDup = False 

155 

156 def __init__(self, filt=None, dataset=None, noDup=None): 

157 self.filt = filt 

158 self.dataset = dataset if dataset is not None else self._defaultDataset 

159 self._noDup = noDup 

160 self.log = logging.getLogger(type(self).__name__) 

161 

162 @property 

163 def noDup(self): 

164 if self._noDup is not None: 

165 return self._noDup 

166 else: 

167 return self._defaultNoDup 

168 

169 @property 

170 def columns(self): 

171 """Columns required to perform calculation 

172 """ 

173 if not hasattr(self, '_columns'): 

174 raise NotImplementedError('Must define columns property or _columns attribute') 

175 return self._columns 

176 

177 def _get_data_columnLevels(self, data, columnIndex=None): 

178 """Gets the names of the column index levels 

179 

180 This should only be called in the context of a multilevel table. 

181 

182 Parameters 

183 ---------- 

184 data : various 

185 The data to be read, can be a `DeferredDatasetHandle` or 

186 `InMemoryDatasetHandle`. 

187 columnnIndex (optional): pandas `Index` object 

188 If not passed, then it is read from the `DeferredDatasetHandle` 

189 for `InMemoryDatasetHandle`. 

190 """ 

191 if columnIndex is None: 

192 columnIndex = data.get(component="columns") 

193 return columnIndex.names 

194 

195 def _get_data_columnLevelNames(self, data, columnIndex=None): 

196 """Gets the content of each of the column levels for a multilevel table. 

197 """ 

198 if columnIndex is None: 

199 columnIndex = data.get(component="columns") 

200 

201 columnLevels = columnIndex.names 

202 columnLevelNames = { 

203 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

204 for i, level in enumerate(columnLevels) 

205 } 

206 return columnLevelNames 

207 

208 def _colsFromDict(self, colDict, columnIndex=None): 

209 """Converts dictionary column specficiation to a list of columns 

210 """ 

211 new_colDict = {} 

212 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

213 

214 for i, lev in enumerate(columnLevels): 

215 if lev in colDict: 

216 if isinstance(colDict[lev], str): 

217 new_colDict[lev] = [colDict[lev]] 

218 else: 

219 new_colDict[lev] = colDict[lev] 

220 else: 

221 new_colDict[lev] = columnIndex.levels[i] 

222 

223 levelCols = [new_colDict[lev] for lev in columnLevels] 

224 cols = list(product(*levelCols)) 

225 colsAvailable = [col for col in cols if col in columnIndex] 

226 return colsAvailable 

227 

228 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

229 """Returns columns needed by functor from multilevel dataset 

230 

231 To access tables with multilevel column structure, the `DeferredDatasetHandle` 

232 or `InMemoryDatasetHandle` need to be passed either a list of tuples or a 

233 dictionary. 

234 

235 Parameters 

236 ---------- 

237 data : various 

238 The data as either `DeferredDatasetHandle`, or `InMemoryDatasetHandle`. 

239 columnIndex (optional): pandas `Index` object 

240 either passed or read in from `DeferredDatasetHandle`. 

241 `returnTuple` : `bool` 

242 If true, then return a list of tuples rather than the column dictionary 

243 specification. This is set to `True` by `CompositeFunctor` in order to be able to 

244 combine columns from the various component functors. 

245 

246 """ 

247 if not isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

248 raise RuntimeError(f"Unexpected data type. Got {get_full_type_name(data)}.") 

249 

250 if columnIndex is None: 

251 columnIndex = data.get(component="columns") 

252 

253 # Confirm that the dataset has the column levels the functor is expecting it to have. 

254 columnLevels = self._get_data_columnLevels(data, columnIndex) 

255 

256 columnDict = {'column': self.columns, 

257 'dataset': self.dataset} 

258 if self.filt is None: 

259 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

260 if "band" in columnLevels: 

261 if self.dataset == "ref": 

262 columnDict["band"] = columnLevelNames["band"][0] 

263 else: 

264 raise ValueError(f"'filt' not set for functor {self.name}" 

265 f"(dataset {self.dataset}) " 

266 "and DataFrame " 

267 "contains multiple filters in column index. " 

268 "Set 'filt' or set 'dataset' to 'ref'.") 

269 else: 

270 columnDict['band'] = self.filt 

271 

272 if returnTuple: 

273 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

274 else: 

275 return columnDict 

276 

277 def _func(self, df, dropna=True): 

278 raise NotImplementedError('Must define calculation on dataframe') 

279 

280 def _get_columnIndex(self, data): 

281 """Return columnIndex 

282 """ 

283 

284 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

285 return data.get(component="columns") 

286 else: 

287 return None 

288 

289 def _get_data(self, data): 

290 """Retrieve dataframe necessary for calculation. 

291 

292 The data argument can be a `DataFrame`, a `DeferredDatasetHandle`, or an 

293 `InMemoryDatasetHandle`. 

294 

295 Returns dataframe upon which `self._func` can act. 

296 """ 

297 # We wrap a dataframe in a handle here to take advantage of the dataframe 

298 # delegate dataframe column wrangling abilities. 

299 if isinstance(data, pd.DataFrame): 

300 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

301 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

302 _data = data 

303 else: 

304 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

305 

306 # First thing to do: check to see if the data source has a multilevel column index or not. 

307 columnIndex = self._get_columnIndex(_data) 

308 is_multiLevel = isinstance(columnIndex, pd.MultiIndex) 

309 

310 # Get proper columns specification for this functor 

311 if is_multiLevel: 

312 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

313 else: 

314 columns = self.columns 

315 

316 # Load in-memory dataframe with appropriate columns the gen3 way 

317 df = _data.get(parameters={"columns": columns}) 

318 

319 # Drop unnecessary column levels 

320 if is_multiLevel: 

321 df = self._setLevels(df) 

322 

323 return df 

324 

325 def _setLevels(self, df): 

326 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

327 df.columns = df.columns.droplevel(levelsToDrop) 

328 return df 

329 

330 def _dropna(self, vals): 

331 return vals.dropna() 

332 

333 def __call__(self, data, dropna=False): 

334 df = self._get_data(data) 

335 try: 

336 vals = self._func(df) 

337 except Exception as e: 

338 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e) 

339 vals = self.fail(df) 

340 if dropna: 

341 vals = self._dropna(vals) 

342 

343 return vals 

344 

345 def difference(self, data1, data2, **kwargs): 

346 """Computes difference between functor called on two different DataFrame/Handle objects 

347 """ 

348 return self(data1, **kwargs) - self(data2, **kwargs) 

349 

350 def fail(self, df): 

351 return pd.Series(np.full(len(df), np.nan), index=df.index) 

352 

353 @property 

354 def name(self): 

355 """Full name of functor (suitable for figure labels) 

356 """ 

357 return NotImplementedError 

358 

359 @property 

360 def shortname(self): 

361 """Short name of functor (suitable for column name/dict key) 

362 """ 

363 return self.name 

364 

365 

366class CompositeFunctor(Functor): 

367 """Perform multiple calculations at once on a catalog. 

368 

369 The role of a `CompositeFunctor` is to group together computations from 

370 multiple functors. Instead of returning `pandas.Series` a 

371 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names 

372 being the keys of `funcDict`. 

373 

374 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

375 in all the component functors. 

376 

377 A `CompositeFunctor` does not use a `_func` method itself; rather, 

378 when a `CompositeFunctor` is called, all its columns are loaded 

379 at once, and the resulting dataframe is passed to the `_func` method of each component 

380 functor. This has the advantage of only doing I/O (reading from parquet file) once, 

381 and works because each individual `_func` method of each component functor does not 

382 care if there are *extra* columns in the dataframe being passed; only that it must contain 

383 *at least* the `columns` it expects. 

384 

385 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML 

386 file specifying a collection of functors. 

387 

388 Parameters 

389 ---------- 

390 funcs : `dict` or `list` 

391 Dictionary or list of functors. If a list, then it will be converted 

392 into a dictonary according to the `.shortname` attribute of each functor. 

393 

394 """ 

395 dataset = None 

396 

397 def __init__(self, funcs, **kwargs): 

398 

399 if type(funcs) == dict: 

400 self.funcDict = funcs 

401 else: 

402 self.funcDict = {f.shortname: f for f in funcs} 

403 

404 self._filt = None 

405 

406 super().__init__(**kwargs) 

407 

408 @property 

409 def filt(self): 

410 return self._filt 

411 

412 @filt.setter 

413 def filt(self, filt): 

414 if filt is not None: 

415 for _, f in self.funcDict.items(): 

416 f.filt = filt 

417 self._filt = filt 

418 

419 def update(self, new): 

420 if isinstance(new, dict): 

421 self.funcDict.update(new) 

422 elif isinstance(new, CompositeFunctor): 

423 self.funcDict.update(new.funcDict) 

424 else: 

425 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

426 

427 # Make sure new functors have the same 'filt' set 

428 if self.filt is not None: 

429 self.filt = self.filt 

430 

431 @property 

432 def columns(self): 

433 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

434 

435 def multilevelColumns(self, data, **kwargs): 

436 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here. 

437 return list( 

438 set( 

439 [ 

440 x 

441 for y in [ 

442 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

443 ] 

444 for x in y 

445 ] 

446 ) 

447 ) 

448 

449 def __call__(self, data, **kwargs): 

450 """Apply the functor to the data table 

451 

452 Parameters 

453 ---------- 

454 data : various 

455 The data represented as `lsst.daf.butler.DeferredDatasetHandle`, 

456 `lsst.pipe.base.InMemoryDatasetHandle`, 

457 or `pandas.DataFrame`. 

458 The table or a pointer to a table on disk from which columns can 

459 be accessed 

460 """ 

461 if isinstance(data, pd.DataFrame): 

462 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

463 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

464 _data = data 

465 else: 

466 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

467 

468 columnIndex = self._get_columnIndex(_data) 

469 

470 if isinstance(columnIndex, pd.MultiIndex): 

471 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

472 df = _data.get(parameters={"columns": columns}) 

473 

474 valDict = {} 

475 for k, f in self.funcDict.items(): 

476 try: 

477 subdf = f._setLevels( 

478 df[f.multilevelColumns(_data, returnTuple=True, columnIndex=columnIndex)] 

479 ) 

480 valDict[k] = f._func(subdf) 

481 except Exception as e: 

482 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e) 

483 try: 

484 valDict[k] = f.fail(subdf) 

485 except NameError: 

486 raise e 

487 

488 else: 

489 df = _data.get(parameters={"columns": self.columns}) 

490 

491 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

492 

493 # Check that output columns are actually columns 

494 for name, colVal in valDict.items(): 

495 if len(colVal.shape) != 1: 

496 raise RuntimeError("Transformed column '%s' is not the shape of a column. " 

497 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal))) 

498 

499 try: 

500 valDf = pd.concat(valDict, axis=1) 

501 except TypeError: 

502 print([(k, type(v)) for k, v in valDict.items()]) 

503 raise 

504 

505 if kwargs.get('dropna', False): 

506 valDf = valDf.dropna(how='any') 

507 

508 return valDf 

509 

510 @classmethod 

511 def renameCol(cls, col, renameRules): 

512 if renameRules is None: 

513 return col 

514 for old, new in renameRules: 

515 if col.startswith(old): 

516 col = col.replace(old, new) 

517 return col 

518 

519 @classmethod 

520 def from_file(cls, filename, **kwargs): 

521 # Allow environment variables in the filename. 

522 filename = os.path.expandvars(filename) 

523 with open(filename) as f: 

524 translationDefinition = yaml.safe_load(f) 

525 

526 return cls.from_yaml(translationDefinition, **kwargs) 

527 

528 @classmethod 

529 def from_yaml(cls, translationDefinition, **kwargs): 

530 funcs = {} 

531 for func, val in translationDefinition['funcs'].items(): 

532 funcs[func] = init_fromDict(val, name=func) 

533 

534 if 'flag_rename_rules' in translationDefinition: 

535 renameRules = translationDefinition['flag_rename_rules'] 

536 else: 

537 renameRules = None 

538 

539 if 'calexpFlags' in translationDefinition: 

540 for flag in translationDefinition['calexpFlags']: 

541 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp') 

542 

543 if 'refFlags' in translationDefinition: 

544 for flag in translationDefinition['refFlags']: 

545 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

546 

547 if 'forcedFlags' in translationDefinition: 

548 for flag in translationDefinition['forcedFlags']: 

549 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

550 

551 if 'flags' in translationDefinition: 

552 for flag in translationDefinition['flags']: 

553 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

554 

555 return cls(funcs, **kwargs) 

556 

557 

558def mag_aware_eval(df, expr, log): 

559 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means 

560 

561 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes. 

562 

563 Parameters 

564 ---------- 

565 df : pandas.DataFrame 

566 Dataframe on which to evaluate expression. 

567 

568 expr : str 

569 Expression. 

570 """ 

571 try: 

572 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

573 val = df.eval(expr_new) 

574 except Exception as e: # Should check what actually gets raised 

575 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e) 

576 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

577 val = df.eval(expr_new) 

578 return val 

579 

580 

581class CustomFunctor(Functor): 

582 """Arbitrary computation on a catalog 

583 

584 Column names (and thus the columns to be loaded from catalog) are found 

585 by finding all words and trying to ignore all "math-y" words. 

586 

587 Parameters 

588 ---------- 

589 expr : str 

590 Expression to evaluate, to be parsed and executed by `mag_aware_eval`. 

591 """ 

592 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

593 

594 def __init__(self, expr, **kwargs): 

595 self.expr = expr 

596 super().__init__(**kwargs) 

597 

598 @property 

599 def name(self): 

600 return self.expr 

601 

602 @property 

603 def columns(self): 

604 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

605 

606 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

607 not_a_col = [] 

608 for c in flux_cols: 

609 if not re.search('_instFlux$', c): 

610 cols.append(f'{c}_instFlux') 

611 not_a_col.append(c) 

612 else: 

613 cols.append(c) 

614 

615 return list(set([c for c in cols if c not in not_a_col])) 

616 

617 def _func(self, df): 

618 return mag_aware_eval(df, self.expr, self.log) 

619 

620 

621class Column(Functor): 

622 """Get column with specified name 

623 """ 

624 

625 def __init__(self, col, **kwargs): 

626 self.col = col 

627 super().__init__(**kwargs) 

628 

629 @property 

630 def name(self): 

631 return self.col 

632 

633 @property 

634 def columns(self): 

635 return [self.col] 

636 

637 def _func(self, df): 

638 return df[self.col] 

639 

640 

641class Index(Functor): 

642 """Return the value of the index for each object 

643 """ 

644 

645 columns = ['coord_ra'] # just a dummy; something has to be here 

646 _defaultDataset = 'ref' 

647 _defaultNoDup = True 

648 

649 def _func(self, df): 

650 return pd.Series(df.index, index=df.index) 

651 

652 

653class IDColumn(Column): 

654 col = 'id' 

655 _allow_difference = False 

656 _defaultNoDup = True 

657 

658 def _func(self, df): 

659 return pd.Series(df.index, index=df.index) 

660 

661 

662class FootprintNPix(Column): 

663 col = 'base_Footprint_nPix' 

664 

665 

666class CoordColumn(Column): 

667 """Base class for coordinate column, in degrees 

668 """ 

669 _radians = True 

670 

671 def __init__(self, col, **kwargs): 

672 super().__init__(col, **kwargs) 

673 

674 def _func(self, df): 

675 # Must not modify original column in case that column is used by another functor 

676 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

677 return output 

678 

679 

680class RAColumn(CoordColumn): 

681 """Right Ascension, in degrees 

682 """ 

683 name = 'RA' 

684 _defaultNoDup = True 

685 

686 def __init__(self, **kwargs): 

687 super().__init__('coord_ra', **kwargs) 

688 

689 def __call__(self, catalog, **kwargs): 

690 return super().__call__(catalog, **kwargs) 

691 

692 

693class DecColumn(CoordColumn): 

694 """Declination, in degrees 

695 """ 

696 name = 'Dec' 

697 _defaultNoDup = True 

698 

699 def __init__(self, **kwargs): 

700 super().__init__('coord_dec', **kwargs) 

701 

702 def __call__(self, catalog, **kwargs): 

703 return super().__call__(catalog, **kwargs) 

704 

705 

706class HtmIndex20(Functor): 

707 """Compute the level 20 HtmIndex for the catalog. 

708 

709 Notes 

710 ----- 

711 This functor was implemented to satisfy requirements of old APDB interface 

712 which required ``pixelId`` column in DiaObject with HTM20 index. APDB 

713 interface had migrated to not need that information, but we keep this 

714 class in case it may be useful for something else. 

715 """ 

716 name = "Htm20" 

717 htmLevel = 20 

718 _radians = True 

719 

720 def __init__(self, ra, decl, **kwargs): 

721 self.pixelator = sphgeom.HtmPixelization(self.htmLevel) 

722 self.ra = ra 

723 self.decl = decl 

724 self._columns = [self.ra, self.decl] 

725 super().__init__(**kwargs) 

726 

727 def _func(self, df): 

728 

729 def computePixel(row): 

730 if self._radians: 

731 sphPoint = geom.SpherePoint(row[self.ra], 

732 row[self.decl], 

733 geom.radians) 

734 else: 

735 sphPoint = geom.SpherePoint(row[self.ra], 

736 row[self.decl], 

737 geom.degrees) 

738 return self.pixelator.index(sphPoint.getVector()) 

739 

740 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64') 

741 

742 

743def fluxName(col): 

744 if not col.endswith('_instFlux'): 

745 col += '_instFlux' 

746 return col 

747 

748 

749def fluxErrName(col): 

750 if not col.endswith('_instFluxErr'): 

751 col += '_instFluxErr' 

752 return col 

753 

754 

755class Mag(Functor): 

756 """Compute calibrated magnitude 

757 

758 Takes a `calib` argument, which returns the flux at mag=0 

759 as `calib.getFluxMag0()`. If not provided, then the default 

760 `fluxMag0` is 63095734448.0194, which is default for HSC. 

761 This default should be removed in DM-21955 

762 

763 This calculation hides warnings about invalid values and dividing by zero. 

764 

765 As for all functors, a `dataset` and `filt` kwarg should be provided upon 

766 initialization. Unlike the default `Functor`, however, the default dataset 

767 for a `Mag` is `'meas'`, rather than `'ref'`. 

768 

769 Parameters 

770 ---------- 

771 col : `str` 

772 Name of flux column from which to compute magnitude. Can be parseable 

773 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass 

774 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will 

775 understand. 

776 calib : `lsst.afw.image.calib.Calib` (optional) 

777 Object that knows zero point. 

778 """ 

779 _defaultDataset = 'meas' 

780 

781 def __init__(self, col, calib=None, **kwargs): 

782 self.col = fluxName(col) 

783 self.calib = calib 

784 if calib is not None: 

785 self.fluxMag0 = calib.getFluxMag0()[0] 

786 else: 

787 # TO DO: DM-21955 Replace hard coded photometic calibration values 

788 self.fluxMag0 = 63095734448.0194 

789 

790 super().__init__(**kwargs) 

791 

792 @property 

793 def columns(self): 

794 return [self.col] 

795 

796 def _func(self, df): 

797 with np.warnings.catch_warnings(): 

798 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

799 np.warnings.filterwarnings('ignore', r'divide by zero') 

800 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

801 

802 @property 

803 def name(self): 

804 return f'mag_{self.col}' 

805 

806 

807class MagErr(Mag): 

808 """Compute calibrated magnitude uncertainty 

809 

810 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`. 

811 

812 Parameters 

813 col : `str` 

814 Name of flux column 

815 calib : `lsst.afw.image.calib.Calib` (optional) 

816 Object that knows zero point. 

817 """ 

818 

819 def __init__(self, *args, **kwargs): 

820 super().__init__(*args, **kwargs) 

821 if self.calib is not None: 

822 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

823 else: 

824 self.fluxMag0Err = 0. 

825 

826 @property 

827 def columns(self): 

828 return [self.col, self.col + 'Err'] 

829 

830 def _func(self, df): 

831 with np.warnings.catch_warnings(): 

832 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

833 np.warnings.filterwarnings('ignore', r'divide by zero') 

834 fluxCol, fluxErrCol = self.columns 

835 x = df[fluxErrCol] / df[fluxCol] 

836 y = self.fluxMag0Err / self.fluxMag0 

837 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

838 return magErr 

839 

840 @property 

841 def name(self): 

842 return super().name + '_err' 

843 

844 

845class NanoMaggie(Mag): 

846 """ 

847 """ 

848 

849 def _func(self, df): 

850 return (df[self.col] / self.fluxMag0) * 1e9 

851 

852 

853class MagDiff(Functor): 

854 _defaultDataset = 'meas' 

855 

856 """Functor to calculate magnitude difference""" 

857 

858 def __init__(self, col1, col2, **kwargs): 

859 self.col1 = fluxName(col1) 

860 self.col2 = fluxName(col2) 

861 super().__init__(**kwargs) 

862 

863 @property 

864 def columns(self): 

865 return [self.col1, self.col2] 

866 

867 def _func(self, df): 

868 with np.warnings.catch_warnings(): 

869 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

870 np.warnings.filterwarnings('ignore', r'divide by zero') 

871 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

872 

873 @property 

874 def name(self): 

875 return f'(mag_{self.col1} - mag_{self.col2})' 

876 

877 @property 

878 def shortname(self): 

879 return f'magDiff_{self.col1}_{self.col2}' 

880 

881 

882class Color(Functor): 

883 """Compute the color between two filters 

884 

885 Computes color by initializing two different `Mag` 

886 functors based on the `col` and filters provided, and 

887 then returning the difference. 

888 

889 This is enabled by the `_func` expecting a dataframe with a 

890 multilevel column index, with both `'band'` and `'column'`, 

891 instead of just `'column'`, which is the `Functor` default. 

892 This is controlled by the `_dfLevels` attribute. 

893 

894 Also of note, the default dataset for `Color` is `forced_src'`, 

895 whereas for `Mag` it is `'meas'`. 

896 

897 Parameters 

898 ---------- 

899 col : str 

900 Name of flux column from which to compute; same as would be passed to 

901 `lsst.pipe.tasks.functors.Mag`. 

902 

903 filt2, filt1 : str 

904 Filters from which to compute magnitude difference. 

905 Color computed is `Mag(filt2) - Mag(filt1)`. 

906 """ 

907 _defaultDataset = 'forced_src' 

908 _dfLevels = ('band', 'column') 

909 _defaultNoDup = True 

910 

911 def __init__(self, col, filt2, filt1, **kwargs): 

912 self.col = fluxName(col) 

913 if filt2 == filt1: 

914 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

915 self.filt2 = filt2 

916 self.filt1 = filt1 

917 

918 self.mag2 = Mag(col, filt=filt2, **kwargs) 

919 self.mag1 = Mag(col, filt=filt1, **kwargs) 

920 

921 super().__init__(**kwargs) 

922 

923 @property 

924 def filt(self): 

925 return None 

926 

927 @filt.setter 

928 def filt(self, filt): 

929 pass 

930 

931 def _func(self, df): 

932 mag2 = self.mag2._func(df[self.filt2]) 

933 mag1 = self.mag1._func(df[self.filt1]) 

934 return mag2 - mag1 

935 

936 @property 

937 def columns(self): 

938 return [self.mag1.col, self.mag2.col] 

939 

940 def multilevelColumns(self, parq, **kwargs): 

941 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

942 

943 @property 

944 def name(self): 

945 return f'{self.filt2} - {self.filt1} ({self.col})' 

946 

947 @property 

948 def shortname(self): 

949 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

950 

951 

952class Labeller(Functor): 

953 """Main function of this subclass is to override the dropna=True 

954 """ 

955 _null_label = 'null' 

956 _allow_difference = False 

957 name = 'label' 

958 _force_str = False 

959 

960 def __call__(self, parq, dropna=False, **kwargs): 

961 return super().__call__(parq, dropna=False, **kwargs) 

962 

963 

964class StarGalaxyLabeller(Labeller): 

965 _columns = ["base_ClassificationExtendedness_value"] 

966 _column = "base_ClassificationExtendedness_value" 

967 

968 def _func(self, df): 

969 x = df[self._columns][self._column] 

970 mask = x.isnull() 

971 test = (x < 0.5).astype(int) 

972 test = test.mask(mask, 2) 

973 

974 # TODO: DM-21954 Look into veracity of inline comment below 

975 # are these backwards? 

976 categories = ['galaxy', 'star', self._null_label] 

977 label = pd.Series(pd.Categorical.from_codes(test, categories=categories), 

978 index=x.index, name='label') 

979 if self._force_str: 

980 label = label.astype(str) 

981 return label 

982 

983 

984class NumStarLabeller(Labeller): 

985 _columns = ['numStarFlags'] 

986 labels = {"star": 0, "maybe": 1, "notStar": 2} 

987 

988 def _func(self, df): 

989 x = df[self._columns][self._columns[0]] 

990 

991 # Number of filters 

992 n = len(x.unique()) - 1 

993 

994 labels = ['noStar', 'maybe', 'star'] 

995 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels), 

996 index=x.index, name='label') 

997 

998 if self._force_str: 

999 label = label.astype(str) 

1000 

1001 return label 

1002 

1003 

1004class DeconvolvedMoments(Functor): 

1005 name = 'Deconvolved Moments' 

1006 shortname = 'deconvolvedMoments' 

1007 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1008 "ext_shapeHSM_HsmSourceMoments_yy", 

1009 "base_SdssShape_xx", "base_SdssShape_yy", 

1010 "ext_shapeHSM_HsmPsfMoments_xx", 

1011 "ext_shapeHSM_HsmPsfMoments_yy") 

1012 

1013 def _func(self, df): 

1014 """Calculate deconvolved moments""" 

1015 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

1016 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

1017 else: 

1018 hsm = np.ones(len(df))*np.nan 

1019 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

1020 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

1021 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

1022 else: 

1023 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using 

1024 # exposure.getPsf().computeShape(s.getCentroid()).getIxx() 

1025 # raise TaskError("No psf shape parameter found in catalog") 

1026 raise RuntimeError('No psf shape parameter found in catalog') 

1027 

1028 return hsm.where(np.isfinite(hsm), sdss) - psf 

1029 

1030 

1031class SdssTraceSize(Functor): 

1032 """Functor to calculate SDSS trace radius size for sources""" 

1033 name = "SDSS Trace Size" 

1034 shortname = 'sdssTrace' 

1035 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

1036 

1037 def _func(self, df): 

1038 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1039 return srcSize 

1040 

1041 

1042class PsfSdssTraceSizeDiff(Functor): 

1043 """Functor to calculate SDSS trace radius size difference (%) between object and psf model""" 

1044 name = "PSF - SDSS Trace Size" 

1045 shortname = 'psf_sdssTrace' 

1046 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

1047 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

1048 

1049 def _func(self, df): 

1050 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1051 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

1052 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1053 return sizeDiff 

1054 

1055 

1056class HsmTraceSize(Functor): 

1057 """Functor to calculate HSM trace radius size for sources""" 

1058 name = 'HSM Trace Size' 

1059 shortname = 'hsmTrace' 

1060 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1061 "ext_shapeHSM_HsmSourceMoments_yy") 

1062 

1063 def _func(self, df): 

1064 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1065 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1066 return srcSize 

1067 

1068 

1069class PsfHsmTraceSizeDiff(Functor): 

1070 """Functor to calculate HSM trace radius size difference (%) between object and psf model""" 

1071 name = 'PSF - HSM Trace Size' 

1072 shortname = 'psf_HsmTrace' 

1073 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1074 "ext_shapeHSM_HsmSourceMoments_yy", 

1075 "ext_shapeHSM_HsmPsfMoments_xx", 

1076 "ext_shapeHSM_HsmPsfMoments_yy") 

1077 

1078 def _func(self, df): 

1079 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1080 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1081 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1082 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1083 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1084 return sizeDiff 

1085 

1086 

1087class HsmFwhm(Functor): 

1088 name = 'HSM Psf FWHM' 

1089 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1090 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1091 pixelScale = 0.168 

1092 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1093 

1094 def _func(self, df): 

1095 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1096 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1097 

1098 

1099class E1(Functor): 

1100 name = "Distortion Ellipticity (e1)" 

1101 shortname = "Distortion" 

1102 

1103 def __init__(self, colXX, colXY, colYY, **kwargs): 

1104 self.colXX = colXX 

1105 self.colXY = colXY 

1106 self.colYY = colYY 

1107 self._columns = [self.colXX, self.colXY, self.colYY] 

1108 super().__init__(**kwargs) 

1109 

1110 @property 

1111 def columns(self): 

1112 return [self.colXX, self.colXY, self.colYY] 

1113 

1114 def _func(self, df): 

1115 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1116 

1117 

1118class E2(Functor): 

1119 name = "Ellipticity e2" 

1120 

1121 def __init__(self, colXX, colXY, colYY, **kwargs): 

1122 self.colXX = colXX 

1123 self.colXY = colXY 

1124 self.colYY = colYY 

1125 super().__init__(**kwargs) 

1126 

1127 @property 

1128 def columns(self): 

1129 return [self.colXX, self.colXY, self.colYY] 

1130 

1131 def _func(self, df): 

1132 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1133 

1134 

1135class RadiusFromQuadrupole(Functor): 

1136 

1137 def __init__(self, colXX, colXY, colYY, **kwargs): 

1138 self.colXX = colXX 

1139 self.colXY = colXY 

1140 self.colYY = colYY 

1141 super().__init__(**kwargs) 

1142 

1143 @property 

1144 def columns(self): 

1145 return [self.colXX, self.colXY, self.colYY] 

1146 

1147 def _func(self, df): 

1148 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1149 

1150 

1151class LocalWcs(Functor): 

1152 """Computations using the stored localWcs. 

1153 """ 

1154 name = "LocalWcsOperations" 

1155 

1156 def __init__(self, 

1157 colCD_1_1, 

1158 colCD_1_2, 

1159 colCD_2_1, 

1160 colCD_2_2, 

1161 **kwargs): 

1162 self.colCD_1_1 = colCD_1_1 

1163 self.colCD_1_2 = colCD_1_2 

1164 self.colCD_2_1 = colCD_2_1 

1165 self.colCD_2_2 = colCD_2_2 

1166 super().__init__(**kwargs) 

1167 

1168 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1169 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1170 

1171 Parameters 

1172 ---------- 

1173 x : `pandas.Series` 

1174 X pixel coordinate. 

1175 y : `pandas.Series` 

1176 Y pixel coordinate. 

1177 cd11 : `pandas.Series` 

1178 [1, 1] element of the local Wcs affine transform. 

1179 cd11 : `pandas.Series` 

1180 [1, 1] element of the local Wcs affine transform. 

1181 cd12 : `pandas.Series` 

1182 [1, 2] element of the local Wcs affine transform. 

1183 cd21 : `pandas.Series` 

1184 [2, 1] element of the local Wcs affine transform. 

1185 cd22 : `pandas.Series` 

1186 [2, 2] element of the local Wcs affine transform. 

1187 

1188 Returns 

1189 ------- 

1190 raDecTuple : tuple 

1191 RA and dec conversion of x and y given the local Wcs. Returned 

1192 units are in radians. 

1193 

1194 """ 

1195 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1196 

1197 def computeSkySeperation(self, ra1, dec1, ra2, dec2): 

1198 """Compute the local pixel scale conversion. 

1199 

1200 Parameters 

1201 ---------- 

1202 ra1 : `pandas.Series` 

1203 Ra of the first coordinate in radians. 

1204 dec1 : `pandas.Series` 

1205 Dec of the first coordinate in radians. 

1206 ra2 : `pandas.Series` 

1207 Ra of the second coordinate in radians. 

1208 dec2 : `pandas.Series` 

1209 Dec of the second coordinate in radians. 

1210 

1211 Returns 

1212 ------- 

1213 dist : `pandas.Series` 

1214 Distance on the sphere in radians. 

1215 """ 

1216 deltaDec = dec2 - dec1 

1217 deltaRa = ra2 - ra1 

1218 return 2 * np.arcsin( 

1219 np.sqrt( 

1220 np.sin(deltaDec / 2) ** 2 

1221 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1222 

1223 def getSkySeperationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1224 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1225 

1226 Parameters 

1227 ---------- 

1228 x1 : `pandas.Series` 

1229 X pixel coordinate. 

1230 y1 : `pandas.Series` 

1231 Y pixel coordinate. 

1232 x2 : `pandas.Series` 

1233 X pixel coordinate. 

1234 y2 : `pandas.Series` 

1235 Y pixel coordinate. 

1236 cd11 : `pandas.Series` 

1237 [1, 1] element of the local Wcs affine transform. 

1238 cd11 : `pandas.Series` 

1239 [1, 1] element of the local Wcs affine transform. 

1240 cd12 : `pandas.Series` 

1241 [1, 2] element of the local Wcs affine transform. 

1242 cd21 : `pandas.Series` 

1243 [2, 1] element of the local Wcs affine transform. 

1244 cd22 : `pandas.Series` 

1245 [2, 2] element of the local Wcs affine transform. 

1246 

1247 Returns 

1248 ------- 

1249 Distance : `pandas.Series` 

1250 Arcseconds per pixel at the location of the local WC 

1251 """ 

1252 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1253 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1254 # Great circle distance for small separations. 

1255 return self.computeSkySeperation(ra1, dec1, ra2, dec2) 

1256 

1257 

1258class ComputePixelScale(LocalWcs): 

1259 """Compute the local pixel scale from the stored CDMatrix. 

1260 """ 

1261 name = "PixelScale" 

1262 

1263 @property 

1264 def columns(self): 

1265 return [self.colCD_1_1, 

1266 self.colCD_1_2, 

1267 self.colCD_2_1, 

1268 self.colCD_2_2] 

1269 

1270 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1271 """Compute the local pixel to scale conversion in arcseconds. 

1272 

1273 Parameters 

1274 ---------- 

1275 cd11 : `pandas.Series` 

1276 [1, 1] element of the local Wcs affine transform in radians. 

1277 cd11 : `pandas.Series` 

1278 [1, 1] element of the local Wcs affine transform in radians. 

1279 cd12 : `pandas.Series` 

1280 [1, 2] element of the local Wcs affine transform in radians. 

1281 cd21 : `pandas.Series` 

1282 [2, 1] element of the local Wcs affine transform in radians. 

1283 cd22 : `pandas.Series` 

1284 [2, 2] element of the local Wcs affine transform in radians. 

1285 

1286 Returns 

1287 ------- 

1288 pixScale : `pandas.Series` 

1289 Arcseconds per pixel at the location of the local WC 

1290 """ 

1291 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1292 

1293 def _func(self, df): 

1294 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1295 df[self.colCD_1_2], 

1296 df[self.colCD_2_1], 

1297 df[self.colCD_2_2]) 

1298 

1299 

1300class ConvertPixelToArcseconds(ComputePixelScale): 

1301 """Convert a value in units pixels squared to units arcseconds squared. 

1302 """ 

1303 

1304 def __init__(self, 

1305 col, 

1306 colCD_1_1, 

1307 colCD_1_2, 

1308 colCD_2_1, 

1309 colCD_2_2, 

1310 **kwargs): 

1311 self.col = col 

1312 super().__init__(colCD_1_1, 

1313 colCD_1_2, 

1314 colCD_2_1, 

1315 colCD_2_2, 

1316 **kwargs) 

1317 

1318 @property 

1319 def name(self): 

1320 return f"{self.col}_asArcseconds" 

1321 

1322 @property 

1323 def columns(self): 

1324 return [self.col, 

1325 self.colCD_1_1, 

1326 self.colCD_1_2, 

1327 self.colCD_2_1, 

1328 self.colCD_2_2] 

1329 

1330 def _func(self, df): 

1331 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1332 df[self.colCD_1_2], 

1333 df[self.colCD_2_1], 

1334 df[self.colCD_2_2]) 

1335 

1336 

1337class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1338 """Convert a value in units pixels to units arcseconds. 

1339 """ 

1340 

1341 def __init__(self, 

1342 col, 

1343 colCD_1_1, 

1344 colCD_1_2, 

1345 colCD_2_1, 

1346 colCD_2_2, 

1347 **kwargs): 

1348 self.col = col 

1349 super().__init__(colCD_1_1, 

1350 colCD_1_2, 

1351 colCD_2_1, 

1352 colCD_2_2, 

1353 **kwargs) 

1354 

1355 @property 

1356 def name(self): 

1357 return f"{self.col}_asArcsecondsSq" 

1358 

1359 @property 

1360 def columns(self): 

1361 return [self.col, 

1362 self.colCD_1_1, 

1363 self.colCD_1_2, 

1364 self.colCD_2_1, 

1365 self.colCD_2_2] 

1366 

1367 def _func(self, df): 

1368 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1369 df[self.colCD_1_2], 

1370 df[self.colCD_2_1], 

1371 df[self.colCD_2_2]) 

1372 return df[self.col] * pixScale * pixScale 

1373 

1374 

1375class ReferenceBand(Functor): 

1376 name = 'Reference Band' 

1377 shortname = 'refBand' 

1378 

1379 @property 

1380 def columns(self): 

1381 return ["merge_measurement_i", 

1382 "merge_measurement_r", 

1383 "merge_measurement_z", 

1384 "merge_measurement_y", 

1385 "merge_measurement_g", 

1386 "merge_measurement_u"] 

1387 

1388 def _func(self, df: pd.DataFrame) -> pd.Series: 

1389 def getFilterAliasName(row): 

1390 # get column name with the max value (True > False) 

1391 colName = row.idxmax() 

1392 return colName.replace('merge_measurement_', '') 

1393 

1394 # Skip columns that are unavailable, because this functor requests the 

1395 # superset of bands that could be included in the object table 

1396 columns = [col for col in self.columns if col in df.columns] 

1397 # Makes a Series of dtype object if df is empty 

1398 return df[columns].apply(getFilterAliasName, axis=1, 

1399 result_type='reduce').astype('object') 

1400 

1401 

1402class Photometry(Functor): 

1403 # AB to NanoJansky (3631 Jansky) 

1404 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1405 LOG_AB_FLUX_SCALE = 12.56 

1406 FIVE_OVER_2LOG10 = 1.085736204758129569 

1407 # TO DO: DM-21955 Replace hard coded photometic calibration values 

1408 COADD_ZP = 27 

1409 

1410 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1411 self.vhypot = np.vectorize(self.hypot) 

1412 self.col = colFlux 

1413 self.colFluxErr = colFluxErr 

1414 

1415 self.calib = calib 

1416 if calib is not None: 

1417 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1418 else: 

1419 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1420 self.fluxMag0Err = 0. 

1421 

1422 super().__init__(**kwargs) 

1423 

1424 @property 

1425 def columns(self): 

1426 return [self.col] 

1427 

1428 @property 

1429 def name(self): 

1430 return f'mag_{self.col}' 

1431 

1432 @classmethod 

1433 def hypot(cls, a, b): 

1434 if np.abs(a) < np.abs(b): 

1435 a, b = b, a 

1436 if a == 0.: 

1437 return 0. 

1438 q = b/a 

1439 return np.abs(a) * np.sqrt(1. + q*q) 

1440 

1441 def dn2flux(self, dn, fluxMag0): 

1442 return self.AB_FLUX_SCALE * dn / fluxMag0 

1443 

1444 def dn2mag(self, dn, fluxMag0): 

1445 with np.warnings.catch_warnings(): 

1446 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1447 np.warnings.filterwarnings('ignore', r'divide by zero') 

1448 return -2.5 * np.log10(dn/fluxMag0) 

1449 

1450 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1451 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1452 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1453 return retVal 

1454 

1455 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1456 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1457 return self.FIVE_OVER_2LOG10 * retVal 

1458 

1459 

1460class NanoJansky(Photometry): 

1461 def _func(self, df): 

1462 return self.dn2flux(df[self.col], self.fluxMag0) 

1463 

1464 

1465class NanoJanskyErr(Photometry): 

1466 @property 

1467 def columns(self): 

1468 return [self.col, self.colFluxErr] 

1469 

1470 def _func(self, df): 

1471 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1472 return pd.Series(retArr, index=df.index) 

1473 

1474 

1475class Magnitude(Photometry): 

1476 def _func(self, df): 

1477 return self.dn2mag(df[self.col], self.fluxMag0) 

1478 

1479 

1480class MagnitudeErr(Photometry): 

1481 @property 

1482 def columns(self): 

1483 return [self.col, self.colFluxErr] 

1484 

1485 def _func(self, df): 

1486 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1487 return pd.Series(retArr, index=df.index) 

1488 

1489 

1490class LocalPhotometry(Functor): 

1491 """Base class for calibrating the specified instrument flux column using 

1492 the local photometric calibration. 

1493 

1494 Parameters 

1495 ---------- 

1496 instFluxCol : `str` 

1497 Name of the instrument flux column. 

1498 instFluxErrCol : `str` 

1499 Name of the assocated error columns for ``instFluxCol``. 

1500 photoCalibCol : `str` 

1501 Name of local calibration column. 

1502 photoCalibErrCol : `str` 

1503 Error associated with ``photoCalibCol`` 

1504 

1505 See also 

1506 -------- 

1507 LocalPhotometry 

1508 LocalNanojansky 

1509 LocalNanojanskyErr 

1510 LocalMagnitude 

1511 LocalMagnitudeErr 

1512 """ 

1513 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1514 

1515 def __init__(self, 

1516 instFluxCol, 

1517 instFluxErrCol, 

1518 photoCalibCol, 

1519 photoCalibErrCol, 

1520 **kwargs): 

1521 self.instFluxCol = instFluxCol 

1522 self.instFluxErrCol = instFluxErrCol 

1523 self.photoCalibCol = photoCalibCol 

1524 self.photoCalibErrCol = photoCalibErrCol 

1525 super().__init__(**kwargs) 

1526 

1527 def instFluxToNanojansky(self, instFlux, localCalib): 

1528 """Convert instrument flux to nanojanskys. 

1529 

1530 Parameters 

1531 ---------- 

1532 instFlux : `numpy.ndarray` or `pandas.Series` 

1533 Array of instrument flux measurements 

1534 localCalib : `numpy.ndarray` or `pandas.Series` 

1535 Array of local photometric calibration estimates. 

1536 

1537 Returns 

1538 ------- 

1539 calibFlux : `numpy.ndarray` or `pandas.Series` 

1540 Array of calibrated flux measurements. 

1541 """ 

1542 return instFlux * localCalib 

1543 

1544 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1545 """Convert instrument flux to nanojanskys. 

1546 

1547 Parameters 

1548 ---------- 

1549 instFlux : `numpy.ndarray` or `pandas.Series` 

1550 Array of instrument flux measurements 

1551 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1552 Errors on associated ``instFlux`` values 

1553 localCalib : `numpy.ndarray` or `pandas.Series` 

1554 Array of local photometric calibration estimates. 

1555 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1556 Errors on associated ``localCalib`` values 

1557 

1558 Returns 

1559 ------- 

1560 calibFluxErr : `numpy.ndarray` or `pandas.Series` 

1561 Errors on calibrated flux measurements. 

1562 """ 

1563 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1564 

1565 def instFluxToMagnitude(self, instFlux, localCalib): 

1566 """Convert instrument flux to nanojanskys. 

1567 

1568 Parameters 

1569 ---------- 

1570 instFlux : `numpy.ndarray` or `pandas.Series` 

1571 Array of instrument flux measurements 

1572 localCalib : `numpy.ndarray` or `pandas.Series` 

1573 Array of local photometric calibration estimates. 

1574 

1575 Returns 

1576 ------- 

1577 calibMag : `numpy.ndarray` or `pandas.Series` 

1578 Array of calibrated AB magnitudes. 

1579 """ 

1580 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1581 

1582 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1583 """Convert instrument flux err to nanojanskys. 

1584 

1585 Parameters 

1586 ---------- 

1587 instFlux : `numpy.ndarray` or `pandas.Series` 

1588 Array of instrument flux measurements 

1589 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1590 Errors on associated ``instFlux`` values 

1591 localCalib : `numpy.ndarray` or `pandas.Series` 

1592 Array of local photometric calibration estimates. 

1593 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1594 Errors on associated ``localCalib`` values 

1595 

1596 Returns 

1597 ------- 

1598 calibMagErr: `numpy.ndarray` or `pandas.Series` 

1599 Error on calibrated AB magnitudes. 

1600 """ 

1601 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1602 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1603 

1604 

1605class LocalNanojansky(LocalPhotometry): 

1606 """Compute calibrated fluxes using the local calibration value. 

1607 

1608 See also 

1609 -------- 

1610 LocalNanojansky 

1611 LocalNanojanskyErr 

1612 LocalMagnitude 

1613 LocalMagnitudeErr 

1614 """ 

1615 

1616 @property 

1617 def columns(self): 

1618 return [self.instFluxCol, self.photoCalibCol] 

1619 

1620 @property 

1621 def name(self): 

1622 return f'flux_{self.instFluxCol}' 

1623 

1624 def _func(self, df): 

1625 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1626 

1627 

1628class LocalNanojanskyErr(LocalPhotometry): 

1629 """Compute calibrated flux errors using the local calibration value. 

1630 

1631 See also 

1632 -------- 

1633 LocalNanojansky 

1634 LocalNanojanskyErr 

1635 LocalMagnitude 

1636 LocalMagnitudeErr 

1637 """ 

1638 

1639 @property 

1640 def columns(self): 

1641 return [self.instFluxCol, self.instFluxErrCol, 

1642 self.photoCalibCol, self.photoCalibErrCol] 

1643 

1644 @property 

1645 def name(self): 

1646 return f'fluxErr_{self.instFluxCol}' 

1647 

1648 def _func(self, df): 

1649 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1650 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1651 

1652 

1653class LocalMagnitude(LocalPhotometry): 

1654 """Compute calibrated AB magnitudes using the local calibration value. 

1655 

1656 See also 

1657 -------- 

1658 LocalNanojansky 

1659 LocalNanojanskyErr 

1660 LocalMagnitude 

1661 LocalMagnitudeErr 

1662 """ 

1663 

1664 @property 

1665 def columns(self): 

1666 return [self.instFluxCol, self.photoCalibCol] 

1667 

1668 @property 

1669 def name(self): 

1670 return f'mag_{self.instFluxCol}' 

1671 

1672 def _func(self, df): 

1673 return self.instFluxToMagnitude(df[self.instFluxCol], 

1674 df[self.photoCalibCol]) 

1675 

1676 

1677class LocalMagnitudeErr(LocalPhotometry): 

1678 """Compute calibrated AB magnitude errors using the local calibration value. 

1679 

1680 See also 

1681 -------- 

1682 LocalNanojansky 

1683 LocalNanojanskyErr 

1684 LocalMagnitude 

1685 LocalMagnitudeErr 

1686 """ 

1687 

1688 @property 

1689 def columns(self): 

1690 return [self.instFluxCol, self.instFluxErrCol, 

1691 self.photoCalibCol, self.photoCalibErrCol] 

1692 

1693 @property 

1694 def name(self): 

1695 return f'magErr_{self.instFluxCol}' 

1696 

1697 def _func(self, df): 

1698 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol], 

1699 df[self.instFluxErrCol], 

1700 df[self.photoCalibCol], 

1701 df[self.photoCalibErrCol]) 

1702 

1703 

1704class LocalDipoleMeanFlux(LocalPhotometry): 

1705 """Compute absolute mean of dipole fluxes. 

1706 

1707 See also 

1708 -------- 

1709 LocalNanojansky 

1710 LocalNanojanskyErr 

1711 LocalMagnitude 

1712 LocalMagnitudeErr 

1713 LocalDipoleMeanFlux 

1714 LocalDipoleMeanFluxErr 

1715 LocalDipoleDiffFlux 

1716 LocalDipoleDiffFluxErr 

1717 """ 

1718 def __init__(self, 

1719 instFluxPosCol, 

1720 instFluxNegCol, 

1721 instFluxPosErrCol, 

1722 instFluxNegErrCol, 

1723 photoCalibCol, 

1724 photoCalibErrCol, 

1725 **kwargs): 

1726 self.instFluxNegCol = instFluxNegCol 

1727 self.instFluxPosCol = instFluxPosCol 

1728 self.instFluxNegErrCol = instFluxNegErrCol 

1729 self.instFluxPosErrCol = instFluxPosErrCol 

1730 self.photoCalibCol = photoCalibCol 

1731 self.photoCalibErrCol = photoCalibErrCol 

1732 super().__init__(instFluxNegCol, 

1733 instFluxNegErrCol, 

1734 photoCalibCol, 

1735 photoCalibErrCol, 

1736 **kwargs) 

1737 

1738 @property 

1739 def columns(self): 

1740 return [self.instFluxPosCol, 

1741 self.instFluxNegCol, 

1742 self.photoCalibCol] 

1743 

1744 @property 

1745 def name(self): 

1746 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1747 

1748 def _func(self, df): 

1749 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1750 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1751 

1752 

1753class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1754 """Compute the error on the absolute mean of dipole fluxes. 

1755 

1756 See also 

1757 -------- 

1758 LocalNanojansky 

1759 LocalNanojanskyErr 

1760 LocalMagnitude 

1761 LocalMagnitudeErr 

1762 LocalDipoleMeanFlux 

1763 LocalDipoleMeanFluxErr 

1764 LocalDipoleDiffFlux 

1765 LocalDipoleDiffFluxErr 

1766 """ 

1767 

1768 @property 

1769 def columns(self): 

1770 return [self.instFluxPosCol, 

1771 self.instFluxNegCol, 

1772 self.instFluxPosErrCol, 

1773 self.instFluxNegErrCol, 

1774 self.photoCalibCol, 

1775 self.photoCalibErrCol] 

1776 

1777 @property 

1778 def name(self): 

1779 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1780 

1781 def _func(self, df): 

1782 return 0.5*np.sqrt( 

1783 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1784 * df[self.photoCalibErrCol])**2 

1785 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1786 * df[self.photoCalibCol]**2) 

1787 

1788 

1789class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1790 """Compute the absolute difference of dipole fluxes. 

1791 

1792 Value is (abs(pos) - abs(neg)) 

1793 

1794 See also 

1795 -------- 

1796 LocalNanojansky 

1797 LocalNanojanskyErr 

1798 LocalMagnitude 

1799 LocalMagnitudeErr 

1800 LocalDipoleMeanFlux 

1801 LocalDipoleMeanFluxErr 

1802 LocalDipoleDiffFlux 

1803 LocalDipoleDiffFluxErr 

1804 """ 

1805 

1806 @property 

1807 def columns(self): 

1808 return [self.instFluxPosCol, 

1809 self.instFluxNegCol, 

1810 self.photoCalibCol] 

1811 

1812 @property 

1813 def name(self): 

1814 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1815 

1816 def _func(self, df): 

1817 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1818 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1819 

1820 

1821class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1822 """Compute the error on the absolute difference of dipole fluxes. 

1823 

1824 See also 

1825 -------- 

1826 LocalNanojansky 

1827 LocalNanojanskyErr 

1828 LocalMagnitude 

1829 LocalMagnitudeErr 

1830 LocalDipoleMeanFlux 

1831 LocalDipoleMeanFluxErr 

1832 LocalDipoleDiffFlux 

1833 LocalDipoleDiffFluxErr 

1834 """ 

1835 

1836 @property 

1837 def columns(self): 

1838 return [self.instFluxPosCol, 

1839 self.instFluxNegCol, 

1840 self.instFluxPosErrCol, 

1841 self.instFluxNegErrCol, 

1842 self.photoCalibCol, 

1843 self.photoCalibErrCol] 

1844 

1845 @property 

1846 def name(self): 

1847 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1848 

1849 def _func(self, df): 

1850 return np.sqrt( 

1851 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1852 * df[self.photoCalibErrCol])**2 

1853 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1854 * df[self.photoCalibCol]**2) 

1855 

1856 

1857class Ratio(Functor): 

1858 """Base class for returning the ratio of 2 columns. 

1859 

1860 Can be used to compute a Signal to Noise ratio for any input flux. 

1861 

1862 Parameters 

1863 ---------- 

1864 numerator : `str` 

1865 Name of the column to use at the numerator in the ratio 

1866 denominator : `str` 

1867 Name of the column to use as the denominator in the ratio. 

1868 """ 

1869 def __init__(self, 

1870 numerator, 

1871 denominator, 

1872 **kwargs): 

1873 self.numerator = numerator 

1874 self.denominator = denominator 

1875 super().__init__(**kwargs) 

1876 

1877 @property 

1878 def columns(self): 

1879 return [self.numerator, self.denominator] 

1880 

1881 @property 

1882 def name(self): 

1883 return f'ratio_{self.numerator}_{self.denominator}' 

1884 

1885 def _func(self, df): 

1886 with np.warnings.catch_warnings(): 

1887 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1888 np.warnings.filterwarnings('ignore', r'divide by zero') 

1889 return df[self.numerator] / df[self.denominator] 

1890 

1891 

1892class Ebv(Functor): 

1893 """Compute E(B-V) from dustmaps.sfd 

1894 """ 

1895 _defaultDataset = 'ref' 

1896 name = "E(B-V)" 

1897 shortname = "ebv" 

1898 

1899 def __init__(self, **kwargs): 

1900 # import is only needed for Ebv 

1901 from dustmaps.sfd import SFDQuery 

1902 self._columns = ['coord_ra', 'coord_dec'] 

1903 self.sfd = SFDQuery() 

1904 super().__init__(**kwargs) 

1905 

1906 def _func(self, df): 

1907 coords = SkyCoord(df['coord_ra']*u.rad, df['coord_dec']*u.rad) 

1908 ebv = self.sfd(coords) 

1909 # Double precision unnecessary scientifically 

1910 # but currently needed for ingest to qserv 

1911 return pd.Series(ebv, index=df.index).astype('float64')