Coverage for python/lsst/pipe/tasks/functors.py: 36%

810 statements  

« prev     ^ index     » next       coverage.py v7.2.6, created at 2023-05-26 02:56 -0700

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["Functor", "CompositeFunctor", "CustomFunctor", "Column", "Index", 

23 "IDColumn", "FootprintNPix", "CoordColumn", "RAColumn", "DecColumn", 

24 "HtmIndex20", "Mag", "MagErr", "NanoMaggie", "MagDiff", "Color", 

25 "Labeller", "StarGalaxyLabeller", "NumStarLabeller", "DeconvolvedMoments", 

26 "SdssTraceSize", "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff", 

27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs", "ComputePixelScale", 

28 "ConvertPixelToArcseconds", "ConvertPixelSqToArcsecondsSq", "ReferenceBand", 

29 "Photometry", "NanoJansky", "NanoJanskyErr", "Magnitude", "MagnitudeErr", 

30 "LocalPhotometry", "LocalNanojansky", "LocalNanojanskyErr", 

31 "LocalMagnitude", "LocalMagnitudeErr", "LocalDipoleMeanFlux", 

32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux", "LocalDipoleDiffFluxErr", 

33 "Ratio", "Ebv"] 

34 

35import yaml 

36import re 

37from itertools import product 

38import logging 

39import os.path 

40 

41import pandas as pd 

42import numpy as np 

43import astropy.units as u 

44from astropy.coordinates import SkyCoord 

45 

46from lsst.utils import doImport 

47from lsst.utils.introspection import get_full_type_name 

48from lsst.daf.butler import DeferredDatasetHandle 

49from lsst.pipe.base import InMemoryDatasetHandle 

50import lsst.geom as geom 

51import lsst.sphgeom as sphgeom 

52 

53 

54def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

55 typeKey='functor', name=None): 

56 """Initialize an object defined in a dictionary 

57 

58 The object needs to be importable as 

59 f'{basePath}.{initDict[typeKey]}' 

60 The positional and keyword arguments (if any) are contained in 

61 "args" and "kwargs" entries in the dictionary, respectively. 

62 This is used in `functors.CompositeFunctor.from_yaml` to initialize 

63 a composite functor from a specification in a YAML file. 

64 

65 Parameters 

66 ---------- 

67 initDict : dictionary 

68 Dictionary describing object's initialization. Must contain 

69 an entry keyed by ``typeKey`` that is the name of the object, 

70 relative to ``basePath``. 

71 basePath : str 

72 Path relative to module in which ``initDict[typeKey]`` is defined. 

73 typeKey : str 

74 Key of ``initDict`` that is the name of the object 

75 (relative to `basePath`). 

76 """ 

77 initDict = initDict.copy() 

78 # TO DO: DM-21956 We should be able to define functors outside this module 

79 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

80 args = [] 

81 if 'args' in initDict: 

82 args = initDict.pop('args') 

83 if isinstance(args, str): 

84 args = [args] 

85 try: 

86 element = pythonType(*args, **initDict) 

87 except Exception as e: 

88 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

89 raise type(e)(message, e.args) 

90 return element 

91 

92 

93class Functor(object): 

94 """Define and execute a calculation on a DataFrame or Handle holding a DataFrame. 

95 

96 The `__call__` method accepts either a `DataFrame` object or a 

97 `DeferredDatasetHandle` or `InMemoryDatasetHandle`, and returns the 

98 result of the calculation as a single column. Each functor defines what 

99 columns are needed for the calculation, and only these columns are read 

100 from the dataset handle. 

101 

102 The action of `__call__` consists of two steps: first, loading the 

103 necessary columns from disk into memory as a `pandas.DataFrame` object; 

104 and second, performing the computation on this dataframe and returning the 

105 result. 

106 

107 

108 To define a new `Functor`, a subclass must define a `_func` method, 

109 that takes a `pandas.DataFrame` and returns result in a `pandas.Series`. 

110 In addition, it must define the following attributes 

111 

112 * `_columns`: The columns necessary to perform the calculation 

113 * `name`: A name appropriate for a figure axis label 

114 * `shortname`: A name appropriate for use as a dictionary key 

115 

116 On initialization, a `Functor` should declare what band (`filt` kwarg) 

117 and dataset (e.g. `'ref'`, `'meas'`, `'forced_src'`) it is intended to be 

118 applied to. This enables the `_get_data` method to extract the proper 

119 columns from the underlying data. If not specified, the dataset will fall back 

120 on the `_defaultDataset`attribute. If band is not specified and `dataset` 

121 is anything other than `'ref'`, then an error will be raised when trying to 

122 perform the calculation. 

123 

124 Originally, `Functor` was set up to expect 

125 datasets formatted like the `deepCoadd_obj` dataset; that is, a 

126 dataframe with a multi-level column index, with the levels of the 

127 column index being `band`, `dataset`, and `column`. 

128 It has since been generalized to apply to dataframes without mutli-level 

129 indices and multi-level indices with just `dataset` and `column` levels. 

130 In addition, the `_get_data` method that reads 

131 the columns from the underlying data will return a dataframe with column 

132 index levels defined by the `_dfLevels` attribute; by default, this is 

133 `column`. 

134 

135 The `_dfLevels` attributes should generally not need to 

136 be changed, unless `_func` needs columns from multiple filters or datasets 

137 to do the calculation. 

138 An example of this is the `lsst.pipe.tasks.functors.Color` functor, for 

139 which `_dfLevels = ('band', 'column')`, and `_func` expects the dataframe 

140 it gets to have those levels in the column index. 

141 

142 Parameters 

143 ---------- 

144 filt : str 

145 Filter upon which to do the calculation 

146 

147 dataset : str 

148 Dataset upon which to do the calculation 

149 (e.g., 'ref', 'meas', 'forced_src'). 

150 """ 

151 

152 _defaultDataset = 'ref' 

153 _dfLevels = ('column',) 

154 _defaultNoDup = False 

155 

156 def __init__(self, filt=None, dataset=None, noDup=None): 

157 self.filt = filt 

158 self.dataset = dataset if dataset is not None else self._defaultDataset 

159 self._noDup = noDup 

160 self.log = logging.getLogger(type(self).__name__) 

161 

162 @property 

163 def noDup(self): 

164 if self._noDup is not None: 

165 return self._noDup 

166 else: 

167 return self._defaultNoDup 

168 

169 @property 

170 def columns(self): 

171 """Columns required to perform calculation 

172 """ 

173 if not hasattr(self, '_columns'): 

174 raise NotImplementedError('Must define columns property or _columns attribute') 

175 return self._columns 

176 

177 def _get_data_columnLevels(self, data, columnIndex=None): 

178 """Gets the names of the column index levels 

179 

180 This should only be called in the context of a multilevel table. 

181 

182 Parameters 

183 ---------- 

184 data : various 

185 The data to be read, can be a `DeferredDatasetHandle` or 

186 `InMemoryDatasetHandle`. 

187 columnnIndex (optional): pandas `Index` object 

188 If not passed, then it is read from the `DeferredDatasetHandle` 

189 for `InMemoryDatasetHandle`. 

190 """ 

191 if columnIndex is None: 

192 columnIndex = data.get(component="columns") 

193 return columnIndex.names 

194 

195 def _get_data_columnLevelNames(self, data, columnIndex=None): 

196 """Gets the content of each of the column levels for a multilevel table. 

197 """ 

198 if columnIndex is None: 

199 columnIndex = data.get(component="columns") 

200 

201 columnLevels = columnIndex.names 

202 columnLevelNames = { 

203 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

204 for i, level in enumerate(columnLevels) 

205 } 

206 return columnLevelNames 

207 

208 def _colsFromDict(self, colDict, columnIndex=None): 

209 """Converts dictionary column specficiation to a list of columns 

210 """ 

211 new_colDict = {} 

212 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

213 

214 for i, lev in enumerate(columnLevels): 

215 if lev in colDict: 

216 if isinstance(colDict[lev], str): 

217 new_colDict[lev] = [colDict[lev]] 

218 else: 

219 new_colDict[lev] = colDict[lev] 

220 else: 

221 new_colDict[lev] = columnIndex.levels[i] 

222 

223 levelCols = [new_colDict[lev] for lev in columnLevels] 

224 cols = list(product(*levelCols)) 

225 colsAvailable = [col for col in cols if col in columnIndex] 

226 return colsAvailable 

227 

228 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

229 """Returns columns needed by functor from multilevel dataset 

230 

231 To access tables with multilevel column structure, the `DeferredDatasetHandle` 

232 or `InMemoryDatasetHandle` need to be passed either a list of tuples or a 

233 dictionary. 

234 

235 Parameters 

236 ---------- 

237 data : various 

238 The data as either `DeferredDatasetHandle`, or `InMemoryDatasetHandle`. 

239 columnIndex (optional): pandas `Index` object 

240 either passed or read in from `DeferredDatasetHandle`. 

241 `returnTuple` : `bool` 

242 If true, then return a list of tuples rather than the column dictionary 

243 specification. This is set to `True` by `CompositeFunctor` in order to be able to 

244 combine columns from the various component functors. 

245 

246 """ 

247 if not isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

248 raise RuntimeError(f"Unexpected data type. Got {get_full_type_name(data)}.") 

249 

250 if columnIndex is None: 

251 columnIndex = data.get(component="columns") 

252 

253 # Confirm that the dataset has the column levels the functor is expecting it to have. 

254 columnLevels = self._get_data_columnLevels(data, columnIndex) 

255 

256 columnDict = {'column': self.columns, 

257 'dataset': self.dataset} 

258 if self.filt is None: 

259 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

260 if "band" in columnLevels: 

261 if self.dataset == "ref": 

262 columnDict["band"] = columnLevelNames["band"][0] 

263 else: 

264 raise ValueError(f"'filt' not set for functor {self.name}" 

265 f"(dataset {self.dataset}) " 

266 "and DataFrame " 

267 "contains multiple filters in column index. " 

268 "Set 'filt' or set 'dataset' to 'ref'.") 

269 else: 

270 columnDict['band'] = self.filt 

271 

272 if returnTuple: 

273 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

274 else: 

275 return columnDict 

276 

277 def _func(self, df, dropna=True): 

278 raise NotImplementedError('Must define calculation on dataframe') 

279 

280 def _get_columnIndex(self, data): 

281 """Return columnIndex 

282 """ 

283 

284 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

285 return data.get(component="columns") 

286 else: 

287 return None 

288 

289 def _get_data(self, data): 

290 """Retrieve dataframe necessary for calculation. 

291 

292 The data argument can be a `DataFrame`, a `DeferredDatasetHandle`, or an 

293 `InMemoryDatasetHandle`. 

294 

295 Returns dataframe upon which `self._func` can act. 

296 """ 

297 # We wrap a dataframe in a handle here to take advantage of the dataframe 

298 # delegate dataframe column wrangling abilities. 

299 if isinstance(data, pd.DataFrame): 

300 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

301 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

302 _data = data 

303 else: 

304 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

305 

306 # First thing to do: check to see if the data source has a multilevel column index or not. 

307 columnIndex = self._get_columnIndex(_data) 

308 is_multiLevel = isinstance(columnIndex, pd.MultiIndex) 

309 

310 # Get proper columns specification for this functor 

311 if is_multiLevel: 

312 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

313 else: 

314 columns = self.columns 

315 

316 # Load in-memory dataframe with appropriate columns the gen3 way 

317 df = _data.get(parameters={"columns": columns}) 

318 

319 # Drop unnecessary column levels 

320 if is_multiLevel: 

321 df = self._setLevels(df) 

322 

323 return df 

324 

325 def _setLevels(self, df): 

326 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

327 df.columns = df.columns.droplevel(levelsToDrop) 

328 return df 

329 

330 def _dropna(self, vals): 

331 return vals.dropna() 

332 

333 def __call__(self, data, dropna=False): 

334 df = self._get_data(data) 

335 try: 

336 vals = self._func(df) 

337 except Exception as e: 

338 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e) 

339 vals = self.fail(df) 

340 if dropna: 

341 vals = self._dropna(vals) 

342 

343 return vals 

344 

345 def difference(self, data1, data2, **kwargs): 

346 """Computes difference between functor called on two different DataFrame/Handle objects 

347 """ 

348 return self(data1, **kwargs) - self(data2, **kwargs) 

349 

350 def fail(self, df): 

351 return pd.Series(np.full(len(df), np.nan), index=df.index) 

352 

353 @property 

354 def name(self): 

355 """Full name of functor (suitable for figure labels) 

356 """ 

357 return NotImplementedError 

358 

359 @property 

360 def shortname(self): 

361 """Short name of functor (suitable for column name/dict key) 

362 """ 

363 return self.name 

364 

365 

366class CompositeFunctor(Functor): 

367 """Perform multiple calculations at once on a catalog. 

368 

369 The role of a `CompositeFunctor` is to group together computations from 

370 multiple functors. Instead of returning `pandas.Series` a 

371 `CompositeFunctor` returns a `pandas.Dataframe`, with the column names 

372 being the keys of `funcDict`. 

373 

374 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

375 in all the component functors. 

376 

377 A `CompositeFunctor` does not use a `_func` method itself; rather, 

378 when a `CompositeFunctor` is called, all its columns are loaded 

379 at once, and the resulting dataframe is passed to the `_func` method of each component 

380 functor. This has the advantage of only doing I/O (reading from parquet file) once, 

381 and works because each individual `_func` method of each component functor does not 

382 care if there are *extra* columns in the dataframe being passed; only that it must contain 

383 *at least* the `columns` it expects. 

384 

385 An important and useful class method is `from_yaml`, which takes as argument the path to a YAML 

386 file specifying a collection of functors. 

387 

388 Parameters 

389 ---------- 

390 funcs : `dict` or `list` 

391 Dictionary or list of functors. If a list, then it will be converted 

392 into a dictonary according to the `.shortname` attribute of each functor. 

393 

394 """ 

395 dataset = None 

396 name = "CompositeFunctor" 

397 

398 def __init__(self, funcs, **kwargs): 

399 

400 if type(funcs) == dict: 

401 self.funcDict = funcs 

402 else: 

403 self.funcDict = {f.shortname: f for f in funcs} 

404 

405 self._filt = None 

406 

407 super().__init__(**kwargs) 

408 

409 @property 

410 def filt(self): 

411 return self._filt 

412 

413 @filt.setter 

414 def filt(self, filt): 

415 if filt is not None: 

416 for _, f in self.funcDict.items(): 

417 f.filt = filt 

418 self._filt = filt 

419 

420 def update(self, new): 

421 if isinstance(new, dict): 

422 self.funcDict.update(new) 

423 elif isinstance(new, CompositeFunctor): 

424 self.funcDict.update(new.funcDict) 

425 else: 

426 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

427 

428 # Make sure new functors have the same 'filt' set 

429 if self.filt is not None: 

430 self.filt = self.filt 

431 

432 @property 

433 def columns(self): 

434 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

435 

436 def multilevelColumns(self, data, **kwargs): 

437 # Get the union of columns for all component functors. Note the need to have `returnTuple=True` here. 

438 return list( 

439 set( 

440 [ 

441 x 

442 for y in [ 

443 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

444 ] 

445 for x in y 

446 ] 

447 ) 

448 ) 

449 

450 def __call__(self, data, **kwargs): 

451 """Apply the functor to the data table 

452 

453 Parameters 

454 ---------- 

455 data : various 

456 The data represented as `lsst.daf.butler.DeferredDatasetHandle`, 

457 `lsst.pipe.base.InMemoryDatasetHandle`, 

458 or `pandas.DataFrame`. 

459 The table or a pointer to a table on disk from which columns can 

460 be accessed 

461 """ 

462 if isinstance(data, pd.DataFrame): 

463 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

464 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

465 _data = data 

466 else: 

467 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

468 

469 columnIndex = self._get_columnIndex(_data) 

470 

471 if isinstance(columnIndex, pd.MultiIndex): 

472 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

473 df = _data.get(parameters={"columns": columns}) 

474 

475 valDict = {} 

476 for k, f in self.funcDict.items(): 

477 try: 

478 subdf = f._setLevels( 

479 df[f.multilevelColumns(_data, returnTuple=True, columnIndex=columnIndex)] 

480 ) 

481 valDict[k] = f._func(subdf) 

482 except Exception as e: 

483 self.log.exception( 

484 "Exception in %s (funcs: %s) call: %s", 

485 self.name, 

486 str(list(self.funcDict.keys())), 

487 type(e).__name__, 

488 ) 

489 try: 

490 valDict[k] = f.fail(subdf) 

491 except NameError: 

492 raise e 

493 

494 else: 

495 df = _data.get(parameters={"columns": self.columns}) 

496 

497 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

498 

499 # Check that output columns are actually columns 

500 for name, colVal in valDict.items(): 

501 if len(colVal.shape) != 1: 

502 raise RuntimeError("Transformed column '%s' is not the shape of a column. " 

503 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal))) 

504 

505 try: 

506 valDf = pd.concat(valDict, axis=1) 

507 except TypeError: 

508 print([(k, type(v)) for k, v in valDict.items()]) 

509 raise 

510 

511 if kwargs.get('dropna', False): 

512 valDf = valDf.dropna(how='any') 

513 

514 return valDf 

515 

516 @classmethod 

517 def renameCol(cls, col, renameRules): 

518 if renameRules is None: 

519 return col 

520 for old, new in renameRules: 

521 if col.startswith(old): 

522 col = col.replace(old, new) 

523 return col 

524 

525 @classmethod 

526 def from_file(cls, filename, **kwargs): 

527 # Allow environment variables in the filename. 

528 filename = os.path.expandvars(filename) 

529 with open(filename) as f: 

530 translationDefinition = yaml.safe_load(f) 

531 

532 return cls.from_yaml(translationDefinition, **kwargs) 

533 

534 @classmethod 

535 def from_yaml(cls, translationDefinition, **kwargs): 

536 funcs = {} 

537 for func, val in translationDefinition['funcs'].items(): 

538 funcs[func] = init_fromDict(val, name=func) 

539 

540 if 'flag_rename_rules' in translationDefinition: 

541 renameRules = translationDefinition['flag_rename_rules'] 

542 else: 

543 renameRules = None 

544 

545 if 'calexpFlags' in translationDefinition: 

546 for flag in translationDefinition['calexpFlags']: 

547 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp') 

548 

549 if 'refFlags' in translationDefinition: 

550 for flag in translationDefinition['refFlags']: 

551 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

552 

553 if 'forcedFlags' in translationDefinition: 

554 for flag in translationDefinition['forcedFlags']: 

555 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

556 

557 if 'flags' in translationDefinition: 

558 for flag in translationDefinition['flags']: 

559 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

560 

561 return cls(funcs, **kwargs) 

562 

563 

564def mag_aware_eval(df, expr, log): 

565 """Evaluate an expression on a DataFrame, knowing what the 'mag' function means 

566 

567 Builds on `pandas.DataFrame.eval`, which parses and executes math on dataframes. 

568 

569 Parameters 

570 ---------- 

571 df : pandas.DataFrame 

572 Dataframe on which to evaluate expression. 

573 

574 expr : str 

575 Expression. 

576 """ 

577 try: 

578 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

579 val = df.eval(expr_new) 

580 except Exception as e: # Should check what actually gets raised 

581 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e) 

582 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

583 val = df.eval(expr_new) 

584 return val 

585 

586 

587class CustomFunctor(Functor): 

588 """Arbitrary computation on a catalog 

589 

590 Column names (and thus the columns to be loaded from catalog) are found 

591 by finding all words and trying to ignore all "math-y" words. 

592 

593 Parameters 

594 ---------- 

595 expr : str 

596 Expression to evaluate, to be parsed and executed by `mag_aware_eval`. 

597 """ 

598 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

599 

600 def __init__(self, expr, **kwargs): 

601 self.expr = expr 

602 super().__init__(**kwargs) 

603 

604 @property 

605 def name(self): 

606 return self.expr 

607 

608 @property 

609 def columns(self): 

610 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

611 

612 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

613 not_a_col = [] 

614 for c in flux_cols: 

615 if not re.search('_instFlux$', c): 

616 cols.append(f'{c}_instFlux') 

617 not_a_col.append(c) 

618 else: 

619 cols.append(c) 

620 

621 return list(set([c for c in cols if c not in not_a_col])) 

622 

623 def _func(self, df): 

624 return mag_aware_eval(df, self.expr, self.log) 

625 

626 

627class Column(Functor): 

628 """Get column with specified name 

629 """ 

630 

631 def __init__(self, col, **kwargs): 

632 self.col = col 

633 super().__init__(**kwargs) 

634 

635 @property 

636 def name(self): 

637 return self.col 

638 

639 @property 

640 def columns(self): 

641 return [self.col] 

642 

643 def _func(self, df): 

644 return df[self.col] 

645 

646 

647class Index(Functor): 

648 """Return the value of the index for each object 

649 """ 

650 

651 columns = ['coord_ra'] # just a dummy; something has to be here 

652 _defaultDataset = 'ref' 

653 _defaultNoDup = True 

654 

655 def _func(self, df): 

656 return pd.Series(df.index, index=df.index) 

657 

658 

659class IDColumn(Column): 

660 col = 'id' 

661 _allow_difference = False 

662 _defaultNoDup = True 

663 

664 def _func(self, df): 

665 return pd.Series(df.index, index=df.index) 

666 

667 

668class FootprintNPix(Column): 

669 col = 'base_Footprint_nPix' 

670 

671 

672class CoordColumn(Column): 

673 """Base class for coordinate column, in degrees 

674 """ 

675 _radians = True 

676 

677 def __init__(self, col, **kwargs): 

678 super().__init__(col, **kwargs) 

679 

680 def _func(self, df): 

681 # Must not modify original column in case that column is used by another functor 

682 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

683 return output 

684 

685 

686class RAColumn(CoordColumn): 

687 """Right Ascension, in degrees 

688 """ 

689 name = 'RA' 

690 _defaultNoDup = True 

691 

692 def __init__(self, **kwargs): 

693 super().__init__('coord_ra', **kwargs) 

694 

695 def __call__(self, catalog, **kwargs): 

696 return super().__call__(catalog, **kwargs) 

697 

698 

699class DecColumn(CoordColumn): 

700 """Declination, in degrees 

701 """ 

702 name = 'Dec' 

703 _defaultNoDup = True 

704 

705 def __init__(self, **kwargs): 

706 super().__init__('coord_dec', **kwargs) 

707 

708 def __call__(self, catalog, **kwargs): 

709 return super().__call__(catalog, **kwargs) 

710 

711 

712class HtmIndex20(Functor): 

713 """Compute the level 20 HtmIndex for the catalog. 

714 

715 Notes 

716 ----- 

717 This functor was implemented to satisfy requirements of old APDB interface 

718 which required ``pixelId`` column in DiaObject with HTM20 index. APDB 

719 interface had migrated to not need that information, but we keep this 

720 class in case it may be useful for something else. 

721 """ 

722 name = "Htm20" 

723 htmLevel = 20 

724 _radians = True 

725 

726 def __init__(self, ra, decl, **kwargs): 

727 self.pixelator = sphgeom.HtmPixelization(self.htmLevel) 

728 self.ra = ra 

729 self.decl = decl 

730 self._columns = [self.ra, self.decl] 

731 super().__init__(**kwargs) 

732 

733 def _func(self, df): 

734 

735 def computePixel(row): 

736 if self._radians: 

737 sphPoint = geom.SpherePoint(row[self.ra], 

738 row[self.decl], 

739 geom.radians) 

740 else: 

741 sphPoint = geom.SpherePoint(row[self.ra], 

742 row[self.decl], 

743 geom.degrees) 

744 return self.pixelator.index(sphPoint.getVector()) 

745 

746 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64') 

747 

748 

749def fluxName(col): 

750 if not col.endswith('_instFlux'): 

751 col += '_instFlux' 

752 return col 

753 

754 

755def fluxErrName(col): 

756 if not col.endswith('_instFluxErr'): 

757 col += '_instFluxErr' 

758 return col 

759 

760 

761class Mag(Functor): 

762 """Compute calibrated magnitude 

763 

764 Takes a `calib` argument, which returns the flux at mag=0 

765 as `calib.getFluxMag0()`. If not provided, then the default 

766 `fluxMag0` is 63095734448.0194, which is default for HSC. 

767 This default should be removed in DM-21955 

768 

769 This calculation hides warnings about invalid values and dividing by zero. 

770 

771 As for all functors, a `dataset` and `filt` kwarg should be provided upon 

772 initialization. Unlike the default `Functor`, however, the default dataset 

773 for a `Mag` is `'meas'`, rather than `'ref'`. 

774 

775 Parameters 

776 ---------- 

777 col : `str` 

778 Name of flux column from which to compute magnitude. Can be parseable 

779 by `lsst.pipe.tasks.functors.fluxName` function---that is, you can pass 

780 `'modelfit_CModel'` instead of `'modelfit_CModel_instFlux'`) and it will 

781 understand. 

782 calib : `lsst.afw.image.calib.Calib` (optional) 

783 Object that knows zero point. 

784 """ 

785 _defaultDataset = 'meas' 

786 

787 def __init__(self, col, calib=None, **kwargs): 

788 self.col = fluxName(col) 

789 self.calib = calib 

790 if calib is not None: 

791 self.fluxMag0 = calib.getFluxMag0()[0] 

792 else: 

793 # TO DO: DM-21955 Replace hard coded photometic calibration values 

794 self.fluxMag0 = 63095734448.0194 

795 

796 super().__init__(**kwargs) 

797 

798 @property 

799 def columns(self): 

800 return [self.col] 

801 

802 def _func(self, df): 

803 with np.warnings.catch_warnings(): 

804 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

805 np.warnings.filterwarnings('ignore', r'divide by zero') 

806 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

807 

808 @property 

809 def name(self): 

810 return f'mag_{self.col}' 

811 

812 

813class MagErr(Mag): 

814 """Compute calibrated magnitude uncertainty 

815 

816 Takes the same `calib` object as `lsst.pipe.tasks.functors.Mag`. 

817 

818 Parameters 

819 col : `str` 

820 Name of flux column 

821 calib : `lsst.afw.image.calib.Calib` (optional) 

822 Object that knows zero point. 

823 """ 

824 

825 def __init__(self, *args, **kwargs): 

826 super().__init__(*args, **kwargs) 

827 if self.calib is not None: 

828 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

829 else: 

830 self.fluxMag0Err = 0. 

831 

832 @property 

833 def columns(self): 

834 return [self.col, self.col + 'Err'] 

835 

836 def _func(self, df): 

837 with np.warnings.catch_warnings(): 

838 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

839 np.warnings.filterwarnings('ignore', r'divide by zero') 

840 fluxCol, fluxErrCol = self.columns 

841 x = df[fluxErrCol] / df[fluxCol] 

842 y = self.fluxMag0Err / self.fluxMag0 

843 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

844 return magErr 

845 

846 @property 

847 def name(self): 

848 return super().name + '_err' 

849 

850 

851class NanoMaggie(Mag): 

852 """ 

853 """ 

854 

855 def _func(self, df): 

856 return (df[self.col] / self.fluxMag0) * 1e9 

857 

858 

859class MagDiff(Functor): 

860 _defaultDataset = 'meas' 

861 

862 """Functor to calculate magnitude difference""" 

863 

864 def __init__(self, col1, col2, **kwargs): 

865 self.col1 = fluxName(col1) 

866 self.col2 = fluxName(col2) 

867 super().__init__(**kwargs) 

868 

869 @property 

870 def columns(self): 

871 return [self.col1, self.col2] 

872 

873 def _func(self, df): 

874 with np.warnings.catch_warnings(): 

875 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

876 np.warnings.filterwarnings('ignore', r'divide by zero') 

877 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

878 

879 @property 

880 def name(self): 

881 return f'(mag_{self.col1} - mag_{self.col2})' 

882 

883 @property 

884 def shortname(self): 

885 return f'magDiff_{self.col1}_{self.col2}' 

886 

887 

888class Color(Functor): 

889 """Compute the color between two filters 

890 

891 Computes color by initializing two different `Mag` 

892 functors based on the `col` and filters provided, and 

893 then returning the difference. 

894 

895 This is enabled by the `_func` expecting a dataframe with a 

896 multilevel column index, with both `'band'` and `'column'`, 

897 instead of just `'column'`, which is the `Functor` default. 

898 This is controlled by the `_dfLevels` attribute. 

899 

900 Also of note, the default dataset for `Color` is `forced_src'`, 

901 whereas for `Mag` it is `'meas'`. 

902 

903 Parameters 

904 ---------- 

905 col : str 

906 Name of flux column from which to compute; same as would be passed to 

907 `lsst.pipe.tasks.functors.Mag`. 

908 

909 filt2, filt1 : str 

910 Filters from which to compute magnitude difference. 

911 Color computed is `Mag(filt2) - Mag(filt1)`. 

912 """ 

913 _defaultDataset = 'forced_src' 

914 _dfLevels = ('band', 'column') 

915 _defaultNoDup = True 

916 

917 def __init__(self, col, filt2, filt1, **kwargs): 

918 self.col = fluxName(col) 

919 if filt2 == filt1: 

920 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

921 self.filt2 = filt2 

922 self.filt1 = filt1 

923 

924 self.mag2 = Mag(col, filt=filt2, **kwargs) 

925 self.mag1 = Mag(col, filt=filt1, **kwargs) 

926 

927 super().__init__(**kwargs) 

928 

929 @property 

930 def filt(self): 

931 return None 

932 

933 @filt.setter 

934 def filt(self, filt): 

935 pass 

936 

937 def _func(self, df): 

938 mag2 = self.mag2._func(df[self.filt2]) 

939 mag1 = self.mag1._func(df[self.filt1]) 

940 return mag2 - mag1 

941 

942 @property 

943 def columns(self): 

944 return [self.mag1.col, self.mag2.col] 

945 

946 def multilevelColumns(self, parq, **kwargs): 

947 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

948 

949 @property 

950 def name(self): 

951 return f'{self.filt2} - {self.filt1} ({self.col})' 

952 

953 @property 

954 def shortname(self): 

955 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

956 

957 

958class Labeller(Functor): 

959 """Main function of this subclass is to override the dropna=True 

960 """ 

961 _null_label = 'null' 

962 _allow_difference = False 

963 name = 'label' 

964 _force_str = False 

965 

966 def __call__(self, parq, dropna=False, **kwargs): 

967 return super().__call__(parq, dropna=False, **kwargs) 

968 

969 

970class StarGalaxyLabeller(Labeller): 

971 _columns = ["base_ClassificationExtendedness_value"] 

972 _column = "base_ClassificationExtendedness_value" 

973 

974 def _func(self, df): 

975 x = df[self._columns][self._column] 

976 mask = x.isnull() 

977 test = (x < 0.5).astype(int) 

978 test = test.mask(mask, 2) 

979 

980 # TODO: DM-21954 Look into veracity of inline comment below 

981 # are these backwards? 

982 categories = ['galaxy', 'star', self._null_label] 

983 label = pd.Series(pd.Categorical.from_codes(test, categories=categories), 

984 index=x.index, name='label') 

985 if self._force_str: 

986 label = label.astype(str) 

987 return label 

988 

989 

990class NumStarLabeller(Labeller): 

991 _columns = ['numStarFlags'] 

992 labels = {"star": 0, "maybe": 1, "notStar": 2} 

993 

994 def _func(self, df): 

995 x = df[self._columns][self._columns[0]] 

996 

997 # Number of filters 

998 n = len(x.unique()) - 1 

999 

1000 labels = ['noStar', 'maybe', 'star'] 

1001 label = pd.Series(pd.cut(x, [-1, 0, n-1, n], labels=labels), 

1002 index=x.index, name='label') 

1003 

1004 if self._force_str: 

1005 label = label.astype(str) 

1006 

1007 return label 

1008 

1009 

1010class DeconvolvedMoments(Functor): 

1011 name = 'Deconvolved Moments' 

1012 shortname = 'deconvolvedMoments' 

1013 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1014 "ext_shapeHSM_HsmSourceMoments_yy", 

1015 "base_SdssShape_xx", "base_SdssShape_yy", 

1016 "ext_shapeHSM_HsmPsfMoments_xx", 

1017 "ext_shapeHSM_HsmPsfMoments_yy") 

1018 

1019 def _func(self, df): 

1020 """Calculate deconvolved moments""" 

1021 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

1022 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

1023 else: 

1024 hsm = np.ones(len(df))*np.nan 

1025 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

1026 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

1027 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

1028 else: 

1029 # LSST does not have shape.sdss.psf. Could instead add base_PsfShape to catalog using 

1030 # exposure.getPsf().computeShape(s.getCentroid()).getIxx() 

1031 # raise TaskError("No psf shape parameter found in catalog") 

1032 raise RuntimeError('No psf shape parameter found in catalog') 

1033 

1034 return hsm.where(np.isfinite(hsm), sdss) - psf 

1035 

1036 

1037class SdssTraceSize(Functor): 

1038 """Functor to calculate SDSS trace radius size for sources""" 

1039 name = "SDSS Trace Size" 

1040 shortname = 'sdssTrace' 

1041 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

1042 

1043 def _func(self, df): 

1044 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1045 return srcSize 

1046 

1047 

1048class PsfSdssTraceSizeDiff(Functor): 

1049 """Functor to calculate SDSS trace radius size difference (%) between object and psf model""" 

1050 name = "PSF - SDSS Trace Size" 

1051 shortname = 'psf_sdssTrace' 

1052 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

1053 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

1054 

1055 def _func(self, df): 

1056 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1057 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

1058 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1059 return sizeDiff 

1060 

1061 

1062class HsmTraceSize(Functor): 

1063 """Functor to calculate HSM trace radius size for sources""" 

1064 name = 'HSM Trace Size' 

1065 shortname = 'hsmTrace' 

1066 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1067 "ext_shapeHSM_HsmSourceMoments_yy") 

1068 

1069 def _func(self, df): 

1070 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1071 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1072 return srcSize 

1073 

1074 

1075class PsfHsmTraceSizeDiff(Functor): 

1076 """Functor to calculate HSM trace radius size difference (%) between object and psf model""" 

1077 name = 'PSF - HSM Trace Size' 

1078 shortname = 'psf_HsmTrace' 

1079 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1080 "ext_shapeHSM_HsmSourceMoments_yy", 

1081 "ext_shapeHSM_HsmPsfMoments_xx", 

1082 "ext_shapeHSM_HsmPsfMoments_yy") 

1083 

1084 def _func(self, df): 

1085 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1086 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1087 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1088 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1089 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1090 return sizeDiff 

1091 

1092 

1093class HsmFwhm(Functor): 

1094 name = 'HSM Psf FWHM' 

1095 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1096 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1097 pixelScale = 0.168 

1098 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1099 

1100 def _func(self, df): 

1101 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1102 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1103 

1104 

1105class E1(Functor): 

1106 name = "Distortion Ellipticity (e1)" 

1107 shortname = "Distortion" 

1108 

1109 def __init__(self, colXX, colXY, colYY, **kwargs): 

1110 self.colXX = colXX 

1111 self.colXY = colXY 

1112 self.colYY = colYY 

1113 self._columns = [self.colXX, self.colXY, self.colYY] 

1114 super().__init__(**kwargs) 

1115 

1116 @property 

1117 def columns(self): 

1118 return [self.colXX, self.colXY, self.colYY] 

1119 

1120 def _func(self, df): 

1121 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1122 

1123 

1124class E2(Functor): 

1125 name = "Ellipticity e2" 

1126 

1127 def __init__(self, colXX, colXY, colYY, **kwargs): 

1128 self.colXX = colXX 

1129 self.colXY = colXY 

1130 self.colYY = colYY 

1131 super().__init__(**kwargs) 

1132 

1133 @property 

1134 def columns(self): 

1135 return [self.colXX, self.colXY, self.colYY] 

1136 

1137 def _func(self, df): 

1138 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1139 

1140 

1141class RadiusFromQuadrupole(Functor): 

1142 

1143 def __init__(self, colXX, colXY, colYY, **kwargs): 

1144 self.colXX = colXX 

1145 self.colXY = colXY 

1146 self.colYY = colYY 

1147 super().__init__(**kwargs) 

1148 

1149 @property 

1150 def columns(self): 

1151 return [self.colXX, self.colXY, self.colYY] 

1152 

1153 def _func(self, df): 

1154 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1155 

1156 

1157class LocalWcs(Functor): 

1158 """Computations using the stored localWcs. 

1159 """ 

1160 name = "LocalWcsOperations" 

1161 

1162 def __init__(self, 

1163 colCD_1_1, 

1164 colCD_1_2, 

1165 colCD_2_1, 

1166 colCD_2_2, 

1167 **kwargs): 

1168 self.colCD_1_1 = colCD_1_1 

1169 self.colCD_1_2 = colCD_1_2 

1170 self.colCD_2_1 = colCD_2_1 

1171 self.colCD_2_2 = colCD_2_2 

1172 super().__init__(**kwargs) 

1173 

1174 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1175 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1176 

1177 Parameters 

1178 ---------- 

1179 x : `pandas.Series` 

1180 X pixel coordinate. 

1181 y : `pandas.Series` 

1182 Y pixel coordinate. 

1183 cd11 : `pandas.Series` 

1184 [1, 1] element of the local Wcs affine transform. 

1185 cd11 : `pandas.Series` 

1186 [1, 1] element of the local Wcs affine transform. 

1187 cd12 : `pandas.Series` 

1188 [1, 2] element of the local Wcs affine transform. 

1189 cd21 : `pandas.Series` 

1190 [2, 1] element of the local Wcs affine transform. 

1191 cd22 : `pandas.Series` 

1192 [2, 2] element of the local Wcs affine transform. 

1193 

1194 Returns 

1195 ------- 

1196 raDecTuple : tuple 

1197 RA and dec conversion of x and y given the local Wcs. Returned 

1198 units are in radians. 

1199 

1200 """ 

1201 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1202 

1203 def computeSkySeparation(self, ra1, dec1, ra2, dec2): 

1204 """Compute the local pixel scale conversion. 

1205 

1206 Parameters 

1207 ---------- 

1208 ra1 : `pandas.Series` 

1209 Ra of the first coordinate in radians. 

1210 dec1 : `pandas.Series` 

1211 Dec of the first coordinate in radians. 

1212 ra2 : `pandas.Series` 

1213 Ra of the second coordinate in radians. 

1214 dec2 : `pandas.Series` 

1215 Dec of the second coordinate in radians. 

1216 

1217 Returns 

1218 ------- 

1219 dist : `pandas.Series` 

1220 Distance on the sphere in radians. 

1221 """ 

1222 deltaDec = dec2 - dec1 

1223 deltaRa = ra2 - ra1 

1224 return 2 * np.arcsin( 

1225 np.sqrt( 

1226 np.sin(deltaDec / 2) ** 2 

1227 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1228 

1229 def getSkySeparationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1230 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1231 

1232 Parameters 

1233 ---------- 

1234 x1 : `pandas.Series` 

1235 X pixel coordinate. 

1236 y1 : `pandas.Series` 

1237 Y pixel coordinate. 

1238 x2 : `pandas.Series` 

1239 X pixel coordinate. 

1240 y2 : `pandas.Series` 

1241 Y pixel coordinate. 

1242 cd11 : `pandas.Series` 

1243 [1, 1] element of the local Wcs affine transform. 

1244 cd11 : `pandas.Series` 

1245 [1, 1] element of the local Wcs affine transform. 

1246 cd12 : `pandas.Series` 

1247 [1, 2] element of the local Wcs affine transform. 

1248 cd21 : `pandas.Series` 

1249 [2, 1] element of the local Wcs affine transform. 

1250 cd22 : `pandas.Series` 

1251 [2, 2] element of the local Wcs affine transform. 

1252 

1253 Returns 

1254 ------- 

1255 Distance : `pandas.Series` 

1256 Arcseconds per pixel at the location of the local WC 

1257 """ 

1258 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1259 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1260 # Great circle distance for small separations. 

1261 return self.computeSkySeparation(ra1, dec1, ra2, dec2) 

1262 

1263 

1264class ComputePixelScale(LocalWcs): 

1265 """Compute the local pixel scale from the stored CDMatrix. 

1266 """ 

1267 name = "PixelScale" 

1268 

1269 @property 

1270 def columns(self): 

1271 return [self.colCD_1_1, 

1272 self.colCD_1_2, 

1273 self.colCD_2_1, 

1274 self.colCD_2_2] 

1275 

1276 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1277 """Compute the local pixel to scale conversion in arcseconds. 

1278 

1279 Parameters 

1280 ---------- 

1281 cd11 : `pandas.Series` 

1282 [1, 1] element of the local Wcs affine transform in radians. 

1283 cd11 : `pandas.Series` 

1284 [1, 1] element of the local Wcs affine transform in radians. 

1285 cd12 : `pandas.Series` 

1286 [1, 2] element of the local Wcs affine transform in radians. 

1287 cd21 : `pandas.Series` 

1288 [2, 1] element of the local Wcs affine transform in radians. 

1289 cd22 : `pandas.Series` 

1290 [2, 2] element of the local Wcs affine transform in radians. 

1291 

1292 Returns 

1293 ------- 

1294 pixScale : `pandas.Series` 

1295 Arcseconds per pixel at the location of the local WC 

1296 """ 

1297 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1298 

1299 def _func(self, df): 

1300 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1301 df[self.colCD_1_2], 

1302 df[self.colCD_2_1], 

1303 df[self.colCD_2_2]) 

1304 

1305 

1306class ConvertPixelToArcseconds(ComputePixelScale): 

1307 """Convert a value in units pixels squared to units arcseconds squared. 

1308 """ 

1309 

1310 def __init__(self, 

1311 col, 

1312 colCD_1_1, 

1313 colCD_1_2, 

1314 colCD_2_1, 

1315 colCD_2_2, 

1316 **kwargs): 

1317 self.col = col 

1318 super().__init__(colCD_1_1, 

1319 colCD_1_2, 

1320 colCD_2_1, 

1321 colCD_2_2, 

1322 **kwargs) 

1323 

1324 @property 

1325 def name(self): 

1326 return f"{self.col}_asArcseconds" 

1327 

1328 @property 

1329 def columns(self): 

1330 return [self.col, 

1331 self.colCD_1_1, 

1332 self.colCD_1_2, 

1333 self.colCD_2_1, 

1334 self.colCD_2_2] 

1335 

1336 def _func(self, df): 

1337 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1338 df[self.colCD_1_2], 

1339 df[self.colCD_2_1], 

1340 df[self.colCD_2_2]) 

1341 

1342 

1343class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1344 """Convert a value in units pixels to units arcseconds. 

1345 """ 

1346 

1347 def __init__(self, 

1348 col, 

1349 colCD_1_1, 

1350 colCD_1_2, 

1351 colCD_2_1, 

1352 colCD_2_2, 

1353 **kwargs): 

1354 self.col = col 

1355 super().__init__(colCD_1_1, 

1356 colCD_1_2, 

1357 colCD_2_1, 

1358 colCD_2_2, 

1359 **kwargs) 

1360 

1361 @property 

1362 def name(self): 

1363 return f"{self.col}_asArcsecondsSq" 

1364 

1365 @property 

1366 def columns(self): 

1367 return [self.col, 

1368 self.colCD_1_1, 

1369 self.colCD_1_2, 

1370 self.colCD_2_1, 

1371 self.colCD_2_2] 

1372 

1373 def _func(self, df): 

1374 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1375 df[self.colCD_1_2], 

1376 df[self.colCD_2_1], 

1377 df[self.colCD_2_2]) 

1378 return df[self.col] * pixScale * pixScale 

1379 

1380 

1381class ReferenceBand(Functor): 

1382 name = 'Reference Band' 

1383 shortname = 'refBand' 

1384 

1385 @property 

1386 def columns(self): 

1387 return ["merge_measurement_i", 

1388 "merge_measurement_r", 

1389 "merge_measurement_z", 

1390 "merge_measurement_y", 

1391 "merge_measurement_g", 

1392 "merge_measurement_u"] 

1393 

1394 def _func(self, df: pd.DataFrame) -> pd.Series: 

1395 def getFilterAliasName(row): 

1396 # get column name with the max value (True > False) 

1397 colName = row.idxmax() 

1398 return colName.replace('merge_measurement_', '') 

1399 

1400 # Skip columns that are unavailable, because this functor requests the 

1401 # superset of bands that could be included in the object table 

1402 columns = [col for col in self.columns if col in df.columns] 

1403 # Makes a Series of dtype object if df is empty 

1404 return df[columns].apply(getFilterAliasName, axis=1, 

1405 result_type='reduce').astype('object') 

1406 

1407 

1408class Photometry(Functor): 

1409 # AB to NanoJansky (3631 Jansky) 

1410 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1411 LOG_AB_FLUX_SCALE = 12.56 

1412 FIVE_OVER_2LOG10 = 1.085736204758129569 

1413 # TO DO: DM-21955 Replace hard coded photometic calibration values 

1414 COADD_ZP = 27 

1415 

1416 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1417 self.vhypot = np.vectorize(self.hypot) 

1418 self.col = colFlux 

1419 self.colFluxErr = colFluxErr 

1420 

1421 self.calib = calib 

1422 if calib is not None: 

1423 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1424 else: 

1425 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1426 self.fluxMag0Err = 0. 

1427 

1428 super().__init__(**kwargs) 

1429 

1430 @property 

1431 def columns(self): 

1432 return [self.col] 

1433 

1434 @property 

1435 def name(self): 

1436 return f'mag_{self.col}' 

1437 

1438 @classmethod 

1439 def hypot(cls, a, b): 

1440 if np.abs(a) < np.abs(b): 

1441 a, b = b, a 

1442 if a == 0.: 

1443 return 0. 

1444 q = b/a 

1445 return np.abs(a) * np.sqrt(1. + q*q) 

1446 

1447 def dn2flux(self, dn, fluxMag0): 

1448 return self.AB_FLUX_SCALE * dn / fluxMag0 

1449 

1450 def dn2mag(self, dn, fluxMag0): 

1451 with np.warnings.catch_warnings(): 

1452 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1453 np.warnings.filterwarnings('ignore', r'divide by zero') 

1454 return -2.5 * np.log10(dn/fluxMag0) 

1455 

1456 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1457 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1458 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1459 return retVal 

1460 

1461 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1462 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1463 return self.FIVE_OVER_2LOG10 * retVal 

1464 

1465 

1466class NanoJansky(Photometry): 

1467 def _func(self, df): 

1468 return self.dn2flux(df[self.col], self.fluxMag0) 

1469 

1470 

1471class NanoJanskyErr(Photometry): 

1472 @property 

1473 def columns(self): 

1474 return [self.col, self.colFluxErr] 

1475 

1476 def _func(self, df): 

1477 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1478 return pd.Series(retArr, index=df.index) 

1479 

1480 

1481class Magnitude(Photometry): 

1482 def _func(self, df): 

1483 return self.dn2mag(df[self.col], self.fluxMag0) 

1484 

1485 

1486class MagnitudeErr(Photometry): 

1487 @property 

1488 def columns(self): 

1489 return [self.col, self.colFluxErr] 

1490 

1491 def _func(self, df): 

1492 retArr = self.dn2MagErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1493 return pd.Series(retArr, index=df.index) 

1494 

1495 

1496class LocalPhotometry(Functor): 

1497 """Base class for calibrating the specified instrument flux column using 

1498 the local photometric calibration. 

1499 

1500 Parameters 

1501 ---------- 

1502 instFluxCol : `str` 

1503 Name of the instrument flux column. 

1504 instFluxErrCol : `str` 

1505 Name of the assocated error columns for ``instFluxCol``. 

1506 photoCalibCol : `str` 

1507 Name of local calibration column. 

1508 photoCalibErrCol : `str` 

1509 Error associated with ``photoCalibCol`` 

1510 

1511 See also 

1512 -------- 

1513 LocalPhotometry 

1514 LocalNanojansky 

1515 LocalNanojanskyErr 

1516 LocalMagnitude 

1517 LocalMagnitudeErr 

1518 """ 

1519 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1520 

1521 def __init__(self, 

1522 instFluxCol, 

1523 instFluxErrCol, 

1524 photoCalibCol, 

1525 photoCalibErrCol, 

1526 **kwargs): 

1527 self.instFluxCol = instFluxCol 

1528 self.instFluxErrCol = instFluxErrCol 

1529 self.photoCalibCol = photoCalibCol 

1530 self.photoCalibErrCol = photoCalibErrCol 

1531 super().__init__(**kwargs) 

1532 

1533 def instFluxToNanojansky(self, instFlux, localCalib): 

1534 """Convert instrument flux to nanojanskys. 

1535 

1536 Parameters 

1537 ---------- 

1538 instFlux : `numpy.ndarray` or `pandas.Series` 

1539 Array of instrument flux measurements 

1540 localCalib : `numpy.ndarray` or `pandas.Series` 

1541 Array of local photometric calibration estimates. 

1542 

1543 Returns 

1544 ------- 

1545 calibFlux : `numpy.ndarray` or `pandas.Series` 

1546 Array of calibrated flux measurements. 

1547 """ 

1548 return instFlux * localCalib 

1549 

1550 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1551 """Convert instrument flux to nanojanskys. 

1552 

1553 Parameters 

1554 ---------- 

1555 instFlux : `numpy.ndarray` or `pandas.Series` 

1556 Array of instrument flux measurements 

1557 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1558 Errors on associated ``instFlux`` values 

1559 localCalib : `numpy.ndarray` or `pandas.Series` 

1560 Array of local photometric calibration estimates. 

1561 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1562 Errors on associated ``localCalib`` values 

1563 

1564 Returns 

1565 ------- 

1566 calibFluxErr : `numpy.ndarray` or `pandas.Series` 

1567 Errors on calibrated flux measurements. 

1568 """ 

1569 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1570 

1571 def instFluxToMagnitude(self, instFlux, localCalib): 

1572 """Convert instrument flux to nanojanskys. 

1573 

1574 Parameters 

1575 ---------- 

1576 instFlux : `numpy.ndarray` or `pandas.Series` 

1577 Array of instrument flux measurements 

1578 localCalib : `numpy.ndarray` or `pandas.Series` 

1579 Array of local photometric calibration estimates. 

1580 

1581 Returns 

1582 ------- 

1583 calibMag : `numpy.ndarray` or `pandas.Series` 

1584 Array of calibrated AB magnitudes. 

1585 """ 

1586 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1587 

1588 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1589 """Convert instrument flux err to nanojanskys. 

1590 

1591 Parameters 

1592 ---------- 

1593 instFlux : `numpy.ndarray` or `pandas.Series` 

1594 Array of instrument flux measurements 

1595 instFluxErr : `numpy.ndarray` or `pandas.Series` 

1596 Errors on associated ``instFlux`` values 

1597 localCalib : `numpy.ndarray` or `pandas.Series` 

1598 Array of local photometric calibration estimates. 

1599 localCalibErr : `numpy.ndarray` or `pandas.Series` 

1600 Errors on associated ``localCalib`` values 

1601 

1602 Returns 

1603 ------- 

1604 calibMagErr: `numpy.ndarray` or `pandas.Series` 

1605 Error on calibrated AB magnitudes. 

1606 """ 

1607 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1608 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1609 

1610 

1611class LocalNanojansky(LocalPhotometry): 

1612 """Compute calibrated fluxes using the local calibration value. 

1613 

1614 See also 

1615 -------- 

1616 LocalNanojansky 

1617 LocalNanojanskyErr 

1618 LocalMagnitude 

1619 LocalMagnitudeErr 

1620 """ 

1621 

1622 @property 

1623 def columns(self): 

1624 return [self.instFluxCol, self.photoCalibCol] 

1625 

1626 @property 

1627 def name(self): 

1628 return f'flux_{self.instFluxCol}' 

1629 

1630 def _func(self, df): 

1631 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1632 

1633 

1634class LocalNanojanskyErr(LocalPhotometry): 

1635 """Compute calibrated flux errors using the local calibration value. 

1636 

1637 See also 

1638 -------- 

1639 LocalNanojansky 

1640 LocalNanojanskyErr 

1641 LocalMagnitude 

1642 LocalMagnitudeErr 

1643 """ 

1644 

1645 @property 

1646 def columns(self): 

1647 return [self.instFluxCol, self.instFluxErrCol, 

1648 self.photoCalibCol, self.photoCalibErrCol] 

1649 

1650 @property 

1651 def name(self): 

1652 return f'fluxErr_{self.instFluxCol}' 

1653 

1654 def _func(self, df): 

1655 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1656 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1657 

1658 

1659class LocalMagnitude(LocalPhotometry): 

1660 """Compute calibrated AB magnitudes using the local calibration value. 

1661 

1662 See also 

1663 -------- 

1664 LocalNanojansky 

1665 LocalNanojanskyErr 

1666 LocalMagnitude 

1667 LocalMagnitudeErr 

1668 """ 

1669 

1670 @property 

1671 def columns(self): 

1672 return [self.instFluxCol, self.photoCalibCol] 

1673 

1674 @property 

1675 def name(self): 

1676 return f'mag_{self.instFluxCol}' 

1677 

1678 def _func(self, df): 

1679 return self.instFluxToMagnitude(df[self.instFluxCol], 

1680 df[self.photoCalibCol]) 

1681 

1682 

1683class LocalMagnitudeErr(LocalPhotometry): 

1684 """Compute calibrated AB magnitude errors using the local calibration value. 

1685 

1686 See also 

1687 -------- 

1688 LocalNanojansky 

1689 LocalNanojanskyErr 

1690 LocalMagnitude 

1691 LocalMagnitudeErr 

1692 """ 

1693 

1694 @property 

1695 def columns(self): 

1696 return [self.instFluxCol, self.instFluxErrCol, 

1697 self.photoCalibCol, self.photoCalibErrCol] 

1698 

1699 @property 

1700 def name(self): 

1701 return f'magErr_{self.instFluxCol}' 

1702 

1703 def _func(self, df): 

1704 return self.instFluxErrToMagnitudeErr(df[self.instFluxCol], 

1705 df[self.instFluxErrCol], 

1706 df[self.photoCalibCol], 

1707 df[self.photoCalibErrCol]) 

1708 

1709 

1710class LocalDipoleMeanFlux(LocalPhotometry): 

1711 """Compute absolute mean of dipole fluxes. 

1712 

1713 See also 

1714 -------- 

1715 LocalNanojansky 

1716 LocalNanojanskyErr 

1717 LocalMagnitude 

1718 LocalMagnitudeErr 

1719 LocalDipoleMeanFlux 

1720 LocalDipoleMeanFluxErr 

1721 LocalDipoleDiffFlux 

1722 LocalDipoleDiffFluxErr 

1723 """ 

1724 def __init__(self, 

1725 instFluxPosCol, 

1726 instFluxNegCol, 

1727 instFluxPosErrCol, 

1728 instFluxNegErrCol, 

1729 photoCalibCol, 

1730 photoCalibErrCol, 

1731 **kwargs): 

1732 self.instFluxNegCol = instFluxNegCol 

1733 self.instFluxPosCol = instFluxPosCol 

1734 self.instFluxNegErrCol = instFluxNegErrCol 

1735 self.instFluxPosErrCol = instFluxPosErrCol 

1736 self.photoCalibCol = photoCalibCol 

1737 self.photoCalibErrCol = photoCalibErrCol 

1738 super().__init__(instFluxNegCol, 

1739 instFluxNegErrCol, 

1740 photoCalibCol, 

1741 photoCalibErrCol, 

1742 **kwargs) 

1743 

1744 @property 

1745 def columns(self): 

1746 return [self.instFluxPosCol, 

1747 self.instFluxNegCol, 

1748 self.photoCalibCol] 

1749 

1750 @property 

1751 def name(self): 

1752 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1753 

1754 def _func(self, df): 

1755 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1756 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1757 

1758 

1759class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1760 """Compute the error on the absolute mean of dipole fluxes. 

1761 

1762 See also 

1763 -------- 

1764 LocalNanojansky 

1765 LocalNanojanskyErr 

1766 LocalMagnitude 

1767 LocalMagnitudeErr 

1768 LocalDipoleMeanFlux 

1769 LocalDipoleMeanFluxErr 

1770 LocalDipoleDiffFlux 

1771 LocalDipoleDiffFluxErr 

1772 """ 

1773 

1774 @property 

1775 def columns(self): 

1776 return [self.instFluxPosCol, 

1777 self.instFluxNegCol, 

1778 self.instFluxPosErrCol, 

1779 self.instFluxNegErrCol, 

1780 self.photoCalibCol, 

1781 self.photoCalibErrCol] 

1782 

1783 @property 

1784 def name(self): 

1785 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1786 

1787 def _func(self, df): 

1788 return 0.5*np.sqrt( 

1789 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1790 * df[self.photoCalibErrCol])**2 

1791 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1792 * df[self.photoCalibCol]**2) 

1793 

1794 

1795class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1796 """Compute the absolute difference of dipole fluxes. 

1797 

1798 Value is (abs(pos) - abs(neg)) 

1799 

1800 See also 

1801 -------- 

1802 LocalNanojansky 

1803 LocalNanojanskyErr 

1804 LocalMagnitude 

1805 LocalMagnitudeErr 

1806 LocalDipoleMeanFlux 

1807 LocalDipoleMeanFluxErr 

1808 LocalDipoleDiffFlux 

1809 LocalDipoleDiffFluxErr 

1810 """ 

1811 

1812 @property 

1813 def columns(self): 

1814 return [self.instFluxPosCol, 

1815 self.instFluxNegCol, 

1816 self.photoCalibCol] 

1817 

1818 @property 

1819 def name(self): 

1820 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1821 

1822 def _func(self, df): 

1823 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1824 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1825 

1826 

1827class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1828 """Compute the error on the absolute difference of dipole fluxes. 

1829 

1830 See also 

1831 -------- 

1832 LocalNanojansky 

1833 LocalNanojanskyErr 

1834 LocalMagnitude 

1835 LocalMagnitudeErr 

1836 LocalDipoleMeanFlux 

1837 LocalDipoleMeanFluxErr 

1838 LocalDipoleDiffFlux 

1839 LocalDipoleDiffFluxErr 

1840 """ 

1841 

1842 @property 

1843 def columns(self): 

1844 return [self.instFluxPosCol, 

1845 self.instFluxNegCol, 

1846 self.instFluxPosErrCol, 

1847 self.instFluxNegErrCol, 

1848 self.photoCalibCol, 

1849 self.photoCalibErrCol] 

1850 

1851 @property 

1852 def name(self): 

1853 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1854 

1855 def _func(self, df): 

1856 return np.sqrt( 

1857 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1858 * df[self.photoCalibErrCol])**2 

1859 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1860 * df[self.photoCalibCol]**2) 

1861 

1862 

1863class Ratio(Functor): 

1864 """Base class for returning the ratio of 2 columns. 

1865 

1866 Can be used to compute a Signal to Noise ratio for any input flux. 

1867 

1868 Parameters 

1869 ---------- 

1870 numerator : `str` 

1871 Name of the column to use at the numerator in the ratio 

1872 denominator : `str` 

1873 Name of the column to use as the denominator in the ratio. 

1874 """ 

1875 def __init__(self, 

1876 numerator, 

1877 denominator, 

1878 **kwargs): 

1879 self.numerator = numerator 

1880 self.denominator = denominator 

1881 super().__init__(**kwargs) 

1882 

1883 @property 

1884 def columns(self): 

1885 return [self.numerator, self.denominator] 

1886 

1887 @property 

1888 def name(self): 

1889 return f'ratio_{self.numerator}_{self.denominator}' 

1890 

1891 def _func(self, df): 

1892 with np.warnings.catch_warnings(): 

1893 np.warnings.filterwarnings('ignore', r'invalid value encountered') 

1894 np.warnings.filterwarnings('ignore', r'divide by zero') 

1895 return df[self.numerator] / df[self.denominator] 

1896 

1897 

1898class Ebv(Functor): 

1899 """Compute E(B-V) from dustmaps.sfd 

1900 """ 

1901 _defaultDataset = 'ref' 

1902 name = "E(B-V)" 

1903 shortname = "ebv" 

1904 

1905 def __init__(self, **kwargs): 

1906 # import is only needed for Ebv 

1907 from dustmaps.sfd import SFDQuery 

1908 self._columns = ['coord_ra', 'coord_dec'] 

1909 self.sfd = SFDQuery() 

1910 super().__init__(**kwargs) 

1911 

1912 def _func(self, df): 

1913 coords = SkyCoord(df['coord_ra'].values * u.rad, df['coord_dec'].values * u.rad) 

1914 ebv = self.sfd(coords) 

1915 # Double precision unnecessary scientifically 

1916 # but currently needed for ingest to qserv 

1917 return pd.Series(ebv, index=df.index).astype('float64')