Coverage for python/lsst/pipe/tasks/functors.py: 34%

726 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-12 10:09 +0000

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["init_fromDict", "Functor", "CompositeFunctor", "mag_aware_eval", 

23 "CustomFunctor", "Column", "Index", "CoordColumn", "RAColumn", 

24 "DecColumn", "HtmIndex20", "fluxName", "fluxErrName", "Mag", 

25 "MagErr", "MagDiff", "Color", "DeconvolvedMoments", "SdssTraceSize", 

26 "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff", 

27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs", 

28 "ComputePixelScale", "ConvertPixelToArcseconds", 

29 "ConvertPixelSqToArcsecondsSq", "ReferenceBand", "Photometry", 

30 "NanoJansky", "NanoJanskyErr", "LocalPhotometry", "LocalNanojansky", 

31 "LocalNanojanskyErr", "LocalDipoleMeanFlux", 

32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux", 

33 "LocalDipoleDiffFluxErr", "Ebv", 

34 ] 

35 

36import yaml 

37import re 

38from itertools import product 

39import logging 

40import os.path 

41import warnings 

42 

43import pandas as pd 

44import numpy as np 

45import astropy.units as u 

46from astropy.coordinates import SkyCoord 

47 

48from lsst.utils import doImport 

49from lsst.utils.introspection import get_full_type_name 

50from lsst.daf.butler import DeferredDatasetHandle 

51from lsst.pipe.base import InMemoryDatasetHandle 

52import lsst.geom as geom 

53import lsst.sphgeom as sphgeom 

54 

55 

56def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

57 typeKey='functor', name=None): 

58 """Initialize an object defined in a dictionary. 

59 

60 The object needs to be importable as f'{basePath}.{initDict[typeKey]}'. 

61 The positional and keyword arguments (if any) are contained in "args" and 

62 "kwargs" entries in the dictionary, respectively. 

63 This is used in `~lsst.pipe.tasks.functors.CompositeFunctor.from_yaml` to 

64 initialize a composite functor from a specification in a YAML file. 

65 

66 Parameters 

67 ---------- 

68 initDict : dictionary 

69 Dictionary describing object's initialization. 

70 Must contain an entry keyed by ``typeKey`` that is the name of the 

71 object, relative to ``basePath``. 

72 basePath : str 

73 Path relative to module in which ``initDict[typeKey]`` is defined. 

74 typeKey : str 

75 Key of ``initDict`` that is the name of the object (relative to 

76 ``basePath``). 

77 """ 

78 initDict = initDict.copy() 

79 # TO DO: DM-21956 We should be able to define functors outside this module 

80 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

81 args = [] 

82 if 'args' in initDict: 

83 args = initDict.pop('args') 

84 if isinstance(args, str): 

85 args = [args] 

86 try: 

87 element = pythonType(*args, **initDict) 

88 except Exception as e: 

89 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

90 raise type(e)(message, e.args) 

91 return element 

92 

93 

94class Functor(object): 

95 """Define and execute a calculation on a DataFrame or Handle holding a 

96 DataFrame. 

97 

98 The `__call__` method accepts either a `~pandas.DataFrame` object or a 

99 `~lsst.daf.butler.DeferredDatasetHandle` or 

100 `~lsst.pipe.base.InMemoryDatasetHandle`, and returns the 

101 result of the calculation as a single column. 

102 Each functor defines what columns are needed for the calculation, and only 

103 these columns are read from the dataset handle. 

104 

105 The action of `__call__` consists of two steps: first, loading the 

106 necessary columns from disk into memory as a `~pandas.DataFrame` object; 

107 and second, performing the computation on this DataFrame and returning the 

108 result. 

109 

110 To define a new `Functor`, a subclass must define a `_func` method, 

111 that takes a `~pandas.DataFrame` and returns result in a `~pandas.Series`. 

112 In addition, it must define the following attributes: 

113 

114 * `_columns`: The columns necessary to perform the calculation 

115 * `name`: A name appropriate for a figure axis label 

116 * `shortname`: A name appropriate for use as a dictionary key 

117 

118 On initialization, a `Functor` should declare what band (``filt`` kwarg) 

119 and dataset (e.g. ``'ref'``, ``'meas'``, ``'forced_src'``) it is intended 

120 to be applied to. 

121 This enables the `_get_data` method to extract the proper columns from the 

122 underlying data. 

123 If not specified, the dataset will fall back on the `_defaultDataset` 

124 attribute. 

125 If band is not specified and ``dataset`` is anything other than ``'ref'``, 

126 then an error will be raised when trying to perform the calculation. 

127 

128 Originally, `Functor` was set up to expect datasets formatted like the 

129 ``deepCoadd_obj`` dataset; that is, a DataFrame with a multi-level column 

130 index, with the levels of the column index being ``band``, ``dataset``, and 

131 ``column``. 

132 It has since been generalized to apply to DataFrames without multi-level 

133 indices and multi-level indices with just ``dataset`` and ``column`` 

134 levels. 

135 In addition, the `_get_data` method that reads the columns from the 

136 underlying data will return a DataFrame with column index levels defined by 

137 the `_dfLevels` attribute; by default, this is ``column``. 

138 

139 The `_dfLevels` attributes should generally not need to be changed, unless 

140 `_func` needs columns from multiple filters or datasets to do the 

141 calculation. 

142 An example of this is the `~lsst.pipe.tasks.functors.Color` functor, for 

143 which `_dfLevels = ('band', 'column')`, and `_func` expects the DataFrame 

144 it gets to have those levels in the column index. 

145 

146 Parameters 

147 ---------- 

148 filt : str 

149 Band upon which to do the calculation. 

150 

151 dataset : str 

152 Dataset upon which to do the calculation (e.g., 'ref', 'meas', 

153 'forced_src'). 

154 """ 

155 

156 _defaultDataset = 'ref' 

157 _dfLevels = ('column',) 

158 _defaultNoDup = False 

159 

160 def __init__(self, filt=None, dataset=None, noDup=None): 

161 self.filt = filt 

162 self.dataset = dataset if dataset is not None else self._defaultDataset 

163 self._noDup = noDup 

164 self.log = logging.getLogger(type(self).__name__) 

165 

166 @property 

167 def noDup(self): 

168 """Do not explode by band if used on object table.""" 

169 if self._noDup is not None: 

170 return self._noDup 

171 else: 

172 return self._defaultNoDup 

173 

174 @property 

175 def columns(self): 

176 """Columns required to perform calculation.""" 

177 if not hasattr(self, '_columns'): 

178 raise NotImplementedError('Must define columns property or _columns attribute') 

179 return self._columns 

180 

181 def _get_data_columnLevels(self, data, columnIndex=None): 

182 """Gets the names of the column index levels. 

183 

184 This should only be called in the context of a multilevel table. 

185 

186 Parameters 

187 ---------- 

188 data : various 

189 The data to be read, can be a 

190 `~lsst.daf.butler.DeferredDatasetHandle` or 

191 `~lsst.pipe.base.InMemoryDatasetHandle`. 

192 columnIndex (optional): pandas `~pandas.Index` object 

193 If not passed, then it is read from the 

194 `~lsst.daf.butler.DeferredDatasetHandle` 

195 for `~lsst.pipe.base.InMemoryDatasetHandle`. 

196 """ 

197 if columnIndex is None: 

198 columnIndex = data.get(component="columns") 

199 return columnIndex.names 

200 

201 def _get_data_columnLevelNames(self, data, columnIndex=None): 

202 """Gets the content of each of the column levels for a multilevel 

203 table. 

204 """ 

205 if columnIndex is None: 

206 columnIndex = data.get(component="columns") 

207 

208 columnLevels = columnIndex.names 

209 columnLevelNames = { 

210 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

211 for i, level in enumerate(columnLevels) 

212 } 

213 return columnLevelNames 

214 

215 def _colsFromDict(self, colDict, columnIndex=None): 

216 """Converts dictionary column specficiation to a list of columns.""" 

217 new_colDict = {} 

218 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

219 

220 for i, lev in enumerate(columnLevels): 

221 if lev in colDict: 

222 if isinstance(colDict[lev], str): 

223 new_colDict[lev] = [colDict[lev]] 

224 else: 

225 new_colDict[lev] = colDict[lev] 

226 else: 

227 new_colDict[lev] = columnIndex.levels[i] 

228 

229 levelCols = [new_colDict[lev] for lev in columnLevels] 

230 cols = list(product(*levelCols)) 

231 colsAvailable = [col for col in cols if col in columnIndex] 

232 return colsAvailable 

233 

234 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

235 """Returns columns needed by functor from multilevel dataset. 

236 

237 To access tables with multilevel column structure, the 

238 `~lsst.daf.butler.DeferredDatasetHandle` or 

239 `~lsst.pipe.base.InMemoryDatasetHandle` needs to be passed 

240 either a list of tuples or a dictionary. 

241 

242 Parameters 

243 ---------- 

244 data : various 

245 The data as either `~lsst.daf.butler.DeferredDatasetHandle`, or 

246 `~lsst.pipe.base.InMemoryDatasetHandle`. 

247 columnIndex (optional): pandas `~pandas.Index` object 

248 Either passed or read in from 

249 `~lsst.daf.butler.DeferredDatasetHandle`. 

250 `returnTuple` : `bool` 

251 If true, then return a list of tuples rather than the column 

252 dictionary specification. 

253 This is set to `True` by `CompositeFunctor` in order to be able to 

254 combine columns from the various component functors. 

255 

256 """ 

257 if not isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

258 raise RuntimeError(f"Unexpected data type. Got {get_full_type_name(data)}.") 

259 

260 if columnIndex is None: 

261 columnIndex = data.get(component="columns") 

262 

263 # Confirm that the dataset has the column levels the functor is 

264 # expecting it to have. 

265 columnLevels = self._get_data_columnLevels(data, columnIndex) 

266 

267 columnDict = {'column': self.columns, 

268 'dataset': self.dataset} 

269 if self.filt is None: 

270 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

271 if "band" in columnLevels: 

272 if self.dataset == "ref": 

273 columnDict["band"] = columnLevelNames["band"][0] 

274 else: 

275 raise ValueError(f"'filt' not set for functor {self.name}" 

276 f"(dataset {self.dataset}) " 

277 "and DataFrame " 

278 "contains multiple filters in column index. " 

279 "Set 'filt' or set 'dataset' to 'ref'.") 

280 else: 

281 columnDict['band'] = self.filt 

282 

283 if returnTuple: 

284 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

285 else: 

286 return columnDict 

287 

288 def _func(self, df, dropna=True): 

289 raise NotImplementedError('Must define calculation on DataFrame') 

290 

291 def _get_columnIndex(self, data): 

292 """Return columnIndex.""" 

293 

294 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

295 return data.get(component="columns") 

296 else: 

297 return None 

298 

299 def _get_data(self, data): 

300 """Retrieve DataFrame necessary for calculation. 

301 

302 The data argument can be a `~pandas.DataFrame`, a 

303 `~lsst.daf.butler.DeferredDatasetHandle`, or 

304 an `~lsst.pipe.base.InMemoryDatasetHandle`. 

305 

306 Returns a DataFrame upon which `self._func` can act. 

307 """ 

308 # We wrap a DataFrame in a handle here to take advantage of the 

309 # DataFrame delegate DataFrame column wrangling abilities. 

310 if isinstance(data, pd.DataFrame): 

311 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

312 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

313 _data = data 

314 else: 

315 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

316 

317 # First thing to do: check to see if the data source has a multilevel 

318 # column index or not. 

319 columnIndex = self._get_columnIndex(_data) 

320 is_multiLevel = isinstance(columnIndex, pd.MultiIndex) 

321 

322 # Get proper columns specification for this functor. 

323 if is_multiLevel: 

324 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

325 else: 

326 columns = self.columns 

327 

328 # Load in-memory DataFrame with appropriate columns the gen3 way. 

329 df = _data.get(parameters={"columns": columns}) 

330 

331 # Drop unnecessary column levels. 

332 if is_multiLevel: 

333 df = self._setLevels(df) 

334 

335 return df 

336 

337 def _setLevels(self, df): 

338 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

339 df.columns = df.columns.droplevel(levelsToDrop) 

340 return df 

341 

342 def _dropna(self, vals): 

343 return vals.dropna() 

344 

345 def __call__(self, data, dropna=False): 

346 df = self._get_data(data) 

347 try: 

348 vals = self._func(df) 

349 except Exception as e: 

350 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e) 

351 vals = self.fail(df) 

352 if dropna: 

353 vals = self._dropna(vals) 

354 

355 return vals 

356 

357 def difference(self, data1, data2, **kwargs): 

358 """Computes difference between functor called on two different 

359 DataFrame/Handle objects. 

360 """ 

361 return self(data1, **kwargs) - self(data2, **kwargs) 

362 

363 def fail(self, df): 

364 return pd.Series(np.full(len(df), np.nan), index=df.index) 

365 

366 @property 

367 def name(self): 

368 """Full name of functor (suitable for figure labels).""" 

369 return NotImplementedError 

370 

371 @property 

372 def shortname(self): 

373 """Short name of functor (suitable for column name/dict key).""" 

374 return self.name 

375 

376 

377class CompositeFunctor(Functor): 

378 """Perform multiple calculations at once on a catalog. 

379 

380 The role of a `CompositeFunctor` is to group together computations from 

381 multiple functors. 

382 Instead of returning `~pandas.Series` a `CompositeFunctor` returns a 

383 `~pandas.DataFrame`, with the column names being the keys of ``funcDict``. 

384 

385 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

386 in all the component functors. 

387 

388 A `CompositeFunctor` does not use a `_func` method itself; rather, when a 

389 `CompositeFunctor` is called, all its columns are loaded at once, and the 

390 resulting DataFrame is passed to the `_func` method of each component 

391 functor. 

392 This has the advantage of only doing I/O (reading from parquet file) once, 

393 and works because each individual `_func` method of each component functor 

394 does not care if there are *extra* columns in the DataFrame being passed; 

395 only that it must contain *at least* the `columns` it expects. 

396 

397 An important and useful class method is `from_yaml`, which takes as an 

398 argument the path to a YAML file specifying a collection of functors. 

399 

400 Parameters 

401 ---------- 

402 funcs : `dict` or `list` 

403 Dictionary or list of functors. 

404 If a list, then it will be converted into a dictonary according to the 

405 `.shortname` attribute of each functor. 

406 """ 

407 dataset = None 

408 name = "CompositeFunctor" 

409 

410 def __init__(self, funcs, **kwargs): 

411 

412 if type(funcs) == dict: 

413 self.funcDict = funcs 

414 else: 

415 self.funcDict = {f.shortname: f for f in funcs} 

416 

417 self._filt = None 

418 

419 super().__init__(**kwargs) 

420 

421 @property 

422 def filt(self): 

423 return self._filt 

424 

425 @filt.setter 

426 def filt(self, filt): 

427 if filt is not None: 

428 for _, f in self.funcDict.items(): 

429 f.filt = filt 

430 self._filt = filt 

431 

432 def update(self, new): 

433 """Update the functor with new functors.""" 

434 if isinstance(new, dict): 

435 self.funcDict.update(new) 

436 elif isinstance(new, CompositeFunctor): 

437 self.funcDict.update(new.funcDict) 

438 else: 

439 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

440 

441 # Make sure new functors have the same 'filt' set. 

442 if self.filt is not None: 

443 self.filt = self.filt 

444 

445 @property 

446 def columns(self): 

447 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

448 

449 def multilevelColumns(self, data, **kwargs): 

450 # Get the union of columns for all component functors. 

451 # Note the need to have `returnTuple=True` here. 

452 return list( 

453 set( 

454 [ 

455 x 

456 for y in [ 

457 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

458 ] 

459 for x in y 

460 ] 

461 ) 

462 ) 

463 

464 def __call__(self, data, **kwargs): 

465 """Apply the functor to the data table. 

466 

467 Parameters 

468 ---------- 

469 data : various 

470 The data represented as `~lsst.daf.butler.DeferredDatasetHandle`, 

471 `~lsst.pipe.base.InMemoryDatasetHandle`, or `~pandas.DataFrame`. 

472 The table or a pointer to a table on disk from which columns can 

473 be accessed. 

474 """ 

475 if isinstance(data, pd.DataFrame): 

476 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

477 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

478 _data = data 

479 else: 

480 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

481 

482 columnIndex = self._get_columnIndex(_data) 

483 

484 if isinstance(columnIndex, pd.MultiIndex): 

485 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

486 df = _data.get(parameters={"columns": columns}) 

487 

488 valDict = {} 

489 for k, f in self.funcDict.items(): 

490 try: 

491 subdf = f._setLevels( 

492 df[f.multilevelColumns(_data, returnTuple=True, columnIndex=columnIndex)] 

493 ) 

494 valDict[k] = f._func(subdf) 

495 except Exception as e: 

496 self.log.exception( 

497 "Exception in %s (funcs: %s) call: %s", 

498 self.name, 

499 str(list(self.funcDict.keys())), 

500 type(e).__name__, 

501 ) 

502 try: 

503 valDict[k] = f.fail(subdf) 

504 except NameError: 

505 raise e 

506 

507 else: 

508 df = _data.get(parameters={"columns": self.columns}) 

509 

510 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

511 

512 # Check that output columns are actually columns. 

513 for name, colVal in valDict.items(): 

514 if len(colVal.shape) != 1: 

515 raise RuntimeError("Transformed column '%s' is not the shape of a column. " 

516 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal))) 

517 

518 try: 

519 valDf = pd.concat(valDict, axis=1) 

520 except TypeError: 

521 print([(k, type(v)) for k, v in valDict.items()]) 

522 raise 

523 

524 if kwargs.get('dropna', False): 

525 valDf = valDf.dropna(how='any') 

526 

527 return valDf 

528 

529 @classmethod 

530 def renameCol(cls, col, renameRules): 

531 if renameRules is None: 

532 return col 

533 for old, new in renameRules: 

534 if col.startswith(old): 

535 col = col.replace(old, new) 

536 return col 

537 

538 @classmethod 

539 def from_file(cls, filename, **kwargs): 

540 # Allow environment variables in the filename. 

541 filename = os.path.expandvars(filename) 

542 with open(filename) as f: 

543 translationDefinition = yaml.safe_load(f) 

544 

545 return cls.from_yaml(translationDefinition, **kwargs) 

546 

547 @classmethod 

548 def from_yaml(cls, translationDefinition, **kwargs): 

549 funcs = {} 

550 for func, val in translationDefinition['funcs'].items(): 

551 funcs[func] = init_fromDict(val, name=func) 

552 

553 if 'flag_rename_rules' in translationDefinition: 

554 renameRules = translationDefinition['flag_rename_rules'] 

555 else: 

556 renameRules = None 

557 

558 if 'calexpFlags' in translationDefinition: 

559 for flag in translationDefinition['calexpFlags']: 

560 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp') 

561 

562 if 'refFlags' in translationDefinition: 

563 for flag in translationDefinition['refFlags']: 

564 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

565 

566 if 'forcedFlags' in translationDefinition: 

567 for flag in translationDefinition['forcedFlags']: 

568 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

569 

570 if 'flags' in translationDefinition: 

571 for flag in translationDefinition['flags']: 

572 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

573 

574 return cls(funcs, **kwargs) 

575 

576 

577def mag_aware_eval(df, expr, log): 

578 """Evaluate an expression on a DataFrame, knowing what the 'mag' function 

579 means. 

580 

581 Builds on `pandas.DataFrame.eval`, which parses and executes math on 

582 DataFrames. 

583 

584 Parameters 

585 ---------- 

586 df : ~pandas.DataFrame 

587 DataFrame on which to evaluate expression. 

588 

589 expr : str 

590 Expression. 

591 """ 

592 try: 

593 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

594 val = df.eval(expr_new) 

595 except Exception as e: # Should check what actually gets raised 

596 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e) 

597 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

598 val = df.eval(expr_new) 

599 return val 

600 

601 

602class CustomFunctor(Functor): 

603 """Arbitrary computation on a catalog. 

604 

605 Column names (and thus the columns to be loaded from catalog) are found by 

606 finding all words and trying to ignore all "math-y" words. 

607 

608 Parameters 

609 ---------- 

610 expr : str 

611 Expression to evaluate, to be parsed and executed by 

612 `~lsst.pipe.tasks.functors.mag_aware_eval`. 

613 """ 

614 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

615 

616 def __init__(self, expr, **kwargs): 

617 self.expr = expr 

618 super().__init__(**kwargs) 

619 

620 @property 

621 def name(self): 

622 return self.expr 

623 

624 @property 

625 def columns(self): 

626 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

627 

628 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

629 not_a_col = [] 

630 for c in flux_cols: 

631 if not re.search('_instFlux$', c): 

632 cols.append(f'{c}_instFlux') 

633 not_a_col.append(c) 

634 else: 

635 cols.append(c) 

636 

637 return list(set([c for c in cols if c not in not_a_col])) 

638 

639 def _func(self, df): 

640 return mag_aware_eval(df, self.expr, self.log) 

641 

642 

643class Column(Functor): 

644 """Get column with a specified name.""" 

645 

646 def __init__(self, col, **kwargs): 

647 self.col = col 

648 super().__init__(**kwargs) 

649 

650 @property 

651 def name(self): 

652 return self.col 

653 

654 @property 

655 def columns(self): 

656 return [self.col] 

657 

658 def _func(self, df): 

659 return df[self.col] 

660 

661 

662class Index(Functor): 

663 """Return the value of the index for each object.""" 

664 

665 columns = ['coord_ra'] # Just a dummy; something has to be here. 

666 _defaultDataset = 'ref' 

667 _defaultNoDup = True 

668 

669 def _func(self, df): 

670 return pd.Series(df.index, index=df.index) 

671 

672 

673class CoordColumn(Column): 

674 """Base class for coordinate column, in degrees.""" 

675 _radians = True 

676 

677 def __init__(self, col, **kwargs): 

678 super().__init__(col, **kwargs) 

679 

680 def _func(self, df): 

681 # Must not modify original column in case that column is used by 

682 # another functor. 

683 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

684 return output 

685 

686 

687class RAColumn(CoordColumn): 

688 """Right Ascension, in degrees.""" 

689 name = 'RA' 

690 _defaultNoDup = True 

691 

692 def __init__(self, **kwargs): 

693 super().__init__('coord_ra', **kwargs) 

694 

695 def __call__(self, catalog, **kwargs): 

696 return super().__call__(catalog, **kwargs) 

697 

698 

699class DecColumn(CoordColumn): 

700 """Declination, in degrees.""" 

701 name = 'Dec' 

702 _defaultNoDup = True 

703 

704 def __init__(self, **kwargs): 

705 super().__init__('coord_dec', **kwargs) 

706 

707 def __call__(self, catalog, **kwargs): 

708 return super().__call__(catalog, **kwargs) 

709 

710 

711class HtmIndex20(Functor): 

712 """Compute the level 20 HtmIndex for the catalog. 

713 

714 Notes 

715 ----- 

716 This functor was implemented to satisfy requirements of old APDB interface 

717 which required the ``pixelId`` column in DiaObject with HTM20 index. 

718 The APDB interface had migrated to not need that information, but we keep 

719 this class in case it may be useful for something else. 

720 """ 

721 name = "Htm20" 

722 htmLevel = 20 

723 _radians = True 

724 

725 def __init__(self, ra, dec, **kwargs): 

726 self.pixelator = sphgeom.HtmPixelization(self.htmLevel) 

727 self.ra = ra 

728 self.dec = dec 

729 self._columns = [self.ra, self.dec] 

730 super().__init__(**kwargs) 

731 

732 def _func(self, df): 

733 

734 def computePixel(row): 

735 if self._radians: 

736 sphPoint = geom.SpherePoint(row[self.ra], 

737 row[self.dec], 

738 geom.radians) 

739 else: 

740 sphPoint = geom.SpherePoint(row[self.ra], 

741 row[self.dec], 

742 geom.degrees) 

743 return self.pixelator.index(sphPoint.getVector()) 

744 

745 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64') 

746 

747 

748def fluxName(col): 

749 """Append _instFlux to the column name if it doesn't have it already.""" 

750 if not col.endswith('_instFlux'): 

751 col += '_instFlux' 

752 return col 

753 

754 

755def fluxErrName(col): 

756 """Append _instFluxErr to the column name if it doesn't have it already.""" 

757 if not col.endswith('_instFluxErr'): 

758 col += '_instFluxErr' 

759 return col 

760 

761 

762class Mag(Functor): 

763 """Compute calibrated magnitude. 

764 

765 Returns the flux at mag=0. 

766 The default ``fluxMag0`` is 63095734448.0194, which is default for HSC. 

767 TO DO: This default should be made configurable in DM-21955. 

768 

769 This calculation hides warnings about invalid values and dividing by zero. 

770 

771 As with all functors, a ``dataset`` and ``filt`` kwarg should be provided 

772 upon initialization. 

773 Unlike the default `Functor`, however, the default dataset for a `Mag` is 

774 ``'meas'``, rather than ``'ref'``. 

775 

776 Parameters 

777 ---------- 

778 col : `str` 

779 Name of flux column from which to compute magnitude. 

780 Can be parseable by the `~lsst.pipe.tasks.functors.fluxName` function; 

781 that is, you can pass ``'modelfit_CModel'`` instead of 

782 ``'modelfit_CModel_instFlux'``, and it will understand. 

783 """ 

784 _defaultDataset = 'meas' 

785 

786 def __init__(self, col, calib=None, **kwargs): 

787 self.col = fluxName(col) 

788 self.calib = calib 

789 if calib is not None: 

790 # TO DO: DM-39914 Remove deprecated calib argument in Mag functor. 

791 warnings.warn( 

792 "The 'calib' argument is deprecated, and will be removed after v27.", 

793 FutureWarning, 

794 stacklevel=2, 

795 ) 

796 self.fluxMag0 = calib.getFluxMag0()[0] 

797 else: 

798 # TO DO: DM-21955 Replace hard coded photometic calibration values. 

799 self.fluxMag0 = 63095734448.0194 

800 

801 super().__init__(**kwargs) 

802 

803 @property 

804 def columns(self): 

805 return [self.col] 

806 

807 def _func(self, df): 

808 with warnings.catch_warnings(): 

809 warnings.filterwarnings('ignore', r'invalid value encountered') 

810 warnings.filterwarnings('ignore', r'divide by zero') 

811 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

812 

813 @property 

814 def name(self): 

815 return f'mag_{self.col}' 

816 

817 

818class MagErr(Mag): 

819 """Compute calibrated magnitude uncertainty. 

820 

821 Parameters 

822 ---------- 

823 col : `str` 

824 Name of the flux column. 

825 """ 

826 

827 def __init__(self, *args, **kwargs): 

828 super().__init__(*args, **kwargs) 

829 if self.calib is not None: 

830 # TO DO: DM-39914 Remove deprecated calib argument in Mag functor. 

831 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

832 warnings.warn( 

833 "The 'calib' argument is deprecated, and will be removed after v27.", 

834 FutureWarning, 

835 stacklevel=2, 

836 ) 

837 else: 

838 # TO DO: DM-21955 Replace hard coded photometic calibration values. 

839 self.fluxMag0Err = 0. 

840 

841 @property 

842 def columns(self): 

843 return [self.col, self.col + 'Err'] 

844 

845 def _func(self, df): 

846 with warnings.catch_warnings(): 

847 warnings.filterwarnings('ignore', r'invalid value encountered') 

848 warnings.filterwarnings('ignore', r'divide by zero') 

849 fluxCol, fluxErrCol = self.columns 

850 x = df[fluxErrCol] / df[fluxCol] 

851 y = self.fluxMag0Err / self.fluxMag0 

852 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

853 return magErr 

854 

855 @property 

856 def name(self): 

857 return super().name + '_err' 

858 

859 

860class MagDiff(Functor): 

861 """Functor to calculate magnitude difference.""" 

862 _defaultDataset = 'meas' 

863 

864 def __init__(self, col1, col2, **kwargs): 

865 self.col1 = fluxName(col1) 

866 self.col2 = fluxName(col2) 

867 super().__init__(**kwargs) 

868 

869 @property 

870 def columns(self): 

871 return [self.col1, self.col2] 

872 

873 def _func(self, df): 

874 with warnings.catch_warnings(): 

875 warnings.filterwarnings('ignore', r'invalid value encountered') 

876 warnings.filterwarnings('ignore', r'divide by zero') 

877 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

878 

879 @property 

880 def name(self): 

881 return f'(mag_{self.col1} - mag_{self.col2})' 

882 

883 @property 

884 def shortname(self): 

885 return f'magDiff_{self.col1}_{self.col2}' 

886 

887 

888class Color(Functor): 

889 """Compute the color between two filters. 

890 

891 Computes color by initializing two different `Mag` functors based on the 

892 ``col`` and filters provided, and then returning the difference. 

893 

894 This is enabled by the `_func` method expecting a DataFrame with a 

895 multilevel column index, with both ``'band'`` and ``'column'``, instead of 

896 just ``'column'``, which is the `Functor` default. 

897 This is controlled by the `_dfLevels` attribute. 

898 

899 Also of note, the default dataset for `Color` is ``forced_src'``, whereas 

900 for `Mag` it is ``'meas'``. 

901 

902 Parameters 

903 ---------- 

904 col : str 

905 Name of the flux column from which to compute; same as would be passed 

906 to `~lsst.pipe.tasks.functors.Mag`. 

907 

908 filt2, filt1 : str 

909 Filters from which to compute magnitude difference. 

910 Color computed is ``Mag(filt2) - Mag(filt1)``. 

911 """ 

912 _defaultDataset = 'forced_src' 

913 _dfLevels = ('band', 'column') 

914 _defaultNoDup = True 

915 

916 def __init__(self, col, filt2, filt1, **kwargs): 

917 self.col = fluxName(col) 

918 if filt2 == filt1: 

919 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

920 self.filt2 = filt2 

921 self.filt1 = filt1 

922 

923 self.mag2 = Mag(col, filt=filt2, **kwargs) 

924 self.mag1 = Mag(col, filt=filt1, **kwargs) 

925 

926 super().__init__(**kwargs) 

927 

928 @property 

929 def filt(self): 

930 return None 

931 

932 @filt.setter 

933 def filt(self, filt): 

934 pass 

935 

936 def _func(self, df): 

937 mag2 = self.mag2._func(df[self.filt2]) 

938 mag1 = self.mag1._func(df[self.filt1]) 

939 return mag2 - mag1 

940 

941 @property 

942 def columns(self): 

943 return [self.mag1.col, self.mag2.col] 

944 

945 def multilevelColumns(self, parq, **kwargs): 

946 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

947 

948 @property 

949 def name(self): 

950 return f'{self.filt2} - {self.filt1} ({self.col})' 

951 

952 @property 

953 def shortname(self): 

954 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

955 

956 

957class DeconvolvedMoments(Functor): 

958 """This functor subtracts the trace of the PSF second moments from the 

959 trace of the second moments of the source. 

960 

961 If the HsmShapeAlgorithm measurement is valid, then these will be used for 

962 the sources. 

963 Otherwise, the SdssShapeAlgorithm measurements will be used. 

964 """ 

965 name = 'Deconvolved Moments' 

966 shortname = 'deconvolvedMoments' 

967 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

968 "ext_shapeHSM_HsmSourceMoments_yy", 

969 "base_SdssShape_xx", "base_SdssShape_yy", 

970 "ext_shapeHSM_HsmPsfMoments_xx", 

971 "ext_shapeHSM_HsmPsfMoments_yy") 

972 

973 def _func(self, df): 

974 """Calculate deconvolved moments.""" 

975 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

976 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

977 else: 

978 hsm = np.ones(len(df))*np.nan 

979 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

980 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

981 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

982 else: 

983 # LSST does not have shape.sdss.psf. 

984 # We could instead add base_PsfShape to the catalog using 

985 # exposure.getPsf().computeShape(s.getCentroid()).getIxx(). 

986 raise RuntimeError('No psf shape parameter found in catalog') 

987 

988 return hsm.where(np.isfinite(hsm), sdss) - psf 

989 

990 

991class SdssTraceSize(Functor): 

992 """Functor to calculate the SDSS trace radius size for sources. 

993 

994 The SDSS trace radius size is a measure of size equal to the square root of 

995 half of the trace of the second moments tensor measured with the 

996 SdssShapeAlgorithm plugin. 

997 This has units of pixels. 

998 """ 

999 name = "SDSS Trace Size" 

1000 shortname = 'sdssTrace' 

1001 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

1002 

1003 def _func(self, df): 

1004 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1005 return srcSize 

1006 

1007 

1008class PsfSdssTraceSizeDiff(Functor): 

1009 """Functor to calculate the SDSS trace radius size difference (%) between 

1010 the object and the PSF model. 

1011 

1012 See Also 

1013 -------- 

1014 SdssTraceSize 

1015 """ 

1016 name = "PSF - SDSS Trace Size" 

1017 shortname = 'psf_sdssTrace' 

1018 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

1019 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

1020 

1021 def _func(self, df): 

1022 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1023 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

1024 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1025 return sizeDiff 

1026 

1027 

1028class HsmTraceSize(Functor): 

1029 """Functor to calculate the HSM trace radius size for sources. 

1030 

1031 The HSM trace radius size is a measure of size equal to the square root of 

1032 half of the trace of the second moments tensor measured with the 

1033 HsmShapeAlgorithm plugin. 

1034 This has units of pixels. 

1035 """ 

1036 name = 'HSM Trace Size' 

1037 shortname = 'hsmTrace' 

1038 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1039 "ext_shapeHSM_HsmSourceMoments_yy") 

1040 

1041 def _func(self, df): 

1042 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1043 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1044 return srcSize 

1045 

1046 

1047class PsfHsmTraceSizeDiff(Functor): 

1048 """Functor to calculate the HSM trace radius size difference (%) between 

1049 the object and the PSF model. 

1050 

1051 See Also 

1052 -------- 

1053 HsmTraceSize 

1054 """ 

1055 name = 'PSF - HSM Trace Size' 

1056 shortname = 'psf_HsmTrace' 

1057 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1058 "ext_shapeHSM_HsmSourceMoments_yy", 

1059 "ext_shapeHSM_HsmPsfMoments_xx", 

1060 "ext_shapeHSM_HsmPsfMoments_yy") 

1061 

1062 def _func(self, df): 

1063 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1064 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1065 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1066 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1067 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1068 return sizeDiff 

1069 

1070 

1071class HsmFwhm(Functor): 

1072 """Functor to calculate the PSF FWHM with second moments measured from the 

1073 HsmShapeAlgorithm plugin. 

1074 

1075 This is in units of arcseconds, and assumes the hsc_rings_v1 skymap pixel 

1076 scale of 0.168 arcseconds/pixel. 

1077 

1078 Notes 

1079 ----- 

1080 This conversion assumes the PSF is Gaussian, which is not always the case. 

1081 """ 

1082 name = 'HSM Psf FWHM' 

1083 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1084 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1085 pixelScale = 0.168 

1086 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1087 

1088 def _func(self, df): 

1089 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1090 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1091 

1092 

1093class E1(Functor): 

1094 r"""Calculate :math:`e_1` ellipticity component for sources, defined as: 

1095 

1096 .. math:: 

1097 e_1 &= (I_{xx}-I_{yy})/(I_{xx}+I_{yy}) 

1098 

1099 See Also 

1100 -------- 

1101 E2 

1102 """ 

1103 name = "Distortion Ellipticity (e1)" 

1104 shortname = "Distortion" 

1105 

1106 def __init__(self, colXX, colXY, colYY, **kwargs): 

1107 self.colXX = colXX 

1108 self.colXY = colXY 

1109 self.colYY = colYY 

1110 self._columns = [self.colXX, self.colXY, self.colYY] 

1111 super().__init__(**kwargs) 

1112 

1113 @property 

1114 def columns(self): 

1115 return [self.colXX, self.colXY, self.colYY] 

1116 

1117 def _func(self, df): 

1118 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1119 

1120 

1121class E2(Functor): 

1122 r"""Calculate :math:`e_2` ellipticity component for sources, defined as: 

1123 

1124 .. math:: 

1125 e_2 &= 2I_{xy}/(I_{xx}+I_{yy}) 

1126 

1127 See Also 

1128 -------- 

1129 E1 

1130 """ 

1131 name = "Ellipticity e2" 

1132 

1133 def __init__(self, colXX, colXY, colYY, **kwargs): 

1134 self.colXX = colXX 

1135 self.colXY = colXY 

1136 self.colYY = colYY 

1137 super().__init__(**kwargs) 

1138 

1139 @property 

1140 def columns(self): 

1141 return [self.colXX, self.colXY, self.colYY] 

1142 

1143 def _func(self, df): 

1144 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1145 

1146 

1147class RadiusFromQuadrupole(Functor): 

1148 """Calculate the radius from the quadrupole moments. 

1149 

1150 This returns the fourth root of the determinant of the second moments 

1151 tensor, which has units of pixels. 

1152 

1153 See Also 

1154 -------- 

1155 SdssTraceSize 

1156 HsmTraceSize 

1157 """ 

1158 

1159 def __init__(self, colXX, colXY, colYY, **kwargs): 

1160 self.colXX = colXX 

1161 self.colXY = colXY 

1162 self.colYY = colYY 

1163 super().__init__(**kwargs) 

1164 

1165 @property 

1166 def columns(self): 

1167 return [self.colXX, self.colXY, self.colYY] 

1168 

1169 def _func(self, df): 

1170 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1171 

1172 

1173class LocalWcs(Functor): 

1174 """Computations using the stored localWcs.""" 

1175 name = "LocalWcsOperations" 

1176 

1177 def __init__(self, 

1178 colCD_1_1, 

1179 colCD_1_2, 

1180 colCD_2_1, 

1181 colCD_2_2, 

1182 **kwargs): 

1183 self.colCD_1_1 = colCD_1_1 

1184 self.colCD_1_2 = colCD_1_2 

1185 self.colCD_2_1 = colCD_2_1 

1186 self.colCD_2_2 = colCD_2_2 

1187 super().__init__(**kwargs) 

1188 

1189 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1190 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1191 

1192 Parameters 

1193 ---------- 

1194 x : `~pandas.Series` 

1195 X pixel coordinate. 

1196 y : `~pandas.Series` 

1197 Y pixel coordinate. 

1198 cd11 : `~pandas.Series` 

1199 [1, 1] element of the local Wcs affine transform. 

1200 cd11 : `~pandas.Series` 

1201 [1, 1] element of the local Wcs affine transform. 

1202 cd12 : `~pandas.Series` 

1203 [1, 2] element of the local Wcs affine transform. 

1204 cd21 : `~pandas.Series` 

1205 [2, 1] element of the local Wcs affine transform. 

1206 cd22 : `~pandas.Series` 

1207 [2, 2] element of the local Wcs affine transform. 

1208 

1209 Returns 

1210 ------- 

1211 raDecTuple : tuple 

1212 RA and dec conversion of x and y given the local Wcs. 

1213 Returned units are in radians. 

1214 

1215 """ 

1216 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1217 

1218 def computeSkySeparation(self, ra1, dec1, ra2, dec2): 

1219 """Compute the local pixel scale conversion. 

1220 

1221 Parameters 

1222 ---------- 

1223 ra1 : `~pandas.Series` 

1224 Ra of the first coordinate in radians. 

1225 dec1 : `~pandas.Series` 

1226 Dec of the first coordinate in radians. 

1227 ra2 : `~pandas.Series` 

1228 Ra of the second coordinate in radians. 

1229 dec2 : `~pandas.Series` 

1230 Dec of the second coordinate in radians. 

1231 

1232 Returns 

1233 ------- 

1234 dist : `~pandas.Series` 

1235 Distance on the sphere in radians. 

1236 """ 

1237 deltaDec = dec2 - dec1 

1238 deltaRa = ra2 - ra1 

1239 return 2 * np.arcsin( 

1240 np.sqrt( 

1241 np.sin(deltaDec / 2) ** 2 

1242 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1243 

1244 def getSkySeparationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1245 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1246 

1247 Parameters 

1248 ---------- 

1249 x1 : `~pandas.Series` 

1250 X pixel coordinate. 

1251 y1 : `~pandas.Series` 

1252 Y pixel coordinate. 

1253 x2 : `~pandas.Series` 

1254 X pixel coordinate. 

1255 y2 : `~pandas.Series` 

1256 Y pixel coordinate. 

1257 cd11 : `~pandas.Series` 

1258 [1, 1] element of the local Wcs affine transform. 

1259 cd11 : `~pandas.Series` 

1260 [1, 1] element of the local Wcs affine transform. 

1261 cd12 : `~pandas.Series` 

1262 [1, 2] element of the local Wcs affine transform. 

1263 cd21 : `~pandas.Series` 

1264 [2, 1] element of the local Wcs affine transform. 

1265 cd22 : `~pandas.Series` 

1266 [2, 2] element of the local Wcs affine transform. 

1267 

1268 Returns 

1269 ------- 

1270 Distance : `~pandas.Series` 

1271 Arcseconds per pixel at the location of the local WC. 

1272 """ 

1273 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1274 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1275 # Great circle distance for small separations. 

1276 return self.computeSkySeparation(ra1, dec1, ra2, dec2) 

1277 

1278 

1279class ComputePixelScale(LocalWcs): 

1280 """Compute the local pixel scale from the stored CDMatrix. 

1281 """ 

1282 name = "PixelScale" 

1283 

1284 @property 

1285 def columns(self): 

1286 return [self.colCD_1_1, 

1287 self.colCD_1_2, 

1288 self.colCD_2_1, 

1289 self.colCD_2_2] 

1290 

1291 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1292 """Compute the local pixel to scale conversion in arcseconds. 

1293 

1294 Parameters 

1295 ---------- 

1296 cd11 : `~pandas.Series` 

1297 [1, 1] element of the local Wcs affine transform in radians. 

1298 cd11 : `~pandas.Series` 

1299 [1, 1] element of the local Wcs affine transform in radians. 

1300 cd12 : `~pandas.Series` 

1301 [1, 2] element of the local Wcs affine transform in radians. 

1302 cd21 : `~pandas.Series` 

1303 [2, 1] element of the local Wcs affine transform in radians. 

1304 cd22 : `~pandas.Series` 

1305 [2, 2] element of the local Wcs affine transform in radians. 

1306 

1307 Returns 

1308 ------- 

1309 pixScale : `~pandas.Series` 

1310 Arcseconds per pixel at the location of the local WC. 

1311 """ 

1312 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1313 

1314 def _func(self, df): 

1315 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1316 df[self.colCD_1_2], 

1317 df[self.colCD_2_1], 

1318 df[self.colCD_2_2]) 

1319 

1320 

1321class ConvertPixelToArcseconds(ComputePixelScale): 

1322 """Convert a value in units of pixels to units of arcseconds.""" 

1323 

1324 def __init__(self, 

1325 col, 

1326 colCD_1_1, 

1327 colCD_1_2, 

1328 colCD_2_1, 

1329 colCD_2_2, 

1330 **kwargs): 

1331 self.col = col 

1332 super().__init__(colCD_1_1, 

1333 colCD_1_2, 

1334 colCD_2_1, 

1335 colCD_2_2, 

1336 **kwargs) 

1337 

1338 @property 

1339 def name(self): 

1340 return f"{self.col}_asArcseconds" 

1341 

1342 @property 

1343 def columns(self): 

1344 return [self.col, 

1345 self.colCD_1_1, 

1346 self.colCD_1_2, 

1347 self.colCD_2_1, 

1348 self.colCD_2_2] 

1349 

1350 def _func(self, df): 

1351 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1352 df[self.colCD_1_2], 

1353 df[self.colCD_2_1], 

1354 df[self.colCD_2_2]) 

1355 

1356 

1357class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1358 """Convert a value in units of pixels squared to units of arcseconds 

1359 squared. 

1360 """ 

1361 

1362 def __init__(self, 

1363 col, 

1364 colCD_1_1, 

1365 colCD_1_2, 

1366 colCD_2_1, 

1367 colCD_2_2, 

1368 **kwargs): 

1369 self.col = col 

1370 super().__init__(colCD_1_1, 

1371 colCD_1_2, 

1372 colCD_2_1, 

1373 colCD_2_2, 

1374 **kwargs) 

1375 

1376 @property 

1377 def name(self): 

1378 return f"{self.col}_asArcsecondsSq" 

1379 

1380 @property 

1381 def columns(self): 

1382 return [self.col, 

1383 self.colCD_1_1, 

1384 self.colCD_1_2, 

1385 self.colCD_2_1, 

1386 self.colCD_2_2] 

1387 

1388 def _func(self, df): 

1389 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1390 df[self.colCD_1_2], 

1391 df[self.colCD_2_1], 

1392 df[self.colCD_2_2]) 

1393 return df[self.col] * pixScale * pixScale 

1394 

1395 

1396class ReferenceBand(Functor): 

1397 """Return the band used to seed multiband forced photometry. 

1398 

1399 This functor is to be used on Object tables. 

1400 It converts the boolean merge_measurements_{band} columns into a single 

1401 string representing the first band for which merge_measurements_{band} 

1402 is True. 

1403 

1404 Assumes the default priority order of i, r, z, y, g, u. 

1405 """ 

1406 name = 'Reference Band' 

1407 shortname = 'refBand' 

1408 

1409 @property 

1410 def columns(self): 

1411 return ["merge_measurement_i", 

1412 "merge_measurement_r", 

1413 "merge_measurement_z", 

1414 "merge_measurement_y", 

1415 "merge_measurement_g", 

1416 "merge_measurement_u"] 

1417 

1418 def _func(self, df: pd.DataFrame) -> pd.Series: 

1419 def getFilterAliasName(row): 

1420 # Get column name with the max value (True > False). 

1421 colName = row.idxmax() 

1422 return colName.replace('merge_measurement_', '') 

1423 

1424 # Skip columns that are unavailable, because this functor requests the 

1425 # superset of bands that could be included in the object table. 

1426 columns = [col for col in self.columns if col in df.columns] 

1427 # Makes a Series of dtype object if df is empty. 

1428 return df[columns].apply(getFilterAliasName, axis=1, 

1429 result_type='reduce').astype('object') 

1430 

1431 

1432class Photometry(Functor): 

1433 """Base class for Object table calibrated fluxes and magnitudes.""" 

1434 # AB to NanoJansky (3631 Jansky). 

1435 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1436 LOG_AB_FLUX_SCALE = 12.56 

1437 FIVE_OVER_2LOG10 = 1.085736204758129569 

1438 # TO DO: DM-21955 Replace hard coded photometic calibration values. 

1439 COADD_ZP = 27 

1440 

1441 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1442 self.vhypot = np.vectorize(self.hypot) 

1443 self.col = colFlux 

1444 self.colFluxErr = colFluxErr 

1445 

1446 self.calib = calib 

1447 if calib is not None: 

1448 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1449 else: 

1450 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1451 self.fluxMag0Err = 0. 

1452 

1453 super().__init__(**kwargs) 

1454 

1455 @property 

1456 def columns(self): 

1457 return [self.col] 

1458 

1459 @property 

1460 def name(self): 

1461 return f'mag_{self.col}' 

1462 

1463 @classmethod 

1464 def hypot(cls, a, b): 

1465 """Compute sqrt(a^2 + b^2) without under/overflow.""" 

1466 if np.abs(a) < np.abs(b): 

1467 a, b = b, a 

1468 if a == 0.: 

1469 return 0. 

1470 q = b/a 

1471 return np.abs(a) * np.sqrt(1. + q*q) 

1472 

1473 def dn2flux(self, dn, fluxMag0): 

1474 """Convert instrumental flux to nanojanskys.""" 

1475 return self.AB_FLUX_SCALE * dn / fluxMag0 

1476 

1477 def dn2mag(self, dn, fluxMag0): 

1478 """Convert instrumental flux to AB magnitude.""" 

1479 with warnings.catch_warnings(): 

1480 warnings.filterwarnings('ignore', r'invalid value encountered') 

1481 warnings.filterwarnings('ignore', r'divide by zero') 

1482 return -2.5 * np.log10(dn/fluxMag0) 

1483 

1484 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1485 """Convert instrumental flux error to nanojanskys.""" 

1486 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1487 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1488 return retVal 

1489 

1490 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1491 """Convert instrumental flux error to AB magnitude error.""" 

1492 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1493 return self.FIVE_OVER_2LOG10 * retVal 

1494 

1495 

1496class NanoJansky(Photometry): 

1497 """Convert instrumental flux to nanojanskys.""" 

1498 def _func(self, df): 

1499 return self.dn2flux(df[self.col], self.fluxMag0) 

1500 

1501 

1502class NanoJanskyErr(Photometry): 

1503 """Convert instrumental flux error to nanojanskys.""" 

1504 @property 

1505 def columns(self): 

1506 return [self.col, self.colFluxErr] 

1507 

1508 def _func(self, df): 

1509 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1510 return pd.Series(retArr, index=df.index) 

1511 

1512 

1513class LocalPhotometry(Functor): 

1514 """Base class for calibrating the specified instrument flux column using 

1515 the local photometric calibration. 

1516 

1517 Parameters 

1518 ---------- 

1519 instFluxCol : `str` 

1520 Name of the instrument flux column. 

1521 instFluxErrCol : `str` 

1522 Name of the assocated error columns for ``instFluxCol``. 

1523 photoCalibCol : `str` 

1524 Name of local calibration column. 

1525 photoCalibErrCol : `str` 

1526 Error associated with ``photoCalibCol`` 

1527 

1528 See Also 

1529 -------- 

1530 LocalNanojansky 

1531 LocalNanojanskyErr 

1532 """ 

1533 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1534 

1535 def __init__(self, 

1536 instFluxCol, 

1537 instFluxErrCol, 

1538 photoCalibCol, 

1539 photoCalibErrCol, 

1540 **kwargs): 

1541 self.instFluxCol = instFluxCol 

1542 self.instFluxErrCol = instFluxErrCol 

1543 self.photoCalibCol = photoCalibCol 

1544 self.photoCalibErrCol = photoCalibErrCol 

1545 super().__init__(**kwargs) 

1546 

1547 def instFluxToNanojansky(self, instFlux, localCalib): 

1548 """Convert instrument flux to nanojanskys. 

1549 

1550 Parameters 

1551 ---------- 

1552 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1553 Array of instrument flux measurements. 

1554 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1555 Array of local photometric calibration estimates. 

1556 

1557 Returns 

1558 ------- 

1559 calibFlux : `~numpy.ndarray` or `~pandas.Series` 

1560 Array of calibrated flux measurements. 

1561 """ 

1562 return instFlux * localCalib 

1563 

1564 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1565 """Convert instrument flux to nanojanskys. 

1566 

1567 Parameters 

1568 ---------- 

1569 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1570 Array of instrument flux measurements. 

1571 instFluxErr : `~numpy.ndarray` or `~pandas.Series` 

1572 Errors on associated ``instFlux`` values. 

1573 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1574 Array of local photometric calibration estimates. 

1575 localCalibErr : `~numpy.ndarray` or `~pandas.Series` 

1576 Errors on associated ``localCalib`` values. 

1577 

1578 Returns 

1579 ------- 

1580 calibFluxErr : `~numpy.ndarray` or `~pandas.Series` 

1581 Errors on calibrated flux measurements. 

1582 """ 

1583 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1584 

1585 def instFluxToMagnitude(self, instFlux, localCalib): 

1586 """Convert instrument flux to nanojanskys. 

1587 

1588 Parameters 

1589 ---------- 

1590 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1591 Array of instrument flux measurements. 

1592 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1593 Array of local photometric calibration estimates. 

1594 

1595 Returns 

1596 ------- 

1597 calibMag : `~numpy.ndarray` or `~pandas.Series` 

1598 Array of calibrated AB magnitudes. 

1599 """ 

1600 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1601 

1602 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1603 """Convert instrument flux err to nanojanskys. 

1604 

1605 Parameters 

1606 ---------- 

1607 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1608 Array of instrument flux measurements. 

1609 instFluxErr : `~numpy.ndarray` or `~pandas.Series` 

1610 Errors on associated ``instFlux`` values. 

1611 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1612 Array of local photometric calibration estimates. 

1613 localCalibErr : `~numpy.ndarray` or `~pandas.Series` 

1614 Errors on associated ``localCalib`` values. 

1615 

1616 Returns 

1617 ------- 

1618 calibMagErr: `~numpy.ndarray` or `~pandas.Series` 

1619 Error on calibrated AB magnitudes. 

1620 """ 

1621 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1622 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1623 

1624 

1625class LocalNanojansky(LocalPhotometry): 

1626 """Compute calibrated fluxes using the local calibration value. 

1627 

1628 This returns units of nanojanskys. 

1629 """ 

1630 

1631 @property 

1632 def columns(self): 

1633 return [self.instFluxCol, self.photoCalibCol] 

1634 

1635 @property 

1636 def name(self): 

1637 return f'flux_{self.instFluxCol}' 

1638 

1639 def _func(self, df): 

1640 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1641 

1642 

1643class LocalNanojanskyErr(LocalPhotometry): 

1644 """Compute calibrated flux errors using the local calibration value. 

1645 

1646 This returns units of nanojanskys. 

1647 """ 

1648 

1649 @property 

1650 def columns(self): 

1651 return [self.instFluxCol, self.instFluxErrCol, 

1652 self.photoCalibCol, self.photoCalibErrCol] 

1653 

1654 @property 

1655 def name(self): 

1656 return f'fluxErr_{self.instFluxCol}' 

1657 

1658 def _func(self, df): 

1659 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1660 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1661 

1662 

1663class LocalDipoleMeanFlux(LocalPhotometry): 

1664 """Compute absolute mean of dipole fluxes. 

1665 

1666 See Also 

1667 -------- 

1668 LocalNanojansky 

1669 LocalNanojanskyErr 

1670 LocalDipoleMeanFluxErr 

1671 LocalDipoleDiffFlux 

1672 LocalDipoleDiffFluxErr 

1673 """ 

1674 def __init__(self, 

1675 instFluxPosCol, 

1676 instFluxNegCol, 

1677 instFluxPosErrCol, 

1678 instFluxNegErrCol, 

1679 photoCalibCol, 

1680 photoCalibErrCol, 

1681 **kwargs): 

1682 self.instFluxNegCol = instFluxNegCol 

1683 self.instFluxPosCol = instFluxPosCol 

1684 self.instFluxNegErrCol = instFluxNegErrCol 

1685 self.instFluxPosErrCol = instFluxPosErrCol 

1686 self.photoCalibCol = photoCalibCol 

1687 self.photoCalibErrCol = photoCalibErrCol 

1688 super().__init__(instFluxNegCol, 

1689 instFluxNegErrCol, 

1690 photoCalibCol, 

1691 photoCalibErrCol, 

1692 **kwargs) 

1693 

1694 @property 

1695 def columns(self): 

1696 return [self.instFluxPosCol, 

1697 self.instFluxNegCol, 

1698 self.photoCalibCol] 

1699 

1700 @property 

1701 def name(self): 

1702 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1703 

1704 def _func(self, df): 

1705 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1706 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1707 

1708 

1709class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1710 """Compute the error on the absolute mean of dipole fluxes. 

1711 

1712 See Also 

1713 -------- 

1714 LocalNanojansky 

1715 LocalNanojanskyErr 

1716 LocalDipoleMeanFlux 

1717 LocalDipoleDiffFlux 

1718 LocalDipoleDiffFluxErr 

1719 """ 

1720 

1721 @property 

1722 def columns(self): 

1723 return [self.instFluxPosCol, 

1724 self.instFluxNegCol, 

1725 self.instFluxPosErrCol, 

1726 self.instFluxNegErrCol, 

1727 self.photoCalibCol, 

1728 self.photoCalibErrCol] 

1729 

1730 @property 

1731 def name(self): 

1732 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1733 

1734 def _func(self, df): 

1735 return 0.5*np.sqrt( 

1736 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1737 * df[self.photoCalibErrCol])**2 

1738 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1739 * df[self.photoCalibCol]**2) 

1740 

1741 

1742class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1743 """Compute the absolute difference of dipole fluxes. 

1744 

1745 Calculated value is (abs(pos) - abs(neg)). 

1746 

1747 See Also 

1748 -------- 

1749 LocalNanojansky 

1750 LocalNanojanskyErr 

1751 LocalDipoleMeanFlux 

1752 LocalDipoleMeanFluxErr 

1753 LocalDipoleDiffFluxErr 

1754 """ 

1755 

1756 @property 

1757 def columns(self): 

1758 return [self.instFluxPosCol, 

1759 self.instFluxNegCol, 

1760 self.photoCalibCol] 

1761 

1762 @property 

1763 def name(self): 

1764 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1765 

1766 def _func(self, df): 

1767 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1768 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1769 

1770 

1771class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1772 """Compute the error on the absolute difference of dipole fluxes. 

1773 

1774 See Also 

1775 -------- 

1776 LocalNanojansky 

1777 LocalNanojanskyErr 

1778 LocalDipoleMeanFlux 

1779 LocalDipoleMeanFluxErr 

1780 LocalDipoleDiffFlux 

1781 """ 

1782 

1783 @property 

1784 def columns(self): 

1785 return [self.instFluxPosCol, 

1786 self.instFluxNegCol, 

1787 self.instFluxPosErrCol, 

1788 self.instFluxNegErrCol, 

1789 self.photoCalibCol, 

1790 self.photoCalibErrCol] 

1791 

1792 @property 

1793 def name(self): 

1794 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1795 

1796 def _func(self, df): 

1797 return np.sqrt( 

1798 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1799 * df[self.photoCalibErrCol])**2 

1800 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1801 * df[self.photoCalibCol]**2) 

1802 

1803 

1804class Ebv(Functor): 

1805 """Compute E(B-V) from dustmaps.sfd.""" 

1806 _defaultDataset = 'ref' 

1807 name = "E(B-V)" 

1808 shortname = "ebv" 

1809 

1810 def __init__(self, **kwargs): 

1811 # Import is only needed for Ebv. 

1812 from dustmaps.sfd import SFDQuery 

1813 self._columns = ['coord_ra', 'coord_dec'] 

1814 self.sfd = SFDQuery() 

1815 super().__init__(**kwargs) 

1816 

1817 def _func(self, df): 

1818 coords = SkyCoord(df['coord_ra'].values * u.rad, df['coord_dec'].values * u.rad) 

1819 ebv = self.sfd(coords) 

1820 # Double precision unnecessary scientifically but currently needed for 

1821 # ingest to qserv. 

1822 return pd.Series(ebv, index=df.index).astype('float64')