Coverage for python/lsst/pipe/tasks/functors.py: 42%

735 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-13 12:19 +0000

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["init_fromDict", "Functor", "CompositeFunctor", "mag_aware_eval", 

23 "CustomFunctor", "Column", "Index", "CoordColumn", "RAColumn", 

24 "DecColumn", "HtmIndex20", "fluxName", "fluxErrName", "Mag", 

25 "MagErr", "MagDiff", "Color", "DeconvolvedMoments", "SdssTraceSize", 

26 "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff", 

27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs", 

28 "ComputePixelScale", "ConvertPixelToArcseconds", 

29 "ConvertPixelSqToArcsecondsSq", "ReferenceBand", "Photometry", 

30 "NanoJansky", "NanoJanskyErr", "LocalPhotometry", "LocalNanojansky", 

31 "LocalNanojanskyErr", "LocalDipoleMeanFlux", 

32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux", 

33 "LocalDipoleDiffFluxErr", "Ebv", 

34 ] 

35 

36import yaml 

37import re 

38from itertools import product 

39import logging 

40import os.path 

41import warnings 

42 

43import pandas as pd 

44import numpy as np 

45import astropy.units as u 

46from astropy.coordinates import SkyCoord 

47 

48from lsst.utils import doImport 

49from lsst.utils.introspection import get_full_type_name 

50from lsst.daf.butler import DeferredDatasetHandle 

51from lsst.pipe.base import InMemoryDatasetHandle 

52import lsst.geom as geom 

53import lsst.sphgeom as sphgeom 

54 

55 

56def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

57 typeKey='functor', name=None): 

58 """Initialize an object defined in a dictionary. 

59 

60 The object needs to be importable as f'{basePath}.{initDict[typeKey]}'. 

61 The positional and keyword arguments (if any) are contained in "args" and 

62 "kwargs" entries in the dictionary, respectively. 

63 This is used in `~lsst.pipe.tasks.functors.CompositeFunctor.from_yaml` to 

64 initialize a composite functor from a specification in a YAML file. 

65 

66 Parameters 

67 ---------- 

68 initDict : dictionary 

69 Dictionary describing object's initialization. 

70 Must contain an entry keyed by ``typeKey`` that is the name of the 

71 object, relative to ``basePath``. 

72 basePath : str 

73 Path relative to module in which ``initDict[typeKey]`` is defined. 

74 typeKey : str 

75 Key of ``initDict`` that is the name of the object (relative to 

76 ``basePath``). 

77 """ 

78 initDict = initDict.copy() 

79 # TO DO: DM-21956 We should be able to define functors outside this module 

80 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

81 args = [] 

82 if 'args' in initDict: 

83 args = initDict.pop('args') 

84 if isinstance(args, str): 

85 args = [args] 

86 try: 

87 element = pythonType(*args, **initDict) 

88 except Exception as e: 

89 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

90 raise type(e)(message, e.args) 

91 return element 

92 

93 

94class Functor(object): 

95 """Define and execute a calculation on a DataFrame or Handle holding a 

96 DataFrame. 

97 

98 The `__call__` method accepts either a `~pandas.DataFrame` object or a 

99 `~lsst.daf.butler.DeferredDatasetHandle` or 

100 `~lsst.pipe.base.InMemoryDatasetHandle`, and returns the 

101 result of the calculation as a single column. 

102 Each functor defines what columns are needed for the calculation, and only 

103 these columns are read from the dataset handle. 

104 

105 The action of `__call__` consists of two steps: first, loading the 

106 necessary columns from disk into memory as a `~pandas.DataFrame` object; 

107 and second, performing the computation on this DataFrame and returning the 

108 result. 

109 

110 To define a new `Functor`, a subclass must define a `_func` method, 

111 that takes a `~pandas.DataFrame` and returns result in a `~pandas.Series`. 

112 In addition, it must define the following attributes: 

113 

114 * `_columns`: The columns necessary to perform the calculation 

115 * `name`: A name appropriate for a figure axis label 

116 * `shortname`: A name appropriate for use as a dictionary key 

117 

118 On initialization, a `Functor` should declare what band (``filt`` kwarg) 

119 and dataset (e.g. ``'ref'``, ``'meas'``, ``'forced_src'``) it is intended 

120 to be applied to. 

121 This enables the `_get_data` method to extract the proper columns from the 

122 underlying data. 

123 If not specified, the dataset will fall back on the `_defaultDataset` 

124 attribute. 

125 If band is not specified and ``dataset`` is anything other than ``'ref'``, 

126 then an error will be raised when trying to perform the calculation. 

127 

128 Originally, `Functor` was set up to expect datasets formatted like the 

129 ``deepCoadd_obj`` dataset; that is, a DataFrame with a multi-level column 

130 index, with the levels of the column index being ``band``, ``dataset``, and 

131 ``column``. 

132 It has since been generalized to apply to DataFrames without multi-level 

133 indices and multi-level indices with just ``dataset`` and ``column`` 

134 levels. 

135 In addition, the `_get_data` method that reads the columns from the 

136 underlying data will return a DataFrame with column index levels defined by 

137 the `_dfLevels` attribute; by default, this is ``column``. 

138 

139 The `_dfLevels` attributes should generally not need to be changed, unless 

140 `_func` needs columns from multiple filters or datasets to do the 

141 calculation. 

142 An example of this is the `~lsst.pipe.tasks.functors.Color` functor, for 

143 which `_dfLevels = ('band', 'column')`, and `_func` expects the DataFrame 

144 it gets to have those levels in the column index. 

145 

146 Parameters 

147 ---------- 

148 filt : str 

149 Band upon which to do the calculation. 

150 

151 dataset : str 

152 Dataset upon which to do the calculation (e.g., 'ref', 'meas', 

153 'forced_src'). 

154 """ 

155 

156 _defaultDataset = 'ref' 

157 _dfLevels = ('column',) 

158 _defaultNoDup = False 

159 

160 def __init__(self, filt=None, dataset=None, noDup=None): 

161 self.filt = filt 

162 self.dataset = dataset if dataset is not None else self._defaultDataset 

163 self._noDup = noDup 

164 self.log = logging.getLogger(type(self).__name__) 

165 

166 @property 

167 def noDup(self): 

168 """Do not explode by band if used on object table.""" 

169 if self._noDup is not None: 

170 return self._noDup 

171 else: 

172 return self._defaultNoDup 

173 

174 @property 

175 def columns(self): 

176 """Columns required to perform calculation.""" 

177 if not hasattr(self, '_columns'): 

178 raise NotImplementedError('Must define columns property or _columns attribute') 

179 return self._columns 

180 

181 def _get_data_columnLevels(self, data, columnIndex=None): 

182 """Gets the names of the column index levels. 

183 

184 This should only be called in the context of a multilevel table. 

185 

186 Parameters 

187 ---------- 

188 data : various 

189 The data to be read, can be a 

190 `~lsst.daf.butler.DeferredDatasetHandle` or 

191 `~lsst.pipe.base.InMemoryDatasetHandle`. 

192 columnIndex (optional): pandas `~pandas.Index` object 

193 If not passed, then it is read from the 

194 `~lsst.daf.butler.DeferredDatasetHandle` 

195 for `~lsst.pipe.base.InMemoryDatasetHandle`. 

196 """ 

197 if columnIndex is None: 

198 columnIndex = data.get(component="columns") 

199 return columnIndex.names 

200 

201 def _get_data_columnLevelNames(self, data, columnIndex=None): 

202 """Gets the content of each of the column levels for a multilevel 

203 table. 

204 """ 

205 if columnIndex is None: 

206 columnIndex = data.get(component="columns") 

207 

208 columnLevels = columnIndex.names 

209 columnLevelNames = { 

210 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

211 for i, level in enumerate(columnLevels) 

212 } 

213 return columnLevelNames 

214 

215 def _colsFromDict(self, colDict, columnIndex=None): 

216 """Converts dictionary column specficiation to a list of columns.""" 

217 new_colDict = {} 

218 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

219 

220 for i, lev in enumerate(columnLevels): 

221 if lev in colDict: 

222 if isinstance(colDict[lev], str): 

223 new_colDict[lev] = [colDict[lev]] 

224 else: 

225 new_colDict[lev] = colDict[lev] 

226 else: 

227 new_colDict[lev] = columnIndex.levels[i] 

228 

229 levelCols = [new_colDict[lev] for lev in columnLevels] 

230 cols = list(product(*levelCols)) 

231 colsAvailable = [col for col in cols if col in columnIndex] 

232 return colsAvailable 

233 

234 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

235 """Returns columns needed by functor from multilevel dataset. 

236 

237 To access tables with multilevel column structure, the 

238 `~lsst.daf.butler.DeferredDatasetHandle` or 

239 `~lsst.pipe.base.InMemoryDatasetHandle` needs to be passed 

240 either a list of tuples or a dictionary. 

241 

242 Parameters 

243 ---------- 

244 data : various 

245 The data as either `~lsst.daf.butler.DeferredDatasetHandle`, or 

246 `~lsst.pipe.base.InMemoryDatasetHandle`. 

247 columnIndex (optional): pandas `~pandas.Index` object 

248 Either passed or read in from 

249 `~lsst.daf.butler.DeferredDatasetHandle`. 

250 `returnTuple` : `bool` 

251 If true, then return a list of tuples rather than the column 

252 dictionary specification. 

253 This is set to `True` by `CompositeFunctor` in order to be able to 

254 combine columns from the various component functors. 

255 

256 """ 

257 if not isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

258 raise RuntimeError(f"Unexpected data type. Got {get_full_type_name(data)}.") 

259 

260 if columnIndex is None: 

261 columnIndex = data.get(component="columns") 

262 

263 # Confirm that the dataset has the column levels the functor is 

264 # expecting it to have. 

265 columnLevels = self._get_data_columnLevels(data, columnIndex) 

266 

267 columnDict = {'column': self.columns, 

268 'dataset': self.dataset} 

269 if self.filt is None: 

270 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

271 if "band" in columnLevels: 

272 if self.dataset == "ref": 

273 columnDict["band"] = columnLevelNames["band"][0] 

274 else: 

275 raise ValueError(f"'filt' not set for functor {self.name}" 

276 f"(dataset {self.dataset}) " 

277 "and DataFrame " 

278 "contains multiple filters in column index. " 

279 "Set 'filt' or set 'dataset' to 'ref'.") 

280 else: 

281 columnDict['band'] = self.filt 

282 

283 if returnTuple: 

284 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

285 else: 

286 return columnDict 

287 

288 def _func(self, df, dropna=True): 

289 raise NotImplementedError('Must define calculation on DataFrame') 

290 

291 def _get_columnIndex(self, data): 

292 """Return columnIndex.""" 

293 

294 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

295 return data.get(component="columns") 

296 else: 

297 return None 

298 

299 def _get_data(self, data): 

300 """Retrieve DataFrame necessary for calculation. 

301 

302 The data argument can be a `~pandas.DataFrame`, a 

303 `~lsst.daf.butler.DeferredDatasetHandle`, or 

304 an `~lsst.pipe.base.InMemoryDatasetHandle`. 

305 

306 Returns a DataFrame upon which `self._func` can act. 

307 """ 

308 # We wrap a DataFrame in a handle here to take advantage of the 

309 # DataFrame delegate DataFrame column wrangling abilities. 

310 if isinstance(data, pd.DataFrame): 

311 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

312 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

313 _data = data 

314 else: 

315 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

316 

317 # First thing to do: check to see if the data source has a multilevel 

318 # column index or not. 

319 columnIndex = self._get_columnIndex(_data) 

320 is_multiLevel = isinstance(columnIndex, pd.MultiIndex) 

321 

322 # Get proper columns specification for this functor. 

323 if is_multiLevel: 

324 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

325 else: 

326 columns = self.columns 

327 

328 # Load in-memory DataFrame with appropriate columns the gen3 way. 

329 df = _data.get(parameters={"columns": columns}) 

330 

331 # Drop unnecessary column levels. 

332 if is_multiLevel: 

333 df = self._setLevels(df) 

334 

335 return df 

336 

337 def _setLevels(self, df): 

338 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

339 df.columns = df.columns.droplevel(levelsToDrop) 

340 return df 

341 

342 def _dropna(self, vals): 

343 return vals.dropna() 

344 

345 def __call__(self, data, dropna=False): 

346 df = self._get_data(data) 

347 try: 

348 vals = self._func(df) 

349 except Exception as e: 

350 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e) 

351 vals = self.fail(df) 

352 if dropna: 

353 vals = self._dropna(vals) 

354 

355 return vals 

356 

357 def difference(self, data1, data2, **kwargs): 

358 """Computes difference between functor called on two different 

359 DataFrame/Handle objects. 

360 """ 

361 return self(data1, **kwargs) - self(data2, **kwargs) 

362 

363 def fail(self, df): 

364 return pd.Series(np.full(len(df), np.nan), index=df.index) 

365 

366 @property 

367 def name(self): 

368 """Full name of functor (suitable for figure labels).""" 

369 return NotImplementedError 

370 

371 @property 

372 def shortname(self): 

373 """Short name of functor (suitable for column name/dict key).""" 

374 return self.name 

375 

376 

377class CompositeFunctor(Functor): 

378 """Perform multiple calculations at once on a catalog. 

379 

380 The role of a `CompositeFunctor` is to group together computations from 

381 multiple functors. 

382 Instead of returning `~pandas.Series` a `CompositeFunctor` returns a 

383 `~pandas.DataFrame`, with the column names being the keys of ``funcDict``. 

384 

385 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

386 in all the component functors. 

387 

388 A `CompositeFunctor` does not use a `_func` method itself; rather, when a 

389 `CompositeFunctor` is called, all its columns are loaded at once, and the 

390 resulting DataFrame is passed to the `_func` method of each component 

391 functor. 

392 This has the advantage of only doing I/O (reading from parquet file) once, 

393 and works because each individual `_func` method of each component functor 

394 does not care if there are *extra* columns in the DataFrame being passed; 

395 only that it must contain *at least* the `columns` it expects. 

396 

397 An important and useful class method is `from_yaml`, which takes as an 

398 argument the path to a YAML file specifying a collection of functors. 

399 

400 Parameters 

401 ---------- 

402 funcs : `dict` or `list` 

403 Dictionary or list of functors. 

404 If a list, then it will be converted into a dictonary according to the 

405 `.shortname` attribute of each functor. 

406 """ 

407 dataset = None 

408 name = "CompositeFunctor" 

409 

410 def __init__(self, funcs, **kwargs): 

411 

412 if type(funcs) == dict: 

413 self.funcDict = funcs 

414 else: 

415 self.funcDict = {f.shortname: f for f in funcs} 

416 

417 self._filt = None 

418 

419 super().__init__(**kwargs) 

420 

421 @property 

422 def filt(self): 

423 return self._filt 

424 

425 @filt.setter 

426 def filt(self, filt): 

427 if filt is not None: 

428 for _, f in self.funcDict.items(): 

429 f.filt = filt 

430 self._filt = filt 

431 

432 def update(self, new): 

433 """Update the functor with new functors.""" 

434 if isinstance(new, dict): 

435 self.funcDict.update(new) 

436 elif isinstance(new, CompositeFunctor): 

437 self.funcDict.update(new.funcDict) 

438 else: 

439 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

440 

441 # Make sure new functors have the same 'filt' set. 

442 if self.filt is not None: 

443 self.filt = self.filt 

444 

445 @property 

446 def columns(self): 

447 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

448 

449 def multilevelColumns(self, data, **kwargs): 

450 # Get the union of columns for all component functors. 

451 # Note the need to have `returnTuple=True` here. 

452 return list( 

453 set( 

454 [ 

455 x 

456 for y in [ 

457 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

458 ] 

459 for x in y 

460 ] 

461 ) 

462 ) 

463 

464 def __call__(self, data, **kwargs): 

465 """Apply the functor to the data table. 

466 

467 Parameters 

468 ---------- 

469 data : various 

470 The data represented as `~lsst.daf.butler.DeferredDatasetHandle`, 

471 `~lsst.pipe.base.InMemoryDatasetHandle`, or `~pandas.DataFrame`. 

472 The table or a pointer to a table on disk from which columns can 

473 be accessed. 

474 """ 

475 if isinstance(data, pd.DataFrame): 

476 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

477 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

478 _data = data 

479 else: 

480 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

481 

482 columnIndex = self._get_columnIndex(_data) 

483 

484 if isinstance(columnIndex, pd.MultiIndex): 

485 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

486 df = _data.get(parameters={"columns": columns}) 

487 

488 valDict = {} 

489 for k, f in self.funcDict.items(): 

490 try: 

491 subdf = f._setLevels( 

492 df[f.multilevelColumns(_data, returnTuple=True, columnIndex=columnIndex)] 

493 ) 

494 valDict[k] = f._func(subdf) 

495 except Exception as e: 

496 self.log.exception( 

497 "Exception in %s (funcs: %s) call: %s", 

498 self.name, 

499 str(list(self.funcDict.keys())), 

500 type(e).__name__, 

501 ) 

502 try: 

503 valDict[k] = f.fail(subdf) 

504 except NameError: 

505 raise e 

506 

507 else: 

508 df = _data.get(parameters={"columns": self.columns}) 

509 

510 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

511 

512 # Check that output columns are actually columns. 

513 for name, colVal in valDict.items(): 

514 if len(colVal.shape) != 1: 

515 raise RuntimeError("Transformed column '%s' is not the shape of a column. " 

516 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal))) 

517 

518 try: 

519 valDf = pd.concat(valDict, axis=1) 

520 except TypeError: 

521 print([(k, type(v)) for k, v in valDict.items()]) 

522 raise 

523 

524 if kwargs.get('dropna', False): 

525 valDf = valDf.dropna(how='any') 

526 

527 return valDf 

528 

529 @classmethod 

530 def renameCol(cls, col, renameRules): 

531 if renameRules is None: 

532 return col 

533 for old, new in renameRules: 

534 if col.startswith(old): 

535 col = col.replace(old, new) 

536 return col 

537 

538 @classmethod 

539 def from_file(cls, filename, **kwargs): 

540 # Allow environment variables in the filename. 

541 filename = os.path.expandvars(filename) 

542 with open(filename) as f: 

543 translationDefinition = yaml.safe_load(f) 

544 

545 return cls.from_yaml(translationDefinition, **kwargs) 

546 

547 @classmethod 

548 def from_yaml(cls, translationDefinition, **kwargs): 

549 funcs = {} 

550 for func, val in translationDefinition['funcs'].items(): 

551 funcs[func] = init_fromDict(val, name=func) 

552 

553 if 'flag_rename_rules' in translationDefinition: 

554 renameRules = translationDefinition['flag_rename_rules'] 

555 else: 

556 renameRules = None 

557 

558 if 'calexpFlags' in translationDefinition: 

559 for flag in translationDefinition['calexpFlags']: 

560 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp') 

561 

562 if 'refFlags' in translationDefinition: 

563 for flag in translationDefinition['refFlags']: 

564 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

565 

566 if 'forcedFlags' in translationDefinition: 

567 for flag in translationDefinition['forcedFlags']: 

568 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

569 

570 if 'flags' in translationDefinition: 

571 for flag in translationDefinition['flags']: 

572 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

573 

574 return cls(funcs, **kwargs) 

575 

576 

577def mag_aware_eval(df, expr, log): 

578 """Evaluate an expression on a DataFrame, knowing what the 'mag' function 

579 means. 

580 

581 Builds on `pandas.DataFrame.eval`, which parses and executes math on 

582 DataFrames. 

583 

584 Parameters 

585 ---------- 

586 df : ~pandas.DataFrame 

587 DataFrame on which to evaluate expression. 

588 

589 expr : str 

590 Expression. 

591 """ 

592 try: 

593 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

594 val = df.eval(expr_new) 

595 except Exception as e: # Should check what actually gets raised 

596 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e) 

597 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

598 val = df.eval(expr_new) 

599 return val 

600 

601 

602class CustomFunctor(Functor): 

603 """Arbitrary computation on a catalog. 

604 

605 Column names (and thus the columns to be loaded from catalog) are found by 

606 finding all words and trying to ignore all "math-y" words. 

607 

608 Parameters 

609 ---------- 

610 expr : str 

611 Expression to evaluate, to be parsed and executed by 

612 `~lsst.pipe.tasks.functors.mag_aware_eval`. 

613 """ 

614 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

615 

616 def __init__(self, expr, **kwargs): 

617 self.expr = expr 

618 super().__init__(**kwargs) 

619 

620 @property 

621 def name(self): 

622 return self.expr 

623 

624 @property 

625 def columns(self): 

626 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

627 

628 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

629 not_a_col = [] 

630 for c in flux_cols: 

631 if not re.search('_instFlux$', c): 

632 cols.append(f'{c}_instFlux') 

633 not_a_col.append(c) 

634 else: 

635 cols.append(c) 

636 

637 return list(set([c for c in cols if c not in not_a_col])) 

638 

639 def _func(self, df): 

640 return mag_aware_eval(df, self.expr, self.log) 

641 

642 

643class Column(Functor): 

644 """Get column with a specified name.""" 

645 

646 def __init__(self, col, **kwargs): 

647 self.col = col 

648 super().__init__(**kwargs) 

649 

650 @property 

651 def name(self): 

652 return self.col 

653 

654 @property 

655 def columns(self): 

656 return [self.col] 

657 

658 def _func(self, df): 

659 return df[self.col] 

660 

661 

662class Index(Functor): 

663 """Return the value of the index for each object.""" 

664 

665 columns = ['coord_ra'] # Just a dummy; something has to be here. 

666 _defaultDataset = 'ref' 

667 _defaultNoDup = True 

668 

669 def _func(self, df): 

670 return pd.Series(df.index, index=df.index) 

671 

672 

673class CoordColumn(Column): 

674 """Base class for coordinate column, in degrees.""" 

675 _radians = True 

676 

677 def __init__(self, col, **kwargs): 

678 super().__init__(col, **kwargs) 

679 

680 def _func(self, df): 

681 # Must not modify original column in case that column is used by 

682 # another functor. 

683 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

684 return output 

685 

686 

687class RAColumn(CoordColumn): 

688 """Right Ascension, in degrees.""" 

689 name = 'RA' 

690 _defaultNoDup = True 

691 

692 def __init__(self, **kwargs): 

693 super().__init__('coord_ra', **kwargs) 

694 

695 def __call__(self, catalog, **kwargs): 

696 return super().__call__(catalog, **kwargs) 

697 

698 

699class DecColumn(CoordColumn): 

700 """Declination, in degrees.""" 

701 name = 'Dec' 

702 _defaultNoDup = True 

703 

704 def __init__(self, **kwargs): 

705 super().__init__('coord_dec', **kwargs) 

706 

707 def __call__(self, catalog, **kwargs): 

708 return super().__call__(catalog, **kwargs) 

709 

710 

711class RAErrColumn(CoordColumn): 

712 """Uncertainty in Right Ascension, in degrees.""" 

713 name = 'RAErr' 

714 _defaultNoDup = True 

715 

716 def __init__(self, **kwargs): 

717 super().__init__('coord_raErr', **kwargs) 

718 

719 

720class DecErrColumn(CoordColumn): 

721 """Uncertainty in declination, in degrees.""" 

722 name = 'DecErr' 

723 _defaultNoDup = True 

724 

725 def __init__(self, **kwargs): 

726 super().__init__('coord_decErr', **kwargs) 

727 

728 

729class RADecCovColumn(Column): 

730 """Coordinate covariance column, in degrees.""" 

731 _radians = True 

732 name = 'RADecCov' 

733 _defaultNoDup = True 

734 

735 def __init__(self, **kwargs): 

736 super().__init__('coord_ra_dec_Cov', **kwargs) 

737 

738 def _func(self, df): 

739 # Must not modify original column in case that column is used by 

740 # another functor. 

741 output = df[self.col]*(180/np.pi)**2 if self._radians else df[self.col] 

742 return output 

743 

744 

745class HtmIndex20(Functor): 

746 """Compute the level 20 HtmIndex for the catalog. 

747 

748 Notes 

749 ----- 

750 This functor was implemented to satisfy requirements of old APDB interface 

751 which required the ``pixelId`` column in DiaObject with HTM20 index. 

752 The APDB interface had migrated to not need that information, but we keep 

753 this class in case it may be useful for something else. 

754 """ 

755 name = "Htm20" 

756 htmLevel = 20 

757 _radians = True 

758 

759 def __init__(self, ra, dec, **kwargs): 

760 self.pixelator = sphgeom.HtmPixelization(self.htmLevel) 

761 self.ra = ra 

762 self.dec = dec 

763 self._columns = [self.ra, self.dec] 

764 super().__init__(**kwargs) 

765 

766 def _func(self, df): 

767 

768 def computePixel(row): 

769 if self._radians: 

770 sphPoint = geom.SpherePoint(row[self.ra], 

771 row[self.dec], 

772 geom.radians) 

773 else: 

774 sphPoint = geom.SpherePoint(row[self.ra], 

775 row[self.dec], 

776 geom.degrees) 

777 return self.pixelator.index(sphPoint.getVector()) 

778 

779 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64') 

780 

781 

782def fluxName(col): 

783 """Append _instFlux to the column name if it doesn't have it already.""" 

784 if not col.endswith('_instFlux'): 

785 col += '_instFlux' 

786 return col 

787 

788 

789def fluxErrName(col): 

790 """Append _instFluxErr to the column name if it doesn't have it already.""" 

791 if not col.endswith('_instFluxErr'): 

792 col += '_instFluxErr' 

793 return col 

794 

795 

796class Mag(Functor): 

797 """Compute calibrated magnitude. 

798 

799 Returns the flux at mag=0. 

800 The default ``fluxMag0`` is 63095734448.0194, which is default for HSC. 

801 TO DO: This default should be made configurable in DM-21955. 

802 

803 This calculation hides warnings about invalid values and dividing by zero. 

804 

805 As with all functors, a ``dataset`` and ``filt`` kwarg should be provided 

806 upon initialization. 

807 Unlike the default `Functor`, however, the default dataset for a `Mag` is 

808 ``'meas'``, rather than ``'ref'``. 

809 

810 Parameters 

811 ---------- 

812 col : `str` 

813 Name of flux column from which to compute magnitude. 

814 Can be parseable by the `~lsst.pipe.tasks.functors.fluxName` function; 

815 that is, you can pass ``'modelfit_CModel'`` instead of 

816 ``'modelfit_CModel_instFlux'``, and it will understand. 

817 """ 

818 _defaultDataset = 'meas' 

819 

820 def __init__(self, col, **kwargs): 

821 self.col = fluxName(col) 

822 # TO DO: DM-21955 Replace hard coded photometic calibration values. 

823 self.fluxMag0 = 63095734448.0194 

824 

825 super().__init__(**kwargs) 

826 

827 @property 

828 def columns(self): 

829 return [self.col] 

830 

831 def _func(self, df): 

832 with warnings.catch_warnings(): 

833 warnings.filterwarnings('ignore', r'invalid value encountered') 

834 warnings.filterwarnings('ignore', r'divide by zero') 

835 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

836 

837 @property 

838 def name(self): 

839 return f'mag_{self.col}' 

840 

841 

842class MagErr(Mag): 

843 """Compute calibrated magnitude uncertainty. 

844 

845 Parameters 

846 ---------- 

847 col : `str` 

848 Name of the flux column. 

849 """ 

850 

851 def __init__(self, *args, **kwargs): 

852 super().__init__(*args, **kwargs) 

853 # TO DO: DM-21955 Replace hard coded photometic calibration values. 

854 self.fluxMag0Err = 0. 

855 

856 @property 

857 def columns(self): 

858 return [self.col, self.col + 'Err'] 

859 

860 def _func(self, df): 

861 with warnings.catch_warnings(): 

862 warnings.filterwarnings('ignore', r'invalid value encountered') 

863 warnings.filterwarnings('ignore', r'divide by zero') 

864 fluxCol, fluxErrCol = self.columns 

865 x = df[fluxErrCol] / df[fluxCol] 

866 y = self.fluxMag0Err / self.fluxMag0 

867 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

868 return magErr 

869 

870 @property 

871 def name(self): 

872 return super().name + '_err' 

873 

874 

875class MagDiff(Functor): 

876 """Functor to calculate magnitude difference.""" 

877 _defaultDataset = 'meas' 

878 

879 def __init__(self, col1, col2, **kwargs): 

880 self.col1 = fluxName(col1) 

881 self.col2 = fluxName(col2) 

882 super().__init__(**kwargs) 

883 

884 @property 

885 def columns(self): 

886 return [self.col1, self.col2] 

887 

888 def _func(self, df): 

889 with warnings.catch_warnings(): 

890 warnings.filterwarnings('ignore', r'invalid value encountered') 

891 warnings.filterwarnings('ignore', r'divide by zero') 

892 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

893 

894 @property 

895 def name(self): 

896 return f'(mag_{self.col1} - mag_{self.col2})' 

897 

898 @property 

899 def shortname(self): 

900 return f'magDiff_{self.col1}_{self.col2}' 

901 

902 

903class Color(Functor): 

904 """Compute the color between two filters. 

905 

906 Computes color by initializing two different `Mag` functors based on the 

907 ``col`` and filters provided, and then returning the difference. 

908 

909 This is enabled by the `_func` method expecting a DataFrame with a 

910 multilevel column index, with both ``'band'`` and ``'column'``, instead of 

911 just ``'column'``, which is the `Functor` default. 

912 This is controlled by the `_dfLevels` attribute. 

913 

914 Also of note, the default dataset for `Color` is ``forced_src'``, whereas 

915 for `Mag` it is ``'meas'``. 

916 

917 Parameters 

918 ---------- 

919 col : str 

920 Name of the flux column from which to compute; same as would be passed 

921 to `~lsst.pipe.tasks.functors.Mag`. 

922 

923 filt2, filt1 : str 

924 Filters from which to compute magnitude difference. 

925 Color computed is ``Mag(filt2) - Mag(filt1)``. 

926 """ 

927 _defaultDataset = 'forced_src' 

928 _dfLevels = ('band', 'column') 

929 _defaultNoDup = True 

930 

931 def __init__(self, col, filt2, filt1, **kwargs): 

932 self.col = fluxName(col) 

933 if filt2 == filt1: 

934 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

935 self.filt2 = filt2 

936 self.filt1 = filt1 

937 

938 self.mag2 = Mag(col, filt=filt2, **kwargs) 

939 self.mag1 = Mag(col, filt=filt1, **kwargs) 

940 

941 super().__init__(**kwargs) 

942 

943 @property 

944 def filt(self): 

945 return None 

946 

947 @filt.setter 

948 def filt(self, filt): 

949 pass 

950 

951 def _func(self, df): 

952 mag2 = self.mag2._func(df[self.filt2]) 

953 mag1 = self.mag1._func(df[self.filt1]) 

954 return mag2 - mag1 

955 

956 @property 

957 def columns(self): 

958 return [self.mag1.col, self.mag2.col] 

959 

960 def multilevelColumns(self, parq, **kwargs): 

961 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

962 

963 @property 

964 def name(self): 

965 return f'{self.filt2} - {self.filt1} ({self.col})' 

966 

967 @property 

968 def shortname(self): 

969 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

970 

971 

972class DeconvolvedMoments(Functor): 

973 """This functor subtracts the trace of the PSF second moments from the 

974 trace of the second moments of the source. 

975 

976 If the HsmShapeAlgorithm measurement is valid, then these will be used for 

977 the sources. 

978 Otherwise, the SdssShapeAlgorithm measurements will be used. 

979 """ 

980 name = 'Deconvolved Moments' 

981 shortname = 'deconvolvedMoments' 

982 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

983 "ext_shapeHSM_HsmSourceMoments_yy", 

984 "base_SdssShape_xx", "base_SdssShape_yy", 

985 "ext_shapeHSM_HsmPsfMoments_xx", 

986 "ext_shapeHSM_HsmPsfMoments_yy") 

987 

988 def _func(self, df): 

989 """Calculate deconvolved moments.""" 

990 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

991 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

992 else: 

993 hsm = np.ones(len(df))*np.nan 

994 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

995 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

996 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

997 else: 

998 # LSST does not have shape.sdss.psf. 

999 # We could instead add base_PsfShape to the catalog using 

1000 # exposure.getPsf().computeShape(s.getCentroid()).getIxx(). 

1001 raise RuntimeError('No psf shape parameter found in catalog') 

1002 

1003 return hsm.where(np.isfinite(hsm), sdss) - psf 

1004 

1005 

1006class SdssTraceSize(Functor): 

1007 """Functor to calculate the SDSS trace radius size for sources. 

1008 

1009 The SDSS trace radius size is a measure of size equal to the square root of 

1010 half of the trace of the second moments tensor measured with the 

1011 SdssShapeAlgorithm plugin. 

1012 This has units of pixels. 

1013 """ 

1014 name = "SDSS Trace Size" 

1015 shortname = 'sdssTrace' 

1016 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

1017 

1018 def _func(self, df): 

1019 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1020 return srcSize 

1021 

1022 

1023class PsfSdssTraceSizeDiff(Functor): 

1024 """Functor to calculate the SDSS trace radius size difference (%) between 

1025 the object and the PSF model. 

1026 

1027 See Also 

1028 -------- 

1029 SdssTraceSize 

1030 """ 

1031 name = "PSF - SDSS Trace Size" 

1032 shortname = 'psf_sdssTrace' 

1033 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

1034 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

1035 

1036 def _func(self, df): 

1037 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1038 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

1039 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1040 return sizeDiff 

1041 

1042 

1043class HsmTraceSize(Functor): 

1044 """Functor to calculate the HSM trace radius size for sources. 

1045 

1046 The HSM trace radius size is a measure of size equal to the square root of 

1047 half of the trace of the second moments tensor measured with the 

1048 HsmShapeAlgorithm plugin. 

1049 This has units of pixels. 

1050 """ 

1051 name = 'HSM Trace Size' 

1052 shortname = 'hsmTrace' 

1053 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1054 "ext_shapeHSM_HsmSourceMoments_yy") 

1055 

1056 def _func(self, df): 

1057 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1058 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1059 return srcSize 

1060 

1061 

1062class PsfHsmTraceSizeDiff(Functor): 

1063 """Functor to calculate the HSM trace radius size difference (%) between 

1064 the object and the PSF model. 

1065 

1066 See Also 

1067 -------- 

1068 HsmTraceSize 

1069 """ 

1070 name = 'PSF - HSM Trace Size' 

1071 shortname = 'psf_HsmTrace' 

1072 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1073 "ext_shapeHSM_HsmSourceMoments_yy", 

1074 "ext_shapeHSM_HsmPsfMoments_xx", 

1075 "ext_shapeHSM_HsmPsfMoments_yy") 

1076 

1077 def _func(self, df): 

1078 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1079 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1080 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1081 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1082 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1083 return sizeDiff 

1084 

1085 

1086class HsmFwhm(Functor): 

1087 """Functor to calculate the PSF FWHM with second moments measured from the 

1088 HsmShapeAlgorithm plugin. 

1089 

1090 This is in units of arcseconds, and assumes the hsc_rings_v1 skymap pixel 

1091 scale of 0.168 arcseconds/pixel. 

1092 

1093 Notes 

1094 ----- 

1095 This conversion assumes the PSF is Gaussian, which is not always the case. 

1096 """ 

1097 name = 'HSM Psf FWHM' 

1098 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1099 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1100 pixelScale = 0.168 

1101 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1102 

1103 def _func(self, df): 

1104 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1105 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1106 

1107 

1108class E1(Functor): 

1109 r"""Calculate :math:`e_1` ellipticity component for sources, defined as: 

1110 

1111 .. math:: 

1112 e_1 &= (I_{xx}-I_{yy})/(I_{xx}+I_{yy}) 

1113 

1114 See Also 

1115 -------- 

1116 E2 

1117 """ 

1118 name = "Distortion Ellipticity (e1)" 

1119 shortname = "Distortion" 

1120 

1121 def __init__(self, colXX, colXY, colYY, **kwargs): 

1122 self.colXX = colXX 

1123 self.colXY = colXY 

1124 self.colYY = colYY 

1125 self._columns = [self.colXX, self.colXY, self.colYY] 

1126 super().__init__(**kwargs) 

1127 

1128 @property 

1129 def columns(self): 

1130 return [self.colXX, self.colXY, self.colYY] 

1131 

1132 def _func(self, df): 

1133 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1134 

1135 

1136class E2(Functor): 

1137 r"""Calculate :math:`e_2` ellipticity component for sources, defined as: 

1138 

1139 .. math:: 

1140 e_2 &= 2I_{xy}/(I_{xx}+I_{yy}) 

1141 

1142 See Also 

1143 -------- 

1144 E1 

1145 """ 

1146 name = "Ellipticity e2" 

1147 

1148 def __init__(self, colXX, colXY, colYY, **kwargs): 

1149 self.colXX = colXX 

1150 self.colXY = colXY 

1151 self.colYY = colYY 

1152 super().__init__(**kwargs) 

1153 

1154 @property 

1155 def columns(self): 

1156 return [self.colXX, self.colXY, self.colYY] 

1157 

1158 def _func(self, df): 

1159 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1160 

1161 

1162class RadiusFromQuadrupole(Functor): 

1163 """Calculate the radius from the quadrupole moments. 

1164 

1165 This returns the fourth root of the determinant of the second moments 

1166 tensor, which has units of pixels. 

1167 

1168 See Also 

1169 -------- 

1170 SdssTraceSize 

1171 HsmTraceSize 

1172 """ 

1173 

1174 def __init__(self, colXX, colXY, colYY, **kwargs): 

1175 self.colXX = colXX 

1176 self.colXY = colXY 

1177 self.colYY = colYY 

1178 super().__init__(**kwargs) 

1179 

1180 @property 

1181 def columns(self): 

1182 return [self.colXX, self.colXY, self.colYY] 

1183 

1184 def _func(self, df): 

1185 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1186 

1187 

1188class LocalWcs(Functor): 

1189 """Computations using the stored localWcs.""" 

1190 name = "LocalWcsOperations" 

1191 

1192 def __init__(self, 

1193 colCD_1_1, 

1194 colCD_1_2, 

1195 colCD_2_1, 

1196 colCD_2_2, 

1197 **kwargs): 

1198 self.colCD_1_1 = colCD_1_1 

1199 self.colCD_1_2 = colCD_1_2 

1200 self.colCD_2_1 = colCD_2_1 

1201 self.colCD_2_2 = colCD_2_2 

1202 super().__init__(**kwargs) 

1203 

1204 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1205 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1206 

1207 Parameters 

1208 ---------- 

1209 x : `~pandas.Series` 

1210 X pixel coordinate. 

1211 y : `~pandas.Series` 

1212 Y pixel coordinate. 

1213 cd11 : `~pandas.Series` 

1214 [1, 1] element of the local Wcs affine transform. 

1215 cd11 : `~pandas.Series` 

1216 [1, 1] element of the local Wcs affine transform. 

1217 cd12 : `~pandas.Series` 

1218 [1, 2] element of the local Wcs affine transform. 

1219 cd21 : `~pandas.Series` 

1220 [2, 1] element of the local Wcs affine transform. 

1221 cd22 : `~pandas.Series` 

1222 [2, 2] element of the local Wcs affine transform. 

1223 

1224 Returns 

1225 ------- 

1226 raDecTuple : tuple 

1227 RA and dec conversion of x and y given the local Wcs. 

1228 Returned units are in radians. 

1229 

1230 """ 

1231 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1232 

1233 def computeSkySeparation(self, ra1, dec1, ra2, dec2): 

1234 """Compute the local pixel scale conversion. 

1235 

1236 Parameters 

1237 ---------- 

1238 ra1 : `~pandas.Series` 

1239 Ra of the first coordinate in radians. 

1240 dec1 : `~pandas.Series` 

1241 Dec of the first coordinate in radians. 

1242 ra2 : `~pandas.Series` 

1243 Ra of the second coordinate in radians. 

1244 dec2 : `~pandas.Series` 

1245 Dec of the second coordinate in radians. 

1246 

1247 Returns 

1248 ------- 

1249 dist : `~pandas.Series` 

1250 Distance on the sphere in radians. 

1251 """ 

1252 deltaDec = dec2 - dec1 

1253 deltaRa = ra2 - ra1 

1254 return 2 * np.arcsin( 

1255 np.sqrt( 

1256 np.sin(deltaDec / 2) ** 2 

1257 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1258 

1259 def getSkySeparationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1260 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1261 

1262 Parameters 

1263 ---------- 

1264 x1 : `~pandas.Series` 

1265 X pixel coordinate. 

1266 y1 : `~pandas.Series` 

1267 Y pixel coordinate. 

1268 x2 : `~pandas.Series` 

1269 X pixel coordinate. 

1270 y2 : `~pandas.Series` 

1271 Y pixel coordinate. 

1272 cd11 : `~pandas.Series` 

1273 [1, 1] element of the local Wcs affine transform. 

1274 cd11 : `~pandas.Series` 

1275 [1, 1] element of the local Wcs affine transform. 

1276 cd12 : `~pandas.Series` 

1277 [1, 2] element of the local Wcs affine transform. 

1278 cd21 : `~pandas.Series` 

1279 [2, 1] element of the local Wcs affine transform. 

1280 cd22 : `~pandas.Series` 

1281 [2, 2] element of the local Wcs affine transform. 

1282 

1283 Returns 

1284 ------- 

1285 Distance : `~pandas.Series` 

1286 Arcseconds per pixel at the location of the local WC. 

1287 """ 

1288 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1289 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1290 # Great circle distance for small separations. 

1291 return self.computeSkySeparation(ra1, dec1, ra2, dec2) 

1292 

1293 

1294class ComputePixelScale(LocalWcs): 

1295 """Compute the local pixel scale from the stored CDMatrix. 

1296 """ 

1297 name = "PixelScale" 

1298 

1299 @property 

1300 def columns(self): 

1301 return [self.colCD_1_1, 

1302 self.colCD_1_2, 

1303 self.colCD_2_1, 

1304 self.colCD_2_2] 

1305 

1306 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1307 """Compute the local pixel to scale conversion in arcseconds. 

1308 

1309 Parameters 

1310 ---------- 

1311 cd11 : `~pandas.Series` 

1312 [1, 1] element of the local Wcs affine transform in radians. 

1313 cd11 : `~pandas.Series` 

1314 [1, 1] element of the local Wcs affine transform in radians. 

1315 cd12 : `~pandas.Series` 

1316 [1, 2] element of the local Wcs affine transform in radians. 

1317 cd21 : `~pandas.Series` 

1318 [2, 1] element of the local Wcs affine transform in radians. 

1319 cd22 : `~pandas.Series` 

1320 [2, 2] element of the local Wcs affine transform in radians. 

1321 

1322 Returns 

1323 ------- 

1324 pixScale : `~pandas.Series` 

1325 Arcseconds per pixel at the location of the local WC. 

1326 """ 

1327 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1328 

1329 def _func(self, df): 

1330 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1331 df[self.colCD_1_2], 

1332 df[self.colCD_2_1], 

1333 df[self.colCD_2_2]) 

1334 

1335 

1336class ConvertPixelToArcseconds(ComputePixelScale): 

1337 """Convert a value in units of pixels to units of arcseconds.""" 

1338 

1339 def __init__(self, 

1340 col, 

1341 colCD_1_1, 

1342 colCD_1_2, 

1343 colCD_2_1, 

1344 colCD_2_2, 

1345 **kwargs): 

1346 self.col = col 

1347 super().__init__(colCD_1_1, 

1348 colCD_1_2, 

1349 colCD_2_1, 

1350 colCD_2_2, 

1351 **kwargs) 

1352 

1353 @property 

1354 def name(self): 

1355 return f"{self.col}_asArcseconds" 

1356 

1357 @property 

1358 def columns(self): 

1359 return [self.col, 

1360 self.colCD_1_1, 

1361 self.colCD_1_2, 

1362 self.colCD_2_1, 

1363 self.colCD_2_2] 

1364 

1365 def _func(self, df): 

1366 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1367 df[self.colCD_1_2], 

1368 df[self.colCD_2_1], 

1369 df[self.colCD_2_2]) 

1370 

1371 

1372class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1373 """Convert a value in units of pixels squared to units of arcseconds 

1374 squared. 

1375 """ 

1376 

1377 def __init__(self, 

1378 col, 

1379 colCD_1_1, 

1380 colCD_1_2, 

1381 colCD_2_1, 

1382 colCD_2_2, 

1383 **kwargs): 

1384 self.col = col 

1385 super().__init__(colCD_1_1, 

1386 colCD_1_2, 

1387 colCD_2_1, 

1388 colCD_2_2, 

1389 **kwargs) 

1390 

1391 @property 

1392 def name(self): 

1393 return f"{self.col}_asArcsecondsSq" 

1394 

1395 @property 

1396 def columns(self): 

1397 return [self.col, 

1398 self.colCD_1_1, 

1399 self.colCD_1_2, 

1400 self.colCD_2_1, 

1401 self.colCD_2_2] 

1402 

1403 def _func(self, df): 

1404 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1405 df[self.colCD_1_2], 

1406 df[self.colCD_2_1], 

1407 df[self.colCD_2_2]) 

1408 return df[self.col] * pixScale * pixScale 

1409 

1410 

1411class ReferenceBand(Functor): 

1412 """Return the band used to seed multiband forced photometry. 

1413 

1414 This functor is to be used on Object tables. 

1415 It converts the boolean merge_measurements_{band} columns into a single 

1416 string representing the first band for which merge_measurements_{band} 

1417 is True. 

1418 

1419 Assumes the default priority order of i, r, z, y, g, u. 

1420 """ 

1421 name = 'Reference Band' 

1422 shortname = 'refBand' 

1423 

1424 @property 

1425 def columns(self): 

1426 return ["merge_measurement_i", 

1427 "merge_measurement_r", 

1428 "merge_measurement_z", 

1429 "merge_measurement_y", 

1430 "merge_measurement_g", 

1431 "merge_measurement_u"] 

1432 

1433 def _func(self, df: pd.DataFrame) -> pd.Series: 

1434 def getFilterAliasName(row): 

1435 # Get column name with the max value (True > False). 

1436 colName = row.idxmax() 

1437 return colName.replace('merge_measurement_', '') 

1438 

1439 # Skip columns that are unavailable, because this functor requests the 

1440 # superset of bands that could be included in the object table. 

1441 columns = [col for col in self.columns if col in df.columns] 

1442 # Makes a Series of dtype object if df is empty. 

1443 return df[columns].apply(getFilterAliasName, axis=1, 

1444 result_type='reduce').astype('object') 

1445 

1446 

1447class Photometry(Functor): 

1448 """Base class for Object table calibrated fluxes and magnitudes.""" 

1449 # AB to NanoJansky (3631 Jansky). 

1450 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1451 LOG_AB_FLUX_SCALE = 12.56 

1452 FIVE_OVER_2LOG10 = 1.085736204758129569 

1453 # TO DO: DM-21955 Replace hard coded photometic calibration values. 

1454 COADD_ZP = 27 

1455 

1456 def __init__(self, colFlux, colFluxErr=None, **kwargs): 

1457 self.vhypot = np.vectorize(self.hypot) 

1458 self.col = colFlux 

1459 self.colFluxErr = colFluxErr 

1460 

1461 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1462 self.fluxMag0Err = 0. 

1463 

1464 super().__init__(**kwargs) 

1465 

1466 @property 

1467 def columns(self): 

1468 return [self.col] 

1469 

1470 @property 

1471 def name(self): 

1472 return f'mag_{self.col}' 

1473 

1474 @classmethod 

1475 def hypot(cls, a, b): 

1476 """Compute sqrt(a^2 + b^2) without under/overflow.""" 

1477 if np.abs(a) < np.abs(b): 

1478 a, b = b, a 

1479 if a == 0.: 

1480 return 0. 

1481 q = b/a 

1482 return np.abs(a) * np.sqrt(1. + q*q) 

1483 

1484 def dn2flux(self, dn, fluxMag0): 

1485 """Convert instrumental flux to nanojanskys.""" 

1486 return self.AB_FLUX_SCALE * dn / fluxMag0 

1487 

1488 def dn2mag(self, dn, fluxMag0): 

1489 """Convert instrumental flux to AB magnitude.""" 

1490 with warnings.catch_warnings(): 

1491 warnings.filterwarnings('ignore', r'invalid value encountered') 

1492 warnings.filterwarnings('ignore', r'divide by zero') 

1493 return -2.5 * np.log10(dn/fluxMag0) 

1494 

1495 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1496 """Convert instrumental flux error to nanojanskys.""" 

1497 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1498 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1499 return retVal 

1500 

1501 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1502 """Convert instrumental flux error to AB magnitude error.""" 

1503 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1504 return self.FIVE_OVER_2LOG10 * retVal 

1505 

1506 

1507class NanoJansky(Photometry): 

1508 """Convert instrumental flux to nanojanskys.""" 

1509 def _func(self, df): 

1510 return self.dn2flux(df[self.col], self.fluxMag0) 

1511 

1512 

1513class NanoJanskyErr(Photometry): 

1514 """Convert instrumental flux error to nanojanskys.""" 

1515 @property 

1516 def columns(self): 

1517 return [self.col, self.colFluxErr] 

1518 

1519 def _func(self, df): 

1520 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1521 return pd.Series(retArr, index=df.index) 

1522 

1523 

1524class LocalPhotometry(Functor): 

1525 """Base class for calibrating the specified instrument flux column using 

1526 the local photometric calibration. 

1527 

1528 Parameters 

1529 ---------- 

1530 instFluxCol : `str` 

1531 Name of the instrument flux column. 

1532 instFluxErrCol : `str` 

1533 Name of the assocated error columns for ``instFluxCol``. 

1534 photoCalibCol : `str` 

1535 Name of local calibration column. 

1536 photoCalibErrCol : `str` 

1537 Error associated with ``photoCalibCol`` 

1538 

1539 See Also 

1540 -------- 

1541 LocalNanojansky 

1542 LocalNanojanskyErr 

1543 """ 

1544 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1545 

1546 def __init__(self, 

1547 instFluxCol, 

1548 instFluxErrCol, 

1549 photoCalibCol, 

1550 photoCalibErrCol, 

1551 **kwargs): 

1552 self.instFluxCol = instFluxCol 

1553 self.instFluxErrCol = instFluxErrCol 

1554 self.photoCalibCol = photoCalibCol 

1555 self.photoCalibErrCol = photoCalibErrCol 

1556 super().__init__(**kwargs) 

1557 

1558 def instFluxToNanojansky(self, instFlux, localCalib): 

1559 """Convert instrument flux to nanojanskys. 

1560 

1561 Parameters 

1562 ---------- 

1563 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1564 Array of instrument flux measurements. 

1565 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1566 Array of local photometric calibration estimates. 

1567 

1568 Returns 

1569 ------- 

1570 calibFlux : `~numpy.ndarray` or `~pandas.Series` 

1571 Array of calibrated flux measurements. 

1572 """ 

1573 return instFlux * localCalib 

1574 

1575 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1576 """Convert instrument flux to nanojanskys. 

1577 

1578 Parameters 

1579 ---------- 

1580 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1581 Array of instrument flux measurements. 

1582 instFluxErr : `~numpy.ndarray` or `~pandas.Series` 

1583 Errors on associated ``instFlux`` values. 

1584 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1585 Array of local photometric calibration estimates. 

1586 localCalibErr : `~numpy.ndarray` or `~pandas.Series` 

1587 Errors on associated ``localCalib`` values. 

1588 

1589 Returns 

1590 ------- 

1591 calibFluxErr : `~numpy.ndarray` or `~pandas.Series` 

1592 Errors on calibrated flux measurements. 

1593 """ 

1594 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1595 

1596 def instFluxToMagnitude(self, instFlux, localCalib): 

1597 """Convert instrument flux to nanojanskys. 

1598 

1599 Parameters 

1600 ---------- 

1601 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1602 Array of instrument flux measurements. 

1603 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1604 Array of local photometric calibration estimates. 

1605 

1606 Returns 

1607 ------- 

1608 calibMag : `~numpy.ndarray` or `~pandas.Series` 

1609 Array of calibrated AB magnitudes. 

1610 """ 

1611 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1612 

1613 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1614 """Convert instrument flux err to nanojanskys. 

1615 

1616 Parameters 

1617 ---------- 

1618 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1619 Array of instrument flux measurements. 

1620 instFluxErr : `~numpy.ndarray` or `~pandas.Series` 

1621 Errors on associated ``instFlux`` values. 

1622 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1623 Array of local photometric calibration estimates. 

1624 localCalibErr : `~numpy.ndarray` or `~pandas.Series` 

1625 Errors on associated ``localCalib`` values. 

1626 

1627 Returns 

1628 ------- 

1629 calibMagErr: `~numpy.ndarray` or `~pandas.Series` 

1630 Error on calibrated AB magnitudes. 

1631 """ 

1632 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1633 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1634 

1635 

1636class LocalNanojansky(LocalPhotometry): 

1637 """Compute calibrated fluxes using the local calibration value. 

1638 

1639 This returns units of nanojanskys. 

1640 """ 

1641 

1642 @property 

1643 def columns(self): 

1644 return [self.instFluxCol, self.photoCalibCol] 

1645 

1646 @property 

1647 def name(self): 

1648 return f'flux_{self.instFluxCol}' 

1649 

1650 def _func(self, df): 

1651 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1652 

1653 

1654class LocalNanojanskyErr(LocalPhotometry): 

1655 """Compute calibrated flux errors using the local calibration value. 

1656 

1657 This returns units of nanojanskys. 

1658 """ 

1659 

1660 @property 

1661 def columns(self): 

1662 return [self.instFluxCol, self.instFluxErrCol, 

1663 self.photoCalibCol, self.photoCalibErrCol] 

1664 

1665 @property 

1666 def name(self): 

1667 return f'fluxErr_{self.instFluxCol}' 

1668 

1669 def _func(self, df): 

1670 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1671 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1672 

1673 

1674class LocalDipoleMeanFlux(LocalPhotometry): 

1675 """Compute absolute mean of dipole fluxes. 

1676 

1677 See Also 

1678 -------- 

1679 LocalNanojansky 

1680 LocalNanojanskyErr 

1681 LocalDipoleMeanFluxErr 

1682 LocalDipoleDiffFlux 

1683 LocalDipoleDiffFluxErr 

1684 """ 

1685 def __init__(self, 

1686 instFluxPosCol, 

1687 instFluxNegCol, 

1688 instFluxPosErrCol, 

1689 instFluxNegErrCol, 

1690 photoCalibCol, 

1691 photoCalibErrCol, 

1692 **kwargs): 

1693 self.instFluxNegCol = instFluxNegCol 

1694 self.instFluxPosCol = instFluxPosCol 

1695 self.instFluxNegErrCol = instFluxNegErrCol 

1696 self.instFluxPosErrCol = instFluxPosErrCol 

1697 self.photoCalibCol = photoCalibCol 

1698 self.photoCalibErrCol = photoCalibErrCol 

1699 super().__init__(instFluxNegCol, 

1700 instFluxNegErrCol, 

1701 photoCalibCol, 

1702 photoCalibErrCol, 

1703 **kwargs) 

1704 

1705 @property 

1706 def columns(self): 

1707 return [self.instFluxPosCol, 

1708 self.instFluxNegCol, 

1709 self.photoCalibCol] 

1710 

1711 @property 

1712 def name(self): 

1713 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1714 

1715 def _func(self, df): 

1716 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1717 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1718 

1719 

1720class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1721 """Compute the error on the absolute mean of dipole fluxes. 

1722 

1723 See Also 

1724 -------- 

1725 LocalNanojansky 

1726 LocalNanojanskyErr 

1727 LocalDipoleMeanFlux 

1728 LocalDipoleDiffFlux 

1729 LocalDipoleDiffFluxErr 

1730 """ 

1731 

1732 @property 

1733 def columns(self): 

1734 return [self.instFluxPosCol, 

1735 self.instFluxNegCol, 

1736 self.instFluxPosErrCol, 

1737 self.instFluxNegErrCol, 

1738 self.photoCalibCol, 

1739 self.photoCalibErrCol] 

1740 

1741 @property 

1742 def name(self): 

1743 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1744 

1745 def _func(self, df): 

1746 return 0.5*np.sqrt( 

1747 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1748 * df[self.photoCalibErrCol])**2 

1749 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1750 * df[self.photoCalibCol]**2) 

1751 

1752 

1753class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1754 """Compute the absolute difference of dipole fluxes. 

1755 

1756 Calculated value is (abs(pos) - abs(neg)). 

1757 

1758 See Also 

1759 -------- 

1760 LocalNanojansky 

1761 LocalNanojanskyErr 

1762 LocalDipoleMeanFlux 

1763 LocalDipoleMeanFluxErr 

1764 LocalDipoleDiffFluxErr 

1765 """ 

1766 

1767 @property 

1768 def columns(self): 

1769 return [self.instFluxPosCol, 

1770 self.instFluxNegCol, 

1771 self.photoCalibCol] 

1772 

1773 @property 

1774 def name(self): 

1775 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1776 

1777 def _func(self, df): 

1778 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1779 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1780 

1781 

1782class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1783 """Compute the error on the absolute difference of dipole fluxes. 

1784 

1785 See Also 

1786 -------- 

1787 LocalNanojansky 

1788 LocalNanojanskyErr 

1789 LocalDipoleMeanFlux 

1790 LocalDipoleMeanFluxErr 

1791 LocalDipoleDiffFlux 

1792 """ 

1793 

1794 @property 

1795 def columns(self): 

1796 return [self.instFluxPosCol, 

1797 self.instFluxNegCol, 

1798 self.instFluxPosErrCol, 

1799 self.instFluxNegErrCol, 

1800 self.photoCalibCol, 

1801 self.photoCalibErrCol] 

1802 

1803 @property 

1804 def name(self): 

1805 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1806 

1807 def _func(self, df): 

1808 return np.sqrt( 

1809 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1810 * df[self.photoCalibErrCol])**2 

1811 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1812 * df[self.photoCalibCol]**2) 

1813 

1814 

1815class Ebv(Functor): 

1816 """Compute E(B-V) from dustmaps.sfd.""" 

1817 _defaultDataset = 'ref' 

1818 name = "E(B-V)" 

1819 shortname = "ebv" 

1820 

1821 def __init__(self, **kwargs): 

1822 # Import is only needed for Ebv. 

1823 from dustmaps.sfd import SFDQuery 

1824 self._columns = ['coord_ra', 'coord_dec'] 

1825 self.sfd = SFDQuery() 

1826 super().__init__(**kwargs) 

1827 

1828 def _func(self, df): 

1829 coords = SkyCoord(df['coord_ra'].values * u.rad, df['coord_dec'].values * u.rad) 

1830 ebv = self.sfd(coords) 

1831 # Double precision unnecessary scientifically but currently needed for 

1832 # ingest to qserv. 

1833 return pd.Series(ebv, index=df.index).astype('float64')