Coverage for python/lsst/pipe/tasks/functors.py: 42%

739 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-12 01:56 -0700

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["init_fromDict", "Functor", "CompositeFunctor", "mag_aware_eval", 

23 "CustomFunctor", "Column", "Index", "CoordColumn", "RAColumn", 

24 "DecColumn", "HtmIndex20", "fluxName", "fluxErrName", "Mag", 

25 "MagErr", "MagDiff", "Color", "DeconvolvedMoments", "SdssTraceSize", 

26 "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff", 

27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs", 

28 "ComputePixelScale", "ConvertPixelToArcseconds", 

29 "ConvertPixelSqToArcsecondsSq", "ReferenceBand", "Photometry", 

30 "NanoJansky", "NanoJanskyErr", "LocalPhotometry", "LocalNanojansky", 

31 "LocalNanojanskyErr", "LocalDipoleMeanFlux", 

32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux", 

33 "LocalDipoleDiffFluxErr", "Ebv", 

34 ] 

35 

36import logging 

37import os 

38import os.path 

39import re 

40import warnings 

41from contextlib import redirect_stdout 

42from itertools import product 

43 

44import astropy.units as u 

45import lsst.geom as geom 

46import lsst.sphgeom as sphgeom 

47import numpy as np 

48import pandas as pd 

49import yaml 

50from astropy.coordinates import SkyCoord 

51from lsst.daf.butler import DeferredDatasetHandle 

52from lsst.pipe.base import InMemoryDatasetHandle 

53from lsst.utils import doImport 

54from lsst.utils.introspection import get_full_type_name 

55 

56 

57def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

58 typeKey='functor', name=None): 

59 """Initialize an object defined in a dictionary. 

60 

61 The object needs to be importable as f'{basePath}.{initDict[typeKey]}'. 

62 The positional and keyword arguments (if any) are contained in "args" and 

63 "kwargs" entries in the dictionary, respectively. 

64 This is used in `~lsst.pipe.tasks.functors.CompositeFunctor.from_yaml` to 

65 initialize a composite functor from a specification in a YAML file. 

66 

67 Parameters 

68 ---------- 

69 initDict : dictionary 

70 Dictionary describing object's initialization. 

71 Must contain an entry keyed by ``typeKey`` that is the name of the 

72 object, relative to ``basePath``. 

73 basePath : str 

74 Path relative to module in which ``initDict[typeKey]`` is defined. 

75 typeKey : str 

76 Key of ``initDict`` that is the name of the object (relative to 

77 ``basePath``). 

78 """ 

79 initDict = initDict.copy() 

80 # TO DO: DM-21956 We should be able to define functors outside this module 

81 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

82 args = [] 

83 if 'args' in initDict: 

84 args = initDict.pop('args') 

85 if isinstance(args, str): 

86 args = [args] 

87 try: 

88 element = pythonType(*args, **initDict) 

89 except Exception as e: 

90 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

91 raise type(e)(message, e.args) 

92 return element 

93 

94 

95class Functor(object): 

96 """Define and execute a calculation on a DataFrame or Handle holding a 

97 DataFrame. 

98 

99 The `__call__` method accepts either a `~pandas.DataFrame` object or a 

100 `~lsst.daf.butler.DeferredDatasetHandle` or 

101 `~lsst.pipe.base.InMemoryDatasetHandle`, and returns the 

102 result of the calculation as a single column. 

103 Each functor defines what columns are needed for the calculation, and only 

104 these columns are read from the dataset handle. 

105 

106 The action of `__call__` consists of two steps: first, loading the 

107 necessary columns from disk into memory as a `~pandas.DataFrame` object; 

108 and second, performing the computation on this DataFrame and returning the 

109 result. 

110 

111 To define a new `Functor`, a subclass must define a `_func` method, 

112 that takes a `~pandas.DataFrame` and returns result in a `~pandas.Series`. 

113 In addition, it must define the following attributes: 

114 

115 * `_columns`: The columns necessary to perform the calculation 

116 * `name`: A name appropriate for a figure axis label 

117 * `shortname`: A name appropriate for use as a dictionary key 

118 

119 On initialization, a `Functor` should declare what band (``filt`` kwarg) 

120 and dataset (e.g. ``'ref'``, ``'meas'``, ``'forced_src'``) it is intended 

121 to be applied to. 

122 This enables the `_get_data` method to extract the proper columns from the 

123 underlying data. 

124 If not specified, the dataset will fall back on the `_defaultDataset` 

125 attribute. 

126 If band is not specified and ``dataset`` is anything other than ``'ref'``, 

127 then an error will be raised when trying to perform the calculation. 

128 

129 Originally, `Functor` was set up to expect datasets formatted like the 

130 ``deepCoadd_obj`` dataset; that is, a DataFrame with a multi-level column 

131 index, with the levels of the column index being ``band``, ``dataset``, and 

132 ``column``. 

133 It has since been generalized to apply to DataFrames without multi-level 

134 indices and multi-level indices with just ``dataset`` and ``column`` 

135 levels. 

136 In addition, the `_get_data` method that reads the columns from the 

137 underlying data will return a DataFrame with column index levels defined by 

138 the `_dfLevels` attribute; by default, this is ``column``. 

139 

140 The `_dfLevels` attributes should generally not need to be changed, unless 

141 `_func` needs columns from multiple filters or datasets to do the 

142 calculation. 

143 An example of this is the `~lsst.pipe.tasks.functors.Color` functor, for 

144 which `_dfLevels = ('band', 'column')`, and `_func` expects the DataFrame 

145 it gets to have those levels in the column index. 

146 

147 Parameters 

148 ---------- 

149 filt : str 

150 Band upon which to do the calculation. 

151 

152 dataset : str 

153 Dataset upon which to do the calculation (e.g., 'ref', 'meas', 

154 'forced_src'). 

155 """ 

156 

157 _defaultDataset = 'ref' 

158 _dfLevels = ('column',) 

159 _defaultNoDup = False 

160 

161 def __init__(self, filt=None, dataset=None, noDup=None): 

162 self.filt = filt 

163 self.dataset = dataset if dataset is not None else self._defaultDataset 

164 self._noDup = noDup 

165 self.log = logging.getLogger(type(self).__name__) 

166 

167 @property 

168 def noDup(self): 

169 """Do not explode by band if used on object table.""" 

170 if self._noDup is not None: 

171 return self._noDup 

172 else: 

173 return self._defaultNoDup 

174 

175 @property 

176 def columns(self): 

177 """Columns required to perform calculation.""" 

178 if not hasattr(self, '_columns'): 

179 raise NotImplementedError('Must define columns property or _columns attribute') 

180 return self._columns 

181 

182 def _get_data_columnLevels(self, data, columnIndex=None): 

183 """Gets the names of the column index levels. 

184 

185 This should only be called in the context of a multilevel table. 

186 

187 Parameters 

188 ---------- 

189 data : various 

190 The data to be read, can be a 

191 `~lsst.daf.butler.DeferredDatasetHandle` or 

192 `~lsst.pipe.base.InMemoryDatasetHandle`. 

193 columnIndex (optional): pandas `~pandas.Index` object 

194 If not passed, then it is read from the 

195 `~lsst.daf.butler.DeferredDatasetHandle` 

196 for `~lsst.pipe.base.InMemoryDatasetHandle`. 

197 """ 

198 if columnIndex is None: 

199 columnIndex = data.get(component="columns") 

200 return columnIndex.names 

201 

202 def _get_data_columnLevelNames(self, data, columnIndex=None): 

203 """Gets the content of each of the column levels for a multilevel 

204 table. 

205 """ 

206 if columnIndex is None: 

207 columnIndex = data.get(component="columns") 

208 

209 columnLevels = columnIndex.names 

210 columnLevelNames = { 

211 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

212 for i, level in enumerate(columnLevels) 

213 } 

214 return columnLevelNames 

215 

216 def _colsFromDict(self, colDict, columnIndex=None): 

217 """Converts dictionary column specficiation to a list of columns.""" 

218 new_colDict = {} 

219 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

220 

221 for i, lev in enumerate(columnLevels): 

222 if lev in colDict: 

223 if isinstance(colDict[lev], str): 

224 new_colDict[lev] = [colDict[lev]] 

225 else: 

226 new_colDict[lev] = colDict[lev] 

227 else: 

228 new_colDict[lev] = columnIndex.levels[i] 

229 

230 levelCols = [new_colDict[lev] for lev in columnLevels] 

231 cols = list(product(*levelCols)) 

232 colsAvailable = [col for col in cols if col in columnIndex] 

233 return colsAvailable 

234 

235 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

236 """Returns columns needed by functor from multilevel dataset. 

237 

238 To access tables with multilevel column structure, the 

239 `~lsst.daf.butler.DeferredDatasetHandle` or 

240 `~lsst.pipe.base.InMemoryDatasetHandle` needs to be passed 

241 either a list of tuples or a dictionary. 

242 

243 Parameters 

244 ---------- 

245 data : various 

246 The data as either `~lsst.daf.butler.DeferredDatasetHandle`, or 

247 `~lsst.pipe.base.InMemoryDatasetHandle`. 

248 columnIndex (optional): pandas `~pandas.Index` object 

249 Either passed or read in from 

250 `~lsst.daf.butler.DeferredDatasetHandle`. 

251 `returnTuple` : `bool` 

252 If true, then return a list of tuples rather than the column 

253 dictionary specification. 

254 This is set to `True` by `CompositeFunctor` in order to be able to 

255 combine columns from the various component functors. 

256 

257 """ 

258 if not isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

259 raise RuntimeError(f"Unexpected data type. Got {get_full_type_name(data)}.") 

260 

261 if columnIndex is None: 

262 columnIndex = data.get(component="columns") 

263 

264 # Confirm that the dataset has the column levels the functor is 

265 # expecting it to have. 

266 columnLevels = self._get_data_columnLevels(data, columnIndex) 

267 

268 columnDict = {'column': self.columns, 

269 'dataset': self.dataset} 

270 if self.filt is None: 

271 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

272 if "band" in columnLevels: 

273 if self.dataset == "ref": 

274 columnDict["band"] = columnLevelNames["band"][0] 

275 else: 

276 raise ValueError(f"'filt' not set for functor {self.name}" 

277 f"(dataset {self.dataset}) " 

278 "and DataFrame " 

279 "contains multiple filters in column index. " 

280 "Set 'filt' or set 'dataset' to 'ref'.") 

281 else: 

282 columnDict['band'] = self.filt 

283 

284 if returnTuple: 

285 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

286 else: 

287 return columnDict 

288 

289 def _func(self, df, dropna=True): 

290 raise NotImplementedError('Must define calculation on DataFrame') 

291 

292 def _get_columnIndex(self, data): 

293 """Return columnIndex.""" 

294 

295 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

296 return data.get(component="columns") 

297 else: 

298 return None 

299 

300 def _get_data(self, data): 

301 """Retrieve DataFrame necessary for calculation. 

302 

303 The data argument can be a `~pandas.DataFrame`, a 

304 `~lsst.daf.butler.DeferredDatasetHandle`, or 

305 an `~lsst.pipe.base.InMemoryDatasetHandle`. 

306 

307 Returns a DataFrame upon which `self._func` can act. 

308 """ 

309 # We wrap a DataFrame in a handle here to take advantage of the 

310 # DataFrame delegate DataFrame column wrangling abilities. 

311 if isinstance(data, pd.DataFrame): 

312 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

313 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

314 _data = data 

315 else: 

316 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

317 

318 # First thing to do: check to see if the data source has a multilevel 

319 # column index or not. 

320 columnIndex = self._get_columnIndex(_data) 

321 is_multiLevel = isinstance(columnIndex, pd.MultiIndex) 

322 

323 # Get proper columns specification for this functor. 

324 if is_multiLevel: 

325 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

326 else: 

327 columns = self.columns 

328 

329 # Load in-memory DataFrame with appropriate columns the gen3 way. 

330 df = _data.get(parameters={"columns": columns}) 

331 

332 # Drop unnecessary column levels. 

333 if is_multiLevel: 

334 df = self._setLevels(df) 

335 

336 return df 

337 

338 def _setLevels(self, df): 

339 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

340 df.columns = df.columns.droplevel(levelsToDrop) 

341 return df 

342 

343 def _dropna(self, vals): 

344 return vals.dropna() 

345 

346 def __call__(self, data, dropna=False): 

347 df = self._get_data(data) 

348 try: 

349 vals = self._func(df) 

350 except Exception as e: 

351 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e) 

352 vals = self.fail(df) 

353 if dropna: 

354 vals = self._dropna(vals) 

355 

356 return vals 

357 

358 def difference(self, data1, data2, **kwargs): 

359 """Computes difference between functor called on two different 

360 DataFrame/Handle objects. 

361 """ 

362 return self(data1, **kwargs) - self(data2, **kwargs) 

363 

364 def fail(self, df): 

365 return pd.Series(np.full(len(df), np.nan), index=df.index) 

366 

367 @property 

368 def name(self): 

369 """Full name of functor (suitable for figure labels).""" 

370 return NotImplementedError 

371 

372 @property 

373 def shortname(self): 

374 """Short name of functor (suitable for column name/dict key).""" 

375 return self.name 

376 

377 

378class CompositeFunctor(Functor): 

379 """Perform multiple calculations at once on a catalog. 

380 

381 The role of a `CompositeFunctor` is to group together computations from 

382 multiple functors. 

383 Instead of returning `~pandas.Series` a `CompositeFunctor` returns a 

384 `~pandas.DataFrame`, with the column names being the keys of ``funcDict``. 

385 

386 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

387 in all the component functors. 

388 

389 A `CompositeFunctor` does not use a `_func` method itself; rather, when a 

390 `CompositeFunctor` is called, all its columns are loaded at once, and the 

391 resulting DataFrame is passed to the `_func` method of each component 

392 functor. 

393 This has the advantage of only doing I/O (reading from parquet file) once, 

394 and works because each individual `_func` method of each component functor 

395 does not care if there are *extra* columns in the DataFrame being passed; 

396 only that it must contain *at least* the `columns` it expects. 

397 

398 An important and useful class method is `from_yaml`, which takes as an 

399 argument the path to a YAML file specifying a collection of functors. 

400 

401 Parameters 

402 ---------- 

403 funcs : `dict` or `list` 

404 Dictionary or list of functors. 

405 If a list, then it will be converted into a dictonary according to the 

406 `.shortname` attribute of each functor. 

407 """ 

408 dataset = None 

409 name = "CompositeFunctor" 

410 

411 def __init__(self, funcs, **kwargs): 

412 

413 if type(funcs) == dict: 

414 self.funcDict = funcs 

415 else: 

416 self.funcDict = {f.shortname: f for f in funcs} 

417 

418 self._filt = None 

419 

420 super().__init__(**kwargs) 

421 

422 @property 

423 def filt(self): 

424 return self._filt 

425 

426 @filt.setter 

427 def filt(self, filt): 

428 if filt is not None: 

429 for _, f in self.funcDict.items(): 

430 f.filt = filt 

431 self._filt = filt 

432 

433 def update(self, new): 

434 """Update the functor with new functors.""" 

435 if isinstance(new, dict): 

436 self.funcDict.update(new) 

437 elif isinstance(new, CompositeFunctor): 

438 self.funcDict.update(new.funcDict) 

439 else: 

440 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

441 

442 # Make sure new functors have the same 'filt' set. 

443 if self.filt is not None: 

444 self.filt = self.filt 

445 

446 @property 

447 def columns(self): 

448 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

449 

450 def multilevelColumns(self, data, **kwargs): 

451 # Get the union of columns for all component functors. 

452 # Note the need to have `returnTuple=True` here. 

453 return list( 

454 set( 

455 [ 

456 x 

457 for y in [ 

458 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

459 ] 

460 for x in y 

461 ] 

462 ) 

463 ) 

464 

465 def __call__(self, data, **kwargs): 

466 """Apply the functor to the data table. 

467 

468 Parameters 

469 ---------- 

470 data : various 

471 The data represented as `~lsst.daf.butler.DeferredDatasetHandle`, 

472 `~lsst.pipe.base.InMemoryDatasetHandle`, or `~pandas.DataFrame`. 

473 The table or a pointer to a table on disk from which columns can 

474 be accessed. 

475 """ 

476 if isinstance(data, pd.DataFrame): 

477 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

478 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

479 _data = data 

480 else: 

481 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

482 

483 columnIndex = self._get_columnIndex(_data) 

484 

485 if isinstance(columnIndex, pd.MultiIndex): 

486 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

487 df = _data.get(parameters={"columns": columns}) 

488 

489 valDict = {} 

490 for k, f in self.funcDict.items(): 

491 try: 

492 subdf = f._setLevels( 

493 df[f.multilevelColumns(_data, returnTuple=True, columnIndex=columnIndex)] 

494 ) 

495 valDict[k] = f._func(subdf) 

496 except Exception as e: 

497 self.log.exception( 

498 "Exception in %s (funcs: %s) call: %s", 

499 self.name, 

500 str(list(self.funcDict.keys())), 

501 type(e).__name__, 

502 ) 

503 try: 

504 valDict[k] = f.fail(subdf) 

505 except NameError: 

506 raise e 

507 

508 else: 

509 df = _data.get(parameters={"columns": self.columns}) 

510 

511 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

512 

513 # Check that output columns are actually columns. 

514 for name, colVal in valDict.items(): 

515 if len(colVal.shape) != 1: 

516 raise RuntimeError("Transformed column '%s' is not the shape of a column. " 

517 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal))) 

518 

519 try: 

520 valDf = pd.concat(valDict, axis=1) 

521 except TypeError: 

522 print([(k, type(v)) for k, v in valDict.items()]) 

523 raise 

524 

525 if kwargs.get('dropna', False): 

526 valDf = valDf.dropna(how='any') 

527 

528 return valDf 

529 

530 @classmethod 

531 def renameCol(cls, col, renameRules): 

532 if renameRules is None: 

533 return col 

534 for old, new in renameRules: 

535 if col.startswith(old): 

536 col = col.replace(old, new) 

537 return col 

538 

539 @classmethod 

540 def from_file(cls, filename, **kwargs): 

541 # Allow environment variables in the filename. 

542 filename = os.path.expandvars(filename) 

543 with open(filename) as f: 

544 translationDefinition = yaml.safe_load(f) 

545 

546 return cls.from_yaml(translationDefinition, **kwargs) 

547 

548 @classmethod 

549 def from_yaml(cls, translationDefinition, **kwargs): 

550 funcs = {} 

551 for func, val in translationDefinition['funcs'].items(): 

552 funcs[func] = init_fromDict(val, name=func) 

553 

554 if 'flag_rename_rules' in translationDefinition: 

555 renameRules = translationDefinition['flag_rename_rules'] 

556 else: 

557 renameRules = None 

558 

559 if 'calexpFlags' in translationDefinition: 

560 for flag in translationDefinition['calexpFlags']: 

561 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp') 

562 

563 if 'refFlags' in translationDefinition: 

564 for flag in translationDefinition['refFlags']: 

565 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

566 

567 if 'forcedFlags' in translationDefinition: 

568 for flag in translationDefinition['forcedFlags']: 

569 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

570 

571 if 'flags' in translationDefinition: 

572 for flag in translationDefinition['flags']: 

573 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

574 

575 return cls(funcs, **kwargs) 

576 

577 

578def mag_aware_eval(df, expr, log): 

579 """Evaluate an expression on a DataFrame, knowing what the 'mag' function 

580 means. 

581 

582 Builds on `pandas.DataFrame.eval`, which parses and executes math on 

583 DataFrames. 

584 

585 Parameters 

586 ---------- 

587 df : ~pandas.DataFrame 

588 DataFrame on which to evaluate expression. 

589 

590 expr : str 

591 Expression. 

592 """ 

593 try: 

594 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

595 val = df.eval(expr_new) 

596 except Exception as e: # Should check what actually gets raised 

597 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e) 

598 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

599 val = df.eval(expr_new) 

600 return val 

601 

602 

603class CustomFunctor(Functor): 

604 """Arbitrary computation on a catalog. 

605 

606 Column names (and thus the columns to be loaded from catalog) are found by 

607 finding all words and trying to ignore all "math-y" words. 

608 

609 Parameters 

610 ---------- 

611 expr : str 

612 Expression to evaluate, to be parsed and executed by 

613 `~lsst.pipe.tasks.functors.mag_aware_eval`. 

614 """ 

615 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

616 

617 def __init__(self, expr, **kwargs): 

618 self.expr = expr 

619 super().__init__(**kwargs) 

620 

621 @property 

622 def name(self): 

623 return self.expr 

624 

625 @property 

626 def columns(self): 

627 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

628 

629 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

630 not_a_col = [] 

631 for c in flux_cols: 

632 if not re.search('_instFlux$', c): 

633 cols.append(f'{c}_instFlux') 

634 not_a_col.append(c) 

635 else: 

636 cols.append(c) 

637 

638 return list(set([c for c in cols if c not in not_a_col])) 

639 

640 def _func(self, df): 

641 return mag_aware_eval(df, self.expr, self.log) 

642 

643 

644class Column(Functor): 

645 """Get column with a specified name.""" 

646 

647 def __init__(self, col, **kwargs): 

648 self.col = col 

649 super().__init__(**kwargs) 

650 

651 @property 

652 def name(self): 

653 return self.col 

654 

655 @property 

656 def columns(self): 

657 return [self.col] 

658 

659 def _func(self, df): 

660 return df[self.col] 

661 

662 

663class Index(Functor): 

664 """Return the value of the index for each object.""" 

665 

666 columns = ['coord_ra'] # Just a dummy; something has to be here. 

667 _defaultDataset = 'ref' 

668 _defaultNoDup = True 

669 

670 def _func(self, df): 

671 return pd.Series(df.index, index=df.index) 

672 

673 

674class CoordColumn(Column): 

675 """Base class for coordinate column, in degrees.""" 

676 _radians = True 

677 

678 def __init__(self, col, **kwargs): 

679 super().__init__(col, **kwargs) 

680 

681 def _func(self, df): 

682 # Must not modify original column in case that column is used by 

683 # another functor. 

684 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

685 return output 

686 

687 

688class RAColumn(CoordColumn): 

689 """Right Ascension, in degrees.""" 

690 name = 'RA' 

691 _defaultNoDup = True 

692 

693 def __init__(self, **kwargs): 

694 super().__init__('coord_ra', **kwargs) 

695 

696 def __call__(self, catalog, **kwargs): 

697 return super().__call__(catalog, **kwargs) 

698 

699 

700class DecColumn(CoordColumn): 

701 """Declination, in degrees.""" 

702 name = 'Dec' 

703 _defaultNoDup = True 

704 

705 def __init__(self, **kwargs): 

706 super().__init__('coord_dec', **kwargs) 

707 

708 def __call__(self, catalog, **kwargs): 

709 return super().__call__(catalog, **kwargs) 

710 

711 

712class RAErrColumn(CoordColumn): 

713 """Uncertainty in Right Ascension, in degrees.""" 

714 name = 'RAErr' 

715 _defaultNoDup = True 

716 

717 def __init__(self, **kwargs): 

718 super().__init__('coord_raErr', **kwargs) 

719 

720 

721class DecErrColumn(CoordColumn): 

722 """Uncertainty in declination, in degrees.""" 

723 name = 'DecErr' 

724 _defaultNoDup = True 

725 

726 def __init__(self, **kwargs): 

727 super().__init__('coord_decErr', **kwargs) 

728 

729 

730class RADecCovColumn(Column): 

731 """Coordinate covariance column, in degrees.""" 

732 _radians = True 

733 name = 'RADecCov' 

734 _defaultNoDup = True 

735 

736 def __init__(self, **kwargs): 

737 super().__init__('coord_ra_dec_Cov', **kwargs) 

738 

739 def _func(self, df): 

740 # Must not modify original column in case that column is used by 

741 # another functor. 

742 output = df[self.col]*(180/np.pi)**2 if self._radians else df[self.col] 

743 return output 

744 

745 

746class HtmIndex20(Functor): 

747 """Compute the level 20 HtmIndex for the catalog. 

748 

749 Notes 

750 ----- 

751 This functor was implemented to satisfy requirements of old APDB interface 

752 which required the ``pixelId`` column in DiaObject with HTM20 index. 

753 The APDB interface had migrated to not need that information, but we keep 

754 this class in case it may be useful for something else. 

755 """ 

756 name = "Htm20" 

757 htmLevel = 20 

758 _radians = True 

759 

760 def __init__(self, ra, dec, **kwargs): 

761 self.pixelator = sphgeom.HtmPixelization(self.htmLevel) 

762 self.ra = ra 

763 self.dec = dec 

764 self._columns = [self.ra, self.dec] 

765 super().__init__(**kwargs) 

766 

767 def _func(self, df): 

768 

769 def computePixel(row): 

770 if self._radians: 

771 sphPoint = geom.SpherePoint(row[self.ra], 

772 row[self.dec], 

773 geom.radians) 

774 else: 

775 sphPoint = geom.SpherePoint(row[self.ra], 

776 row[self.dec], 

777 geom.degrees) 

778 return self.pixelator.index(sphPoint.getVector()) 

779 

780 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64') 

781 

782 

783def fluxName(col): 

784 """Append _instFlux to the column name if it doesn't have it already.""" 

785 if not col.endswith('_instFlux'): 

786 col += '_instFlux' 

787 return col 

788 

789 

790def fluxErrName(col): 

791 """Append _instFluxErr to the column name if it doesn't have it already.""" 

792 if not col.endswith('_instFluxErr'): 

793 col += '_instFluxErr' 

794 return col 

795 

796 

797class Mag(Functor): 

798 """Compute calibrated magnitude. 

799 

800 Returns the flux at mag=0. 

801 The default ``fluxMag0`` is 63095734448.0194, which is default for HSC. 

802 TO DO: This default should be made configurable in DM-21955. 

803 

804 This calculation hides warnings about invalid values and dividing by zero. 

805 

806 As with all functors, a ``dataset`` and ``filt`` kwarg should be provided 

807 upon initialization. 

808 Unlike the default `Functor`, however, the default dataset for a `Mag` is 

809 ``'meas'``, rather than ``'ref'``. 

810 

811 Parameters 

812 ---------- 

813 col : `str` 

814 Name of flux column from which to compute magnitude. 

815 Can be parseable by the `~lsst.pipe.tasks.functors.fluxName` function; 

816 that is, you can pass ``'modelfit_CModel'`` instead of 

817 ``'modelfit_CModel_instFlux'``, and it will understand. 

818 """ 

819 _defaultDataset = 'meas' 

820 

821 def __init__(self, col, **kwargs): 

822 self.col = fluxName(col) 

823 # TO DO: DM-21955 Replace hard coded photometic calibration values. 

824 self.fluxMag0 = 63095734448.0194 

825 

826 super().__init__(**kwargs) 

827 

828 @property 

829 def columns(self): 

830 return [self.col] 

831 

832 def _func(self, df): 

833 with warnings.catch_warnings(): 

834 warnings.filterwarnings('ignore', r'invalid value encountered') 

835 warnings.filterwarnings('ignore', r'divide by zero') 

836 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

837 

838 @property 

839 def name(self): 

840 return f'mag_{self.col}' 

841 

842 

843class MagErr(Mag): 

844 """Compute calibrated magnitude uncertainty. 

845 

846 Parameters 

847 ---------- 

848 col : `str` 

849 Name of the flux column. 

850 """ 

851 

852 def __init__(self, *args, **kwargs): 

853 super().__init__(*args, **kwargs) 

854 # TO DO: DM-21955 Replace hard coded photometic calibration values. 

855 self.fluxMag0Err = 0. 

856 

857 @property 

858 def columns(self): 

859 return [self.col, self.col + 'Err'] 

860 

861 def _func(self, df): 

862 with warnings.catch_warnings(): 

863 warnings.filterwarnings('ignore', r'invalid value encountered') 

864 warnings.filterwarnings('ignore', r'divide by zero') 

865 fluxCol, fluxErrCol = self.columns 

866 x = df[fluxErrCol] / df[fluxCol] 

867 y = self.fluxMag0Err / self.fluxMag0 

868 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

869 return magErr 

870 

871 @property 

872 def name(self): 

873 return super().name + '_err' 

874 

875 

876class MagDiff(Functor): 

877 """Functor to calculate magnitude difference.""" 

878 _defaultDataset = 'meas' 

879 

880 def __init__(self, col1, col2, **kwargs): 

881 self.col1 = fluxName(col1) 

882 self.col2 = fluxName(col2) 

883 super().__init__(**kwargs) 

884 

885 @property 

886 def columns(self): 

887 return [self.col1, self.col2] 

888 

889 def _func(self, df): 

890 with warnings.catch_warnings(): 

891 warnings.filterwarnings('ignore', r'invalid value encountered') 

892 warnings.filterwarnings('ignore', r'divide by zero') 

893 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

894 

895 @property 

896 def name(self): 

897 return f'(mag_{self.col1} - mag_{self.col2})' 

898 

899 @property 

900 def shortname(self): 

901 return f'magDiff_{self.col1}_{self.col2}' 

902 

903 

904class Color(Functor): 

905 """Compute the color between two filters. 

906 

907 Computes color by initializing two different `Mag` functors based on the 

908 ``col`` and filters provided, and then returning the difference. 

909 

910 This is enabled by the `_func` method expecting a DataFrame with a 

911 multilevel column index, with both ``'band'`` and ``'column'``, instead of 

912 just ``'column'``, which is the `Functor` default. 

913 This is controlled by the `_dfLevels` attribute. 

914 

915 Also of note, the default dataset for `Color` is ``forced_src'``, whereas 

916 for `Mag` it is ``'meas'``. 

917 

918 Parameters 

919 ---------- 

920 col : str 

921 Name of the flux column from which to compute; same as would be passed 

922 to `~lsst.pipe.tasks.functors.Mag`. 

923 

924 filt2, filt1 : str 

925 Filters from which to compute magnitude difference. 

926 Color computed is ``Mag(filt2) - Mag(filt1)``. 

927 """ 

928 _defaultDataset = 'forced_src' 

929 _dfLevels = ('band', 'column') 

930 _defaultNoDup = True 

931 

932 def __init__(self, col, filt2, filt1, **kwargs): 

933 self.col = fluxName(col) 

934 if filt2 == filt1: 

935 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

936 self.filt2 = filt2 

937 self.filt1 = filt1 

938 

939 self.mag2 = Mag(col, filt=filt2, **kwargs) 

940 self.mag1 = Mag(col, filt=filt1, **kwargs) 

941 

942 super().__init__(**kwargs) 

943 

944 @property 

945 def filt(self): 

946 return None 

947 

948 @filt.setter 

949 def filt(self, filt): 

950 pass 

951 

952 def _func(self, df): 

953 mag2 = self.mag2._func(df[self.filt2]) 

954 mag1 = self.mag1._func(df[self.filt1]) 

955 return mag2 - mag1 

956 

957 @property 

958 def columns(self): 

959 return [self.mag1.col, self.mag2.col] 

960 

961 def multilevelColumns(self, parq, **kwargs): 

962 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

963 

964 @property 

965 def name(self): 

966 return f'{self.filt2} - {self.filt1} ({self.col})' 

967 

968 @property 

969 def shortname(self): 

970 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

971 

972 

973class DeconvolvedMoments(Functor): 

974 """This functor subtracts the trace of the PSF second moments from the 

975 trace of the second moments of the source. 

976 

977 If the HsmShapeAlgorithm measurement is valid, then these will be used for 

978 the sources. 

979 Otherwise, the SdssShapeAlgorithm measurements will be used. 

980 """ 

981 name = 'Deconvolved Moments' 

982 shortname = 'deconvolvedMoments' 

983 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

984 "ext_shapeHSM_HsmSourceMoments_yy", 

985 "base_SdssShape_xx", "base_SdssShape_yy", 

986 "ext_shapeHSM_HsmPsfMoments_xx", 

987 "ext_shapeHSM_HsmPsfMoments_yy") 

988 

989 def _func(self, df): 

990 """Calculate deconvolved moments.""" 

991 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

992 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

993 else: 

994 hsm = np.ones(len(df))*np.nan 

995 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

996 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

997 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

998 else: 

999 # LSST does not have shape.sdss.psf. 

1000 # We could instead add base_PsfShape to the catalog using 

1001 # exposure.getPsf().computeShape(s.getCentroid()).getIxx(). 

1002 raise RuntimeError('No psf shape parameter found in catalog') 

1003 

1004 return hsm.where(np.isfinite(hsm), sdss) - psf 

1005 

1006 

1007class SdssTraceSize(Functor): 

1008 """Functor to calculate the SDSS trace radius size for sources. 

1009 

1010 The SDSS trace radius size is a measure of size equal to the square root of 

1011 half of the trace of the second moments tensor measured with the 

1012 SdssShapeAlgorithm plugin. 

1013 This has units of pixels. 

1014 """ 

1015 name = "SDSS Trace Size" 

1016 shortname = 'sdssTrace' 

1017 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

1018 

1019 def _func(self, df): 

1020 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1021 return srcSize 

1022 

1023 

1024class PsfSdssTraceSizeDiff(Functor): 

1025 """Functor to calculate the SDSS trace radius size difference (%) between 

1026 the object and the PSF model. 

1027 

1028 See Also 

1029 -------- 

1030 SdssTraceSize 

1031 """ 

1032 name = "PSF - SDSS Trace Size" 

1033 shortname = 'psf_sdssTrace' 

1034 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

1035 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

1036 

1037 def _func(self, df): 

1038 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1039 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

1040 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1041 return sizeDiff 

1042 

1043 

1044class HsmTraceSize(Functor): 

1045 """Functor to calculate the HSM trace radius size for sources. 

1046 

1047 The HSM trace radius size is a measure of size equal to the square root of 

1048 half of the trace of the second moments tensor measured with the 

1049 HsmShapeAlgorithm plugin. 

1050 This has units of pixels. 

1051 """ 

1052 name = 'HSM Trace Size' 

1053 shortname = 'hsmTrace' 

1054 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1055 "ext_shapeHSM_HsmSourceMoments_yy") 

1056 

1057 def _func(self, df): 

1058 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1059 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1060 return srcSize 

1061 

1062 

1063class PsfHsmTraceSizeDiff(Functor): 

1064 """Functor to calculate the HSM trace radius size difference (%) between 

1065 the object and the PSF model. 

1066 

1067 See Also 

1068 -------- 

1069 HsmTraceSize 

1070 """ 

1071 name = 'PSF - HSM Trace Size' 

1072 shortname = 'psf_HsmTrace' 

1073 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1074 "ext_shapeHSM_HsmSourceMoments_yy", 

1075 "ext_shapeHSM_HsmPsfMoments_xx", 

1076 "ext_shapeHSM_HsmPsfMoments_yy") 

1077 

1078 def _func(self, df): 

1079 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1080 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1081 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1082 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1083 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1084 return sizeDiff 

1085 

1086 

1087class HsmFwhm(Functor): 

1088 """Functor to calculate the PSF FWHM with second moments measured from the 

1089 HsmShapeAlgorithm plugin. 

1090 

1091 This is in units of arcseconds, and assumes the hsc_rings_v1 skymap pixel 

1092 scale of 0.168 arcseconds/pixel. 

1093 

1094 Notes 

1095 ----- 

1096 This conversion assumes the PSF is Gaussian, which is not always the case. 

1097 """ 

1098 name = 'HSM Psf FWHM' 

1099 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1100 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1101 pixelScale = 0.168 

1102 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1103 

1104 def _func(self, df): 

1105 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1106 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1107 

1108 

1109class E1(Functor): 

1110 r"""Calculate :math:`e_1` ellipticity component for sources, defined as: 

1111 

1112 .. math:: 

1113 e_1 &= (I_{xx}-I_{yy})/(I_{xx}+I_{yy}) 

1114 

1115 See Also 

1116 -------- 

1117 E2 

1118 """ 

1119 name = "Distortion Ellipticity (e1)" 

1120 shortname = "Distortion" 

1121 

1122 def __init__(self, colXX, colXY, colYY, **kwargs): 

1123 self.colXX = colXX 

1124 self.colXY = colXY 

1125 self.colYY = colYY 

1126 self._columns = [self.colXX, self.colXY, self.colYY] 

1127 super().__init__(**kwargs) 

1128 

1129 @property 

1130 def columns(self): 

1131 return [self.colXX, self.colXY, self.colYY] 

1132 

1133 def _func(self, df): 

1134 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1135 

1136 

1137class E2(Functor): 

1138 r"""Calculate :math:`e_2` ellipticity component for sources, defined as: 

1139 

1140 .. math:: 

1141 e_2 &= 2I_{xy}/(I_{xx}+I_{yy}) 

1142 

1143 See Also 

1144 -------- 

1145 E1 

1146 """ 

1147 name = "Ellipticity e2" 

1148 

1149 def __init__(self, colXX, colXY, colYY, **kwargs): 

1150 self.colXX = colXX 

1151 self.colXY = colXY 

1152 self.colYY = colYY 

1153 super().__init__(**kwargs) 

1154 

1155 @property 

1156 def columns(self): 

1157 return [self.colXX, self.colXY, self.colYY] 

1158 

1159 def _func(self, df): 

1160 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1161 

1162 

1163class RadiusFromQuadrupole(Functor): 

1164 """Calculate the radius from the quadrupole moments. 

1165 

1166 This returns the fourth root of the determinant of the second moments 

1167 tensor, which has units of pixels. 

1168 

1169 See Also 

1170 -------- 

1171 SdssTraceSize 

1172 HsmTraceSize 

1173 """ 

1174 

1175 def __init__(self, colXX, colXY, colYY, **kwargs): 

1176 self.colXX = colXX 

1177 self.colXY = colXY 

1178 self.colYY = colYY 

1179 super().__init__(**kwargs) 

1180 

1181 @property 

1182 def columns(self): 

1183 return [self.colXX, self.colXY, self.colYY] 

1184 

1185 def _func(self, df): 

1186 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1187 

1188 

1189class LocalWcs(Functor): 

1190 """Computations using the stored localWcs.""" 

1191 name = "LocalWcsOperations" 

1192 

1193 def __init__(self, 

1194 colCD_1_1, 

1195 colCD_1_2, 

1196 colCD_2_1, 

1197 colCD_2_2, 

1198 **kwargs): 

1199 self.colCD_1_1 = colCD_1_1 

1200 self.colCD_1_2 = colCD_1_2 

1201 self.colCD_2_1 = colCD_2_1 

1202 self.colCD_2_2 = colCD_2_2 

1203 super().__init__(**kwargs) 

1204 

1205 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1206 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1207 

1208 Parameters 

1209 ---------- 

1210 x : `~pandas.Series` 

1211 X pixel coordinate. 

1212 y : `~pandas.Series` 

1213 Y pixel coordinate. 

1214 cd11 : `~pandas.Series` 

1215 [1, 1] element of the local Wcs affine transform. 

1216 cd11 : `~pandas.Series` 

1217 [1, 1] element of the local Wcs affine transform. 

1218 cd12 : `~pandas.Series` 

1219 [1, 2] element of the local Wcs affine transform. 

1220 cd21 : `~pandas.Series` 

1221 [2, 1] element of the local Wcs affine transform. 

1222 cd22 : `~pandas.Series` 

1223 [2, 2] element of the local Wcs affine transform. 

1224 

1225 Returns 

1226 ------- 

1227 raDecTuple : tuple 

1228 RA and dec conversion of x and y given the local Wcs. 

1229 Returned units are in radians. 

1230 

1231 """ 

1232 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1233 

1234 def computeSkySeparation(self, ra1, dec1, ra2, dec2): 

1235 """Compute the local pixel scale conversion. 

1236 

1237 Parameters 

1238 ---------- 

1239 ra1 : `~pandas.Series` 

1240 Ra of the first coordinate in radians. 

1241 dec1 : `~pandas.Series` 

1242 Dec of the first coordinate in radians. 

1243 ra2 : `~pandas.Series` 

1244 Ra of the second coordinate in radians. 

1245 dec2 : `~pandas.Series` 

1246 Dec of the second coordinate in radians. 

1247 

1248 Returns 

1249 ------- 

1250 dist : `~pandas.Series` 

1251 Distance on the sphere in radians. 

1252 """ 

1253 deltaDec = dec2 - dec1 

1254 deltaRa = ra2 - ra1 

1255 return 2 * np.arcsin( 

1256 np.sqrt( 

1257 np.sin(deltaDec / 2) ** 2 

1258 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1259 

1260 def getSkySeparationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1261 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1262 

1263 Parameters 

1264 ---------- 

1265 x1 : `~pandas.Series` 

1266 X pixel coordinate. 

1267 y1 : `~pandas.Series` 

1268 Y pixel coordinate. 

1269 x2 : `~pandas.Series` 

1270 X pixel coordinate. 

1271 y2 : `~pandas.Series` 

1272 Y pixel coordinate. 

1273 cd11 : `~pandas.Series` 

1274 [1, 1] element of the local Wcs affine transform. 

1275 cd11 : `~pandas.Series` 

1276 [1, 1] element of the local Wcs affine transform. 

1277 cd12 : `~pandas.Series` 

1278 [1, 2] element of the local Wcs affine transform. 

1279 cd21 : `~pandas.Series` 

1280 [2, 1] element of the local Wcs affine transform. 

1281 cd22 : `~pandas.Series` 

1282 [2, 2] element of the local Wcs affine transform. 

1283 

1284 Returns 

1285 ------- 

1286 Distance : `~pandas.Series` 

1287 Arcseconds per pixel at the location of the local WC. 

1288 """ 

1289 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1290 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1291 # Great circle distance for small separations. 

1292 return self.computeSkySeparation(ra1, dec1, ra2, dec2) 

1293 

1294 

1295class ComputePixelScale(LocalWcs): 

1296 """Compute the local pixel scale from the stored CDMatrix. 

1297 """ 

1298 name = "PixelScale" 

1299 

1300 @property 

1301 def columns(self): 

1302 return [self.colCD_1_1, 

1303 self.colCD_1_2, 

1304 self.colCD_2_1, 

1305 self.colCD_2_2] 

1306 

1307 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1308 """Compute the local pixel to scale conversion in arcseconds. 

1309 

1310 Parameters 

1311 ---------- 

1312 cd11 : `~pandas.Series` 

1313 [1, 1] element of the local Wcs affine transform in radians. 

1314 cd11 : `~pandas.Series` 

1315 [1, 1] element of the local Wcs affine transform in radians. 

1316 cd12 : `~pandas.Series` 

1317 [1, 2] element of the local Wcs affine transform in radians. 

1318 cd21 : `~pandas.Series` 

1319 [2, 1] element of the local Wcs affine transform in radians. 

1320 cd22 : `~pandas.Series` 

1321 [2, 2] element of the local Wcs affine transform in radians. 

1322 

1323 Returns 

1324 ------- 

1325 pixScale : `~pandas.Series` 

1326 Arcseconds per pixel at the location of the local WC. 

1327 """ 

1328 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1329 

1330 def _func(self, df): 

1331 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1332 df[self.colCD_1_2], 

1333 df[self.colCD_2_1], 

1334 df[self.colCD_2_2]) 

1335 

1336 

1337class ConvertPixelToArcseconds(ComputePixelScale): 

1338 """Convert a value in units of pixels to units of arcseconds.""" 

1339 

1340 def __init__(self, 

1341 col, 

1342 colCD_1_1, 

1343 colCD_1_2, 

1344 colCD_2_1, 

1345 colCD_2_2, 

1346 **kwargs): 

1347 self.col = col 

1348 super().__init__(colCD_1_1, 

1349 colCD_1_2, 

1350 colCD_2_1, 

1351 colCD_2_2, 

1352 **kwargs) 

1353 

1354 @property 

1355 def name(self): 

1356 return f"{self.col}_asArcseconds" 

1357 

1358 @property 

1359 def columns(self): 

1360 return [self.col, 

1361 self.colCD_1_1, 

1362 self.colCD_1_2, 

1363 self.colCD_2_1, 

1364 self.colCD_2_2] 

1365 

1366 def _func(self, df): 

1367 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1368 df[self.colCD_1_2], 

1369 df[self.colCD_2_1], 

1370 df[self.colCD_2_2]) 

1371 

1372 

1373class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1374 """Convert a value in units of pixels squared to units of arcseconds 

1375 squared. 

1376 """ 

1377 

1378 def __init__(self, 

1379 col, 

1380 colCD_1_1, 

1381 colCD_1_2, 

1382 colCD_2_1, 

1383 colCD_2_2, 

1384 **kwargs): 

1385 self.col = col 

1386 super().__init__(colCD_1_1, 

1387 colCD_1_2, 

1388 colCD_2_1, 

1389 colCD_2_2, 

1390 **kwargs) 

1391 

1392 @property 

1393 def name(self): 

1394 return f"{self.col}_asArcsecondsSq" 

1395 

1396 @property 

1397 def columns(self): 

1398 return [self.col, 

1399 self.colCD_1_1, 

1400 self.colCD_1_2, 

1401 self.colCD_2_1, 

1402 self.colCD_2_2] 

1403 

1404 def _func(self, df): 

1405 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1406 df[self.colCD_1_2], 

1407 df[self.colCD_2_1], 

1408 df[self.colCD_2_2]) 

1409 return df[self.col] * pixScale * pixScale 

1410 

1411 

1412class ReferenceBand(Functor): 

1413 """Return the band used to seed multiband forced photometry. 

1414 

1415 This functor is to be used on Object tables. 

1416 It converts the boolean merge_measurements_{band} columns into a single 

1417 string representing the first band for which merge_measurements_{band} 

1418 is True. 

1419 

1420 Assumes the default priority order of i, r, z, y, g, u. 

1421 """ 

1422 name = 'Reference Band' 

1423 shortname = 'refBand' 

1424 

1425 @property 

1426 def columns(self): 

1427 return ["merge_measurement_i", 

1428 "merge_measurement_r", 

1429 "merge_measurement_z", 

1430 "merge_measurement_y", 

1431 "merge_measurement_g", 

1432 "merge_measurement_u"] 

1433 

1434 def _func(self, df: pd.DataFrame) -> pd.Series: 

1435 def getFilterAliasName(row): 

1436 # Get column name with the max value (True > False). 

1437 colName = row.idxmax() 

1438 return colName.replace('merge_measurement_', '') 

1439 

1440 # Skip columns that are unavailable, because this functor requests the 

1441 # superset of bands that could be included in the object table. 

1442 columns = [col for col in self.columns if col in df.columns] 

1443 # Makes a Series of dtype object if df is empty. 

1444 return df[columns].apply(getFilterAliasName, axis=1, 

1445 result_type='reduce').astype('object') 

1446 

1447 

1448class Photometry(Functor): 

1449 """Base class for Object table calibrated fluxes and magnitudes.""" 

1450 # AB to NanoJansky (3631 Jansky). 

1451 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1452 LOG_AB_FLUX_SCALE = 12.56 

1453 FIVE_OVER_2LOG10 = 1.085736204758129569 

1454 # TO DO: DM-21955 Replace hard coded photometic calibration values. 

1455 COADD_ZP = 27 

1456 

1457 def __init__(self, colFlux, colFluxErr=None, **kwargs): 

1458 self.vhypot = np.vectorize(self.hypot) 

1459 self.col = colFlux 

1460 self.colFluxErr = colFluxErr 

1461 

1462 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1463 self.fluxMag0Err = 0. 

1464 

1465 super().__init__(**kwargs) 

1466 

1467 @property 

1468 def columns(self): 

1469 return [self.col] 

1470 

1471 @property 

1472 def name(self): 

1473 return f'mag_{self.col}' 

1474 

1475 @classmethod 

1476 def hypot(cls, a, b): 

1477 """Compute sqrt(a^2 + b^2) without under/overflow.""" 

1478 if np.abs(a) < np.abs(b): 

1479 a, b = b, a 

1480 if a == 0.: 

1481 return 0. 

1482 q = b/a 

1483 return np.abs(a) * np.sqrt(1. + q*q) 

1484 

1485 def dn2flux(self, dn, fluxMag0): 

1486 """Convert instrumental flux to nanojanskys.""" 

1487 return self.AB_FLUX_SCALE * dn / fluxMag0 

1488 

1489 def dn2mag(self, dn, fluxMag0): 

1490 """Convert instrumental flux to AB magnitude.""" 

1491 with warnings.catch_warnings(): 

1492 warnings.filterwarnings('ignore', r'invalid value encountered') 

1493 warnings.filterwarnings('ignore', r'divide by zero') 

1494 return -2.5 * np.log10(dn/fluxMag0) 

1495 

1496 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1497 """Convert instrumental flux error to nanojanskys.""" 

1498 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1499 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1500 return retVal 

1501 

1502 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1503 """Convert instrumental flux error to AB magnitude error.""" 

1504 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1505 return self.FIVE_OVER_2LOG10 * retVal 

1506 

1507 

1508class NanoJansky(Photometry): 

1509 """Convert instrumental flux to nanojanskys.""" 

1510 def _func(self, df): 

1511 return self.dn2flux(df[self.col], self.fluxMag0) 

1512 

1513 

1514class NanoJanskyErr(Photometry): 

1515 """Convert instrumental flux error to nanojanskys.""" 

1516 @property 

1517 def columns(self): 

1518 return [self.col, self.colFluxErr] 

1519 

1520 def _func(self, df): 

1521 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1522 return pd.Series(retArr, index=df.index) 

1523 

1524 

1525class LocalPhotometry(Functor): 

1526 """Base class for calibrating the specified instrument flux column using 

1527 the local photometric calibration. 

1528 

1529 Parameters 

1530 ---------- 

1531 instFluxCol : `str` 

1532 Name of the instrument flux column. 

1533 instFluxErrCol : `str` 

1534 Name of the assocated error columns for ``instFluxCol``. 

1535 photoCalibCol : `str` 

1536 Name of local calibration column. 

1537 photoCalibErrCol : `str` 

1538 Error associated with ``photoCalibCol`` 

1539 

1540 See Also 

1541 -------- 

1542 LocalNanojansky 

1543 LocalNanojanskyErr 

1544 """ 

1545 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1546 

1547 def __init__(self, 

1548 instFluxCol, 

1549 instFluxErrCol, 

1550 photoCalibCol, 

1551 photoCalibErrCol, 

1552 **kwargs): 

1553 self.instFluxCol = instFluxCol 

1554 self.instFluxErrCol = instFluxErrCol 

1555 self.photoCalibCol = photoCalibCol 

1556 self.photoCalibErrCol = photoCalibErrCol 

1557 super().__init__(**kwargs) 

1558 

1559 def instFluxToNanojansky(self, instFlux, localCalib): 

1560 """Convert instrument flux to nanojanskys. 

1561 

1562 Parameters 

1563 ---------- 

1564 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1565 Array of instrument flux measurements. 

1566 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1567 Array of local photometric calibration estimates. 

1568 

1569 Returns 

1570 ------- 

1571 calibFlux : `~numpy.ndarray` or `~pandas.Series` 

1572 Array of calibrated flux measurements. 

1573 """ 

1574 return instFlux * localCalib 

1575 

1576 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1577 """Convert instrument flux to nanojanskys. 

1578 

1579 Parameters 

1580 ---------- 

1581 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1582 Array of instrument flux measurements. 

1583 instFluxErr : `~numpy.ndarray` or `~pandas.Series` 

1584 Errors on associated ``instFlux`` values. 

1585 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1586 Array of local photometric calibration estimates. 

1587 localCalibErr : `~numpy.ndarray` or `~pandas.Series` 

1588 Errors on associated ``localCalib`` values. 

1589 

1590 Returns 

1591 ------- 

1592 calibFluxErr : `~numpy.ndarray` or `~pandas.Series` 

1593 Errors on calibrated flux measurements. 

1594 """ 

1595 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1596 

1597 def instFluxToMagnitude(self, instFlux, localCalib): 

1598 """Convert instrument flux to nanojanskys. 

1599 

1600 Parameters 

1601 ---------- 

1602 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1603 Array of instrument flux measurements. 

1604 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1605 Array of local photometric calibration estimates. 

1606 

1607 Returns 

1608 ------- 

1609 calibMag : `~numpy.ndarray` or `~pandas.Series` 

1610 Array of calibrated AB magnitudes. 

1611 """ 

1612 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1613 

1614 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1615 """Convert instrument flux err to nanojanskys. 

1616 

1617 Parameters 

1618 ---------- 

1619 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1620 Array of instrument flux measurements. 

1621 instFluxErr : `~numpy.ndarray` or `~pandas.Series` 

1622 Errors on associated ``instFlux`` values. 

1623 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1624 Array of local photometric calibration estimates. 

1625 localCalibErr : `~numpy.ndarray` or `~pandas.Series` 

1626 Errors on associated ``localCalib`` values. 

1627 

1628 Returns 

1629 ------- 

1630 calibMagErr: `~numpy.ndarray` or `~pandas.Series` 

1631 Error on calibrated AB magnitudes. 

1632 """ 

1633 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1634 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1635 

1636 

1637class LocalNanojansky(LocalPhotometry): 

1638 """Compute calibrated fluxes using the local calibration value. 

1639 

1640 This returns units of nanojanskys. 

1641 """ 

1642 

1643 @property 

1644 def columns(self): 

1645 return [self.instFluxCol, self.photoCalibCol] 

1646 

1647 @property 

1648 def name(self): 

1649 return f'flux_{self.instFluxCol}' 

1650 

1651 def _func(self, df): 

1652 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1653 

1654 

1655class LocalNanojanskyErr(LocalPhotometry): 

1656 """Compute calibrated flux errors using the local calibration value. 

1657 

1658 This returns units of nanojanskys. 

1659 """ 

1660 

1661 @property 

1662 def columns(self): 

1663 return [self.instFluxCol, self.instFluxErrCol, 

1664 self.photoCalibCol, self.photoCalibErrCol] 

1665 

1666 @property 

1667 def name(self): 

1668 return f'fluxErr_{self.instFluxCol}' 

1669 

1670 def _func(self, df): 

1671 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1672 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1673 

1674 

1675class LocalDipoleMeanFlux(LocalPhotometry): 

1676 """Compute absolute mean of dipole fluxes. 

1677 

1678 See Also 

1679 -------- 

1680 LocalNanojansky 

1681 LocalNanojanskyErr 

1682 LocalDipoleMeanFluxErr 

1683 LocalDipoleDiffFlux 

1684 LocalDipoleDiffFluxErr 

1685 """ 

1686 def __init__(self, 

1687 instFluxPosCol, 

1688 instFluxNegCol, 

1689 instFluxPosErrCol, 

1690 instFluxNegErrCol, 

1691 photoCalibCol, 

1692 photoCalibErrCol, 

1693 **kwargs): 

1694 self.instFluxNegCol = instFluxNegCol 

1695 self.instFluxPosCol = instFluxPosCol 

1696 self.instFluxNegErrCol = instFluxNegErrCol 

1697 self.instFluxPosErrCol = instFluxPosErrCol 

1698 self.photoCalibCol = photoCalibCol 

1699 self.photoCalibErrCol = photoCalibErrCol 

1700 super().__init__(instFluxNegCol, 

1701 instFluxNegErrCol, 

1702 photoCalibCol, 

1703 photoCalibErrCol, 

1704 **kwargs) 

1705 

1706 @property 

1707 def columns(self): 

1708 return [self.instFluxPosCol, 

1709 self.instFluxNegCol, 

1710 self.photoCalibCol] 

1711 

1712 @property 

1713 def name(self): 

1714 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1715 

1716 def _func(self, df): 

1717 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1718 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1719 

1720 

1721class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1722 """Compute the error on the absolute mean of dipole fluxes. 

1723 

1724 See Also 

1725 -------- 

1726 LocalNanojansky 

1727 LocalNanojanskyErr 

1728 LocalDipoleMeanFlux 

1729 LocalDipoleDiffFlux 

1730 LocalDipoleDiffFluxErr 

1731 """ 

1732 

1733 @property 

1734 def columns(self): 

1735 return [self.instFluxPosCol, 

1736 self.instFluxNegCol, 

1737 self.instFluxPosErrCol, 

1738 self.instFluxNegErrCol, 

1739 self.photoCalibCol, 

1740 self.photoCalibErrCol] 

1741 

1742 @property 

1743 def name(self): 

1744 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1745 

1746 def _func(self, df): 

1747 return 0.5*np.sqrt( 

1748 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1749 * df[self.photoCalibErrCol])**2 

1750 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1751 * df[self.photoCalibCol]**2) 

1752 

1753 

1754class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1755 """Compute the absolute difference of dipole fluxes. 

1756 

1757 Calculated value is (abs(pos) - abs(neg)). 

1758 

1759 See Also 

1760 -------- 

1761 LocalNanojansky 

1762 LocalNanojanskyErr 

1763 LocalDipoleMeanFlux 

1764 LocalDipoleMeanFluxErr 

1765 LocalDipoleDiffFluxErr 

1766 """ 

1767 

1768 @property 

1769 def columns(self): 

1770 return [self.instFluxPosCol, 

1771 self.instFluxNegCol, 

1772 self.photoCalibCol] 

1773 

1774 @property 

1775 def name(self): 

1776 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1777 

1778 def _func(self, df): 

1779 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1780 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1781 

1782 

1783class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1784 """Compute the error on the absolute difference of dipole fluxes. 

1785 

1786 See Also 

1787 -------- 

1788 LocalNanojansky 

1789 LocalNanojanskyErr 

1790 LocalDipoleMeanFlux 

1791 LocalDipoleMeanFluxErr 

1792 LocalDipoleDiffFlux 

1793 """ 

1794 

1795 @property 

1796 def columns(self): 

1797 return [self.instFluxPosCol, 

1798 self.instFluxNegCol, 

1799 self.instFluxPosErrCol, 

1800 self.instFluxNegErrCol, 

1801 self.photoCalibCol, 

1802 self.photoCalibErrCol] 

1803 

1804 @property 

1805 def name(self): 

1806 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1807 

1808 def _func(self, df): 

1809 return np.sqrt( 

1810 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1811 * df[self.photoCalibErrCol])**2 

1812 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1813 * df[self.photoCalibCol]**2) 

1814 

1815 

1816class Ebv(Functor): 

1817 """Compute E(B-V) from dustmaps.sfd.""" 

1818 _defaultDataset = 'ref' 

1819 name = "E(B-V)" 

1820 shortname = "ebv" 

1821 

1822 def __init__(self, **kwargs): 

1823 # Import is only needed for Ebv. 

1824 # Suppress unnecessary .dustmapsrc log message on import. 

1825 with open(os.devnull, "w") as devnull: 

1826 with redirect_stdout(devnull): 

1827 from dustmaps.sfd import SFDQuery 

1828 self._columns = ['coord_ra', 'coord_dec'] 

1829 self.sfd = SFDQuery() 

1830 super().__init__(**kwargs) 

1831 

1832 def _func(self, df): 

1833 coords = SkyCoord(df['coord_ra'].values * u.rad, df['coord_dec'].values * u.rad) 

1834 ebv = self.sfd(coords) 

1835 # Double precision unnecessary scientifically but currently needed for 

1836 # ingest to qserv. 

1837 return pd.Series(ebv, index=df.index).astype('float64')