Coverage for python/lsst/pipe/tasks/functors.py: 42%

745 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-27 12:31 +0000

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["init_fromDict", "Functor", "CompositeFunctor", "mag_aware_eval", 

23 "CustomFunctor", "Column", "Index", "CoordColumn", "RAColumn", 

24 "DecColumn", "HtmIndex20", "fluxName", "fluxErrName", "Mag", 

25 "MagErr", "MagDiff", "Color", "DeconvolvedMoments", "SdssTraceSize", 

26 "PsfSdssTraceSizeDiff", "HsmTraceSize", "PsfHsmTraceSizeDiff", 

27 "HsmFwhm", "E1", "E2", "RadiusFromQuadrupole", "LocalWcs", 

28 "ComputePixelScale", "ConvertPixelToArcseconds", 

29 "ConvertPixelSqToArcsecondsSq", "ReferenceBand", "Photometry", 

30 "NanoJansky", "NanoJanskyErr", "LocalPhotometry", "LocalNanojansky", 

31 "LocalNanojanskyErr", "LocalDipoleMeanFlux", 

32 "LocalDipoleMeanFluxErr", "LocalDipoleDiffFlux", 

33 "LocalDipoleDiffFluxErr", "Ebv", 

34 ] 

35 

36import yaml 

37import re 

38from itertools import product 

39import logging 

40import os.path 

41import warnings 

42 

43import pandas as pd 

44import numpy as np 

45import astropy.units as u 

46from astropy.coordinates import SkyCoord 

47 

48from lsst.utils import doImport 

49from lsst.utils.introspection import get_full_type_name 

50from lsst.daf.butler import DeferredDatasetHandle 

51from lsst.pipe.base import InMemoryDatasetHandle 

52import lsst.geom as geom 

53import lsst.sphgeom as sphgeom 

54 

55 

56def init_fromDict(initDict, basePath='lsst.pipe.tasks.functors', 

57 typeKey='functor', name=None): 

58 """Initialize an object defined in a dictionary. 

59 

60 The object needs to be importable as f'{basePath}.{initDict[typeKey]}'. 

61 The positional and keyword arguments (if any) are contained in "args" and 

62 "kwargs" entries in the dictionary, respectively. 

63 This is used in `~lsst.pipe.tasks.functors.CompositeFunctor.from_yaml` to 

64 initialize a composite functor from a specification in a YAML file. 

65 

66 Parameters 

67 ---------- 

68 initDict : dictionary 

69 Dictionary describing object's initialization. 

70 Must contain an entry keyed by ``typeKey`` that is the name of the 

71 object, relative to ``basePath``. 

72 basePath : str 

73 Path relative to module in which ``initDict[typeKey]`` is defined. 

74 typeKey : str 

75 Key of ``initDict`` that is the name of the object (relative to 

76 ``basePath``). 

77 """ 

78 initDict = initDict.copy() 

79 # TO DO: DM-21956 We should be able to define functors outside this module 

80 pythonType = doImport(f'{basePath}.{initDict.pop(typeKey)}') 

81 args = [] 

82 if 'args' in initDict: 

83 args = initDict.pop('args') 

84 if isinstance(args, str): 

85 args = [args] 

86 try: 

87 element = pythonType(*args, **initDict) 

88 except Exception as e: 

89 message = f'Error in constructing functor "{name}" of type {pythonType.__name__} with args: {args}' 

90 raise type(e)(message, e.args) 

91 return element 

92 

93 

94class Functor(object): 

95 """Define and execute a calculation on a DataFrame or Handle holding a 

96 DataFrame. 

97 

98 The `__call__` method accepts either a `~pandas.DataFrame` object or a 

99 `~lsst.daf.butler.DeferredDatasetHandle` or 

100 `~lsst.pipe.base.InMemoryDatasetHandle`, and returns the 

101 result of the calculation as a single column. 

102 Each functor defines what columns are needed for the calculation, and only 

103 these columns are read from the dataset handle. 

104 

105 The action of `__call__` consists of two steps: first, loading the 

106 necessary columns from disk into memory as a `~pandas.DataFrame` object; 

107 and second, performing the computation on this DataFrame and returning the 

108 result. 

109 

110 To define a new `Functor`, a subclass must define a `_func` method, 

111 that takes a `~pandas.DataFrame` and returns result in a `~pandas.Series`. 

112 In addition, it must define the following attributes: 

113 

114 * `_columns`: The columns necessary to perform the calculation 

115 * `name`: A name appropriate for a figure axis label 

116 * `shortname`: A name appropriate for use as a dictionary key 

117 

118 On initialization, a `Functor` should declare what band (``filt`` kwarg) 

119 and dataset (e.g. ``'ref'``, ``'meas'``, ``'forced_src'``) it is intended 

120 to be applied to. 

121 This enables the `_get_data` method to extract the proper columns from the 

122 underlying data. 

123 If not specified, the dataset will fall back on the `_defaultDataset` 

124 attribute. 

125 If band is not specified and ``dataset`` is anything other than ``'ref'``, 

126 then an error will be raised when trying to perform the calculation. 

127 

128 Originally, `Functor` was set up to expect datasets formatted like the 

129 ``deepCoadd_obj`` dataset; that is, a DataFrame with a multi-level column 

130 index, with the levels of the column index being ``band``, ``dataset``, and 

131 ``column``. 

132 It has since been generalized to apply to DataFrames without multi-level 

133 indices and multi-level indices with just ``dataset`` and ``column`` 

134 levels. 

135 In addition, the `_get_data` method that reads the columns from the 

136 underlying data will return a DataFrame with column index levels defined by 

137 the `_dfLevels` attribute; by default, this is ``column``. 

138 

139 The `_dfLevels` attributes should generally not need to be changed, unless 

140 `_func` needs columns from multiple filters or datasets to do the 

141 calculation. 

142 An example of this is the `~lsst.pipe.tasks.functors.Color` functor, for 

143 which `_dfLevels = ('band', 'column')`, and `_func` expects the DataFrame 

144 it gets to have those levels in the column index. 

145 

146 Parameters 

147 ---------- 

148 filt : str 

149 Band upon which to do the calculation. 

150 

151 dataset : str 

152 Dataset upon which to do the calculation (e.g., 'ref', 'meas', 

153 'forced_src'). 

154 """ 

155 

156 _defaultDataset = 'ref' 

157 _dfLevels = ('column',) 

158 _defaultNoDup = False 

159 

160 def __init__(self, filt=None, dataset=None, noDup=None): 

161 self.filt = filt 

162 self.dataset = dataset if dataset is not None else self._defaultDataset 

163 self._noDup = noDup 

164 self.log = logging.getLogger(type(self).__name__) 

165 

166 @property 

167 def noDup(self): 

168 """Do not explode by band if used on object table.""" 

169 if self._noDup is not None: 

170 return self._noDup 

171 else: 

172 return self._defaultNoDup 

173 

174 @property 

175 def columns(self): 

176 """Columns required to perform calculation.""" 

177 if not hasattr(self, '_columns'): 

178 raise NotImplementedError('Must define columns property or _columns attribute') 

179 return self._columns 

180 

181 def _get_data_columnLevels(self, data, columnIndex=None): 

182 """Gets the names of the column index levels. 

183 

184 This should only be called in the context of a multilevel table. 

185 

186 Parameters 

187 ---------- 

188 data : various 

189 The data to be read, can be a 

190 `~lsst.daf.butler.DeferredDatasetHandle` or 

191 `~lsst.pipe.base.InMemoryDatasetHandle`. 

192 columnIndex (optional): pandas `~pandas.Index` object 

193 If not passed, then it is read from the 

194 `~lsst.daf.butler.DeferredDatasetHandle` 

195 for `~lsst.pipe.base.InMemoryDatasetHandle`. 

196 """ 

197 if columnIndex is None: 

198 columnIndex = data.get(component="columns") 

199 return columnIndex.names 

200 

201 def _get_data_columnLevelNames(self, data, columnIndex=None): 

202 """Gets the content of each of the column levels for a multilevel 

203 table. 

204 """ 

205 if columnIndex is None: 

206 columnIndex = data.get(component="columns") 

207 

208 columnLevels = columnIndex.names 

209 columnLevelNames = { 

210 level: list(np.unique(np.array([c for c in columnIndex])[:, i])) 

211 for i, level in enumerate(columnLevels) 

212 } 

213 return columnLevelNames 

214 

215 def _colsFromDict(self, colDict, columnIndex=None): 

216 """Converts dictionary column specficiation to a list of columns.""" 

217 new_colDict = {} 

218 columnLevels = self._get_data_columnLevels(None, columnIndex=columnIndex) 

219 

220 for i, lev in enumerate(columnLevels): 

221 if lev in colDict: 

222 if isinstance(colDict[lev], str): 

223 new_colDict[lev] = [colDict[lev]] 

224 else: 

225 new_colDict[lev] = colDict[lev] 

226 else: 

227 new_colDict[lev] = columnIndex.levels[i] 

228 

229 levelCols = [new_colDict[lev] for lev in columnLevels] 

230 cols = list(product(*levelCols)) 

231 colsAvailable = [col for col in cols if col in columnIndex] 

232 return colsAvailable 

233 

234 def multilevelColumns(self, data, columnIndex=None, returnTuple=False): 

235 """Returns columns needed by functor from multilevel dataset. 

236 

237 To access tables with multilevel column structure, the 

238 `~lsst.daf.butler.DeferredDatasetHandle` or 

239 `~lsst.pipe.base.InMemoryDatasetHandle` needs to be passed 

240 either a list of tuples or a dictionary. 

241 

242 Parameters 

243 ---------- 

244 data : various 

245 The data as either `~lsst.daf.butler.DeferredDatasetHandle`, or 

246 `~lsst.pipe.base.InMemoryDatasetHandle`. 

247 columnIndex (optional): pandas `~pandas.Index` object 

248 Either passed or read in from 

249 `~lsst.daf.butler.DeferredDatasetHandle`. 

250 `returnTuple` : `bool` 

251 If true, then return a list of tuples rather than the column 

252 dictionary specification. 

253 This is set to `True` by `CompositeFunctor` in order to be able to 

254 combine columns from the various component functors. 

255 

256 """ 

257 if not isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

258 raise RuntimeError(f"Unexpected data type. Got {get_full_type_name(data)}.") 

259 

260 if columnIndex is None: 

261 columnIndex = data.get(component="columns") 

262 

263 # Confirm that the dataset has the column levels the functor is 

264 # expecting it to have. 

265 columnLevels = self._get_data_columnLevels(data, columnIndex) 

266 

267 columnDict = {'column': self.columns, 

268 'dataset': self.dataset} 

269 if self.filt is None: 

270 columnLevelNames = self._get_data_columnLevelNames(data, columnIndex) 

271 if "band" in columnLevels: 

272 if self.dataset == "ref": 

273 columnDict["band"] = columnLevelNames["band"][0] 

274 else: 

275 raise ValueError(f"'filt' not set for functor {self.name}" 

276 f"(dataset {self.dataset}) " 

277 "and DataFrame " 

278 "contains multiple filters in column index. " 

279 "Set 'filt' or set 'dataset' to 'ref'.") 

280 else: 

281 columnDict['band'] = self.filt 

282 

283 if returnTuple: 

284 return self._colsFromDict(columnDict, columnIndex=columnIndex) 

285 else: 

286 return columnDict 

287 

288 def _func(self, df, dropna=True): 

289 raise NotImplementedError('Must define calculation on DataFrame') 

290 

291 def _get_columnIndex(self, data): 

292 """Return columnIndex.""" 

293 

294 if isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

295 return data.get(component="columns") 

296 else: 

297 return None 

298 

299 def _get_data(self, data): 

300 """Retrieve DataFrame necessary for calculation. 

301 

302 The data argument can be a `~pandas.DataFrame`, a 

303 `~lsst.daf.butler.DeferredDatasetHandle`, or 

304 an `~lsst.pipe.base.InMemoryDatasetHandle`. 

305 

306 Returns a DataFrame upon which `self._func` can act. 

307 """ 

308 # We wrap a DataFrame in a handle here to take advantage of the 

309 # DataFrame delegate DataFrame column wrangling abilities. 

310 if isinstance(data, pd.DataFrame): 

311 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

312 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

313 _data = data 

314 else: 

315 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

316 

317 # First thing to do: check to see if the data source has a multilevel 

318 # column index or not. 

319 columnIndex = self._get_columnIndex(_data) 

320 is_multiLevel = isinstance(columnIndex, pd.MultiIndex) 

321 

322 # Get proper columns specification for this functor. 

323 if is_multiLevel: 

324 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

325 else: 

326 columns = self.columns 

327 

328 # Load in-memory DataFrame with appropriate columns the gen3 way. 

329 df = _data.get(parameters={"columns": columns}) 

330 

331 # Drop unnecessary column levels. 

332 if is_multiLevel: 

333 df = self._setLevels(df) 

334 

335 return df 

336 

337 def _setLevels(self, df): 

338 levelsToDrop = [n for n in df.columns.names if n not in self._dfLevels] 

339 df.columns = df.columns.droplevel(levelsToDrop) 

340 return df 

341 

342 def _dropna(self, vals): 

343 return vals.dropna() 

344 

345 def __call__(self, data, dropna=False): 

346 df = self._get_data(data) 

347 try: 

348 vals = self._func(df) 

349 except Exception as e: 

350 self.log.error("Exception in %s call: %s: %s", self.name, type(e).__name__, e) 

351 vals = self.fail(df) 

352 if dropna: 

353 vals = self._dropna(vals) 

354 

355 return vals 

356 

357 def difference(self, data1, data2, **kwargs): 

358 """Computes difference between functor called on two different 

359 DataFrame/Handle objects. 

360 """ 

361 return self(data1, **kwargs) - self(data2, **kwargs) 

362 

363 def fail(self, df): 

364 return pd.Series(np.full(len(df), np.nan), index=df.index) 

365 

366 @property 

367 def name(self): 

368 """Full name of functor (suitable for figure labels).""" 

369 return NotImplementedError 

370 

371 @property 

372 def shortname(self): 

373 """Short name of functor (suitable for column name/dict key).""" 

374 return self.name 

375 

376 

377class CompositeFunctor(Functor): 

378 """Perform multiple calculations at once on a catalog. 

379 

380 The role of a `CompositeFunctor` is to group together computations from 

381 multiple functors. 

382 Instead of returning `~pandas.Series` a `CompositeFunctor` returns a 

383 `~pandas.DataFrame`, with the column names being the keys of ``funcDict``. 

384 

385 The `columns` attribute of a `CompositeFunctor` is the union of all columns 

386 in all the component functors. 

387 

388 A `CompositeFunctor` does not use a `_func` method itself; rather, when a 

389 `CompositeFunctor` is called, all its columns are loaded at once, and the 

390 resulting DataFrame is passed to the `_func` method of each component 

391 functor. 

392 This has the advantage of only doing I/O (reading from parquet file) once, 

393 and works because each individual `_func` method of each component functor 

394 does not care if there are *extra* columns in the DataFrame being passed; 

395 only that it must contain *at least* the `columns` it expects. 

396 

397 An important and useful class method is `from_yaml`, which takes as an 

398 argument the path to a YAML file specifying a collection of functors. 

399 

400 Parameters 

401 ---------- 

402 funcs : `dict` or `list` 

403 Dictionary or list of functors. 

404 If a list, then it will be converted into a dictonary according to the 

405 `.shortname` attribute of each functor. 

406 """ 

407 dataset = None 

408 name = "CompositeFunctor" 

409 

410 def __init__(self, funcs, **kwargs): 

411 

412 if type(funcs) == dict: 

413 self.funcDict = funcs 

414 else: 

415 self.funcDict = {f.shortname: f for f in funcs} 

416 

417 self._filt = None 

418 

419 super().__init__(**kwargs) 

420 

421 @property 

422 def filt(self): 

423 return self._filt 

424 

425 @filt.setter 

426 def filt(self, filt): 

427 if filt is not None: 

428 for _, f in self.funcDict.items(): 

429 f.filt = filt 

430 self._filt = filt 

431 

432 def update(self, new): 

433 """Update the functor with new functors.""" 

434 if isinstance(new, dict): 

435 self.funcDict.update(new) 

436 elif isinstance(new, CompositeFunctor): 

437 self.funcDict.update(new.funcDict) 

438 else: 

439 raise TypeError('Can only update with dictionary or CompositeFunctor.') 

440 

441 # Make sure new functors have the same 'filt' set. 

442 if self.filt is not None: 

443 self.filt = self.filt 

444 

445 @property 

446 def columns(self): 

447 return list(set([x for y in [f.columns for f in self.funcDict.values()] for x in y])) 

448 

449 def multilevelColumns(self, data, **kwargs): 

450 # Get the union of columns for all component functors. 

451 # Note the need to have `returnTuple=True` here. 

452 return list( 

453 set( 

454 [ 

455 x 

456 for y in [ 

457 f.multilevelColumns(data, returnTuple=True, **kwargs) for f in self.funcDict.values() 

458 ] 

459 for x in y 

460 ] 

461 ) 

462 ) 

463 

464 def __call__(self, data, **kwargs): 

465 """Apply the functor to the data table. 

466 

467 Parameters 

468 ---------- 

469 data : various 

470 The data represented as `~lsst.daf.butler.DeferredDatasetHandle`, 

471 `~lsst.pipe.base.InMemoryDatasetHandle`, or `~pandas.DataFrame`. 

472 The table or a pointer to a table on disk from which columns can 

473 be accessed. 

474 """ 

475 if isinstance(data, pd.DataFrame): 

476 _data = InMemoryDatasetHandle(data, storageClass="DataFrame") 

477 elif isinstance(data, (DeferredDatasetHandle, InMemoryDatasetHandle)): 

478 _data = data 

479 else: 

480 raise RuntimeError(f"Unexpected type provided for data. Got {get_full_type_name(data)}.") 

481 

482 columnIndex = self._get_columnIndex(_data) 

483 

484 if isinstance(columnIndex, pd.MultiIndex): 

485 columns = self.multilevelColumns(_data, columnIndex=columnIndex) 

486 df = _data.get(parameters={"columns": columns}) 

487 

488 valDict = {} 

489 for k, f in self.funcDict.items(): 

490 try: 

491 subdf = f._setLevels( 

492 df[f.multilevelColumns(_data, returnTuple=True, columnIndex=columnIndex)] 

493 ) 

494 valDict[k] = f._func(subdf) 

495 except Exception as e: 

496 self.log.exception( 

497 "Exception in %s (funcs: %s) call: %s", 

498 self.name, 

499 str(list(self.funcDict.keys())), 

500 type(e).__name__, 

501 ) 

502 try: 

503 valDict[k] = f.fail(subdf) 

504 except NameError: 

505 raise e 

506 

507 else: 

508 df = _data.get(parameters={"columns": self.columns}) 

509 

510 valDict = {k: f._func(df) for k, f in self.funcDict.items()} 

511 

512 # Check that output columns are actually columns. 

513 for name, colVal in valDict.items(): 

514 if len(colVal.shape) != 1: 

515 raise RuntimeError("Transformed column '%s' is not the shape of a column. " 

516 "It is shaped %s and type %s." % (name, colVal.shape, type(colVal))) 

517 

518 try: 

519 valDf = pd.concat(valDict, axis=1) 

520 except TypeError: 

521 print([(k, type(v)) for k, v in valDict.items()]) 

522 raise 

523 

524 if kwargs.get('dropna', False): 

525 valDf = valDf.dropna(how='any') 

526 

527 return valDf 

528 

529 @classmethod 

530 def renameCol(cls, col, renameRules): 

531 if renameRules is None: 

532 return col 

533 for old, new in renameRules: 

534 if col.startswith(old): 

535 col = col.replace(old, new) 

536 return col 

537 

538 @classmethod 

539 def from_file(cls, filename, **kwargs): 

540 # Allow environment variables in the filename. 

541 filename = os.path.expandvars(filename) 

542 with open(filename) as f: 

543 translationDefinition = yaml.safe_load(f) 

544 

545 return cls.from_yaml(translationDefinition, **kwargs) 

546 

547 @classmethod 

548 def from_yaml(cls, translationDefinition, **kwargs): 

549 funcs = {} 

550 for func, val in translationDefinition['funcs'].items(): 

551 funcs[func] = init_fromDict(val, name=func) 

552 

553 if 'flag_rename_rules' in translationDefinition: 

554 renameRules = translationDefinition['flag_rename_rules'] 

555 else: 

556 renameRules = None 

557 

558 if 'calexpFlags' in translationDefinition: 

559 for flag in translationDefinition['calexpFlags']: 

560 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='calexp') 

561 

562 if 'refFlags' in translationDefinition: 

563 for flag in translationDefinition['refFlags']: 

564 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='ref') 

565 

566 if 'forcedFlags' in translationDefinition: 

567 for flag in translationDefinition['forcedFlags']: 

568 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='forced_src') 

569 

570 if 'flags' in translationDefinition: 

571 for flag in translationDefinition['flags']: 

572 funcs[cls.renameCol(flag, renameRules)] = Column(flag, dataset='meas') 

573 

574 return cls(funcs, **kwargs) 

575 

576 

577def mag_aware_eval(df, expr, log): 

578 """Evaluate an expression on a DataFrame, knowing what the 'mag' function 

579 means. 

580 

581 Builds on `pandas.DataFrame.eval`, which parses and executes math on 

582 DataFrames. 

583 

584 Parameters 

585 ---------- 

586 df : ~pandas.DataFrame 

587 DataFrame on which to evaluate expression. 

588 

589 expr : str 

590 Expression. 

591 """ 

592 try: 

593 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>)/log(10)', expr) 

594 val = df.eval(expr_new) 

595 except Exception as e: # Should check what actually gets raised 

596 log.error("Exception in mag_aware_eval: %s: %s", type(e).__name__, e) 

597 expr_new = re.sub(r'mag\((\w+)\)', r'-2.5*log(\g<1>_instFlux)/log(10)', expr) 

598 val = df.eval(expr_new) 

599 return val 

600 

601 

602class CustomFunctor(Functor): 

603 """Arbitrary computation on a catalog. 

604 

605 Column names (and thus the columns to be loaded from catalog) are found by 

606 finding all words and trying to ignore all "math-y" words. 

607 

608 Parameters 

609 ---------- 

610 expr : str 

611 Expression to evaluate, to be parsed and executed by 

612 `~lsst.pipe.tasks.functors.mag_aware_eval`. 

613 """ 

614 _ignore_words = ('mag', 'sin', 'cos', 'exp', 'log', 'sqrt') 

615 

616 def __init__(self, expr, **kwargs): 

617 self.expr = expr 

618 super().__init__(**kwargs) 

619 

620 @property 

621 def name(self): 

622 return self.expr 

623 

624 @property 

625 def columns(self): 

626 flux_cols = re.findall(r'mag\(\s*(\w+)\s*\)', self.expr) 

627 

628 cols = [c for c in re.findall(r'[a-zA-Z_]+', self.expr) if c not in self._ignore_words] 

629 not_a_col = [] 

630 for c in flux_cols: 

631 if not re.search('_instFlux$', c): 

632 cols.append(f'{c}_instFlux') 

633 not_a_col.append(c) 

634 else: 

635 cols.append(c) 

636 

637 return list(set([c for c in cols if c not in not_a_col])) 

638 

639 def _func(self, df): 

640 return mag_aware_eval(df, self.expr, self.log) 

641 

642 

643class Column(Functor): 

644 """Get column with a specified name.""" 

645 

646 def __init__(self, col, **kwargs): 

647 self.col = col 

648 super().__init__(**kwargs) 

649 

650 @property 

651 def name(self): 

652 return self.col 

653 

654 @property 

655 def columns(self): 

656 return [self.col] 

657 

658 def _func(self, df): 

659 return df[self.col] 

660 

661 

662class Index(Functor): 

663 """Return the value of the index for each object.""" 

664 

665 columns = ['coord_ra'] # Just a dummy; something has to be here. 

666 _defaultDataset = 'ref' 

667 _defaultNoDup = True 

668 

669 def _func(self, df): 

670 return pd.Series(df.index, index=df.index) 

671 

672 

673class CoordColumn(Column): 

674 """Base class for coordinate column, in degrees.""" 

675 _radians = True 

676 

677 def __init__(self, col, **kwargs): 

678 super().__init__(col, **kwargs) 

679 

680 def _func(self, df): 

681 # Must not modify original column in case that column is used by 

682 # another functor. 

683 output = df[self.col] * 180 / np.pi if self._radians else df[self.col] 

684 return output 

685 

686 

687class RAColumn(CoordColumn): 

688 """Right Ascension, in degrees.""" 

689 name = 'RA' 

690 _defaultNoDup = True 

691 

692 def __init__(self, **kwargs): 

693 super().__init__('coord_ra', **kwargs) 

694 

695 def __call__(self, catalog, **kwargs): 

696 return super().__call__(catalog, **kwargs) 

697 

698 

699class DecColumn(CoordColumn): 

700 """Declination, in degrees.""" 

701 name = 'Dec' 

702 _defaultNoDup = True 

703 

704 def __init__(self, **kwargs): 

705 super().__init__('coord_dec', **kwargs) 

706 

707 def __call__(self, catalog, **kwargs): 

708 return super().__call__(catalog, **kwargs) 

709 

710 

711class RAErrColumn(CoordColumn): 

712 """Uncertainty in Right Ascension, in degrees.""" 

713 name = 'RAErr' 

714 _defaultNoDup = True 

715 

716 def __init__(self, **kwargs): 

717 super().__init__('coord_raErr', **kwargs) 

718 

719 

720class DecErrColumn(CoordColumn): 

721 """Uncertainty in declination, in degrees.""" 

722 name = 'DecErr' 

723 _defaultNoDup = True 

724 

725 def __init__(self, **kwargs): 

726 super().__init__('coord_decErr', **kwargs) 

727 

728 

729class RADecCovColumn(Column): 

730 """Coordinate covariance column, in degrees.""" 

731 _radians = True 

732 name = 'RADecCov' 

733 _defaultNoDup = True 

734 

735 def __init__(self, **kwargs): 

736 super().__init__('coord_ra_dec_Cov', **kwargs) 

737 

738 def _func(self, df): 

739 # Must not modify original column in case that column is used by 

740 # another functor. 

741 output = df[self.col]*(180/np.pi)**2 if self._radians else df[self.col] 

742 return output 

743 

744 

745class HtmIndex20(Functor): 

746 """Compute the level 20 HtmIndex for the catalog. 

747 

748 Notes 

749 ----- 

750 This functor was implemented to satisfy requirements of old APDB interface 

751 which required the ``pixelId`` column in DiaObject with HTM20 index. 

752 The APDB interface had migrated to not need that information, but we keep 

753 this class in case it may be useful for something else. 

754 """ 

755 name = "Htm20" 

756 htmLevel = 20 

757 _radians = True 

758 

759 def __init__(self, ra, dec, **kwargs): 

760 self.pixelator = sphgeom.HtmPixelization(self.htmLevel) 

761 self.ra = ra 

762 self.dec = dec 

763 self._columns = [self.ra, self.dec] 

764 super().__init__(**kwargs) 

765 

766 def _func(self, df): 

767 

768 def computePixel(row): 

769 if self._radians: 

770 sphPoint = geom.SpherePoint(row[self.ra], 

771 row[self.dec], 

772 geom.radians) 

773 else: 

774 sphPoint = geom.SpherePoint(row[self.ra], 

775 row[self.dec], 

776 geom.degrees) 

777 return self.pixelator.index(sphPoint.getVector()) 

778 

779 return df.apply(computePixel, axis=1, result_type='reduce').astype('int64') 

780 

781 

782def fluxName(col): 

783 """Append _instFlux to the column name if it doesn't have it already.""" 

784 if not col.endswith('_instFlux'): 

785 col += '_instFlux' 

786 return col 

787 

788 

789def fluxErrName(col): 

790 """Append _instFluxErr to the column name if it doesn't have it already.""" 

791 if not col.endswith('_instFluxErr'): 

792 col += '_instFluxErr' 

793 return col 

794 

795 

796class Mag(Functor): 

797 """Compute calibrated magnitude. 

798 

799 Returns the flux at mag=0. 

800 The default ``fluxMag0`` is 63095734448.0194, which is default for HSC. 

801 TO DO: This default should be made configurable in DM-21955. 

802 

803 This calculation hides warnings about invalid values and dividing by zero. 

804 

805 As with all functors, a ``dataset`` and ``filt`` kwarg should be provided 

806 upon initialization. 

807 Unlike the default `Functor`, however, the default dataset for a `Mag` is 

808 ``'meas'``, rather than ``'ref'``. 

809 

810 Parameters 

811 ---------- 

812 col : `str` 

813 Name of flux column from which to compute magnitude. 

814 Can be parseable by the `~lsst.pipe.tasks.functors.fluxName` function; 

815 that is, you can pass ``'modelfit_CModel'`` instead of 

816 ``'modelfit_CModel_instFlux'``, and it will understand. 

817 """ 

818 _defaultDataset = 'meas' 

819 

820 def __init__(self, col, calib=None, **kwargs): 

821 self.col = fluxName(col) 

822 self.calib = calib 

823 if calib is not None: 

824 # TO DO: DM-39914 Remove deprecated calib argument in Mag functor. 

825 warnings.warn( 

826 "The 'calib' argument is deprecated, and will be removed after v26.", 

827 FutureWarning, 

828 stacklevel=2, 

829 ) 

830 self.fluxMag0 = calib.getFluxMag0()[0] 

831 else: 

832 # TO DO: DM-21955 Replace hard coded photometic calibration values. 

833 self.fluxMag0 = 63095734448.0194 

834 

835 super().__init__(**kwargs) 

836 

837 @property 

838 def columns(self): 

839 return [self.col] 

840 

841 def _func(self, df): 

842 with warnings.catch_warnings(): 

843 warnings.filterwarnings('ignore', r'invalid value encountered') 

844 warnings.filterwarnings('ignore', r'divide by zero') 

845 return -2.5*np.log10(df[self.col] / self.fluxMag0) 

846 

847 @property 

848 def name(self): 

849 return f'mag_{self.col}' 

850 

851 

852class MagErr(Mag): 

853 """Compute calibrated magnitude uncertainty. 

854 

855 Parameters 

856 ---------- 

857 col : `str` 

858 Name of the flux column. 

859 """ 

860 

861 def __init__(self, *args, **kwargs): 

862 super().__init__(*args, **kwargs) 

863 if self.calib is not None: 

864 # TO DO: DM-39914 Remove deprecated calib argument in Mag functor. 

865 self.fluxMag0Err = self.calib.getFluxMag0()[1] 

866 warnings.warn( 

867 "The 'calib' argument is deprecated, and will be removed after v26.", 

868 FutureWarning, 

869 stacklevel=2, 

870 ) 

871 else: 

872 # TO DO: DM-21955 Replace hard coded photometic calibration values. 

873 self.fluxMag0Err = 0. 

874 

875 @property 

876 def columns(self): 

877 return [self.col, self.col + 'Err'] 

878 

879 def _func(self, df): 

880 with warnings.catch_warnings(): 

881 warnings.filterwarnings('ignore', r'invalid value encountered') 

882 warnings.filterwarnings('ignore', r'divide by zero') 

883 fluxCol, fluxErrCol = self.columns 

884 x = df[fluxErrCol] / df[fluxCol] 

885 y = self.fluxMag0Err / self.fluxMag0 

886 magErr = (2.5 / np.log(10.)) * np.sqrt(x*x + y*y) 

887 return magErr 

888 

889 @property 

890 def name(self): 

891 return super().name + '_err' 

892 

893 

894class MagDiff(Functor): 

895 """Functor to calculate magnitude difference.""" 

896 _defaultDataset = 'meas' 

897 

898 def __init__(self, col1, col2, **kwargs): 

899 self.col1 = fluxName(col1) 

900 self.col2 = fluxName(col2) 

901 super().__init__(**kwargs) 

902 

903 @property 

904 def columns(self): 

905 return [self.col1, self.col2] 

906 

907 def _func(self, df): 

908 with warnings.catch_warnings(): 

909 warnings.filterwarnings('ignore', r'invalid value encountered') 

910 warnings.filterwarnings('ignore', r'divide by zero') 

911 return -2.5*np.log10(df[self.col1]/df[self.col2]) 

912 

913 @property 

914 def name(self): 

915 return f'(mag_{self.col1} - mag_{self.col2})' 

916 

917 @property 

918 def shortname(self): 

919 return f'magDiff_{self.col1}_{self.col2}' 

920 

921 

922class Color(Functor): 

923 """Compute the color between two filters. 

924 

925 Computes color by initializing two different `Mag` functors based on the 

926 ``col`` and filters provided, and then returning the difference. 

927 

928 This is enabled by the `_func` method expecting a DataFrame with a 

929 multilevel column index, with both ``'band'`` and ``'column'``, instead of 

930 just ``'column'``, which is the `Functor` default. 

931 This is controlled by the `_dfLevels` attribute. 

932 

933 Also of note, the default dataset for `Color` is ``forced_src'``, whereas 

934 for `Mag` it is ``'meas'``. 

935 

936 Parameters 

937 ---------- 

938 col : str 

939 Name of the flux column from which to compute; same as would be passed 

940 to `~lsst.pipe.tasks.functors.Mag`. 

941 

942 filt2, filt1 : str 

943 Filters from which to compute magnitude difference. 

944 Color computed is ``Mag(filt2) - Mag(filt1)``. 

945 """ 

946 _defaultDataset = 'forced_src' 

947 _dfLevels = ('band', 'column') 

948 _defaultNoDup = True 

949 

950 def __init__(self, col, filt2, filt1, **kwargs): 

951 self.col = fluxName(col) 

952 if filt2 == filt1: 

953 raise RuntimeError("Cannot compute Color for %s: %s - %s " % (col, filt2, filt1)) 

954 self.filt2 = filt2 

955 self.filt1 = filt1 

956 

957 self.mag2 = Mag(col, filt=filt2, **kwargs) 

958 self.mag1 = Mag(col, filt=filt1, **kwargs) 

959 

960 super().__init__(**kwargs) 

961 

962 @property 

963 def filt(self): 

964 return None 

965 

966 @filt.setter 

967 def filt(self, filt): 

968 pass 

969 

970 def _func(self, df): 

971 mag2 = self.mag2._func(df[self.filt2]) 

972 mag1 = self.mag1._func(df[self.filt1]) 

973 return mag2 - mag1 

974 

975 @property 

976 def columns(self): 

977 return [self.mag1.col, self.mag2.col] 

978 

979 def multilevelColumns(self, parq, **kwargs): 

980 return [(self.dataset, self.filt1, self.col), (self.dataset, self.filt2, self.col)] 

981 

982 @property 

983 def name(self): 

984 return f'{self.filt2} - {self.filt1} ({self.col})' 

985 

986 @property 

987 def shortname(self): 

988 return f"{self.col}_{self.filt2.replace('-', '')}m{self.filt1.replace('-', '')}" 

989 

990 

991class DeconvolvedMoments(Functor): 

992 """This functor subtracts the trace of the PSF second moments from the 

993 trace of the second moments of the source. 

994 

995 If the HsmShapeAlgorithm measurement is valid, then these will be used for 

996 the sources. 

997 Otherwise, the SdssShapeAlgorithm measurements will be used. 

998 """ 

999 name = 'Deconvolved Moments' 

1000 shortname = 'deconvolvedMoments' 

1001 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1002 "ext_shapeHSM_HsmSourceMoments_yy", 

1003 "base_SdssShape_xx", "base_SdssShape_yy", 

1004 "ext_shapeHSM_HsmPsfMoments_xx", 

1005 "ext_shapeHSM_HsmPsfMoments_yy") 

1006 

1007 def _func(self, df): 

1008 """Calculate deconvolved moments.""" 

1009 if "ext_shapeHSM_HsmSourceMoments_xx" in df.columns: # _xx added by tdm 

1010 hsm = df["ext_shapeHSM_HsmSourceMoments_xx"] + df["ext_shapeHSM_HsmSourceMoments_yy"] 

1011 else: 

1012 hsm = np.ones(len(df))*np.nan 

1013 sdss = df["base_SdssShape_xx"] + df["base_SdssShape_yy"] 

1014 if "ext_shapeHSM_HsmPsfMoments_xx" in df.columns: 

1015 psf = df["ext_shapeHSM_HsmPsfMoments_xx"] + df["ext_shapeHSM_HsmPsfMoments_yy"] 

1016 else: 

1017 # LSST does not have shape.sdss.psf. 

1018 # We could instead add base_PsfShape to the catalog using 

1019 # exposure.getPsf().computeShape(s.getCentroid()).getIxx(). 

1020 raise RuntimeError('No psf shape parameter found in catalog') 

1021 

1022 return hsm.where(np.isfinite(hsm), sdss) - psf 

1023 

1024 

1025class SdssTraceSize(Functor): 

1026 """Functor to calculate the SDSS trace radius size for sources. 

1027 

1028 The SDSS trace radius size is a measure of size equal to the square root of 

1029 half of the trace of the second moments tensor measured with the 

1030 SdssShapeAlgorithm plugin. 

1031 This has units of pixels. 

1032 """ 

1033 name = "SDSS Trace Size" 

1034 shortname = 'sdssTrace' 

1035 _columns = ("base_SdssShape_xx", "base_SdssShape_yy") 

1036 

1037 def _func(self, df): 

1038 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1039 return srcSize 

1040 

1041 

1042class PsfSdssTraceSizeDiff(Functor): 

1043 """Functor to calculate the SDSS trace radius size difference (%) between 

1044 the object and the PSF model. 

1045 

1046 See Also 

1047 -------- 

1048 SdssTraceSize 

1049 """ 

1050 name = "PSF - SDSS Trace Size" 

1051 shortname = 'psf_sdssTrace' 

1052 _columns = ("base_SdssShape_xx", "base_SdssShape_yy", 

1053 "base_SdssShape_psf_xx", "base_SdssShape_psf_yy") 

1054 

1055 def _func(self, df): 

1056 srcSize = np.sqrt(0.5*(df["base_SdssShape_xx"] + df["base_SdssShape_yy"])) 

1057 psfSize = np.sqrt(0.5*(df["base_SdssShape_psf_xx"] + df["base_SdssShape_psf_yy"])) 

1058 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1059 return sizeDiff 

1060 

1061 

1062class HsmTraceSize(Functor): 

1063 """Functor to calculate the HSM trace radius size for sources. 

1064 

1065 The HSM trace radius size is a measure of size equal to the square root of 

1066 half of the trace of the second moments tensor measured with the 

1067 HsmShapeAlgorithm plugin. 

1068 This has units of pixels. 

1069 """ 

1070 name = 'HSM Trace Size' 

1071 shortname = 'hsmTrace' 

1072 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1073 "ext_shapeHSM_HsmSourceMoments_yy") 

1074 

1075 def _func(self, df): 

1076 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1077 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1078 return srcSize 

1079 

1080 

1081class PsfHsmTraceSizeDiff(Functor): 

1082 """Functor to calculate the HSM trace radius size difference (%) between 

1083 the object and the PSF model. 

1084 

1085 See Also 

1086 -------- 

1087 HsmTraceSize 

1088 """ 

1089 name = 'PSF - HSM Trace Size' 

1090 shortname = 'psf_HsmTrace' 

1091 _columns = ("ext_shapeHSM_HsmSourceMoments_xx", 

1092 "ext_shapeHSM_HsmSourceMoments_yy", 

1093 "ext_shapeHSM_HsmPsfMoments_xx", 

1094 "ext_shapeHSM_HsmPsfMoments_yy") 

1095 

1096 def _func(self, df): 

1097 srcSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmSourceMoments_xx"] 

1098 + df["ext_shapeHSM_HsmSourceMoments_yy"])) 

1099 psfSize = np.sqrt(0.5*(df["ext_shapeHSM_HsmPsfMoments_xx"] 

1100 + df["ext_shapeHSM_HsmPsfMoments_yy"])) 

1101 sizeDiff = 100*(srcSize - psfSize)/(0.5*(srcSize + psfSize)) 

1102 return sizeDiff 

1103 

1104 

1105class HsmFwhm(Functor): 

1106 """Functor to calculate the PSF FWHM with second moments measured from the 

1107 HsmShapeAlgorithm plugin. 

1108 

1109 This is in units of arcseconds, and assumes the hsc_rings_v1 skymap pixel 

1110 scale of 0.168 arcseconds/pixel. 

1111 

1112 Notes 

1113 ----- 

1114 This conversion assumes the PSF is Gaussian, which is not always the case. 

1115 """ 

1116 name = 'HSM Psf FWHM' 

1117 _columns = ('ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_yy') 

1118 # TODO: DM-21403 pixel scale should be computed from the CD matrix or transform matrix 

1119 pixelScale = 0.168 

1120 SIGMA2FWHM = 2*np.sqrt(2*np.log(2)) 

1121 

1122 def _func(self, df): 

1123 return self.pixelScale*self.SIGMA2FWHM*np.sqrt( 

1124 0.5*(df['ext_shapeHSM_HsmPsfMoments_xx'] + df['ext_shapeHSM_HsmPsfMoments_yy'])) 

1125 

1126 

1127class E1(Functor): 

1128 r"""Calculate :math:`e_1` ellipticity component for sources, defined as: 

1129 

1130 .. math:: 

1131 e_1 &= (I_{xx}-I_{yy})/(I_{xx}+I_{yy}) 

1132 

1133 See Also 

1134 -------- 

1135 E2 

1136 """ 

1137 name = "Distortion Ellipticity (e1)" 

1138 shortname = "Distortion" 

1139 

1140 def __init__(self, colXX, colXY, colYY, **kwargs): 

1141 self.colXX = colXX 

1142 self.colXY = colXY 

1143 self.colYY = colYY 

1144 self._columns = [self.colXX, self.colXY, self.colYY] 

1145 super().__init__(**kwargs) 

1146 

1147 @property 

1148 def columns(self): 

1149 return [self.colXX, self.colXY, self.colYY] 

1150 

1151 def _func(self, df): 

1152 return df[self.colXX] - df[self.colYY] / (df[self.colXX] + df[self.colYY]) 

1153 

1154 

1155class E2(Functor): 

1156 r"""Calculate :math:`e_2` ellipticity component for sources, defined as: 

1157 

1158 .. math:: 

1159 e_2 &= 2I_{xy}/(I_{xx}+I_{yy}) 

1160 

1161 See Also 

1162 -------- 

1163 E1 

1164 """ 

1165 name = "Ellipticity e2" 

1166 

1167 def __init__(self, colXX, colXY, colYY, **kwargs): 

1168 self.colXX = colXX 

1169 self.colXY = colXY 

1170 self.colYY = colYY 

1171 super().__init__(**kwargs) 

1172 

1173 @property 

1174 def columns(self): 

1175 return [self.colXX, self.colXY, self.colYY] 

1176 

1177 def _func(self, df): 

1178 return 2*df[self.colXY] / (df[self.colXX] + df[self.colYY]) 

1179 

1180 

1181class RadiusFromQuadrupole(Functor): 

1182 """Calculate the radius from the quadrupole moments. 

1183 

1184 This returns the fourth root of the determinant of the second moments 

1185 tensor, which has units of pixels. 

1186 

1187 See Also 

1188 -------- 

1189 SdssTraceSize 

1190 HsmTraceSize 

1191 """ 

1192 

1193 def __init__(self, colXX, colXY, colYY, **kwargs): 

1194 self.colXX = colXX 

1195 self.colXY = colXY 

1196 self.colYY = colYY 

1197 super().__init__(**kwargs) 

1198 

1199 @property 

1200 def columns(self): 

1201 return [self.colXX, self.colXY, self.colYY] 

1202 

1203 def _func(self, df): 

1204 return (df[self.colXX]*df[self.colYY] - df[self.colXY]**2)**0.25 

1205 

1206 

1207class LocalWcs(Functor): 

1208 """Computations using the stored localWcs.""" 

1209 name = "LocalWcsOperations" 

1210 

1211 def __init__(self, 

1212 colCD_1_1, 

1213 colCD_1_2, 

1214 colCD_2_1, 

1215 colCD_2_2, 

1216 **kwargs): 

1217 self.colCD_1_1 = colCD_1_1 

1218 self.colCD_1_2 = colCD_1_2 

1219 self.colCD_2_1 = colCD_2_1 

1220 self.colCD_2_2 = colCD_2_2 

1221 super().__init__(**kwargs) 

1222 

1223 def computeDeltaRaDec(self, x, y, cd11, cd12, cd21, cd22): 

1224 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1225 

1226 Parameters 

1227 ---------- 

1228 x : `~pandas.Series` 

1229 X pixel coordinate. 

1230 y : `~pandas.Series` 

1231 Y pixel coordinate. 

1232 cd11 : `~pandas.Series` 

1233 [1, 1] element of the local Wcs affine transform. 

1234 cd11 : `~pandas.Series` 

1235 [1, 1] element of the local Wcs affine transform. 

1236 cd12 : `~pandas.Series` 

1237 [1, 2] element of the local Wcs affine transform. 

1238 cd21 : `~pandas.Series` 

1239 [2, 1] element of the local Wcs affine transform. 

1240 cd22 : `~pandas.Series` 

1241 [2, 2] element of the local Wcs affine transform. 

1242 

1243 Returns 

1244 ------- 

1245 raDecTuple : tuple 

1246 RA and dec conversion of x and y given the local Wcs. 

1247 Returned units are in radians. 

1248 

1249 """ 

1250 return (x * cd11 + y * cd12, x * cd21 + y * cd22) 

1251 

1252 def computeSkySeparation(self, ra1, dec1, ra2, dec2): 

1253 """Compute the local pixel scale conversion. 

1254 

1255 Parameters 

1256 ---------- 

1257 ra1 : `~pandas.Series` 

1258 Ra of the first coordinate in radians. 

1259 dec1 : `~pandas.Series` 

1260 Dec of the first coordinate in radians. 

1261 ra2 : `~pandas.Series` 

1262 Ra of the second coordinate in radians. 

1263 dec2 : `~pandas.Series` 

1264 Dec of the second coordinate in radians. 

1265 

1266 Returns 

1267 ------- 

1268 dist : `~pandas.Series` 

1269 Distance on the sphere in radians. 

1270 """ 

1271 deltaDec = dec2 - dec1 

1272 deltaRa = ra2 - ra1 

1273 return 2 * np.arcsin( 

1274 np.sqrt( 

1275 np.sin(deltaDec / 2) ** 2 

1276 + np.cos(dec2) * np.cos(dec1) * np.sin(deltaRa / 2) ** 2)) 

1277 

1278 def getSkySeparationFromPixel(self, x1, y1, x2, y2, cd11, cd12, cd21, cd22): 

1279 """Compute the distance on the sphere from x2, y1 to x1, y1. 

1280 

1281 Parameters 

1282 ---------- 

1283 x1 : `~pandas.Series` 

1284 X pixel coordinate. 

1285 y1 : `~pandas.Series` 

1286 Y pixel coordinate. 

1287 x2 : `~pandas.Series` 

1288 X pixel coordinate. 

1289 y2 : `~pandas.Series` 

1290 Y pixel coordinate. 

1291 cd11 : `~pandas.Series` 

1292 [1, 1] element of the local Wcs affine transform. 

1293 cd11 : `~pandas.Series` 

1294 [1, 1] element of the local Wcs affine transform. 

1295 cd12 : `~pandas.Series` 

1296 [1, 2] element of the local Wcs affine transform. 

1297 cd21 : `~pandas.Series` 

1298 [2, 1] element of the local Wcs affine transform. 

1299 cd22 : `~pandas.Series` 

1300 [2, 2] element of the local Wcs affine transform. 

1301 

1302 Returns 

1303 ------- 

1304 Distance : `~pandas.Series` 

1305 Arcseconds per pixel at the location of the local WC. 

1306 """ 

1307 ra1, dec1 = self.computeDeltaRaDec(x1, y1, cd11, cd12, cd21, cd22) 

1308 ra2, dec2 = self.computeDeltaRaDec(x2, y2, cd11, cd12, cd21, cd22) 

1309 # Great circle distance for small separations. 

1310 return self.computeSkySeparation(ra1, dec1, ra2, dec2) 

1311 

1312 

1313class ComputePixelScale(LocalWcs): 

1314 """Compute the local pixel scale from the stored CDMatrix. 

1315 """ 

1316 name = "PixelScale" 

1317 

1318 @property 

1319 def columns(self): 

1320 return [self.colCD_1_1, 

1321 self.colCD_1_2, 

1322 self.colCD_2_1, 

1323 self.colCD_2_2] 

1324 

1325 def pixelScaleArcseconds(self, cd11, cd12, cd21, cd22): 

1326 """Compute the local pixel to scale conversion in arcseconds. 

1327 

1328 Parameters 

1329 ---------- 

1330 cd11 : `~pandas.Series` 

1331 [1, 1] element of the local Wcs affine transform in radians. 

1332 cd11 : `~pandas.Series` 

1333 [1, 1] element of the local Wcs affine transform in radians. 

1334 cd12 : `~pandas.Series` 

1335 [1, 2] element of the local Wcs affine transform in radians. 

1336 cd21 : `~pandas.Series` 

1337 [2, 1] element of the local Wcs affine transform in radians. 

1338 cd22 : `~pandas.Series` 

1339 [2, 2] element of the local Wcs affine transform in radians. 

1340 

1341 Returns 

1342 ------- 

1343 pixScale : `~pandas.Series` 

1344 Arcseconds per pixel at the location of the local WC. 

1345 """ 

1346 return 3600 * np.degrees(np.sqrt(np.fabs(cd11 * cd22 - cd12 * cd21))) 

1347 

1348 def _func(self, df): 

1349 return self.pixelScaleArcseconds(df[self.colCD_1_1], 

1350 df[self.colCD_1_2], 

1351 df[self.colCD_2_1], 

1352 df[self.colCD_2_2]) 

1353 

1354 

1355class ConvertPixelToArcseconds(ComputePixelScale): 

1356 """Convert a value in units of pixels to units of arcseconds.""" 

1357 

1358 def __init__(self, 

1359 col, 

1360 colCD_1_1, 

1361 colCD_1_2, 

1362 colCD_2_1, 

1363 colCD_2_2, 

1364 **kwargs): 

1365 self.col = col 

1366 super().__init__(colCD_1_1, 

1367 colCD_1_2, 

1368 colCD_2_1, 

1369 colCD_2_2, 

1370 **kwargs) 

1371 

1372 @property 

1373 def name(self): 

1374 return f"{self.col}_asArcseconds" 

1375 

1376 @property 

1377 def columns(self): 

1378 return [self.col, 

1379 self.colCD_1_1, 

1380 self.colCD_1_2, 

1381 self.colCD_2_1, 

1382 self.colCD_2_2] 

1383 

1384 def _func(self, df): 

1385 return df[self.col] * self.pixelScaleArcseconds(df[self.colCD_1_1], 

1386 df[self.colCD_1_2], 

1387 df[self.colCD_2_1], 

1388 df[self.colCD_2_2]) 

1389 

1390 

1391class ConvertPixelSqToArcsecondsSq(ComputePixelScale): 

1392 """Convert a value in units of pixels squared to units of arcseconds 

1393 squared. 

1394 """ 

1395 

1396 def __init__(self, 

1397 col, 

1398 colCD_1_1, 

1399 colCD_1_2, 

1400 colCD_2_1, 

1401 colCD_2_2, 

1402 **kwargs): 

1403 self.col = col 

1404 super().__init__(colCD_1_1, 

1405 colCD_1_2, 

1406 colCD_2_1, 

1407 colCD_2_2, 

1408 **kwargs) 

1409 

1410 @property 

1411 def name(self): 

1412 return f"{self.col}_asArcsecondsSq" 

1413 

1414 @property 

1415 def columns(self): 

1416 return [self.col, 

1417 self.colCD_1_1, 

1418 self.colCD_1_2, 

1419 self.colCD_2_1, 

1420 self.colCD_2_2] 

1421 

1422 def _func(self, df): 

1423 pixScale = self.pixelScaleArcseconds(df[self.colCD_1_1], 

1424 df[self.colCD_1_2], 

1425 df[self.colCD_2_1], 

1426 df[self.colCD_2_2]) 

1427 return df[self.col] * pixScale * pixScale 

1428 

1429 

1430class ReferenceBand(Functor): 

1431 """Return the band used to seed multiband forced photometry. 

1432 

1433 This functor is to be used on Object tables. 

1434 It converts the boolean merge_measurements_{band} columns into a single 

1435 string representing the first band for which merge_measurements_{band} 

1436 is True. 

1437 

1438 Assumes the default priority order of i, r, z, y, g, u. 

1439 """ 

1440 name = 'Reference Band' 

1441 shortname = 'refBand' 

1442 

1443 @property 

1444 def columns(self): 

1445 return ["merge_measurement_i", 

1446 "merge_measurement_r", 

1447 "merge_measurement_z", 

1448 "merge_measurement_y", 

1449 "merge_measurement_g", 

1450 "merge_measurement_u"] 

1451 

1452 def _func(self, df: pd.DataFrame) -> pd.Series: 

1453 def getFilterAliasName(row): 

1454 # Get column name with the max value (True > False). 

1455 colName = row.idxmax() 

1456 return colName.replace('merge_measurement_', '') 

1457 

1458 # Skip columns that are unavailable, because this functor requests the 

1459 # superset of bands that could be included in the object table. 

1460 columns = [col for col in self.columns if col in df.columns] 

1461 # Makes a Series of dtype object if df is empty. 

1462 return df[columns].apply(getFilterAliasName, axis=1, 

1463 result_type='reduce').astype('object') 

1464 

1465 

1466class Photometry(Functor): 

1467 """Base class for Object table calibrated fluxes and magnitudes.""" 

1468 # AB to NanoJansky (3631 Jansky). 

1469 AB_FLUX_SCALE = (0 * u.ABmag).to_value(u.nJy) 

1470 LOG_AB_FLUX_SCALE = 12.56 

1471 FIVE_OVER_2LOG10 = 1.085736204758129569 

1472 # TO DO: DM-21955 Replace hard coded photometic calibration values. 

1473 COADD_ZP = 27 

1474 

1475 def __init__(self, colFlux, colFluxErr=None, calib=None, **kwargs): 

1476 self.vhypot = np.vectorize(self.hypot) 

1477 self.col = colFlux 

1478 self.colFluxErr = colFluxErr 

1479 

1480 self.calib = calib 

1481 if calib is not None: 

1482 self.fluxMag0, self.fluxMag0Err = calib.getFluxMag0() 

1483 else: 

1484 self.fluxMag0 = 1./np.power(10, -0.4*self.COADD_ZP) 

1485 self.fluxMag0Err = 0. 

1486 

1487 super().__init__(**kwargs) 

1488 

1489 @property 

1490 def columns(self): 

1491 return [self.col] 

1492 

1493 @property 

1494 def name(self): 

1495 return f'mag_{self.col}' 

1496 

1497 @classmethod 

1498 def hypot(cls, a, b): 

1499 """Compute sqrt(a^2 + b^2) without under/overflow.""" 

1500 if np.abs(a) < np.abs(b): 

1501 a, b = b, a 

1502 if a == 0.: 

1503 return 0. 

1504 q = b/a 

1505 return np.abs(a) * np.sqrt(1. + q*q) 

1506 

1507 def dn2flux(self, dn, fluxMag0): 

1508 """Convert instrumental flux to nanojanskys.""" 

1509 return self.AB_FLUX_SCALE * dn / fluxMag0 

1510 

1511 def dn2mag(self, dn, fluxMag0): 

1512 """Convert instrumental flux to AB magnitude.""" 

1513 with warnings.catch_warnings(): 

1514 warnings.filterwarnings('ignore', r'invalid value encountered') 

1515 warnings.filterwarnings('ignore', r'divide by zero') 

1516 return -2.5 * np.log10(dn/fluxMag0) 

1517 

1518 def dn2fluxErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1519 """Convert instrumental flux error to nanojanskys.""" 

1520 retVal = self.vhypot(dn * fluxMag0Err, dnErr * fluxMag0) 

1521 retVal *= self.AB_FLUX_SCALE / fluxMag0 / fluxMag0 

1522 return retVal 

1523 

1524 def dn2MagErr(self, dn, dnErr, fluxMag0, fluxMag0Err): 

1525 """Convert instrumental flux error to AB magnitude error.""" 

1526 retVal = self.dn2fluxErr(dn, dnErr, fluxMag0, fluxMag0Err) / self.dn2flux(dn, fluxMag0) 

1527 return self.FIVE_OVER_2LOG10 * retVal 

1528 

1529 

1530class NanoJansky(Photometry): 

1531 """Convert instrumental flux to nanojanskys.""" 

1532 def _func(self, df): 

1533 return self.dn2flux(df[self.col], self.fluxMag0) 

1534 

1535 

1536class NanoJanskyErr(Photometry): 

1537 """Convert instrumental flux error to nanojanskys.""" 

1538 @property 

1539 def columns(self): 

1540 return [self.col, self.colFluxErr] 

1541 

1542 def _func(self, df): 

1543 retArr = self.dn2fluxErr(df[self.col], df[self.colFluxErr], self.fluxMag0, self.fluxMag0Err) 

1544 return pd.Series(retArr, index=df.index) 

1545 

1546 

1547class LocalPhotometry(Functor): 

1548 """Base class for calibrating the specified instrument flux column using 

1549 the local photometric calibration. 

1550 

1551 Parameters 

1552 ---------- 

1553 instFluxCol : `str` 

1554 Name of the instrument flux column. 

1555 instFluxErrCol : `str` 

1556 Name of the assocated error columns for ``instFluxCol``. 

1557 photoCalibCol : `str` 

1558 Name of local calibration column. 

1559 photoCalibErrCol : `str` 

1560 Error associated with ``photoCalibCol`` 

1561 

1562 See Also 

1563 -------- 

1564 LocalNanojansky 

1565 LocalNanojanskyErr 

1566 """ 

1567 logNJanskyToAB = (1 * u.nJy).to_value(u.ABmag) 

1568 

1569 def __init__(self, 

1570 instFluxCol, 

1571 instFluxErrCol, 

1572 photoCalibCol, 

1573 photoCalibErrCol, 

1574 **kwargs): 

1575 self.instFluxCol = instFluxCol 

1576 self.instFluxErrCol = instFluxErrCol 

1577 self.photoCalibCol = photoCalibCol 

1578 self.photoCalibErrCol = photoCalibErrCol 

1579 super().__init__(**kwargs) 

1580 

1581 def instFluxToNanojansky(self, instFlux, localCalib): 

1582 """Convert instrument flux to nanojanskys. 

1583 

1584 Parameters 

1585 ---------- 

1586 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1587 Array of instrument flux measurements. 

1588 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1589 Array of local photometric calibration estimates. 

1590 

1591 Returns 

1592 ------- 

1593 calibFlux : `~numpy.ndarray` or `~pandas.Series` 

1594 Array of calibrated flux measurements. 

1595 """ 

1596 return instFlux * localCalib 

1597 

1598 def instFluxErrToNanojanskyErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1599 """Convert instrument flux to nanojanskys. 

1600 

1601 Parameters 

1602 ---------- 

1603 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1604 Array of instrument flux measurements. 

1605 instFluxErr : `~numpy.ndarray` or `~pandas.Series` 

1606 Errors on associated ``instFlux`` values. 

1607 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1608 Array of local photometric calibration estimates. 

1609 localCalibErr : `~numpy.ndarray` or `~pandas.Series` 

1610 Errors on associated ``localCalib`` values. 

1611 

1612 Returns 

1613 ------- 

1614 calibFluxErr : `~numpy.ndarray` or `~pandas.Series` 

1615 Errors on calibrated flux measurements. 

1616 """ 

1617 return np.hypot(instFluxErr * localCalib, instFlux * localCalibErr) 

1618 

1619 def instFluxToMagnitude(self, instFlux, localCalib): 

1620 """Convert instrument flux to nanojanskys. 

1621 

1622 Parameters 

1623 ---------- 

1624 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1625 Array of instrument flux measurements. 

1626 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1627 Array of local photometric calibration estimates. 

1628 

1629 Returns 

1630 ------- 

1631 calibMag : `~numpy.ndarray` or `~pandas.Series` 

1632 Array of calibrated AB magnitudes. 

1633 """ 

1634 return -2.5 * np.log10(self.instFluxToNanojansky(instFlux, localCalib)) + self.logNJanskyToAB 

1635 

1636 def instFluxErrToMagnitudeErr(self, instFlux, instFluxErr, localCalib, localCalibErr): 

1637 """Convert instrument flux err to nanojanskys. 

1638 

1639 Parameters 

1640 ---------- 

1641 instFlux : `~numpy.ndarray` or `~pandas.Series` 

1642 Array of instrument flux measurements. 

1643 instFluxErr : `~numpy.ndarray` or `~pandas.Series` 

1644 Errors on associated ``instFlux`` values. 

1645 localCalib : `~numpy.ndarray` or `~pandas.Series` 

1646 Array of local photometric calibration estimates. 

1647 localCalibErr : `~numpy.ndarray` or `~pandas.Series` 

1648 Errors on associated ``localCalib`` values. 

1649 

1650 Returns 

1651 ------- 

1652 calibMagErr: `~numpy.ndarray` or `~pandas.Series` 

1653 Error on calibrated AB magnitudes. 

1654 """ 

1655 err = self.instFluxErrToNanojanskyErr(instFlux, instFluxErr, localCalib, localCalibErr) 

1656 return 2.5 / np.log(10) * err / self.instFluxToNanojansky(instFlux, instFluxErr) 

1657 

1658 

1659class LocalNanojansky(LocalPhotometry): 

1660 """Compute calibrated fluxes using the local calibration value. 

1661 

1662 This returns units of nanojanskys. 

1663 """ 

1664 

1665 @property 

1666 def columns(self): 

1667 return [self.instFluxCol, self.photoCalibCol] 

1668 

1669 @property 

1670 def name(self): 

1671 return f'flux_{self.instFluxCol}' 

1672 

1673 def _func(self, df): 

1674 return self.instFluxToNanojansky(df[self.instFluxCol], df[self.photoCalibCol]) 

1675 

1676 

1677class LocalNanojanskyErr(LocalPhotometry): 

1678 """Compute calibrated flux errors using the local calibration value. 

1679 

1680 This returns units of nanojanskys. 

1681 """ 

1682 

1683 @property 

1684 def columns(self): 

1685 return [self.instFluxCol, self.instFluxErrCol, 

1686 self.photoCalibCol, self.photoCalibErrCol] 

1687 

1688 @property 

1689 def name(self): 

1690 return f'fluxErr_{self.instFluxCol}' 

1691 

1692 def _func(self, df): 

1693 return self.instFluxErrToNanojanskyErr(df[self.instFluxCol], df[self.instFluxErrCol], 

1694 df[self.photoCalibCol], df[self.photoCalibErrCol]) 

1695 

1696 

1697class LocalDipoleMeanFlux(LocalPhotometry): 

1698 """Compute absolute mean of dipole fluxes. 

1699 

1700 See Also 

1701 -------- 

1702 LocalNanojansky 

1703 LocalNanojanskyErr 

1704 LocalDipoleMeanFluxErr 

1705 LocalDipoleDiffFlux 

1706 LocalDipoleDiffFluxErr 

1707 """ 

1708 def __init__(self, 

1709 instFluxPosCol, 

1710 instFluxNegCol, 

1711 instFluxPosErrCol, 

1712 instFluxNegErrCol, 

1713 photoCalibCol, 

1714 photoCalibErrCol, 

1715 **kwargs): 

1716 self.instFluxNegCol = instFluxNegCol 

1717 self.instFluxPosCol = instFluxPosCol 

1718 self.instFluxNegErrCol = instFluxNegErrCol 

1719 self.instFluxPosErrCol = instFluxPosErrCol 

1720 self.photoCalibCol = photoCalibCol 

1721 self.photoCalibErrCol = photoCalibErrCol 

1722 super().__init__(instFluxNegCol, 

1723 instFluxNegErrCol, 

1724 photoCalibCol, 

1725 photoCalibErrCol, 

1726 **kwargs) 

1727 

1728 @property 

1729 def columns(self): 

1730 return [self.instFluxPosCol, 

1731 self.instFluxNegCol, 

1732 self.photoCalibCol] 

1733 

1734 @property 

1735 def name(self): 

1736 return f'dipMeanFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1737 

1738 def _func(self, df): 

1739 return 0.5*(np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol])) 

1740 + np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol]))) 

1741 

1742 

1743class LocalDipoleMeanFluxErr(LocalDipoleMeanFlux): 

1744 """Compute the error on the absolute mean of dipole fluxes. 

1745 

1746 See Also 

1747 -------- 

1748 LocalNanojansky 

1749 LocalNanojanskyErr 

1750 LocalDipoleMeanFlux 

1751 LocalDipoleDiffFlux 

1752 LocalDipoleDiffFluxErr 

1753 """ 

1754 

1755 @property 

1756 def columns(self): 

1757 return [self.instFluxPosCol, 

1758 self.instFluxNegCol, 

1759 self.instFluxPosErrCol, 

1760 self.instFluxNegErrCol, 

1761 self.photoCalibCol, 

1762 self.photoCalibErrCol] 

1763 

1764 @property 

1765 def name(self): 

1766 return f'dipMeanFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1767 

1768 def _func(self, df): 

1769 return 0.5*np.sqrt( 

1770 (np.fabs(df[self.instFluxNegCol]) + np.fabs(df[self.instFluxPosCol]) 

1771 * df[self.photoCalibErrCol])**2 

1772 + (df[self.instFluxNegErrCol]**2 + df[self.instFluxPosErrCol]**2) 

1773 * df[self.photoCalibCol]**2) 

1774 

1775 

1776class LocalDipoleDiffFlux(LocalDipoleMeanFlux): 

1777 """Compute the absolute difference of dipole fluxes. 

1778 

1779 Calculated value is (abs(pos) - abs(neg)). 

1780 

1781 See Also 

1782 -------- 

1783 LocalNanojansky 

1784 LocalNanojanskyErr 

1785 LocalDipoleMeanFlux 

1786 LocalDipoleMeanFluxErr 

1787 LocalDipoleDiffFluxErr 

1788 """ 

1789 

1790 @property 

1791 def columns(self): 

1792 return [self.instFluxPosCol, 

1793 self.instFluxNegCol, 

1794 self.photoCalibCol] 

1795 

1796 @property 

1797 def name(self): 

1798 return f'dipDiffFlux_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1799 

1800 def _func(self, df): 

1801 return (np.fabs(self.instFluxToNanojansky(df[self.instFluxPosCol], df[self.photoCalibCol])) 

1802 - np.fabs(self.instFluxToNanojansky(df[self.instFluxNegCol], df[self.photoCalibCol]))) 

1803 

1804 

1805class LocalDipoleDiffFluxErr(LocalDipoleMeanFlux): 

1806 """Compute the error on the absolute difference of dipole fluxes. 

1807 

1808 See Also 

1809 -------- 

1810 LocalNanojansky 

1811 LocalNanojanskyErr 

1812 LocalDipoleMeanFlux 

1813 LocalDipoleMeanFluxErr 

1814 LocalDipoleDiffFlux 

1815 """ 

1816 

1817 @property 

1818 def columns(self): 

1819 return [self.instFluxPosCol, 

1820 self.instFluxNegCol, 

1821 self.instFluxPosErrCol, 

1822 self.instFluxNegErrCol, 

1823 self.photoCalibCol, 

1824 self.photoCalibErrCol] 

1825 

1826 @property 

1827 def name(self): 

1828 return f'dipDiffFluxErr_{self.instFluxPosCol}_{self.instFluxNegCol}' 

1829 

1830 def _func(self, df): 

1831 return np.sqrt( 

1832 ((np.fabs(df[self.instFluxPosCol]) - np.fabs(df[self.instFluxNegCol])) 

1833 * df[self.photoCalibErrCol])**2 

1834 + (df[self.instFluxPosErrCol]**2 + df[self.instFluxNegErrCol]**2) 

1835 * df[self.photoCalibCol]**2) 

1836 

1837 

1838class Ebv(Functor): 

1839 """Compute E(B-V) from dustmaps.sfd.""" 

1840 _defaultDataset = 'ref' 

1841 name = "E(B-V)" 

1842 shortname = "ebv" 

1843 

1844 def __init__(self, **kwargs): 

1845 # Import is only needed for Ebv. 

1846 from dustmaps.sfd import SFDQuery 

1847 self._columns = ['coord_ra', 'coord_dec'] 

1848 self.sfd = SFDQuery() 

1849 super().__init__(**kwargs) 

1850 

1851 def _func(self, df): 

1852 coords = SkyCoord(df['coord_ra'].values * u.rad, df['coord_dec'].values * u.rad) 

1853 ebv = self.sfd(coords) 

1854 # Double precision unnecessary scientifically but currently needed for 

1855 # ingest to qserv. 

1856 return pd.Series(ebv, index=df.index).astype('float64')