Coverage for python/lsst/pipe/tasks/diff_matched_tract_catalog.py: 25%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

284 statements  

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = [ 

23 'DiffMatchedTractCatalogConfig', 'DiffMatchedTractCatalogTask', 'MatchedCatalogFluxesConfig', 

24] 

25 

26import lsst.afw.geom as afwGeom 

27from lsst.meas.astrom.matcher_probabilistic import ( 

28 ComparableCatalog, ConvertCatalogCoordinatesConfig, 

29) 

30from lsst.meas.astrom.match_probabilistic_task import radec_to_xy 

31import lsst.pex.config as pexConfig 

32import lsst.pipe.base as pipeBase 

33import lsst.pipe.base.connectionTypes as cT 

34from lsst.skymap import BaseSkyMap 

35 

36from abc import ABCMeta, abstractmethod 

37from astropy.stats import mad_std 

38from dataclasses import dataclass 

39from enum import Enum, auto 

40import numpy as np 

41import pandas as pd 

42from scipy.stats import iqr 

43from typing import Dict, Set 

44 

45 

46DiffMatchedTractCatalogBaseTemplates = { 

47 "name_input_cat_ref": "truth_summary", 

48 "name_input_cat_target": "objectTable_tract", 

49 "name_skymap": BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

50} 

51 

52 

53class DiffMatchedTractCatalogConnections( 

54 pipeBase.PipelineTaskConnections, 

55 dimensions=("tract", "skymap"), 

56 defaultTemplates=DiffMatchedTractCatalogBaseTemplates, 

57): 

58 cat_ref = cT.Input( 

59 doc="Reference object catalog to match from", 

60 name="{name_input_cat_ref}", 

61 storageClass="DataFrame", 

62 dimensions=("tract", "skymap"), 

63 deferLoad=True, 

64 ) 

65 cat_target = cT.Input( 

66 doc="Target object catalog to match", 

67 name="{name_input_cat_target}", 

68 storageClass="DataFrame", 

69 dimensions=("tract", "skymap"), 

70 deferLoad=True, 

71 ) 

72 skymap = cT.Input( 

73 doc="Input definition of geometry/bbox and projection/wcs for coadded exposures", 

74 name="{name_skymap}", 

75 storageClass="SkyMap", 

76 dimensions=("skymap",), 

77 ) 

78 cat_match_ref = cT.Input( 

79 doc="Reference match catalog with indices of target matches", 

80 name="match_ref_{name_input_cat_ref}_{name_input_cat_target}", 

81 storageClass="DataFrame", 

82 dimensions=("tract", "skymap"), 

83 deferLoad=True, 

84 ) 

85 cat_match_target = cT.Input( 

86 doc="Target match catalog with indices of references matches", 

87 name="match_target_{name_input_cat_ref}_{name_input_cat_target}", 

88 storageClass="DataFrame", 

89 dimensions=("tract", "skymap"), 

90 deferLoad=True, 

91 ) 

92 columns_match_target = cT.Input( 

93 doc="Target match catalog columns", 

94 name="match_target_{name_input_cat_ref}_{name_input_cat_target}.columns", 

95 storageClass="DataFrameIndex", 

96 dimensions=("tract", "skymap"), 

97 ) 

98 cat_matched = cT.Output( 

99 doc="Catalog with reference and target columns for matched sources only", 

100 name="matched_{name_input_cat_ref}_{name_input_cat_target}", 

101 storageClass="DataFrame", 

102 dimensions=("tract", "skymap"), 

103 ) 

104 diff_matched = cT.Output( 

105 doc="Table with aggregated counts, difference and chi statistics", 

106 name="diff_matched_{name_input_cat_ref}_{name_input_cat_target}", 

107 storageClass="DataFrame", 

108 dimensions=("tract", "skymap"), 

109 ) 

110 

111 

112class MatchedCatalogFluxesConfig(pexConfig.Config): 

113 column_ref_flux = pexConfig.Field( 

114 dtype=str, 

115 doc='Reference catalog flux column name', 

116 ) 

117 columns_target_flux = pexConfig.ListField( 117 ↛ exitline 117 didn't jump to the function exit

118 dtype=str, 

119 listCheck=lambda x: len(set(x)) == len(x), 

120 doc="List of target catalog flux column names", 

121 ) 

122 columns_target_flux_err = pexConfig.ListField( 122 ↛ exitline 122 didn't jump to the function exit

123 dtype=str, 

124 listCheck=lambda x: len(set(x)) == len(x), 

125 doc="List of target catalog flux error column names", 

126 ) 

127 

128 @property 

129 def columns_in_ref(self) -> Set[str]: 

130 return {self.column_ref_flux} 

131 

132 @property 

133 def columns_in_target(self) -> Set[str]: 

134 return set(self.columns_target_flux).union(set(self.columns_target_flux_err)) 

135 

136 

137class DiffMatchedTractCatalogConfig( 

138 pipeBase.PipelineTaskConfig, 

139 pipelineConnections=DiffMatchedTractCatalogConnections, 

140): 

141 column_matched_prefix_ref = pexConfig.Field( 

142 dtype=str, 

143 default='refcat_', 

144 doc='The prefix for matched columns copied from the reference catalog', 

145 ) 

146 column_ref_extended = pexConfig.Field( 

147 dtype=str, 

148 default='is_pointsource', 

149 doc='The boolean reference table column specifying if the target is extended', 

150 ) 

151 column_ref_extended_inverted = pexConfig.Field( 

152 dtype=bool, 

153 default=True, 

154 doc='Whether column_ref_extended specifies if the object is compact, not extended', 

155 ) 

156 column_target_extended = pexConfig.Field( 

157 dtype=str, 

158 default='refExtendedness', 

159 doc='The target table column estimating the extendedness of the object (0 <= x <= 1)', 

160 ) 

161 

162 @property 

163 def columns_in_ref(self) -> Set[str]: 

164 columns_all = [self.coord_format.column_ref_coord1, self.coord_format.column_ref_coord2, 

165 self.column_ref_extended] 

166 for column_lists in ( 

167 ( 

168 self.columns_ref_copy, 

169 ), 

170 (x.columns_in_ref for x in self.columns_flux.values()), 

171 ): 

172 for column_list in column_lists: 

173 columns_all.extend(column_list) 

174 

175 return set(columns_all) 

176 

177 @property 

178 def columns_in_target(self) -> Set[str]: 

179 columns_all = [self.coord_format.column_target_coord1, self.coord_format.column_target_coord2, 

180 self.column_target_extended] 

181 if self.coord_format.coords_ref_to_convert is not None: 

182 columns_all.extend(self.coord_format.coords_ref_to_convert.values()) 

183 for column_lists in ( 

184 ( 

185 self.columns_target_coord_err, 

186 self.columns_target_select_false, 

187 self.columns_target_select_true, 

188 self.columns_target_copy, 

189 ), 

190 (x.columns_in_target for x in self.columns_flux.values()), 

191 ): 

192 for column_list in column_lists: 

193 columns_all.extend(column_list) 

194 return set(columns_all) 

195 

196 columns_flux = pexConfig.ConfigDictField( 

197 keytype=str, 

198 itemtype=MatchedCatalogFluxesConfig, 

199 doc="Configs for flux columns for each band", 

200 ) 

201 columns_ref_copy = pexConfig.ListField( 

202 dtype=str, 

203 default=set(), 

204 doc='Reference table columns to copy to copy into cat_matched', 

205 ) 

206 columns_target_coord_err = pexConfig.ListField( 206 ↛ exitline 206 didn't jump to the function exit

207 dtype=str, 

208 listCheck=lambda x: (len(x) == 2) and (x[0] != x[1]), 

209 doc='Target table coordinate columns with standard errors (sigma)', 

210 ) 

211 columns_target_copy = pexConfig.ListField( 

212 dtype=str, 

213 default=('patch',), 

214 doc='Target table columns to copy to copy into cat_matched', 

215 ) 

216 columns_target_select_true = pexConfig.ListField( 

217 dtype=str, 

218 default=('detect_isPrimary',), 

219 doc='Target table columns to require to be True for selecting sources', 

220 ) 

221 columns_target_select_false = pexConfig.ListField( 

222 dtype=str, 

223 default=('merge_peak_sky',), 

224 doc='Target table columns to require to be False for selecting sources', 

225 ) 

226 coord_format = pexConfig.ConfigField( 

227 dtype=ConvertCatalogCoordinatesConfig, 

228 doc="Configuration for coordinate conversion", 

229 ) 

230 extendedness_cut = pexConfig.Field( 

231 dtype=float, 

232 default=0.5, 

233 doc='Minimum extendedness for a measured source to be considered extended', 

234 ) 

235 mag_num_bins = pexConfig.Field( 

236 doc='Number of magnitude bins', 

237 default=15, 

238 dtype=int, 

239 ) 

240 mag_brightest_ref = pexConfig.Field( 

241 dtype=float, 

242 default=15, 

243 doc='Brightest magnitude cutoff for binning', 

244 ) 

245 mag_ceiling_target = pexConfig.Field( 

246 dtype=float, 

247 default=None, 

248 optional=True, 

249 doc='Ceiling (maximum/faint) magnitude for target sources', 

250 ) 

251 mag_faintest_ref = pexConfig.Field( 

252 dtype=float, 

253 default=30, 

254 doc='Faintest magnitude cutoff for binning', 

255 ) 

256 mag_zeropoint_ref = pexConfig.Field( 

257 dtype=float, 

258 default=31.4, 

259 doc='Magnitude zeropoint for reference sources', 

260 ) 

261 mag_zeropoint_target = pexConfig.Field( 

262 dtype=float, 

263 default=31.4, 

264 doc='Magnitude zeropoint for target sources', 

265 ) 

266 

267 

268class Measurement(Enum): 

269 DIFF = auto() 

270 CHI = auto() 

271 

272 

273class Statistic(metaclass=ABCMeta): 

274 """A statistic that can be applied to a set of values. 

275 """ 

276 @abstractmethod 

277 def value(self, values): 

278 """Return the value of the statistic given a set of values. 

279 

280 Parameters 

281 ---------- 

282 values : `Collection` [`float`] 

283 A set of values to compute the statistic for. 

284 

285 Returns 

286 ------- 

287 statistic : `float` 

288 The value of the statistic. 

289 """ 

290 pass 

291 

292 

293class Median(Statistic): 

294 """The median of a set of values.""" 

295 def value(self, values): 

296 return np.median(values) 

297 

298 

299class SigmaIQR(Statistic): 

300 """The re-scaled inter-quartile range (sigma equivalent).""" 

301 def value(self, values): 

302 return iqr(values, scale='normal') 

303 

304 

305class SigmaMAD(Statistic): 

306 """The re-scaled median absolute deviation (sigma equivalent).""" 

307 def value(self, values): 

308 return mad_std(values) 

309 

310 

311@dataclass(frozen=True) 

312class Percentile(Statistic): 

313 """An arbitrary percentile. 

314 

315 Parameters 

316 ---------- 

317 percentile : `float` 

318 A valid percentile (0 <= p <= 100). 

319 """ 

320 percentile: float 

321 

322 def value(self, values): 

323 return np.percentile(values, self.percentile) 

324 

325 

326def compute_stats(values_ref, values_target, errors_target, row, stats, suffixes, prefix, skip_diff=False): 

327 """Compute statistics on differences and store results in a row. 

328 

329 Parameters 

330 ---------- 

331 values_ref : `numpy.ndarray`, (N,) 

332 Reference values. 

333 values_target : `numpy.ndarray`, (N,) 

334 Measured values. 

335 errors_target : `numpy.ndarray`, (N,) 

336 Errors (standard deviations) on `values_target`. 

337 row : `numpy.ndarray`, (1, C) 

338 A numpy array with pre-assigned column names. 

339 stats : `Dict` [`str`, `Statistic`] 

340 A dict of `Statistic` values to measure, keyed by their column suffix. 

341 suffixes : `Dict` [`str`, `Measurement`] 

342 A dict of measurement types are the only valid values), 

343 keyed by the column suffix. 

344 prefix : `str` 

345 A prefix for all column names (e.g. band). 

346 skip_diff : `bool` 

347 Whether to skip computing statistics on differences. Note that 

348 differences will still be computed for chi statistics. 

349 

350 Returns 

351 ------- 

352 row_with_stats : `numpy.ndarray`, (1, C) 

353 The original `row` with statistic values assigned. 

354 """ 

355 n_ref = len(values_ref) 

356 if n_ref > 0: 

357 n_target = len(values_target) 

358 n_target_err = len(errors_target) if errors_target is not None else n_ref 

359 if (n_target != n_ref) or (n_target_err != n_ref): 

360 raise ValueError(f'lengths of values_ref={n_ref}, values_target={n_target}' 

361 f', error_target={n_target_err} must match') 

362 

363 do_chi = errors_target is not None 

364 diff = values_target - values_ref 

365 chi = diff/errors_target if do_chi else diff 

366 # Could make this configurable, but non-finite values/errors are not really usable 

367 valid = np.isfinite(chi) 

368 values_type = {} if skip_diff else {Measurement.DIFF: diff[valid]} 

369 if do_chi: 

370 values_type[Measurement.CHI] = chi[valid] 

371 

372 for suffix_type, suffix in suffixes.items(): 

373 values = values_type.get(suffix_type) 

374 if values is not None and len(values) > 0: 

375 for stat_name, stat in stats.items(): 

376 row[f'{prefix}{suffix}{stat_name}'] = stat.value(values) 

377 return row 

378 

379 

380def _get_columns(bands_columns: Dict, suffixes: Dict, suffixes_flux: Dict, suffixes_mag: Dict, 

381 stats: Dict, target: ComparableCatalog, column_dist: str): 

382 """Get column names for a table of difference statistics. 

383 

384 Parameters 

385 ---------- 

386 bands_columns : `Dict` [`str`,`MatchedCatalogFluxesConfig`] 

387 Dict keyed by band of flux column configuration. 

388 suffixes, suffixes_flux, suffixes_mag : `Dict` [`Measurement`, `str`] 

389 Dict of suffixes for each `Measurement` type, for general columns (e.g. 

390 coordinates), fluxes and magnitudes, respectively. 

391 stats : `Dict` [`Statistic`, `str`] 

392 Dict of suffixes for each `Statistic` type. 

393 target : `ComparableCatalog` 

394 A target catalog with coordinate column names. 

395 column_dist : `str` 

396 The name of the distance column. 

397 

398 Returns 

399 ------- 

400 columns : `Dict` [`str`, `type`] 

401 Dictionary of column types keyed by name. 

402 n_models : `int` 

403 The number of models measurements will be made for. 

404 

405 Notes 

406 ----- 

407 Presently, models must be identical for each band. 

408 """ 

409 # Initial columns 

410 columns = { 

411 "bin": int, 

412 "mag_min": float, 

413 "mag_max": float, 

414 } 

415 

416 # pre-assign all of the columns with appropriate types 

417 n_models = 0 

418 

419 bands = list(bands_columns.keys()) 

420 for idx, (band, config_flux) in enumerate(bands_columns.items()): 

421 columns_suffix = [('_flux', suffixes_flux), ('_mag', suffixes_mag), ] 

422 if idx > 0: 

423 columns_suffix.append((f'_color_{bands[idx - 1]}-{band}', suffixes)) 

424 else: 

425 n_models = len(config_flux.columns_target_flux) 

426 n_models_flux = len(config_flux.columns_target_flux) 

427 n_models_err = len(config_flux.columns_target_flux_err) 

428 

429 # TODO: Do equivalent validation earlier, in the config 

430 if (n_models_flux != n_models) or (n_models_err != n_models): 

431 raise RuntimeError(f'{config_flux} len(columns_target_flux)={n_models_flux} and' 

432 f' len(columns_target_flux_err)={n_models_err} must equal {n_models}') 

433 

434 for subtype in ('', '_resolved', '_unresolved'): 

435 # Totals would be redundant 

436 if subtype != '': 

437 for item in (f'n_{itype}{mtype}' for itype in ('ref', 'target') 

438 for mtype in ('', '_match_right', '_match_wrong')): 

439 columns[f'{band}{subtype}_{item}'] = int 

440 

441 for item in (target.column_coord1, target.column_coord2, column_dist): 

442 for suffix in suffixes.values(): 

443 for stat in stats.keys(): 

444 columns[f'{band}{subtype}_{item}{suffix}{stat}'] = float 

445 

446 for item in config_flux.columns_target_flux: 

447 for prefix_item, suffixes_col in columns_suffix: 

448 for suffix in suffixes_col.values(): 

449 for stat in stats.keys(): 

450 columns[f'{band}{subtype}{prefix_item}_{item}{suffix}{stat}'] = float 

451 

452 return columns, n_models 

453 

454 

455class DiffMatchedTractCatalogTask(pipeBase.PipelineTask): 

456 """Load subsets of matched catalogs and output a merged catalog of matched sources. 

457 """ 

458 ConfigClass = DiffMatchedTractCatalogConfig 

459 _DefaultName = "DiffMatchedTractCatalog" 

460 

461 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

462 inputs = butlerQC.get(inputRefs) 

463 skymap = inputs.pop("skymap") 

464 

465 columns_match_target = ['match_row'] 

466 if 'match_candidate' in inputs['columns_match_target']: 

467 columns_match_target.append('match_candidate') 

468 

469 outputs = self.run( 

470 catalog_ref=inputs['cat_ref'].get(parameters={'columns': self.config.columns_in_ref}), 

471 catalog_target=inputs['cat_target'].get(parameters={'columns': self.config.columns_in_target}), 

472 catalog_match_ref=inputs['cat_match_ref'].get( 

473 parameters={'columns': ['match_candidate', 'match_row']}, 

474 ), 

475 catalog_match_target=inputs['cat_match_target'].get( 

476 parameters={'columns': columns_match_target}, 

477 ), 

478 wcs=skymap[butlerQC.quantum.dataId["tract"]].wcs, 

479 ) 

480 butlerQC.put(outputs, outputRefs) 

481 

482 def run( 

483 self, 

484 catalog_ref: pd.DataFrame, 

485 catalog_target: pd.DataFrame, 

486 catalog_match_ref: pd.DataFrame, 

487 catalog_match_target: pd.DataFrame, 

488 wcs: afwGeom.SkyWcs = None, 

489 ) -> pipeBase.Struct: 

490 """Load matched reference and target (measured) catalogs, measure summary statistics, and output 

491 a combined matched catalog with columns from both inputs. 

492 

493 Parameters 

494 ---------- 

495 catalog_ref : `pandas.DataFrame` 

496 A reference catalog to diff objects/sources from. 

497 catalog_target : `pandas.DataFrame` 

498 A target catalog to diff reference objects/sources to. 

499 catalog_match_ref : `pandas.DataFrame` 

500 A catalog with match indices of target sources and selection flags 

501 for each reference source. 

502 catalog_match_target : `pandas.DataFrame` 

503 A catalog with selection flags for each target source. 

504 wcs : `lsst.afw.image.SkyWcs` 

505 A coordinate system to convert catalog positions to sky coordinates, 

506 if necessary. 

507 

508 Returns 

509 ------- 

510 retStruct : `lsst.pipe.base.Struct` 

511 A struct with output_ref and output_target attribute containing the 

512 output matched catalogs. 

513 """ 

514 config = self.config 

515 

516 select_ref = catalog_match_ref['match_candidate'].values 

517 # Add additional selection criteria for target sources beyond those for matching 

518 # (not recommended, but can be done anyway) 

519 select_target = (catalog_match_target['match_candidate'].values 

520 if 'match_candidate' in catalog_match_target.columns 

521 else np.ones(len(catalog_match_target), dtype=bool)) 

522 for column in config.columns_target_select_true: 

523 select_target &= catalog_target[column].values 

524 for column in config.columns_target_select_false: 

525 select_target &= ~catalog_target[column].values 

526 

527 ref, target = config.coord_format.format_catalogs( 

528 catalog_ref=catalog_ref, catalog_target=catalog_target, 

529 select_ref=None, select_target=select_target, wcs=wcs, radec_to_xy_func=radec_to_xy, 

530 return_converted_columns=config.coord_format.coords_ref_to_convert is not None, 

531 ) 

532 cat_ref = ref.catalog 

533 cat_target = target.catalog 

534 n_target = len(cat_target) 

535 

536 match_row = catalog_match_ref['match_row'].values 

537 matched_ref = match_row >= 0 

538 matched_row = match_row[matched_ref] 

539 matched_target = np.zeros(n_target, dtype=bool) 

540 matched_target[matched_row] = True 

541 

542 # Create a matched table, preserving the target catalog's named index (if it has one) 

543 cat_left = cat_target.iloc[matched_row] 

544 has_index_left = cat_left.index.name is not None 

545 cat_right = cat_ref[matched_ref].reset_index() 

546 cat_matched = pd.concat((cat_left.reset_index(drop=True), cat_right), axis=1) 

547 if has_index_left: 

548 cat_matched.index = cat_left.index 

549 cat_matched.columns.values[len(cat_target.columns):] = [f'refcat_{col}' for col in cat_right.columns] 

550 

551 # Add/compute distance columns 

552 coord1_target_err, coord2_target_err = config.columns_target_coord_err 

553 column_dist, column_dist_err = 'distance', 'distanceErr' 

554 dist = np.full(n_target, np.Inf) 

555 

556 dist[matched_row] = np.hypot( 

557 target.coord1[matched_row] - ref.coord1[matched_ref], 

558 target.coord2[matched_row] - ref.coord2[matched_ref], 

559 ) 

560 dist_err = np.full(n_target, np.Inf) 

561 dist_err[matched_row] = np.hypot(cat_target.iloc[matched_row][coord1_target_err].values, 

562 cat_target.iloc[matched_row][coord2_target_err].values) 

563 cat_target[column_dist], cat_target[column_dist_err] = dist, dist_err 

564 

565 # Slightly smelly hack for when a column (like distance) is already relative to truth 

566 column_dummy = 'dummy' 

567 cat_ref[column_dummy] = np.zeros_like(ref.coord1) 

568 

569 # Add a boolean column for whether a match is classified correctly 

570 extended_ref = cat_ref[config.column_ref_extended] 

571 if config.column_ref_extended_inverted: 

572 extended_ref = 1 - extended_ref 

573 

574 extended_target = cat_target[config.column_target_extended].values >= config.extendedness_cut 

575 

576 # Define difference/chi columns and statistics thereof 

577 suffixes = {Measurement.DIFF: '', Measurement.CHI: '_chi'} 

578 # Skip diff for fluxes - covered by mags 

579 suffixes_flux = {Measurement.CHI: suffixes[Measurement.CHI]} 

580 # Skip chi for magnitudes, which have strange errors 

581 suffixes_mag = {Measurement.DIFF: suffixes[Measurement.DIFF]} 

582 stats = { 

583 '_median': Median(), 

584 '_sig_iqr': SigmaIQR(), 

585 '_sig_mad': SigmaMAD(), 

586 } 

587 for name, percentile in (('p05', 5.), ('p16', 16.), ('p84', 84.), ('p95', 95.)): 

588 stats[f'_{name}'] = Percentile(percentile=percentile) 

589 

590 # Get dict of column names 

591 columns, n_models = _get_columns( 

592 bands_columns=config.columns_flux, 

593 suffixes=suffixes, 

594 suffixes_flux=suffixes_flux, 

595 suffixes_mag=suffixes_mag, 

596 stats=stats, 

597 target=target, 

598 column_dist=column_dist, 

599 ) 

600 

601 # Setup numpy table 

602 n_bins = config.mag_num_bins 

603 data = np.zeros((n_bins,), dtype=[(key, value) for key, value in columns.items()]) 

604 data['bin'] = np.arange(n_bins) 

605 

606 # Setup bins 

607 bins_mag = np.linspace(start=config.mag_brightest_ref, stop=config.mag_faintest_ref, 

608 num=n_bins + 1) 

609 data['mag_min'] = bins_mag[:-1] 

610 data['mag_max'] = bins_mag[1:] 

611 bins_mag = tuple((bins_mag[idx], bins_mag[idx + 1]) for idx in range(n_bins)) 

612 

613 # Define temporary columns for intermediate storage 

614 column_mag_temp = 'mag_temp' 

615 column_color_temp = 'color_temp' 

616 column_color_err_temp = 'colorErr_temp' 

617 flux_err_frac_prev = [None]*n_models 

618 mag_prev = [None]*n_models 

619 

620 columns_target = { 

621 target.column_coord1: ( 

622 ref.column_coord1, target.column_coord1, coord1_target_err, False, 

623 ), 

624 target.column_coord2: ( 

625 ref.column_coord2, target.column_coord2, coord2_target_err, False, 

626 ), 

627 column_dist: (column_dummy, column_dist, column_dist_err, False), 

628 } 

629 

630 band_prev = None 

631 for idx_band, (band, config_flux) in enumerate(config.columns_flux.items()): 

632 mag_ref = -2.5*np.log10(cat_ref[config_flux.column_ref_flux]) + config.mag_zeropoint_ref 

633 if idx_band > 0: 

634 cat_ref[column_color_temp] = cat_ref[column_mag_temp] - mag_ref 

635 cat_ref[column_mag_temp] = mag_ref 

636 

637 flux_err_frac = [None]*n_models 

638 mag_model = [None]*n_models 

639 

640 select_ref_bins = [select_ref & (mag_ref > mag_lo) & (mag_ref < mag_hi) 

641 for idx_bin, (mag_lo, mag_hi) in enumerate(bins_mag)] 

642 

643 # Iterate over multiple models, compute their mags and colours (if there's a previous band) 

644 for idx_model in range(n_models): 

645 column_target_flux = config_flux.columns_target_flux[idx_model] 

646 column_target_flux_err = config_flux.columns_target_flux_err[idx_model] 

647 

648 flux_target = cat_target[column_target_flux] 

649 mag_target = -2.5*np.log10(flux_target) + config.mag_zeropoint_target 

650 if config.mag_ceiling_target is not None: 

651 mag_target[mag_target > config.mag_ceiling_target] = config.mag_ceiling_target 

652 mag_model[idx_model] = mag_target 

653 

654 # These are needed for computing magnitude/color "errors" (which are a sketchy concept) 

655 flux_err_frac[idx_model] = cat_target[column_target_flux_err]/flux_target 

656 # Keep these mags tabulated for convenience 

657 column_mag_temp_model = f'{column_mag_temp}{idx_model}' 

658 cat_target[column_mag_temp_model] = mag_target 

659 

660 columns_target[f'flux_{column_target_flux}'] = ( 

661 config_flux.column_ref_flux, 

662 column_target_flux, 

663 column_target_flux_err, 

664 True, 

665 ) 

666 # Note: magnitude errors are generally problematic and not worth aggregating 

667 columns_target[f'mag_{column_target_flux}'] = ( 

668 column_mag_temp, column_mag_temp_model, None, False, 

669 ) 

670 

671 if idx_band > 0: 

672 column_color_temp_model = f'{column_color_temp}{idx_model}' 

673 column_color_err_temp_model = f'{column_color_err_temp}{idx_model}' 

674 

675 # e.g. if order is ugrizy, first color will be u - g 

676 cat_target[column_color_temp_model] = mag_prev[idx_model] - mag_model[idx_model] 

677 

678 # Sum (in quadrature, and admittedly sketchy for faint fluxes) magnitude errors 

679 cat_target[column_color_err_temp_model] = 2.5/np.log(10)*np.hypot( 

680 flux_err_frac[idx_model], flux_err_frac_prev[idx_model]) 

681 columns_target[f'color_{band_prev}-{band}_{column_target_flux}'] = ( 

682 column_color_temp, 

683 column_color_temp_model, 

684 column_color_err_temp_model, 

685 False, 

686 ) 

687 

688 for idx_bin, (mag_lo, mag_hi) in enumerate(bins_mag): 

689 row = data[idx_bin] 

690 select_ref_bin = select_ref_bins[idx_bin] 

691 select_target_bin = select_target & (mag_target > mag_lo) & (mag_target < mag_hi) 

692 

693 for subtype, is_extended in (('', None), ('_resolved', True), ('_unresolved', False)): 

694 # Counts filtered by match selection and magnitude bin 

695 select_ref_sub = select_ref_bin.copy() 

696 select_target_sub = select_target_bin.copy() 

697 if is_extended is not None: 

698 is_extended_ref = (extended_ref == is_extended) 

699 select_ref_sub &= is_extended_ref 

700 select_target_sub &= (extended_target == is_extended) 

701 n_ref_sub = np.count_nonzero(select_ref_sub) 

702 n_target_sub = np.count_nonzero(select_target_sub) 

703 row[f'{band}{subtype}_n_ref'] = n_ref_sub 

704 row[f'{band}{subtype}_n_target'] = n_target_sub 

705 

706 # Filter matches by magnitude bin and true class 

707 match_row_bin = match_row.copy() 

708 match_row_bin[~select_ref_sub] = -1 

709 match_good = match_row_bin >= 0 

710 

711 n_match = np.count_nonzero(match_good) 

712 

713 # Same for counts of matched target sources (for e.g. purity) 

714 

715 if n_match > 0: 

716 rows_matched = match_row_bin[match_good] 

717 subset_target = cat_target.iloc[rows_matched] 

718 if is_extended is not None: 

719 right_type = extended_target[rows_matched] == is_extended 

720 n_total = len(right_type) 

721 n_right = np.count_nonzero(right_type) 

722 row[f'{band}{subtype}_n_ref_match_right'] = n_right 

723 row[f'{band}{subtype}_n_ref_match_wrong'] = n_total - n_right 

724 

725 # compute stats for this bin, for all columns 

726 for column, (column_ref, column_target, column_err_target, skip_diff) \ 

727 in columns_target.items(): 

728 values_ref = cat_ref[column_ref][match_good].values 

729 compute_stats( 

730 values_ref, 

731 subset_target[column_target].values, 

732 (subset_target[column_err_target].values if column_err_target is not None 

733 else None), 

734 row, 

735 stats, 

736 suffixes, 

737 prefix=f'{band}{subtype}_{column}', 

738 skip_diff=skip_diff, 

739 ) 

740 

741 # Count matched target sources with *measured* mags within bin 

742 # Used for e.g. purity calculation 

743 # Should be merged with above code if there's ever a need for 

744 # measuring stats on this source selection 

745 select_target_sub &= matched_target 

746 

747 if is_extended is not None and (np.count_nonzero(select_target_sub) > 0): 

748 n_total = np.count_nonzero(select_target_sub) 

749 right_type = np.zeros(n_target, dtype=bool) 

750 right_type[match_row[matched_ref & is_extended_ref]] = True 

751 right_type &= select_target_sub 

752 n_right = np.count_nonzero(right_type) 

753 row[f'{band}{subtype}_n_target_match_right'] = n_right 

754 row[f'{band}{subtype}_n_target_match_wrong'] = n_total - n_right 

755 

756 # delete the flux/color columns since they change with each band 

757 for prefix in ('flux_', 'mag_'): 

758 del columns_target[f'{prefix}{column_target_flux}'] 

759 if idx_band > 0: 

760 del columns_target[f'color_{band_prev}-{band}_{column_target_flux}'] 

761 

762 # keep values needed for colors 

763 flux_err_frac_prev = flux_err_frac 

764 mag_prev = mag_model 

765 band_prev = band 

766 

767 retStruct = pipeBase.Struct(cat_matched=cat_matched, diff_matched=pd.DataFrame(data)) 

768 return retStruct