Coverage for python/lsst/meas/astrom/matcher_probabilistic.py: 25%

210 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-04 05:26 -0700

1# This file is part of meas_astrom. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ['ConvertCatalogCoordinatesConfig', 'MatchProbabilisticConfig', 'MatcherProbabilistic'] 

23 

24import lsst.pex.config as pexConfig 

25 

26from dataclasses import dataclass 

27import logging 

28import numpy as np 

29import pandas as pd 

30from scipy.spatial import cKDTree 

31import time 

32from typing import Callable, Set 

33 

34logger_default = logging.getLogger(__name__) 

35 

36 

37def _mul_column(column: np.array, value: float): 

38 if value is not None and value != 1: 

39 column *= value 

40 return column 

41 

42 

43def _radec_to_xyz(ra, dec): 

44 """Convert input ra/dec coordinates to spherical unit vectors. 

45 

46 Parameters 

47 ---------- 

48 ra, dec: `numpy.ndarray` 

49 Arrays of right ascension/declination in degrees. 

50 

51 Returns 

52 ------- 

53 vectors : `numpy.ndarray`, (N, 3) 

54 Output unit vectors. 

55 """ 

56 if ra.size != dec.size: 

57 raise ValueError('ra and dec must be same size') 

58 ras = np.radians(ra) 

59 decs = np.radians(dec) 

60 vectors = np.empty((ras.size, 3)) 

61 

62 sin_dec = np.sin(np.pi / 2 - decs) 

63 vectors[:, 0] = sin_dec * np.cos(ras) 

64 vectors[:, 1] = sin_dec * np.sin(ras) 

65 vectors[:, 2] = np.cos(np.pi / 2 - decs) 

66 

67 return vectors 

68 

69 

70@dataclass 

71class CatalogExtras: 

72 """Store frequently-reference (meta)data relevant for matching a catalog. 

73 

74 Parameters 

75 ---------- 

76 catalog : `pandas.DataFrame` 

77 A pandas catalog to store extra information for. 

78 select : `numpy.array` 

79 A numpy boolean array of the same length as catalog to be used for 

80 target selection. 

81 """ 

82 n: int 

83 indices: np.array 

84 select: np.array 

85 

86 coordinate_factor: float = None 

87 

88 def __init__(self, catalog: pd.DataFrame, select: np.array = None, coordinate_factor: float = None): 

89 self.n = len(catalog) 

90 self.select = np.ones(self.n, dtype=bool) if select is None else select 

91 self.indices = np.flatnonzero(select) if select is not None else np.arange(self.n) 

92 self.coordinate_factor = coordinate_factor 

93 

94 

95@dataclass(frozen=True) 

96class ComparableCatalog: 

97 """A catalog with sources with coordinate columns in some standard format/units. 

98 

99 catalog : `pandas.DataFrame` 

100 A catalog with comparable coordinate columns. 

101 column_coord1 : `str` 

102 The first spatial coordinate column name. 

103 column_coord2 : `str` 

104 The second spatial coordinate column name. 

105 coord1 : `numpy.array` 

106 The first spatial coordinate values. 

107 coord2 : `numpy.array` 

108 The second spatial coordinate values. 

109 extras : `CatalogExtras` 

110 Extra cached (meta)data for the `catalog`. 

111 """ 

112 catalog: pd.DataFrame 

113 column_coord1: str 

114 column_coord2: str 

115 coord1: np.array 

116 coord2: np.array 

117 extras: CatalogExtras 

118 

119 

120class ConvertCatalogCoordinatesConfig(pexConfig.Config): 

121 """Configuration for the MatchProbabilistic matcher. 

122 """ 

123 column_ref_coord1 = pexConfig.Field( 

124 dtype=str, 

125 default='ra', 

126 doc='The reference table column for the first spatial coordinate (usually x or ra).', 

127 ) 

128 column_ref_coord2 = pexConfig.Field( 

129 dtype=str, 

130 default='dec', 

131 doc='The reference table column for the second spatial coordinate (usually y or dec).' 

132 'Units must match column_ref_coord1.', 

133 ) 

134 column_target_coord1 = pexConfig.Field( 

135 dtype=str, 

136 default='coord_ra', 

137 doc='The target table column for the first spatial coordinate (usually x or ra).' 

138 'Units must match column_ref_coord1.', 

139 ) 

140 column_target_coord2 = pexConfig.Field( 

141 dtype=str, 

142 default='coord_dec', 

143 doc='The target table column for the second spatial coordinate (usually y or dec).' 

144 'Units must match column_ref_coord2.', 

145 ) 

146 coords_spherical = pexConfig.Field( 

147 dtype=bool, 

148 default=True, 

149 doc='Whether column_*_coord[12] are spherical coordinates (ra/dec) or not (pixel x/y)', 

150 ) 

151 coords_ref_factor = pexConfig.Field( 

152 dtype=float, 

153 default=1.0, 

154 doc='Multiplicative factor for reference catalog coordinates.' 

155 'If coords_spherical is true, this must be the number of degrees per unit increment of ' 

156 'column_ref_coord[12]. Otherwise, it must convert the coordinate to the same units' 

157 ' as the target coordinates.', 

158 ) 

159 coords_target_factor = pexConfig.Field( 

160 dtype=float, 

161 default=1.0, 

162 doc='Multiplicative factor for target catalog coordinates.' 

163 'If coords_spherical is true, this must be the number of degrees per unit increment of ' 

164 'column_target_coord[12]. Otherwise, it must convert the coordinate to the same units' 

165 ' as the reference coordinates.', 

166 ) 

167 coords_ref_to_convert = pexConfig.DictField( 167 ↛ exitline 167 didn't jump to the function exit

168 default=None, 

169 optional=True, 

170 keytype=str, 

171 itemtype=str, 

172 dictCheck=lambda x: len(x) == 2, 

173 doc='Dict mapping sky coordinate columns to be converted to pixel columns', 

174 ) 

175 mag_zeropoint_ref = pexConfig.Field( 

176 dtype=float, 

177 default=31.4, 

178 doc='Magnitude zeropoint for reference catalog.', 

179 ) 

180 

181 def format_catalogs( 

182 self, 

183 catalog_ref: pd.DataFrame, 

184 catalog_target: pd.DataFrame, 

185 select_ref: np.array = None, 

186 select_target: np.array = None, 

187 radec_to_xy_func: Callable = None, 

188 return_converted_columns: bool = False, 

189 **kwargs, 

190 ): 

191 """Format matched catalogs that may require coordinate conversions. 

192 

193 Parameters 

194 ---------- 

195 catalog_ref : `pandas.DataFrame` 

196 A reference catalog for comparison to `catalog_target`. 

197 catalog_target : `pandas.DataFrame` 

198 A target catalog with measurements for comparison to `catalog_ref`. 

199 select_ref : `numpy.ndarray`, (Nref,) 

200 A boolean array of len `catalog_ref`, True for valid match candidates. 

201 select_target : `numpy.ndarray`, (Ntarget,) 

202 A boolean array of len `catalog_target`, True for valid match candidates. 

203 radec_to_xy_func : `typing.Callable` 

204 Function taking equal-length ra, dec arrays and returning an ndarray of 

205 - ``x``: current parameter (`float`). 

206 - ``extra_args``: additional arguments (`dict`). 

207 return_converted_columns : `bool` 

208 Whether to return converted columns in the `coord1` and `coord2` 

209 attributes, rather than keep the original values. 

210 kwargs 

211 

212 Returns 

213 ------- 

214 compcat_ref, compcat_target : `ComparableCatalog` 

215 Comparable catalogs corresponding to the input reference and target. 

216 

217 """ 

218 convert_ref = self.coords_ref_to_convert 

219 if convert_ref and not callable(radec_to_xy_func): 

220 raise TypeError('radec_to_xy_func must be callable if converting ref coords') 

221 

222 # Set up objects with frequently-used attributes like selection bool array 

223 extras_ref, extras_target = ( 

224 CatalogExtras(catalog, select=select, coordinate_factor=coord_factor) 

225 for catalog, select, coord_factor in zip( 

226 (catalog_ref, catalog_target), 

227 (select_ref, select_target), 

228 (self.coords_ref_factor, self.coords_target_factor), 

229 ) 

230 ) 

231 

232 compcats = [] 

233 

234 # Retrieve coordinates and multiply them by scaling factors 

235 for catalog, extras, (column1, column2), convert in ( 

236 (catalog_ref, extras_ref, (self.column_ref_coord1, self.column_ref_coord2), convert_ref), 

237 (catalog_target, extras_target, (self.column_target_coord1, self.column_target_coord2), False), 

238 ): 

239 coord1, coord2 = ( 

240 _mul_column(catalog[column], extras.coordinate_factor) 

241 for column in (column1, column2) 

242 ) 

243 if convert: 

244 xy_ref = radec_to_xy_func(coord1, coord2, self.coords_ref_factor, **kwargs) 

245 for idx_coord, column_out in enumerate(self.coords_ref_to_convert.values()): 

246 coord = np.array([xy[idx_coord] for xy in xy_ref]) 

247 catalog[column_out] = coord 

248 if convert_ref and return_converted_columns: 

249 column1, column2 = self.coords_ref_to_convert.values() 

250 coord1, coord2 = catalog[column1], catalog[column2] 

251 if isinstance(coord1, pd.Series): 

252 coord1 = coord1.values 

253 if isinstance(coord2, pd.Series): 

254 coord2 = coord2.values 

255 

256 compcats.append(ComparableCatalog( 

257 catalog=catalog, column_coord1=column1, column_coord2=column2, 

258 coord1=coord1, coord2=coord2, extras=extras, 

259 )) 

260 

261 return tuple(compcats) 

262 

263 

264class MatchProbabilisticConfig(pexConfig.Config): 

265 """Configuration for the MatchProbabilistic matcher. 

266 """ 

267 column_ref_order = pexConfig.Field( 

268 dtype=str, 

269 default=None, 

270 optional=True, 

271 doc="Name of column in reference catalog specifying order for matching." 

272 " Derived from columns_ref_flux if not set.", 

273 ) 

274 

275 @property 

276 def columns_in_ref(self) -> Set[str]: 

277 columns_all = [ 

278 self.coord_format.column_ref_coord1, 

279 self.coord_format.column_ref_coord2, 

280 ] 

281 for columns in ( 

282 self.columns_ref_flux, 

283 self.columns_ref_meas, 

284 self.columns_ref_select_false, 

285 self.columns_ref_select_true, 

286 self.columns_ref_copy, 

287 ): 

288 columns_all.extend(columns) 

289 

290 return set(columns_all) 

291 

292 @property 

293 def columns_in_target(self) -> Set[str]: 

294 columns_all = [ 

295 self.coord_format.column_target_coord1, 

296 self.coord_format.column_target_coord2, 

297 ] 

298 for columns in ( 

299 self.columns_target_meas, 

300 self.columns_target_err, 

301 self.columns_target_select_false, 

302 self.columns_target_select_true, 

303 self.columns_target_copy, 

304 ): 

305 columns_all.extend(columns) 

306 return set(columns_all) 

307 

308 columns_ref_copy = pexConfig.ListField( 308 ↛ exitline 308 didn't jump to the function exit

309 dtype=str, 

310 default=[], 

311 listCheck=lambda x: len(set(x)) == len(x), 

312 optional=True, 

313 doc='Reference table columns to copy unchanged into both match tables', 

314 ) 

315 columns_ref_flux = pexConfig.ListField( 315 ↛ exitline 315 didn't jump to the function exit

316 dtype=str, 

317 default=[], 

318 listCheck=lambda x: len(set(x)) == len(x), 

319 optional=True, 

320 doc="List of reference flux columns to nansum total magnitudes from if column_order is None", 

321 ) 

322 columns_ref_meas = pexConfig.ListField( 

323 dtype=str, 

324 doc='The reference table columns to compute match likelihoods from ' 

325 '(usually centroids and fluxes/magnitudes)', 

326 ) 

327 columns_ref_select_true = pexConfig.ListField( 

328 dtype=str, 

329 default=tuple(), 

330 doc='Reference table columns to require to be True for selecting sources', 

331 ) 

332 columns_ref_select_false = pexConfig.ListField( 

333 dtype=str, 

334 default=tuple(), 

335 doc='Reference table columns to require to be False for selecting sources', 

336 ) 

337 columns_target_copy = pexConfig.ListField( 337 ↛ exitline 337 didn't jump to the function exit

338 dtype=str, 

339 default=[], 

340 listCheck=lambda x: len(set(x)) == len(x), 

341 optional=True, 

342 doc='Target table columns to copy unchanged into both match tables', 

343 ) 

344 columns_target_meas = pexConfig.ListField( 

345 dtype=str, 

346 doc='Target table columns with measurements corresponding to columns_ref_meas', 

347 ) 

348 columns_target_err = pexConfig.ListField( 

349 dtype=str, 

350 doc='Target table columns with standard errors (sigma) corresponding to columns_ref_meas', 

351 ) 

352 columns_target_select_true = pexConfig.ListField( 

353 dtype=str, 

354 default=('detect_isPrimary',), 

355 doc='Target table columns to require to be True for selecting sources', 

356 ) 

357 columns_target_select_false = pexConfig.ListField( 

358 dtype=str, 

359 default=('merge_peak_sky',), 

360 doc='Target table columns to require to be False for selecting sources', 

361 ) 

362 coord_format = pexConfig.ConfigField( 

363 dtype=ConvertCatalogCoordinatesConfig, 

364 doc="Configuration for coordinate conversion", 

365 ) 

366 mag_brightest_ref = pexConfig.Field( 

367 dtype=float, 

368 default=-np.inf, 

369 doc='Bright magnitude cutoff for selecting reference sources to match.' 

370 ' Ignored if column_ref_order is None.' 

371 ) 

372 mag_faintest_ref = pexConfig.Field( 

373 dtype=float, 

374 default=np.Inf, 

375 doc='Faint magnitude cutoff for selecting reference sources to match.' 

376 ' Ignored if column_ref_order is None.' 

377 ) 

378 match_dist_max = pexConfig.Field( 

379 dtype=float, 

380 default=0.5, 

381 doc='Maximum match distance. Units must be arcseconds if coords_spherical, ' 

382 'or else match those of column_*_coord[12] multiplied by coords_*_factor.', 

383 ) 

384 match_n_max = pexConfig.Field( 

385 dtype=int, 

386 default=10, 

387 optional=True, 

388 doc='Maximum number of spatial matches to consider (in ascending distance order).', 

389 ) 

390 match_n_finite_min = pexConfig.Field( 

391 dtype=int, 

392 default=3, 

393 optional=True, 

394 doc='Minimum number of columns with a finite value to measure match likelihood', 

395 ) 

396 order_ascending = pexConfig.Field( 

397 dtype=bool, 

398 default=False, 

399 optional=True, 

400 doc='Whether to order reference match candidates in ascending order of column_ref_order ' 

401 '(should be False if the column is a flux and True if it is a magnitude.', 

402 ) 

403 

404 

405def default_value(dtype): 

406 if dtype == str: 

407 return '' 

408 elif dtype == np.signedinteger: 

409 return np.Inf 

410 elif dtype == np.unsignedinteger: 

411 return -np.Inf 

412 return None 

413 

414 

415class MatcherProbabilistic: 

416 """A probabilistic, greedy catalog matcher. 

417 

418 Parameters 

419 ---------- 

420 config: `MatchProbabilisticConfig` 

421 A configuration instance. 

422 """ 

423 config: MatchProbabilisticConfig 

424 

425 def __init__( 

426 self, 

427 config: MatchProbabilisticConfig, 

428 ): 

429 self.config = config 

430 

431 def match( 

432 self, 

433 catalog_ref: pd.DataFrame, 

434 catalog_target: pd.DataFrame, 

435 select_ref: np.array = None, 

436 select_target: np.array = None, 

437 logger: logging.Logger = None, 

438 logging_n_rows: int = None, 

439 **kwargs 

440 ): 

441 """Match catalogs. 

442 

443 Parameters 

444 ---------- 

445 catalog_ref : `pandas.DataFrame` 

446 A reference catalog to match in order of a given column (i.e. greedily). 

447 catalog_target : `pandas.DataFrame` 

448 A target catalog for matching sources from `catalog_ref`. Must contain measurements with errors. 

449 select_ref : `numpy.array` 

450 A boolean array of the same length as `catalog_ref` selecting the sources that can be matched. 

451 select_target : `numpy.array` 

452 A boolean array of the same length as `catalog_target` selecting the sources that can be matched. 

453 logger : `logging.Logger` 

454 A Logger for logging. 

455 logging_n_rows : `int` 

456 The number of sources to match before printing a log message. 

457 kwargs 

458 Additional keyword arguments to pass to `format_catalogs`. 

459 

460 Returns 

461 ------- 

462 catalog_out_ref : `pandas.DataFrame` 

463 A catalog of identical length to `catalog_ref`, containing match information for rows selected by 

464 `select_ref` (including the matching row index in `catalog_target`). 

465 catalog_out_target : `pandas.DataFrame` 

466 A catalog of identical length to `catalog_target`, containing the indices of matching rows in 

467 `catalog_ref`. 

468 exceptions : `dict` [`int`, `Exception`] 

469 A dictionary keyed by `catalog_target` row number of the first exception caught when matching. 

470 """ 

471 if logger is None: 

472 logger = logger_default 

473 

474 config = self.config 

475 

476 # Transform any coordinates, if required 

477 # Note: The returned objects contain the original catalogs, as well as 

478 # transformed coordinates, and the selection of sources for matching. 

479 # These might be identical to the arrays passed as kwargs, but that 

480 # depends on config settings. 

481 # For the rest of this function, the selection arrays will be used, 

482 # but the indices of the original, unfiltered catalog will also be 

483 # output, so some further indexing steps are needed. 

484 ref, target = config.coord_format.format_catalogs( 

485 catalog_ref=catalog_ref, catalog_target=catalog_target, 

486 select_ref=select_ref, select_target=select_target, 

487 **kwargs 

488 ) 

489 

490 # If no order is specified, take nansum of all flux columns for a 'total flux' 

491 # Note: it won't actually be a total flux if bands overlap significantly 

492 # (or it might define a filter with >100% efficiency 

493 # Also, this is done on the original dataframe as it's harder to accomplish 

494 # just with a recarray 

495 column_order = ( 

496 catalog_ref.loc[ref.extras.select, config.column_ref_order] 

497 if config.column_ref_order is not None else 

498 np.nansum(catalog_ref.loc[ref.extras.select, config.columns_ref_flux], axis=1) 

499 ) 

500 order = np.argsort(column_order if config.order_ascending else -column_order) 

501 

502 n_ref_select = len(ref.extras.indices) 

503 

504 match_dist_max = config.match_dist_max 

505 coords_spherical = config.coord_format.coords_spherical 

506 if coords_spherical: 

507 match_dist_max = np.radians(match_dist_max / 3600.) 

508 

509 # Convert ra/dec sky coordinates to spherical vectors for accurate distances 

510 func_convert = _radec_to_xyz if coords_spherical else np.vstack 

511 vec_ref, vec_target = ( 

512 func_convert(cat.coord1[cat.extras.select], cat.coord2[cat.extras.select]) 

513 for cat in (ref, target) 

514 ) 

515 

516 # Generate K-d tree to compute distances 

517 logger.info('Generating cKDTree with match_n_max=%d', config.match_n_max) 

518 tree_obj = cKDTree(vec_target) 

519 

520 scores, idxs_target_select = tree_obj.query( 

521 vec_ref, 

522 distance_upper_bound=match_dist_max, 

523 k=config.match_n_max, 

524 ) 

525 

526 n_target_select = len(target.extras.indices) 

527 n_matches = np.sum(idxs_target_select != n_target_select, axis=1) 

528 n_matched_max = np.sum(n_matches == config.match_n_max) 

529 if n_matched_max > 0: 

530 logger.warning( 

531 '%d/%d (%.2f%%) selected true objects have n_matches=n_match_max(%d)', 

532 n_matched_max, n_ref_select, 100.*n_matched_max/n_ref_select, config.match_n_max 

533 ) 

534 

535 # Pre-allocate outputs 

536 target_row_match = np.full(target.extras.n, np.nan, dtype=np.int64) 

537 ref_candidate_match = np.zeros(ref.extras.n, dtype=bool) 

538 ref_row_match = np.full(ref.extras.n, np.nan, dtype=np.int64) 

539 ref_match_count = np.zeros(ref.extras.n, dtype=np.int32) 

540 ref_match_meas_finite = np.zeros(ref.extras.n, dtype=np.int32) 

541 ref_chisq = np.full(ref.extras.n, np.nan, dtype=float) 

542 

543 # Need the original reference row indices for output 

544 idx_orig_ref, idx_orig_target = (np.argwhere(cat.extras.select) for cat in (ref, target)) 

545 

546 # Retrieve required columns, including any converted ones (default to original column name) 

547 columns_convert = config.coord_format.coords_ref_to_convert 

548 if columns_convert is None: 

549 columns_convert = {} 

550 data_ref = ref.catalog[ 

551 [columns_convert.get(column, column) for column in config.columns_ref_meas] 

552 ].iloc[ref.extras.indices[order]] 

553 data_target = target.catalog[config.columns_target_meas][target.extras.select] 

554 errors_target = target.catalog[config.columns_target_err][target.extras.select] 

555 

556 exceptions = {} 

557 # The kdTree uses len(inputs) as a sentinel value for no match 

558 matched_target = {n_target_select, } 

559 

560 t_begin = time.process_time() 

561 

562 logger.info('Matching n_indices=%d/%d', len(order), len(ref.catalog)) 

563 for index_n, index_row_select in enumerate(order): 

564 index_row = idx_orig_ref[index_row_select] 

565 ref_candidate_match[index_row] = True 

566 found = idxs_target_select[index_row_select, :] 

567 # Select match candidates from nearby sources not already matched 

568 # Note: set lookup is apparently fast enough that this is a few percent faster than: 

569 # found = [x for x in found[found != n_target_select] if x not in matched_target] 

570 # ... at least for ~1M sources 

571 found = [x for x in found if x not in matched_target] 

572 n_found = len(found) 

573 if n_found > 0: 

574 # This is an ndarray of n_found rows x len(data_ref/target) columns 

575 chi = ( 

576 (data_target.iloc[found].values - data_ref.iloc[index_n].values) 

577 / errors_target.iloc[found].values 

578 ) 

579 finite = np.isfinite(chi) 

580 n_finite = np.sum(finite, axis=1) 

581 # Require some number of finite chi_sq to match 

582 chisq_good = n_finite >= config.match_n_finite_min 

583 if np.any(chisq_good): 

584 try: 

585 chisq_sum = np.zeros(n_found, dtype=float) 

586 chisq_sum[chisq_good] = np.nansum(chi[chisq_good, :] ** 2, axis=1) 

587 idx_chisq_min = np.nanargmin(chisq_sum / n_finite) 

588 ref_match_meas_finite[index_row] = n_finite[idx_chisq_min] 

589 ref_match_count[index_row] = len(chisq_good) 

590 ref_chisq[index_row] = chisq_sum[idx_chisq_min] 

591 idx_match_select = found[idx_chisq_min] 

592 row_target = target.extras.indices[idx_match_select] 

593 ref_row_match[index_row] = row_target 

594 

595 target_row_match[row_target] = index_row 

596 matched_target.add(idx_match_select) 

597 except Exception as error: 

598 # Can't foresee any exceptions, but they shouldn't prevent 

599 # matching subsequent sources 

600 exceptions[index_row] = error 

601 

602 if logging_n_rows and ((index_n + 1) % logging_n_rows == 0): 

603 t_elapsed = time.process_time() - t_begin 

604 logger.info( 

605 'Processed %d/%d in %.2fs at sort value=%.3f', 

606 index_n + 1, n_ref_select, t_elapsed, column_order[order[index_n]], 

607 ) 

608 

609 data_ref = { 

610 'match_candidate': ref_candidate_match, 

611 'match_row': ref_row_match, 

612 'match_count': ref_match_count, 

613 'match_chisq': ref_chisq, 

614 'match_n_chisq_finite': ref_match_meas_finite, 

615 } 

616 data_target = { 

617 'match_candidate': target.extras.select if target.extras.select is not None else ( 

618 np.ones(target.extras.n, dtype=bool)), 

619 'match_row': target_row_match, 

620 } 

621 

622 for (columns, out_original, out_matched, in_original, in_matched, matches) in ( 

623 ( 

624 self.config.columns_ref_copy, 

625 data_ref, 

626 data_target, 

627 ref, 

628 target, 

629 target_row_match, 

630 ), 

631 ( 

632 self.config.columns_target_copy, 

633 data_target, 

634 data_ref, 

635 target, 

636 ref, 

637 ref_row_match, 

638 ), 

639 ): 

640 matched = matches >= 0 

641 idx_matched = matches[matched] 

642 

643 for column in columns: 

644 values = in_original.catalog[column] 

645 out_original[column] = values 

646 dtype = in_original.catalog[column].dtype 

647 

648 # Pandas object columns can have mixed types - check for that 

649 if dtype == object: 

650 types = list(set((type(x) for x in values))) 

651 if len(types) != 1: 

652 raise RuntimeError(f'Column {column} dtype={dtype} has multiple types={types}') 

653 dtype = types[0] 

654 

655 value_fill = default_value(dtype) 

656 

657 # Without this, the dtype would be '<U1' for an empty Unicode string 

658 if dtype == str: 

659 dtype = f'<U{max(len(x) for x in values)}' 

660 

661 column_match = np.full(in_matched.extras.n, value_fill, dtype=dtype) 

662 column_match[matched] = in_original.catalog[column][idx_matched] 

663 out_matched[f'match_{column}'] = column_match 

664 

665 catalog_out_ref = pd.DataFrame(data_ref) 

666 catalog_out_target = pd.DataFrame(data_target) 

667 

668 return catalog_out_ref, catalog_out_target, exceptions