Coverage for python/lsst/meas/astrom/matcher_probabilistic.py: 24%

228 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-03 10:36 +0000

1# This file is part of meas_astrom. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ['ConvertCatalogCoordinatesConfig', 'MatchProbabilisticConfig', 'MatcherProbabilistic'] 

23 

24import lsst.pex.config as pexConfig 

25 

26from dataclasses import dataclass 

27import logging 

28import numpy as np 

29import pandas as pd 

30from scipy.spatial import cKDTree 

31import time 

32from typing import Callable, Set 

33 

34logger_default = logging.getLogger(__name__) 

35 

36 

37def _mul_column(column: np.array, value: float): 

38 if value is not None and value != 1: 

39 column *= value 

40 return column 

41 

42 

43def _radec_to_xyz(ra, dec): 

44 """Convert input ra/dec coordinates to spherical unit vectors. 

45 

46 Parameters 

47 ---------- 

48 ra, dec: `numpy.ndarray` 

49 Arrays of right ascension/declination in degrees. 

50 

51 Returns 

52 ------- 

53 vectors : `numpy.ndarray`, (N, 3) 

54 Output unit vectors. 

55 """ 

56 if ra.size != dec.size: 

57 raise ValueError('ra and dec must be same size') 

58 ras = np.radians(ra) 

59 decs = np.radians(dec) 

60 vectors = np.empty((ras.size, 3)) 

61 

62 sin_dec = np.sin(np.pi / 2 - decs) 

63 vectors[:, 0] = sin_dec * np.cos(ras) 

64 vectors[:, 1] = sin_dec * np.sin(ras) 

65 vectors[:, 2] = np.cos(np.pi / 2 - decs) 

66 

67 return vectors 

68 

69 

70@dataclass 

71class CatalogExtras: 

72 """Store frequently-reference (meta)data relevant for matching a catalog. 

73 

74 Parameters 

75 ---------- 

76 catalog : `pandas.DataFrame` 

77 A pandas catalog to store extra information for. 

78 select : `numpy.array` 

79 A numpy boolean array of the same length as catalog to be used for 

80 target selection. 

81 """ 

82 

83 n: int 

84 indices: np.array 

85 select: np.array 

86 

87 coordinate_factor: float = None 

88 

89 def __init__(self, catalog: pd.DataFrame, select: np.array = None, coordinate_factor: float = None): 

90 self.n = len(catalog) 

91 self.select = np.ones(self.n, dtype=bool) if select is None else select 

92 self.indices = np.flatnonzero(select) if select is not None else np.arange(self.n) 

93 self.coordinate_factor = coordinate_factor 

94 

95 

96@dataclass(frozen=True) 

97class ComparableCatalog: 

98 """A catalog with sources with coordinate columns in some standard format/units. 

99 

100 catalog : `pandas.DataFrame` 

101 A catalog with comparable coordinate columns. 

102 column_coord1 : `str` 

103 The first spatial coordinate column name. 

104 column_coord2 : `str` 

105 The second spatial coordinate column name. 

106 coord1 : `numpy.array` 

107 The first spatial coordinate values. 

108 coord2 : `numpy.array` 

109 The second spatial coordinate values. 

110 extras : `CatalogExtras` 

111 Extra cached (meta)data for the `catalog`. 

112 """ 

113 

114 catalog: pd.DataFrame 

115 column_coord1: str 

116 column_coord2: str 

117 coord1: np.array 

118 coord2: np.array 

119 extras: CatalogExtras 

120 

121 

122class ConvertCatalogCoordinatesConfig(pexConfig.Config): 

123 """Configuration for the MatchProbabilistic matcher.""" 

124 

125 column_ref_coord1 = pexConfig.Field( 

126 dtype=str, 

127 default='ra', 

128 doc='The reference table column for the first spatial coordinate (usually x or ra).', 

129 ) 

130 column_ref_coord2 = pexConfig.Field( 

131 dtype=str, 

132 default='dec', 

133 doc='The reference table column for the second spatial coordinate (usually y or dec).' 

134 'Units must match column_ref_coord1.', 

135 ) 

136 column_target_coord1 = pexConfig.Field( 

137 dtype=str, 

138 default='coord_ra', 

139 doc='The target table column for the first spatial coordinate (usually x or ra).' 

140 'Units must match column_ref_coord1.', 

141 ) 

142 column_target_coord2 = pexConfig.Field( 

143 dtype=str, 

144 default='coord_dec', 

145 doc='The target table column for the second spatial coordinate (usually y or dec).' 

146 'Units must match column_ref_coord2.', 

147 ) 

148 coords_spherical = pexConfig.Field( 

149 dtype=bool, 

150 default=True, 

151 doc='Whether column_*_coord[12] are spherical coordinates (ra/dec) or not (pixel x/y)', 

152 ) 

153 coords_ref_factor = pexConfig.Field( 

154 dtype=float, 

155 default=1.0, 

156 doc='Multiplicative factor for reference catalog coordinates.' 

157 'If coords_spherical is true, this must be the number of degrees per unit increment of ' 

158 'column_ref_coord[12]. Otherwise, it must convert the coordinate to the same units' 

159 ' as the target coordinates.', 

160 ) 

161 coords_target_factor = pexConfig.Field( 

162 dtype=float, 

163 default=1.0, 

164 doc='Multiplicative factor for target catalog coordinates.' 

165 'If coords_spherical is true, this must be the number of degrees per unit increment of ' 

166 'column_target_coord[12]. Otherwise, it must convert the coordinate to the same units' 

167 ' as the reference coordinates.', 

168 ) 

169 coords_ref_to_convert = pexConfig.DictField( 169 ↛ exitline 169 didn't jump to the function exit

170 default=None, 

171 optional=True, 

172 keytype=str, 

173 itemtype=str, 

174 dictCheck=lambda x: len(x) == 2, 

175 doc='Dict mapping sky coordinate columns to be converted to pixel columns', 

176 ) 

177 mag_zeropoint_ref = pexConfig.Field( 

178 dtype=float, 

179 default=31.4, 

180 doc='Magnitude zeropoint for reference catalog.', 

181 ) 

182 

183 def format_catalogs( 

184 self, 

185 catalog_ref: pd.DataFrame, 

186 catalog_target: pd.DataFrame, 

187 select_ref: np.array = None, 

188 select_target: np.array = None, 

189 radec_to_xy_func: Callable = None, 

190 return_converted_columns: bool = False, 

191 **kwargs, 

192 ): 

193 """Format matched catalogs that may require coordinate conversions. 

194 

195 Parameters 

196 ---------- 

197 catalog_ref : `pandas.DataFrame` 

198 A reference catalog for comparison to `catalog_target`. 

199 catalog_target : `pandas.DataFrame` 

200 A target catalog with measurements for comparison to `catalog_ref`. 

201 select_ref : `numpy.ndarray`, (Nref,) 

202 A boolean array of len `catalog_ref`, True for valid match candidates. 

203 select_target : `numpy.ndarray`, (Ntarget,) 

204 A boolean array of len `catalog_target`, True for valid match candidates. 

205 radec_to_xy_func : `typing.Callable` 

206 Function taking equal-length ra, dec arrays and returning an ndarray of 

207 - ``x``: current parameter (`float`). 

208 - ``extra_args``: additional arguments (`dict`). 

209 return_converted_columns : `bool` 

210 Whether to return converted columns in the `coord1` and `coord2` 

211 attributes, rather than keep the original values. 

212 kwargs 

213 

214 Returns 

215 ------- 

216 compcat_ref, compcat_target : `ComparableCatalog` 

217 Comparable catalogs corresponding to the input reference and target. 

218 """ 

219 convert_ref = self.coords_ref_to_convert 

220 if convert_ref and not callable(radec_to_xy_func): 

221 raise TypeError('radec_to_xy_func must be callable if converting ref coords') 

222 

223 # Set up objects with frequently-used attributes like selection bool array 

224 extras_ref, extras_target = ( 

225 CatalogExtras(catalog, select=select, coordinate_factor=coord_factor) 

226 for catalog, select, coord_factor in zip( 

227 (catalog_ref, catalog_target), 

228 (select_ref, select_target), 

229 (self.coords_ref_factor, self.coords_target_factor), 

230 ) 

231 ) 

232 

233 compcats = [] 

234 

235 # Retrieve coordinates and multiply them by scaling factors 

236 for catalog, extras, (column1, column2), convert in ( 

237 (catalog_ref, extras_ref, (self.column_ref_coord1, self.column_ref_coord2), convert_ref), 

238 (catalog_target, extras_target, (self.column_target_coord1, self.column_target_coord2), False), 

239 ): 

240 coord1, coord2 = ( 

241 _mul_column(catalog[column], extras.coordinate_factor) 

242 for column in (column1, column2) 

243 ) 

244 if convert: 

245 xy_ref = radec_to_xy_func(coord1, coord2, self.coords_ref_factor, **kwargs) 

246 for idx_coord, column_out in enumerate(self.coords_ref_to_convert.values()): 

247 coord = np.array([xy[idx_coord] for xy in xy_ref]) 

248 catalog[column_out] = coord 

249 if convert_ref and return_converted_columns: 

250 column1, column2 = self.coords_ref_to_convert.values() 

251 coord1, coord2 = catalog[column1], catalog[column2] 

252 if isinstance(coord1, pd.Series): 

253 coord1 = coord1.values 

254 if isinstance(coord2, pd.Series): 

255 coord2 = coord2.values 

256 

257 compcats.append(ComparableCatalog( 

258 catalog=catalog, column_coord1=column1, column_coord2=column2, 

259 coord1=coord1, coord2=coord2, extras=extras, 

260 )) 

261 

262 return tuple(compcats) 

263 

264 

265class MatchProbabilisticConfig(pexConfig.Config): 

266 """Configuration for the MatchProbabilistic matcher.""" 

267 

268 column_ref_order = pexConfig.Field( 

269 dtype=str, 

270 default=None, 

271 optional=True, 

272 doc='Name of column in reference catalog specifying order for matching' 

273 ' Derived from columns_ref_flux if not set.', 

274 ) 

275 

276 @property 

277 def columns_in_ref(self) -> Set[str]: 

278 columns_all = [ 

279 self.coord_format.column_ref_coord1, 

280 self.coord_format.column_ref_coord2, 

281 ] 

282 for columns in ( 

283 self.columns_ref_flux, 

284 self.columns_ref_meas, 

285 self.columns_ref_select_false, 

286 self.columns_ref_select_true, 

287 self.columns_ref_copy, 

288 ): 

289 columns_all.extend(columns) 

290 if self.column_ref_order: 

291 columns_all.append(self.column_ref_order) 

292 

293 return set(columns_all) 

294 

295 @property 

296 def columns_in_target(self) -> Set[str]: 

297 columns_all = [ 

298 self.coord_format.column_target_coord1, 

299 self.coord_format.column_target_coord2, 

300 ] 

301 for columns in ( 

302 self.columns_target_meas, 

303 self.columns_target_err, 

304 self.columns_target_select_false, 

305 self.columns_target_select_true, 

306 self.columns_target_copy, 

307 ): 

308 columns_all.extend(columns) 

309 return set(columns_all) 

310 

311 columns_ref_copy = pexConfig.ListField( 311 ↛ exitline 311 didn't jump to the function exit

312 dtype=str, 

313 default=[], 

314 listCheck=lambda x: len(set(x)) == len(x), 

315 optional=True, 

316 doc='Reference table columns to copy unchanged into both match tables', 

317 ) 

318 columns_ref_flux = pexConfig.ListField( 

319 dtype=str, 

320 default=[], 

321 optional=True, 

322 doc="List of reference flux columns to nansum total magnitudes from if column_order is None", 

323 ) 

324 columns_ref_meas = pexConfig.ListField( 

325 dtype=str, 

326 doc='The reference table columns to compute match likelihoods from ' 

327 '(usually centroids and fluxes/magnitudes)', 

328 ) 

329 columns_ref_select_true = pexConfig.ListField( 

330 dtype=str, 

331 default=tuple(), 

332 doc='Reference table columns to require to be True for selecting sources', 

333 ) 

334 columns_ref_select_false = pexConfig.ListField( 

335 dtype=str, 

336 default=tuple(), 

337 doc='Reference table columns to require to be False for selecting sources', 

338 ) 

339 columns_target_copy = pexConfig.ListField( 339 ↛ exitline 339 didn't jump to the function exit

340 dtype=str, 

341 default=[], 

342 listCheck=lambda x: len(set(x)) == len(x), 

343 optional=True, 

344 doc='Target table columns to copy unchanged into both match tables', 

345 ) 

346 columns_target_meas = pexConfig.ListField( 

347 dtype=str, 

348 doc='Target table columns with measurements corresponding to columns_ref_meas', 

349 ) 

350 columns_target_err = pexConfig.ListField( 

351 dtype=str, 

352 doc='Target table columns with standard errors (sigma) corresponding to columns_ref_meas', 

353 ) 

354 columns_target_select_true = pexConfig.ListField( 

355 dtype=str, 

356 default=('detect_isPrimary',), 

357 doc='Target table columns to require to be True for selecting sources', 

358 ) 

359 columns_target_select_false = pexConfig.ListField( 

360 dtype=str, 

361 default=('merge_peak_sky',), 

362 doc='Target table columns to require to be False for selecting sources', 

363 ) 

364 coord_format = pexConfig.ConfigField( 

365 dtype=ConvertCatalogCoordinatesConfig, 

366 doc="Configuration for coordinate conversion", 

367 ) 

368 mag_brightest_ref = pexConfig.Field( 

369 dtype=float, 

370 default=-np.inf, 

371 doc='Bright magnitude cutoff for selecting reference sources to match.' 

372 ' Ignored if column_ref_order is None.' 

373 ) 

374 mag_faintest_ref = pexConfig.Field( 

375 dtype=float, 

376 default=np.Inf, 

377 doc='Faint magnitude cutoff for selecting reference sources to match.' 

378 ' Ignored if column_ref_order is None.' 

379 ) 

380 match_dist_max = pexConfig.Field( 

381 dtype=float, 

382 default=0.5, 

383 doc='Maximum match distance. Units must be arcseconds if coords_spherical, ' 

384 'or else match those of column_*_coord[12] multiplied by coords_*_factor.', 

385 ) 

386 match_n_max = pexConfig.Field( 

387 dtype=int, 

388 default=10, 

389 optional=True, 

390 doc='Maximum number of spatial matches to consider (in ascending distance order).', 

391 ) 

392 match_n_finite_min = pexConfig.Field( 

393 dtype=int, 

394 default=3, 

395 optional=True, 

396 doc='Minimum number of columns with a finite value to measure match likelihood', 

397 ) 

398 order_ascending = pexConfig.Field( 

399 dtype=bool, 

400 default=False, 

401 optional=True, 

402 doc='Whether to order reference match candidates in ascending order of column_ref_order ' 

403 '(should be False if the column is a flux and True if it is a magnitude.', 

404 ) 

405 

406 def validate(self): 

407 super().validate() 

408 n_ref_meas = len(self.columns_ref_meas) 

409 n_target_meas = len(self.columns_target_meas) 

410 n_target_err = len(self.columns_target_err) 

411 match_n_finite_min = self.match_n_finite_min 

412 errors = [] 

413 if n_target_meas != n_ref_meas: 

414 errors.append(f"{len(self.columns_target_meas)=} != {len(self.columns_ref_meas)=}") 

415 if n_target_err != n_ref_meas: 

416 errors.append(f"{len(self.columns_target_err)=} != {len(self.columns_ref_meas)=}") 

417 if not (n_ref_meas >= match_n_finite_min): 

418 errors.append( 

419 f"{len(self.columns_ref_meas)=} !>= {self.match_n_finite_min=}, no matches possible" 

420 ) 

421 if errors: 

422 raise ValueError("\n".join(errors)) 

423 

424 

425def default_value(dtype): 

426 if dtype == str: 

427 return '' 

428 elif dtype == np.signedinteger: 

429 return np.Inf 

430 elif dtype == np.unsignedinteger: 

431 return -np.Inf 

432 return None 

433 

434 

435class MatcherProbabilistic: 

436 """A probabilistic, greedy catalog matcher. 

437 

438 Parameters 

439 ---------- 

440 config: `MatchProbabilisticConfig` 

441 A configuration instance. 

442 """ 

443 

444 config: MatchProbabilisticConfig 

445 

446 def __init__( 

447 self, 

448 config: MatchProbabilisticConfig, 

449 ): 

450 self.config = config 

451 

452 def match( 

453 self, 

454 catalog_ref: pd.DataFrame, 

455 catalog_target: pd.DataFrame, 

456 select_ref: np.array = None, 

457 select_target: np.array = None, 

458 logger: logging.Logger = None, 

459 logging_n_rows: int = None, 

460 **kwargs 

461 ): 

462 """Match catalogs. 

463 

464 Parameters 

465 ---------- 

466 catalog_ref : `pandas.DataFrame` 

467 A reference catalog to match in order of a given column (i.e. greedily). 

468 catalog_target : `pandas.DataFrame` 

469 A target catalog for matching sources from `catalog_ref`. Must contain measurements with errors. 

470 select_ref : `numpy.array` 

471 A boolean array of the same length as `catalog_ref` selecting the sources that can be matched. 

472 select_target : `numpy.array` 

473 A boolean array of the same length as `catalog_target` selecting the sources that can be matched. 

474 logger : `logging.Logger` 

475 A Logger for logging. 

476 logging_n_rows : `int` 

477 The number of sources to match before printing a log message. 

478 kwargs 

479 Additional keyword arguments to pass to `format_catalogs`. 

480 

481 Returns 

482 ------- 

483 catalog_out_ref : `pandas.DataFrame` 

484 A catalog of identical length to `catalog_ref`, containing match information for rows selected by 

485 `select_ref` (including the matching row index in `catalog_target`). 

486 catalog_out_target : `pandas.DataFrame` 

487 A catalog of identical length to `catalog_target`, containing the indices of matching rows in 

488 `catalog_ref`. 

489 exceptions : `dict` [`int`, `Exception`] 

490 A dictionary keyed by `catalog_target` row number of the first exception caught when matching. 

491 """ 

492 if logger is None: 

493 logger = logger_default 

494 

495 config = self.config 

496 

497 # Transform any coordinates, if required 

498 # Note: The returned objects contain the original catalogs, as well as 

499 # transformed coordinates, and the selection of sources for matching. 

500 # These might be identical to the arrays passed as kwargs, but that 

501 # depends on config settings. 

502 # For the rest of this function, the selection arrays will be used, 

503 # but the indices of the original, unfiltered catalog will also be 

504 # output, so some further indexing steps are needed. 

505 ref, target = config.coord_format.format_catalogs( 

506 catalog_ref=catalog_ref, catalog_target=catalog_target, 

507 select_ref=select_ref, select_target=select_target, 

508 **kwargs 

509 ) 

510 

511 # If no order is specified, take nansum of all flux columns for a 'total flux' 

512 # Note: it won't actually be a total flux if bands overlap significantly 

513 # (or it might define a filter with >100% efficiency 

514 # Also, this is done on the original dataframe as it's harder to accomplish 

515 # just with a recarray 

516 column_order = ( 

517 catalog_ref.loc[ref.extras.select, config.column_ref_order] 

518 if config.column_ref_order is not None else 

519 np.nansum(catalog_ref.loc[ref.extras.select, config.columns_ref_flux], axis=1) 

520 ) 

521 order = np.argsort(column_order if config.order_ascending else -column_order) 

522 

523 n_ref_select = len(ref.extras.indices) 

524 

525 match_dist_max = config.match_dist_max 

526 coords_spherical = config.coord_format.coords_spherical 

527 if coords_spherical: 

528 match_dist_max = np.radians(match_dist_max / 3600.) 

529 

530 # Convert ra/dec sky coordinates to spherical vectors for accurate distances 

531 func_convert = _radec_to_xyz if coords_spherical else np.vstack 

532 vec_ref, vec_target = ( 

533 func_convert(cat.coord1[cat.extras.select], cat.coord2[cat.extras.select]) 

534 for cat in (ref, target) 

535 ) 

536 

537 # Generate K-d tree to compute distances 

538 logger.info('Generating cKDTree with match_n_max=%d', config.match_n_max) 

539 tree_obj = cKDTree(vec_target) 

540 

541 scores, idxs_target_select = tree_obj.query( 

542 vec_ref, 

543 distance_upper_bound=match_dist_max, 

544 k=config.match_n_max, 

545 ) 

546 

547 n_target_select = len(target.extras.indices) 

548 n_matches = np.sum(idxs_target_select != n_target_select, axis=1) 

549 n_matched_max = np.sum(n_matches == config.match_n_max) 

550 if n_matched_max > 0: 

551 logger.warning( 

552 '%d/%d (%.2f%%) selected true objects have n_matches=n_match_max(%d)', 

553 n_matched_max, n_ref_select, 100.*n_matched_max/n_ref_select, config.match_n_max 

554 ) 

555 

556 # Pre-allocate outputs 

557 target_row_match = np.full(target.extras.n, np.nan, dtype=np.int64) 

558 ref_candidate_match = np.zeros(ref.extras.n, dtype=bool) 

559 ref_row_match = np.full(ref.extras.n, np.nan, dtype=np.int64) 

560 ref_match_count = np.zeros(ref.extras.n, dtype=np.int32) 

561 ref_match_meas_finite = np.zeros(ref.extras.n, dtype=np.int32) 

562 ref_chisq = np.full(ref.extras.n, np.nan, dtype=float) 

563 

564 # Need the original reference row indices for output 

565 idx_orig_ref, idx_orig_target = (np.argwhere(cat.extras.select) for cat in (ref, target)) 

566 

567 # Retrieve required columns, including any converted ones (default to original column name) 

568 columns_convert = config.coord_format.coords_ref_to_convert 

569 if columns_convert is None: 

570 columns_convert = {} 

571 data_ref = ref.catalog[ 

572 [columns_convert.get(column, column) for column in config.columns_ref_meas] 

573 ].iloc[ref.extras.indices[order]] 

574 data_target = target.catalog[config.columns_target_meas][target.extras.select] 

575 errors_target = target.catalog[config.columns_target_err][target.extras.select] 

576 

577 exceptions = {} 

578 # The kdTree uses len(inputs) as a sentinel value for no match 

579 matched_target = {n_target_select, } 

580 

581 t_begin = time.process_time() 

582 

583 logger.info('Matching n_indices=%d/%d', len(order), len(ref.catalog)) 

584 for index_n, index_row_select in enumerate(order): 

585 index_row = idx_orig_ref[index_row_select] 

586 ref_candidate_match[index_row] = True 

587 found = idxs_target_select[index_row_select, :] 

588 # Select match candidates from nearby sources not already matched 

589 # Note: set lookup is apparently fast enough that this is a few percent faster than: 

590 # found = [x for x in found[found != n_target_select] if x not in matched_target] 

591 # ... at least for ~1M sources 

592 found = [x for x in found if x not in matched_target] 

593 n_found = len(found) 

594 if n_found > 0: 

595 # This is an ndarray of n_found rows x len(data_ref/target) columns 

596 chi = ( 

597 (data_target.iloc[found].values - data_ref.iloc[index_n].values) 

598 / errors_target.iloc[found].values 

599 ) 

600 finite = np.isfinite(chi) 

601 n_finite = np.sum(finite, axis=1) 

602 # Require some number of finite chi_sq to match 

603 chisq_good = n_finite >= config.match_n_finite_min 

604 if np.any(chisq_good): 

605 try: 

606 chisq_sum = np.zeros(n_found, dtype=float) 

607 chisq_sum[chisq_good] = np.nansum(chi[chisq_good, :] ** 2, axis=1) 

608 idx_chisq_min = np.nanargmin(chisq_sum / n_finite) 

609 ref_match_meas_finite[index_row] = n_finite[idx_chisq_min] 

610 ref_match_count[index_row] = len(chisq_good) 

611 ref_chisq[index_row] = chisq_sum[idx_chisq_min] 

612 idx_match_select = found[idx_chisq_min] 

613 row_target = target.extras.indices[idx_match_select] 

614 ref_row_match[index_row] = row_target 

615 

616 target_row_match[row_target] = index_row 

617 matched_target.add(idx_match_select) 

618 except Exception as error: 

619 # Can't foresee any exceptions, but they shouldn't prevent 

620 # matching subsequent sources 

621 exceptions[index_row] = error 

622 

623 if logging_n_rows and ((index_n + 1) % logging_n_rows == 0): 

624 t_elapsed = time.process_time() - t_begin 

625 logger.info( 

626 'Processed %d/%d in %.2fs at sort value=%.3f', 

627 index_n + 1, n_ref_select, t_elapsed, column_order[order[index_n]], 

628 ) 

629 

630 data_ref = { 

631 'match_candidate': ref_candidate_match, 

632 'match_row': ref_row_match, 

633 'match_count': ref_match_count, 

634 'match_chisq': ref_chisq, 

635 'match_n_chisq_finite': ref_match_meas_finite, 

636 } 

637 data_target = { 

638 'match_candidate': target.extras.select if target.extras.select is not None else ( 

639 np.ones(target.extras.n, dtype=bool)), 

640 'match_row': target_row_match, 

641 } 

642 

643 for (columns, out_original, out_matched, in_original, in_matched, matches, name_cat) in ( 

644 ( 

645 self.config.columns_ref_copy, 

646 data_ref, 

647 data_target, 

648 ref, 

649 target, 

650 target_row_match, 

651 'reference', 

652 ), 

653 ( 

654 self.config.columns_target_copy, 

655 data_target, 

656 data_ref, 

657 target, 

658 ref, 

659 ref_row_match, 

660 'target', 

661 ), 

662 ): 

663 matched = matches >= 0 

664 idx_matched = matches[matched] 

665 logger.info('Matched %d/%d %s sources', np.sum(matched), len(matched), name_cat) 

666 

667 for column in columns: 

668 values = in_original.catalog[column] 

669 out_original[column] = values 

670 dtype = in_original.catalog[column].dtype 

671 

672 # Pandas object columns can have mixed types - check for that 

673 if dtype == object: 

674 types = list(set((type(x) for x in values))) 

675 if len(types) != 1: 

676 raise RuntimeError(f'Column {column} dtype={dtype} has multiple types={types}') 

677 dtype = types[0] 

678 

679 value_fill = default_value(dtype) 

680 

681 # Without this, the dtype would be '<U1' for an empty Unicode string 

682 if dtype == str: 

683 dtype = f'<U{max(len(x) for x in values)}' 

684 

685 column_match = np.full(in_matched.extras.n, value_fill, dtype=dtype) 

686 column_match[matched] = in_original.catalog[column][idx_matched] 

687 out_matched[f'match_{column}'] = column_match 

688 

689 catalog_out_ref = pd.DataFrame(data_ref) 

690 catalog_out_target = pd.DataFrame(data_target) 

691 

692 return catalog_out_ref, catalog_out_target, exceptions