22__all__ = [
'MatchProbabilisticConfig',
'MatcherProbabilistic']
26from dataclasses
import dataclass
30from scipy.spatial
import cKDTree
34logger_default = logging.getLogger(__name__)
37def _mul_column(column: np.array, value: float):
38 if value
is not None and value != 1:
43def _radec_to_xyz(ra, dec):
44 """Convert input ra/dec coordinates to spherical unit vectors.
48 ra, dec: `numpy.ndarray`
49 Arrays of right ascension/declination in degrees.
53 vectors : `numpy.ndarray`, (N, 3)
56 if ra.size != dec.size:
57 raise ValueError(
'ra and dec must be same size')
59 decs = np.radians(dec)
60 vectors = np.empty((ras.size, 3))
62 sin_dec = np.sin(np.pi / 2 - decs)
63 vectors[:, 0] = sin_dec * np.cos(ras)
64 vectors[:, 1] = sin_dec * np.sin(ras)
65 vectors[:, 2] = np.cos(np.pi / 2 - decs)
71 """Configuration for the MatchProbabilistic matcher.
73 column_order = pexConfig.Field(
77 doc=
"Column to sort fit by. Derived from columns_flux if not set.",
79 column_ref_coord1 = pexConfig.Field(
82 doc=
'The reference table column for the first spatial coordinate (usually x or ra).',
84 column_ref_coord2 = pexConfig.Field(
87 doc=
'The reference table column for the second spatial coordinate (usually y or dec).'
88 'Units must match column_ref_coord1.',
90 column_target_coord1 = pexConfig.Field(
93 doc=
'The target table column for the first spatial coordinate (usually x or ra).'
94 'Units must match column_ref_coord1.',
96 column_target_coord2 = pexConfig.Field(
99 doc=
'The target table column for the second spatial coordinate (usually y or dec).'
100 'Units must match column_ref_coord2.',
110 columns_all.extend(columns)
112 return set(columns_all)
123 columns_all.extend(columns)
124 return set(columns_all)
126 columns_ref_flux = pexConfig.ListField(
129 listCheck=
lambda x: len(set(x)) == len(x),
131 doc=
"List of reference flux columns to nansum total magnitudes from if column_order is None",
133 columns_ref_meas = pexConfig.ListField(
135 doc=
'The reference table columns to compute match likelihoods from '
136 '(usually centroids and fluxes/magnitudes)',
138 columns_target_meas = pexConfig.ListField(
140 doc=
'Target table columns with measurements corresponding to columns_ref_meas',
142 columns_target_err = pexConfig.ListField(
144 doc=
'Target table columns with standard errors (sigma) corresponding to columns_ref_meas',
146 columns_target_select_true = pexConfig.ListField(
148 default=(
'detect_isPrimary',),
149 doc=
'Target table columns to require to be True for selecting match candidates',
151 columns_target_select_false = pexConfig.ListField(
153 default=(
'merge_peak_sky',),
154 doc=
'Target table columns to require to be False for selecting match candidates',
157 coords_spherical = pexConfig.Field(
160 doc=
'Whether column_*_coord[12] are spherical coordinates (ra/dec) or not (pixel x/y)',
162 coords_ref_factor = pexConfig.Field(
165 doc=
'Multiplicative factor for reference catalog coordinates.'
166 'If coords_spherical is true, this must be the number of degrees per unit increment of '
167 'column_ref_coord[12]. Otherwise, it must convert the coordinate to the same units'
168 ' as the target coordinates.',
170 coords_target_factor = pexConfig.Field(
173 doc=
'Multiplicative factor for target catalog coordinates.'
174 'If coords_spherical is true, this must be the number of degrees per unit increment of '
175 'column_target_coord[12]. Otherwise, it must convert the coordinate to the same units'
176 ' as the reference coordinates.',
178 coords_ref_to_convert = pexConfig.DictField(
182 dictCheck=
lambda x: len(x) == 2,
183 doc=
'Dict mapping sky coordinate columns to be converted to tract pixel columns',
185 mag_brightest_ref = pexConfig.Field(
188 doc=
'Bright magnitude cutoff for selecting reference sources to match.'
189 ' Ignored if column_order is None.'
191 mag_faintest_ref = pexConfig.Field(
194 doc=
'Faint magnitude cutoff for selecting reference sources to match.'
195 ' Ignored if column_order is None.'
197 mag_zeropoint_ref = pexConfig.Field(
200 doc=
'Magnitude zeropoint for computing reference source magnitudes for selection.'
201 ' Ignored if column_order is None.'
203 match_dist_max = pexConfig.Field(
206 doc=
'Maximum match distance. Units must be arcseconds if coords_spherical, '
207 'or else match those of column_*_coord[12] multiplied by coords_*_factor.',
209 match_n_max = pexConfig.Field(
213 doc=
'Maximum number of spatial matches to consider (in ascending distance order).',
215 match_n_finite_min = pexConfig.Field(
219 doc=
'Minimum number of columns with a finite value to measure match likelihood',
222 order_ascending = pexConfig.Field(
226 doc=
'Whether to order reference match candidates in ascending order of column_order '
227 '(should be False if the column is a flux and True if it is a magnitude.',
233 """Store frequently-reference (meta)data revelant for matching a catalog.
237 catalog : `pandas.DataFrame`
238 A pandas catalog to store extra information for.
239 select : `numpy.array`
240 A numpy boolean array of the same length
as catalog to be used
for
247 coordinate_factor: float = None
249 def __init__(self, catalog: pd.DataFrame, select: np.array =
None, coordinate_factor: float =
None):
250 self.
nn = len(catalog)
252 self.
indicesindices = np.flatnonzero(select)
if select
is not None else None
257 """A probabilistic, greedy catalog matcher.
261 config: `MatchProbabilisticConfig`
262 A configuration instance.
264 config: MatchProbabilisticConfig
268 config: MatchProbabilisticConfig,
274 catalog_ref: pd.DataFrame,
275 catalog_target: pd.DataFrame,
276 select_ref: np.array =
None,
277 select_target: np.array =
None,
278 logger: logging.Logger =
None,
279 logging_n_rows: int =
None,
285 catalog_ref : `pandas.DataFrame`
286 A reference catalog to match in order of a given column (i.e. greedily).
287 catalog_target : `pandas.DataFrame`
288 A target catalog
for matching sources
from `catalog_ref`. Must contain measurements
with errors.
289 select_ref : `numpy.array`
290 A boolean array of the same length
as `catalog_ref` selecting the sources that can be matched.
291 select_target : `numpy.array`
292 A boolean array of the same length
as `catalog_target` selecting the sources that can be matched.
293 logger : `logging.Logger`
294 A Logger
for logging.
295 logging_n_rows : `int`
296 The number of sources to match before printing a log message.
300 catalog_out_ref : `pandas.DataFrame`
301 A catalog of identical length to `catalog_ref`, containing match information
for rows selected by
302 `select_ref` (including the matching row index
in `catalog_target`).
303 catalog_out_target : `pandas.DataFrame`
304 A catalog of identical length to `catalog_target`, containing the indices of matching rows
in
306 exceptions : `dict` [`int`, `Exception`]
307 A dictionary keyed by `catalog_target` row number of the first exception caught when matching.
310 logger = logger_default
312 config = self.
configconfig
314 extras_ref, extras_target = (
315 CatalogExtras(catalog, select=select, coordinate_factor=coord_factor)
316 for catalog, select, coord_factor
in zip(
317 (catalog_ref, catalog_target),
318 (select_ref, select_target),
319 (config.coords_ref_factor, config.coords_target_factor),
322 n_ref_match, n_target_match = (len(x)
for x
in (extras_ref.indices, extras_target.indices))
324 match_dist_max = config.match_dist_max
325 if config.coords_spherical:
326 match_dist_max = np.radians(match_dist_max/3600.)
329 (coord1_ref, coord2_ref), (coord1_target, coord2_target) = (
332 _mul_column(catalog.loc[extras.select, column].values, extras.coordinate_factor)
333 for column
in columns
335 for catalog, extras, columns
in (
336 (catalog_ref, extras_ref, (config.column_ref_coord1, config.column_ref_coord2)),
337 (catalog_target, extras_target, (config.column_target_coord1, config.column_target_coord2)),
342 if config.coords_spherical:
343 vec_ref = _radec_to_xyz(coord1_ref, coord2_ref)
344 vec_target = _radec_to_xyz(coord1_target, coord2_target)
346 vec_ref = np.vstack((coord1_ref, coord2_ref))
347 vec_target = np.vstack((coord1_target, coord2_target))
349 columns_ref_meas = config.columns_ref_meas
350 if config.coords_ref_to_convert:
351 columns_ref_meas = [config.coords_ref_to_convert.get(column, column)
352 for column
in config.columns_ref_meas]
355 logger.info(
'Generating cKDTree with match_n_max=%d', config.match_n_max)
356 tree_obj = cKDTree(vec_target)
358 scores, idxs_target = tree_obj.query(
360 distance_upper_bound=match_dist_max,
361 k=config.match_n_max,
363 n_matches = np.sum(idxs_target != n_target_match, axis=1)
364 n_matched_max = np.sum(n_matches == config.match_n_max)
365 if n_matched_max > 0:
367 '%d/%d (%.2f%%) true objects have n_matches=n_match_max(%d)',
368 n_matched_max, n_ref_match, 100.*n_matched_max/n_ref_match, config.match_n_max
372 target_row_match = np.full(extras_target.n, np.nan, dtype=np.int64)
373 ref_candidate_match = np.zeros(extras_ref.n, dtype=bool)
374 ref_row_match = np.full(extras_ref.n, np.nan, dtype=int)
375 ref_match_count = np.zeros(extras_ref.n, dtype=int)
376 ref_match_meas_finite = np.zeros(extras_ref.n, dtype=int)
377 ref_chisq = np.full(extras_ref.n, np.nan, dtype=float)
383 catalog_ref.loc[extras_ref.indices, config.column_order].values
384 if config.column_order
is not None else
385 np.nansum(catalog_ref.loc[extras_ref.indices, config.columns_ref_flux].values, axis=1)
387 order = np.argsort(column_order
if config.order_ascending
else -column_order)
389 indices = extras_ref.indices[order]
390 idxs_target = idxs_target[order]
391 n_indices = len(indices)
393 data_ref = catalog_ref.loc[indices, columns_ref_meas]
394 data_target = catalog_target.loc[extras_target.select, config.columns_target_meas]
395 errors_target = catalog_target.loc[extras_target.select, config.columns_target_err]
398 matched_target = set()
400 t_begin = time.process_time()
402 logger.info(
'Matching n_indices=%d/%d', n_indices, len(catalog_ref))
403 for index_n, index_row
in enumerate(indices):
404 ref_candidate_match[index_row] =
True
405 found = idxs_target[index_n, :]
407 found = [x
for x
in found[found != n_target_match]
if x
not in matched_target]
412 (data_target.iloc[found].values - data_ref.iloc[index_n].values)
413 / errors_target.iloc[found].values
415 finite = np.isfinite(chi)
416 n_finite = np.sum(finite, axis=1)
418 chisq_good = n_finite >= config.match_n_finite_min
419 if np.any(chisq_good):
421 chisq_sum = np.zeros(n_found, dtype=float)
422 chisq_sum[chisq_good] = np.nansum(chi[chisq_good, :] ** 2, axis=1)
423 idx_chisq_min = np.nanargmin(chisq_sum / n_finite)
424 idx_match = found[idx_chisq_min]
425 ref_match_meas_finite[index_row] = n_finite[idx_chisq_min]
426 ref_match_count[index_row] = len(chisq_good)
427 ref_chisq[index_row] = chisq_sum[idx_chisq_min]
428 row_target = extras_target.indices[idx_match]
429 ref_row_match[index_row] = row_target
430 target_row_match[row_target] = index_row
431 matched_target.add(idx_match)
432 except Exception
as error:
435 exceptions[index_row] = error
437 if logging_n_rows
and ((index_n + 1) % logging_n_rows == 0):
438 t_elapsed = time.process_time() - t_begin
440 'Processed %d/%d in %.2fs at sort value=%.3f',
441 index_n + 1, n_indices, t_elapsed, column_order[order[index_n]],
444 catalog_out_ref = pd.DataFrame({
445 'match_candidate': ref_candidate_match,
446 'match_row': ref_row_match,
447 'match_count': ref_match_count,
448 'match_chisq': ref_chisq,
449 'match_n_chisq_finite': ref_match_meas_finite,
452 catalog_out_target = pd.DataFrame({
453 'match_row': target_row_match,
456 return catalog_out_ref, catalog_out_target, exceptions
Set[str] columns_in_target(self)
columns_target_select_false
columns_target_select_true
Set[str] columns_in_ref(self)
def match(self, pd.DataFrame catalog_ref, pd.DataFrame catalog_target, np.array select_ref=None, np.array select_target=None, logging.Logger logger=None, int logging_n_rows=None)
def __init__(self, MatchProbabilisticConfig config)