Coverage for python/lsst/meas/astrom/matcher_probabilistic.py: 24%
228 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-04 02:55 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-04 02:55 -0700
1# This file is part of meas_astrom.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ['ConvertCatalogCoordinatesConfig', 'MatchProbabilisticConfig', 'MatcherProbabilistic']
24import lsst.pex.config as pexConfig
26from dataclasses import dataclass
27import logging
28import numpy as np
29import pandas as pd
30from scipy.spatial import cKDTree
31import time
32from typing import Callable, Set
34logger_default = logging.getLogger(__name__)
37def _mul_column(column: np.array, value: float):
38 if value is not None and value != 1:
39 column *= value
40 return column
43def _radec_to_xyz(ra, dec):
44 """Convert input ra/dec coordinates to spherical unit vectors.
46 Parameters
47 ----------
48 ra, dec: `numpy.ndarray`
49 Arrays of right ascension/declination in degrees.
51 Returns
52 -------
53 vectors : `numpy.ndarray`, (N, 3)
54 Output unit vectors.
55 """
56 if ra.size != dec.size:
57 raise ValueError('ra and dec must be same size')
58 ras = np.radians(ra)
59 decs = np.radians(dec)
60 vectors = np.empty((ras.size, 3))
62 sin_dec = np.sin(np.pi / 2 - decs)
63 vectors[:, 0] = sin_dec * np.cos(ras)
64 vectors[:, 1] = sin_dec * np.sin(ras)
65 vectors[:, 2] = np.cos(np.pi / 2 - decs)
67 return vectors
70@dataclass
71class CatalogExtras:
72 """Store frequently-reference (meta)data relevant for matching a catalog.
74 Parameters
75 ----------
76 catalog : `pandas.DataFrame`
77 A pandas catalog to store extra information for.
78 select : `numpy.array`
79 A numpy boolean array of the same length as catalog to be used for
80 target selection.
81 """
83 n: int
84 indices: np.array
85 select: np.array
87 coordinate_factor: float = None
89 def __init__(self, catalog: pd.DataFrame, select: np.array = None, coordinate_factor: float = None):
90 self.n = len(catalog)
91 self.select = np.ones(self.n, dtype=bool) if select is None else select
92 self.indices = np.flatnonzero(select) if select is not None else np.arange(self.n)
93 self.coordinate_factor = coordinate_factor
96@dataclass(frozen=True)
97class ComparableCatalog:
98 """A catalog with sources with coordinate columns in some standard format/units.
100 catalog : `pandas.DataFrame`
101 A catalog with comparable coordinate columns.
102 column_coord1 : `str`
103 The first spatial coordinate column name.
104 column_coord2 : `str`
105 The second spatial coordinate column name.
106 coord1 : `numpy.array`
107 The first spatial coordinate values.
108 coord2 : `numpy.array`
109 The second spatial coordinate values.
110 extras : `CatalogExtras`
111 Extra cached (meta)data for the `catalog`.
112 """
114 catalog: pd.DataFrame
115 column_coord1: str
116 column_coord2: str
117 coord1: np.array
118 coord2: np.array
119 extras: CatalogExtras
122class ConvertCatalogCoordinatesConfig(pexConfig.Config):
123 """Configuration for the MatchProbabilistic matcher."""
125 column_ref_coord1 = pexConfig.Field(
126 dtype=str,
127 default='ra',
128 doc='The reference table column for the first spatial coordinate (usually x or ra).',
129 )
130 column_ref_coord2 = pexConfig.Field(
131 dtype=str,
132 default='dec',
133 doc='The reference table column for the second spatial coordinate (usually y or dec).'
134 'Units must match column_ref_coord1.',
135 )
136 column_target_coord1 = pexConfig.Field(
137 dtype=str,
138 default='coord_ra',
139 doc='The target table column for the first spatial coordinate (usually x or ra).'
140 'Units must match column_ref_coord1.',
141 )
142 column_target_coord2 = pexConfig.Field(
143 dtype=str,
144 default='coord_dec',
145 doc='The target table column for the second spatial coordinate (usually y or dec).'
146 'Units must match column_ref_coord2.',
147 )
148 coords_spherical = pexConfig.Field(
149 dtype=bool,
150 default=True,
151 doc='Whether column_*_coord[12] are spherical coordinates (ra/dec) or not (pixel x/y)',
152 )
153 coords_ref_factor = pexConfig.Field(
154 dtype=float,
155 default=1.0,
156 doc='Multiplicative factor for reference catalog coordinates.'
157 'If coords_spherical is true, this must be the number of degrees per unit increment of '
158 'column_ref_coord[12]. Otherwise, it must convert the coordinate to the same units'
159 ' as the target coordinates.',
160 )
161 coords_target_factor = pexConfig.Field(
162 dtype=float,
163 default=1.0,
164 doc='Multiplicative factor for target catalog coordinates.'
165 'If coords_spherical is true, this must be the number of degrees per unit increment of '
166 'column_target_coord[12]. Otherwise, it must convert the coordinate to the same units'
167 ' as the reference coordinates.',
168 )
169 coords_ref_to_convert = pexConfig.DictField( 169 ↛ exitline 169 didn't jump to the function exit
170 default=None,
171 optional=True,
172 keytype=str,
173 itemtype=str,
174 dictCheck=lambda x: len(x) == 2,
175 doc='Dict mapping sky coordinate columns to be converted to pixel columns',
176 )
177 mag_zeropoint_ref = pexConfig.Field(
178 dtype=float,
179 default=31.4,
180 doc='Magnitude zeropoint for reference catalog.',
181 )
183 def format_catalogs(
184 self,
185 catalog_ref: pd.DataFrame,
186 catalog_target: pd.DataFrame,
187 select_ref: np.array = None,
188 select_target: np.array = None,
189 radec_to_xy_func: Callable = None,
190 return_converted_columns: bool = False,
191 **kwargs,
192 ):
193 """Format matched catalogs that may require coordinate conversions.
195 Parameters
196 ----------
197 catalog_ref : `pandas.DataFrame`
198 A reference catalog for comparison to `catalog_target`.
199 catalog_target : `pandas.DataFrame`
200 A target catalog with measurements for comparison to `catalog_ref`.
201 select_ref : `numpy.ndarray`, (Nref,)
202 A boolean array of len `catalog_ref`, True for valid match candidates.
203 select_target : `numpy.ndarray`, (Ntarget,)
204 A boolean array of len `catalog_target`, True for valid match candidates.
205 radec_to_xy_func : `typing.Callable`
206 Function taking equal-length ra, dec arrays and returning an ndarray of
207 - ``x``: current parameter (`float`).
208 - ``extra_args``: additional arguments (`dict`).
209 return_converted_columns : `bool`
210 Whether to return converted columns in the `coord1` and `coord2`
211 attributes, rather than keep the original values.
212 kwargs
214 Returns
215 -------
216 compcat_ref, compcat_target : `ComparableCatalog`
217 Comparable catalogs corresponding to the input reference and target.
218 """
219 convert_ref = self.coords_ref_to_convert
220 if convert_ref and not callable(radec_to_xy_func):
221 raise TypeError('radec_to_xy_func must be callable if converting ref coords')
223 # Set up objects with frequently-used attributes like selection bool array
224 extras_ref, extras_target = (
225 CatalogExtras(catalog, select=select, coordinate_factor=coord_factor)
226 for catalog, select, coord_factor in zip(
227 (catalog_ref, catalog_target),
228 (select_ref, select_target),
229 (self.coords_ref_factor, self.coords_target_factor),
230 )
231 )
233 compcats = []
235 # Retrieve coordinates and multiply them by scaling factors
236 for catalog, extras, (column1, column2), convert in (
237 (catalog_ref, extras_ref, (self.column_ref_coord1, self.column_ref_coord2), convert_ref),
238 (catalog_target, extras_target, (self.column_target_coord1, self.column_target_coord2), False),
239 ):
240 coord1, coord2 = (
241 _mul_column(catalog[column], extras.coordinate_factor)
242 for column in (column1, column2)
243 )
244 if convert:
245 xy_ref = radec_to_xy_func(coord1, coord2, self.coords_ref_factor, **kwargs)
246 for idx_coord, column_out in enumerate(self.coords_ref_to_convert.values()):
247 coord = np.array([xy[idx_coord] for xy in xy_ref])
248 catalog[column_out] = coord
249 if convert_ref and return_converted_columns:
250 column1, column2 = self.coords_ref_to_convert.values()
251 coord1, coord2 = catalog[column1], catalog[column2]
252 if isinstance(coord1, pd.Series):
253 coord1 = coord1.values
254 if isinstance(coord2, pd.Series):
255 coord2 = coord2.values
257 compcats.append(ComparableCatalog(
258 catalog=catalog, column_coord1=column1, column_coord2=column2,
259 coord1=coord1, coord2=coord2, extras=extras,
260 ))
262 return tuple(compcats)
265class MatchProbabilisticConfig(pexConfig.Config):
266 """Configuration for the MatchProbabilistic matcher."""
268 column_ref_order = pexConfig.Field(
269 dtype=str,
270 default=None,
271 optional=True,
272 doc='Name of column in reference catalog specifying order for matching'
273 ' Derived from columns_ref_flux if not set.',
274 )
276 @property
277 def columns_in_ref(self) -> Set[str]:
278 columns_all = [
279 self.coord_format.column_ref_coord1,
280 self.coord_format.column_ref_coord2,
281 ]
282 for columns in (
283 self.columns_ref_flux,
284 self.columns_ref_meas,
285 self.columns_ref_select_false,
286 self.columns_ref_select_true,
287 self.columns_ref_copy,
288 ):
289 columns_all.extend(columns)
290 if self.column_ref_order:
291 columns_all.append(self.column_ref_order)
293 return set(columns_all)
295 @property
296 def columns_in_target(self) -> Set[str]:
297 columns_all = [
298 self.coord_format.column_target_coord1,
299 self.coord_format.column_target_coord2,
300 ]
301 for columns in (
302 self.columns_target_meas,
303 self.columns_target_err,
304 self.columns_target_select_false,
305 self.columns_target_select_true,
306 self.columns_target_copy,
307 ):
308 columns_all.extend(columns)
309 return set(columns_all)
311 columns_ref_copy = pexConfig.ListField( 311 ↛ exitline 311 didn't jump to the function exit
312 dtype=str,
313 default=[],
314 listCheck=lambda x: len(set(x)) == len(x),
315 optional=True,
316 doc='Reference table columns to copy unchanged into both match tables',
317 )
318 columns_ref_flux = pexConfig.ListField(
319 dtype=str,
320 default=[],
321 optional=True,
322 doc="List of reference flux columns to nansum total magnitudes from if column_order is None",
323 )
324 columns_ref_meas = pexConfig.ListField(
325 dtype=str,
326 doc='The reference table columns to compute match likelihoods from '
327 '(usually centroids and fluxes/magnitudes)',
328 )
329 columns_ref_select_true = pexConfig.ListField(
330 dtype=str,
331 default=tuple(),
332 doc='Reference table columns to require to be True for selecting sources',
333 )
334 columns_ref_select_false = pexConfig.ListField(
335 dtype=str,
336 default=tuple(),
337 doc='Reference table columns to require to be False for selecting sources',
338 )
339 columns_target_copy = pexConfig.ListField( 339 ↛ exitline 339 didn't jump to the function exit
340 dtype=str,
341 default=[],
342 listCheck=lambda x: len(set(x)) == len(x),
343 optional=True,
344 doc='Target table columns to copy unchanged into both match tables',
345 )
346 columns_target_meas = pexConfig.ListField(
347 dtype=str,
348 doc='Target table columns with measurements corresponding to columns_ref_meas',
349 )
350 columns_target_err = pexConfig.ListField(
351 dtype=str,
352 doc='Target table columns with standard errors (sigma) corresponding to columns_ref_meas',
353 )
354 columns_target_select_true = pexConfig.ListField(
355 dtype=str,
356 default=('detect_isPrimary',),
357 doc='Target table columns to require to be True for selecting sources',
358 )
359 columns_target_select_false = pexConfig.ListField(
360 dtype=str,
361 default=('merge_peak_sky',),
362 doc='Target table columns to require to be False for selecting sources',
363 )
364 coord_format = pexConfig.ConfigField(
365 dtype=ConvertCatalogCoordinatesConfig,
366 doc="Configuration for coordinate conversion",
367 )
368 mag_brightest_ref = pexConfig.Field(
369 dtype=float,
370 default=-np.inf,
371 doc='Bright magnitude cutoff for selecting reference sources to match.'
372 ' Ignored if column_ref_order is None.'
373 )
374 mag_faintest_ref = pexConfig.Field(
375 dtype=float,
376 default=np.Inf,
377 doc='Faint magnitude cutoff for selecting reference sources to match.'
378 ' Ignored if column_ref_order is None.'
379 )
380 match_dist_max = pexConfig.Field(
381 dtype=float,
382 default=0.5,
383 doc='Maximum match distance. Units must be arcseconds if coords_spherical, '
384 'or else match those of column_*_coord[12] multiplied by coords_*_factor.',
385 )
386 match_n_max = pexConfig.Field(
387 dtype=int,
388 default=10,
389 optional=True,
390 doc='Maximum number of spatial matches to consider (in ascending distance order).',
391 )
392 match_n_finite_min = pexConfig.Field(
393 dtype=int,
394 default=3,
395 optional=True,
396 doc='Minimum number of columns with a finite value to measure match likelihood',
397 )
398 order_ascending = pexConfig.Field(
399 dtype=bool,
400 default=False,
401 optional=True,
402 doc='Whether to order reference match candidates in ascending order of column_ref_order '
403 '(should be False if the column is a flux and True if it is a magnitude.',
404 )
406 def validate(self):
407 super().validate()
408 n_ref_meas = len(self.columns_ref_meas)
409 n_target_meas = len(self.columns_target_meas)
410 n_target_err = len(self.columns_target_err)
411 match_n_finite_min = self.match_n_finite_min
412 errors = []
413 if n_target_meas != n_ref_meas:
414 errors.append(f"{len(self.columns_target_meas)=} != {len(self.columns_ref_meas)=}")
415 if n_target_err != n_ref_meas:
416 errors.append(f"{len(self.columns_target_err)=} != {len(self.columns_ref_meas)=}")
417 if not (n_ref_meas >= match_n_finite_min):
418 errors.append(
419 f"{len(self.columns_ref_meas)=} !>= {self.match_n_finite_min=}, no matches possible"
420 )
421 if errors:
422 raise ValueError("\n".join(errors))
425def default_value(dtype):
426 if dtype == str:
427 return ''
428 elif dtype == np.signedinteger:
429 return np.Inf
430 elif dtype == np.unsignedinteger:
431 return -np.Inf
432 return None
435class MatcherProbabilistic:
436 """A probabilistic, greedy catalog matcher.
438 Parameters
439 ----------
440 config: `MatchProbabilisticConfig`
441 A configuration instance.
442 """
444 config: MatchProbabilisticConfig
446 def __init__(
447 self,
448 config: MatchProbabilisticConfig,
449 ):
450 self.config = config
452 def match(
453 self,
454 catalog_ref: pd.DataFrame,
455 catalog_target: pd.DataFrame,
456 select_ref: np.array = None,
457 select_target: np.array = None,
458 logger: logging.Logger = None,
459 logging_n_rows: int = None,
460 **kwargs
461 ):
462 """Match catalogs.
464 Parameters
465 ----------
466 catalog_ref : `pandas.DataFrame`
467 A reference catalog to match in order of a given column (i.e. greedily).
468 catalog_target : `pandas.DataFrame`
469 A target catalog for matching sources from `catalog_ref`. Must contain measurements with errors.
470 select_ref : `numpy.array`
471 A boolean array of the same length as `catalog_ref` selecting the sources that can be matched.
472 select_target : `numpy.array`
473 A boolean array of the same length as `catalog_target` selecting the sources that can be matched.
474 logger : `logging.Logger`
475 A Logger for logging.
476 logging_n_rows : `int`
477 The number of sources to match before printing a log message.
478 kwargs
479 Additional keyword arguments to pass to `format_catalogs`.
481 Returns
482 -------
483 catalog_out_ref : `pandas.DataFrame`
484 A catalog of identical length to `catalog_ref`, containing match information for rows selected by
485 `select_ref` (including the matching row index in `catalog_target`).
486 catalog_out_target : `pandas.DataFrame`
487 A catalog of identical length to `catalog_target`, containing the indices of matching rows in
488 `catalog_ref`.
489 exceptions : `dict` [`int`, `Exception`]
490 A dictionary keyed by `catalog_target` row number of the first exception caught when matching.
491 """
492 if logger is None:
493 logger = logger_default
495 config = self.config
497 # Transform any coordinates, if required
498 # Note: The returned objects contain the original catalogs, as well as
499 # transformed coordinates, and the selection of sources for matching.
500 # These might be identical to the arrays passed as kwargs, but that
501 # depends on config settings.
502 # For the rest of this function, the selection arrays will be used,
503 # but the indices of the original, unfiltered catalog will also be
504 # output, so some further indexing steps are needed.
505 ref, target = config.coord_format.format_catalogs(
506 catalog_ref=catalog_ref, catalog_target=catalog_target,
507 select_ref=select_ref, select_target=select_target,
508 **kwargs
509 )
511 # If no order is specified, take nansum of all flux columns for a 'total flux'
512 # Note: it won't actually be a total flux if bands overlap significantly
513 # (or it might define a filter with >100% efficiency
514 # Also, this is done on the original dataframe as it's harder to accomplish
515 # just with a recarray
516 column_order = (
517 catalog_ref.loc[ref.extras.select, config.column_ref_order]
518 if config.column_ref_order is not None else
519 np.nansum(catalog_ref.loc[ref.extras.select, config.columns_ref_flux], axis=1)
520 )
521 order = np.argsort(column_order if config.order_ascending else -column_order)
523 n_ref_select = len(ref.extras.indices)
525 match_dist_max = config.match_dist_max
526 coords_spherical = config.coord_format.coords_spherical
527 if coords_spherical:
528 match_dist_max = np.radians(match_dist_max / 3600.)
530 # Convert ra/dec sky coordinates to spherical vectors for accurate distances
531 func_convert = _radec_to_xyz if coords_spherical else np.vstack
532 vec_ref, vec_target = (
533 func_convert(cat.coord1[cat.extras.select], cat.coord2[cat.extras.select])
534 for cat in (ref, target)
535 )
537 # Generate K-d tree to compute distances
538 logger.info('Generating cKDTree with match_n_max=%d', config.match_n_max)
539 tree_obj = cKDTree(vec_target)
541 scores, idxs_target_select = tree_obj.query(
542 vec_ref,
543 distance_upper_bound=match_dist_max,
544 k=config.match_n_max,
545 )
547 n_target_select = len(target.extras.indices)
548 n_matches = np.sum(idxs_target_select != n_target_select, axis=1)
549 n_matched_max = np.sum(n_matches == config.match_n_max)
550 if n_matched_max > 0:
551 logger.warning(
552 '%d/%d (%.2f%%) selected true objects have n_matches=n_match_max(%d)',
553 n_matched_max, n_ref_select, 100.*n_matched_max/n_ref_select, config.match_n_max
554 )
556 # Pre-allocate outputs
557 target_row_match = np.full(target.extras.n, np.nan, dtype=np.int64)
558 ref_candidate_match = np.zeros(ref.extras.n, dtype=bool)
559 ref_row_match = np.full(ref.extras.n, np.nan, dtype=np.int64)
560 ref_match_count = np.zeros(ref.extras.n, dtype=np.int32)
561 ref_match_meas_finite = np.zeros(ref.extras.n, dtype=np.int32)
562 ref_chisq = np.full(ref.extras.n, np.nan, dtype=float)
564 # Need the original reference row indices for output
565 idx_orig_ref, idx_orig_target = (np.argwhere(cat.extras.select) for cat in (ref, target))
567 # Retrieve required columns, including any converted ones (default to original column name)
568 columns_convert = config.coord_format.coords_ref_to_convert
569 if columns_convert is None:
570 columns_convert = {}
571 data_ref = ref.catalog[
572 [columns_convert.get(column, column) for column in config.columns_ref_meas]
573 ].iloc[ref.extras.indices[order]]
574 data_target = target.catalog[config.columns_target_meas][target.extras.select]
575 errors_target = target.catalog[config.columns_target_err][target.extras.select]
577 exceptions = {}
578 # The kdTree uses len(inputs) as a sentinel value for no match
579 matched_target = {n_target_select, }
581 t_begin = time.process_time()
583 logger.info('Matching n_indices=%d/%d', len(order), len(ref.catalog))
584 for index_n, index_row_select in enumerate(order):
585 index_row = idx_orig_ref[index_row_select]
586 ref_candidate_match[index_row] = True
587 found = idxs_target_select[index_row_select, :]
588 # Select match candidates from nearby sources not already matched
589 # Note: set lookup is apparently fast enough that this is a few percent faster than:
590 # found = [x for x in found[found != n_target_select] if x not in matched_target]
591 # ... at least for ~1M sources
592 found = [x for x in found if x not in matched_target]
593 n_found = len(found)
594 if n_found > 0:
595 # This is an ndarray of n_found rows x len(data_ref/target) columns
596 chi = (
597 (data_target.iloc[found].values - data_ref.iloc[index_n].values)
598 / errors_target.iloc[found].values
599 )
600 finite = np.isfinite(chi)
601 n_finite = np.sum(finite, axis=1)
602 # Require some number of finite chi_sq to match
603 chisq_good = n_finite >= config.match_n_finite_min
604 if np.any(chisq_good):
605 try:
606 chisq_sum = np.zeros(n_found, dtype=float)
607 chisq_sum[chisq_good] = np.nansum(chi[chisq_good, :] ** 2, axis=1)
608 idx_chisq_min = np.nanargmin(chisq_sum / n_finite)
609 ref_match_meas_finite[index_row] = n_finite[idx_chisq_min]
610 ref_match_count[index_row] = len(chisq_good)
611 ref_chisq[index_row] = chisq_sum[idx_chisq_min]
612 idx_match_select = found[idx_chisq_min]
613 row_target = target.extras.indices[idx_match_select]
614 ref_row_match[index_row] = row_target
616 target_row_match[row_target] = index_row
617 matched_target.add(idx_match_select)
618 except Exception as error:
619 # Can't foresee any exceptions, but they shouldn't prevent
620 # matching subsequent sources
621 exceptions[index_row] = error
623 if logging_n_rows and ((index_n + 1) % logging_n_rows == 0):
624 t_elapsed = time.process_time() - t_begin
625 logger.info(
626 'Processed %d/%d in %.2fs at sort value=%.3f',
627 index_n + 1, n_ref_select, t_elapsed, column_order[order[index_n]],
628 )
630 data_ref = {
631 'match_candidate': ref_candidate_match,
632 'match_row': ref_row_match,
633 'match_count': ref_match_count,
634 'match_chisq': ref_chisq,
635 'match_n_chisq_finite': ref_match_meas_finite,
636 }
637 data_target = {
638 'match_candidate': target.extras.select if target.extras.select is not None else (
639 np.ones(target.extras.n, dtype=bool)),
640 'match_row': target_row_match,
641 }
643 for (columns, out_original, out_matched, in_original, in_matched, matches, name_cat) in (
644 (
645 self.config.columns_ref_copy,
646 data_ref,
647 data_target,
648 ref,
649 target,
650 target_row_match,
651 'reference',
652 ),
653 (
654 self.config.columns_target_copy,
655 data_target,
656 data_ref,
657 target,
658 ref,
659 ref_row_match,
660 'target',
661 ),
662 ):
663 matched = matches >= 0
664 idx_matched = matches[matched]
665 logger.info('Matched %d/%d %s sources', np.sum(matched), len(matched), name_cat)
667 for column in columns:
668 values = in_original.catalog[column]
669 out_original[column] = values
670 dtype = in_original.catalog[column].dtype
672 # Pandas object columns can have mixed types - check for that
673 if dtype == object:
674 types = list(set((type(x) for x in values)))
675 if len(types) != 1:
676 raise RuntimeError(f'Column {column} dtype={dtype} has multiple types={types}')
677 dtype = types[0]
679 value_fill = default_value(dtype)
681 # Without this, the dtype would be '<U1' for an empty Unicode string
682 if dtype == str:
683 dtype = f'<U{max(len(x) for x in values)}'
685 column_match = np.full(in_matched.extras.n, value_fill, dtype=dtype)
686 column_match[matched] = in_original.catalog[column][idx_matched]
687 out_matched[f'match_{column}'] = column_match
689 catalog_out_ref = pd.DataFrame(data_ref)
690 catalog_out_target = pd.DataFrame(data_target)
692 return catalog_out_ref, catalog_out_target, exceptions