621 catalog_ref: pd.DataFrame,
622 catalog_target: pd.DataFrame,
623 catalog_match_ref: pd.DataFrame,
624 catalog_match_target: pd.DataFrame,
625 wcs: afwGeom.SkyWcs =
None,
626 ) -> pipeBase.Struct:
627 """Load matched reference and target (measured) catalogs, measure summary statistics, and output
628 a combined matched catalog with columns from both inputs.
632 catalog_ref : `pandas.DataFrame`
633 A reference catalog to diff objects/sources from.
634 catalog_target : `pandas.DataFrame`
635 A target catalog to diff reference objects/sources to.
636 catalog_match_ref : `pandas.DataFrame`
637 A catalog with match indices of target sources and selection flags
638 for each reference source.
639 catalog_match_target : `pandas.DataFrame`
640 A catalog with selection flags for each target source.
641 wcs : `lsst.afw.image.SkyWcs`
642 A coordinate system to convert catalog positions to sky coordinates,
647 retStruct : `lsst.pipe.base.Struct`
648 A struct with output_ref and output_target attribute containing the
649 output matched catalogs.
652 config: DiffMatchedTractCatalogConfig = self.config
654 select_ref = catalog_match_ref[
'match_candidate'].values
657 select_target = (catalog_match_target[
'match_candidate'].values
658 if 'match_candidate' in catalog_match_target.columns
659 else np.ones(len(catalog_match_target), dtype=bool))
660 for column
in config.columns_target_select_true:
661 select_target &= catalog_target[column].values
662 for column
in config.columns_target_select_false:
663 select_target &= ~catalog_target[column].values
665 ref, target = config.coord_format.format_catalogs(
666 catalog_ref=catalog_ref, catalog_target=catalog_target,
667 select_ref=
None, select_target=select_target, wcs=wcs, radec_to_xy_func=radec_to_xy,
669 cat_ref = ref.catalog
670 cat_target = target.catalog
671 n_target = len(cat_target)
673 if config.include_unmatched:
674 for cat_add, cat_match
in ((cat_ref, catalog_match_ref), (cat_target, catalog_match_target)):
675 cat_add[
'match_candidate'] = cat_match[
'match_candidate'].values
677 match_row = catalog_match_ref[
'match_row'].values
678 matched_ref = match_row >= 0
679 matched_row = match_row[matched_ref]
680 matched_target = np.zeros(n_target, dtype=bool)
681 matched_target[matched_row] =
True
684 coord1_target_err, coord2_target_err = config.columns_target_coord_err
685 column_dist, column_dist_err =
'match_distance',
'match_distanceErr'
686 dist = np.full(n_target, np.nan)
688 target_match_c1, target_match_c2 = (coord[matched_row]
for coord
in (target.coord1, target.coord2))
689 target_ref_c1, target_ref_c2 = (coord[matched_ref]
for coord
in (ref.coord1, ref.coord2))
691 dist_err = np.full(n_target, np.nan)
692 dist[matched_row] = sphdist(
693 target_match_c1, target_match_c2, target_ref_c1, target_ref_c2
694 )
if config.coord_format.coords_spherical
else np.hypot(
695 target_match_c1 - target_ref_c1, target_match_c2 - target_ref_c2,
698 dist_err[matched_row] = sphdist(
699 target_match_c1, target_match_c2,
700 target_match_c1 + cat_target.iloc[matched_row][coord1_target_err].values,
701 target_match_c2 + cat_target.iloc[matched_row][coord2_target_err].values,
702 )
if config.coord_format.coords_spherical
else np.hypot(
703 cat_target.iloc[matched_row][coord1_target_err].values,
704 cat_target.iloc[matched_row][coord2_target_err].values
706 cat_target[column_dist], cat_target[column_dist_err] = dist, dist_err
709 cat_left = cat_target.iloc[matched_row]
710 has_index_left = cat_left.index.name
is not None
711 cat_right = cat_ref[matched_ref].reset_index()
712 cat_right.columns = [f
'{config.column_matched_prefix_ref}{col}' for col
in cat_right.columns]
713 cat_matched = pd.concat(objs=(cat_left.reset_index(drop=
not has_index_left), cat_right), axis=1)
715 if config.include_unmatched:
719 cat_right = cat_ref[~matched_ref & select_ref].reset_index(drop=
False)
720 cat_right.columns = (f
'{config.column_matched_prefix_ref}{col}' for col
in cat_right.columns)
721 match_row_target = catalog_match_target[
'match_row'].values
722 cat_left = cat_target[~(match_row_target >= 0) & select_target].reset_index(
723 drop=
not has_index_left)
728 for cat_i
in (cat_left, cat_right):
729 for colname
in cat_i.columns:
730 column = cat_i[colname]
731 dtype = str(column.dtype)
733 cat_i[colname] = column.astype(
"boolean")
734 elif dtype.startswith(
"int"):
735 cat_i[colname] = column.astype(f
"Int{dtype[3:]}")
736 elif dtype.startswith(
"uint"):
737 cat_i[colname] = column.astype(f
"UInt{dtype[3:]}")
738 cat_unmatched = pd.concat(objs=(cat_left, cat_right))
740 for columns_convert_base, prefix
in (
741 (config.columns_ref_mag_to_nJy, config.column_matched_prefix_ref),
742 (config.columns_target_mag_to_nJy,
""),
744 if columns_convert_base:
746 f
"{prefix}{k}": f
"{prefix}{v}" for k, v
in columns_convert_base.items()
747 }
if prefix
else columns_convert_base
748 for cat_convert
in (cat_matched, cat_unmatched):
749 cat_convert.rename(columns=columns_convert, inplace=
True)
750 for column_flux
in columns_convert.values():
751 cat_convert[column_flux] = u.ABmag.to(u.nJy, cat_convert[column_flux])
756 column_dummy =
'dummy'
757 cat_ref[column_dummy] = np.zeros_like(ref.coord1)
761 extended_ref = cat_ref[config.column_ref_extended] == (
not config.column_ref_extended_inverted)
763 extended_target = cat_target[config.column_target_extended].values >= config.extendedness_cut
766 suffixes = {MeasurementType.DIFF:
'diff', MeasurementType.CHI:
'chi'}
768 suffixes_flux = {MeasurementType.CHI: suffixes[MeasurementType.CHI]}
770 suffixes_mag = {MeasurementType.DIFF: suffixes[MeasurementType.DIFF]}
771 stats = {stat.name_short(): stat()
for stat
in (Median, SigmaIQR, SigmaMAD)}
773 for percentile
in self.config.percentiles:
774 stat =
Percentile(percentile=float(Decimal(percentile)))
775 stats[stat.name_short()] = stat
779 bands_columns=config.columns_flux,
781 suffixes_flux=suffixes_flux,
782 suffixes_mag=suffixes_mag,
785 column_dist=column_dist,
789 n_bins = config.mag_num_bins
790 data = np.zeros((n_bins,), dtype=[(key, value)
for key, value
in columns.items()])
791 data[
'bin'] = np.arange(n_bins)
794 bins_mag = np.linspace(start=config.mag_brightest_ref, stop=config.mag_faintest_ref,
796 data[
'mag_min'] = bins_mag[:-1]
797 data[
'mag_max'] = bins_mag[1:]
798 bins_mag = tuple((bins_mag[idx], bins_mag[idx + 1])
for idx
in range(n_bins))
801 column_mag_temp =
'mag_temp'
802 column_color_temp =
'color_temp'
803 column_color_err_temp =
'colorErr_temp'
804 flux_err_frac_prev = [
None]*n_models
805 mag_prev = [
None]*n_models
808 target.column_coord1: (
809 ref.column_coord1, target.column_coord1, coord1_target_err,
False,
811 target.column_coord2: (
812 ref.column_coord2, target.column_coord2, coord2_target_err,
False,
814 column_dist: (column_dummy, column_dist, column_dist_err,
False),
819 band_fluxes = [(band, config_flux)
for (band, config_flux)
in config.columns_flux.items()]
820 n_bands = len(band_fluxes)
822 band_fluxes.append(band_fluxes[0])
823 flux_err_frac_first =
None
828 for idx_band, (band, config_flux)
in enumerate(band_fluxes):
829 if idx_band == n_bands:
831 mag_ref = mag_ref_first
832 flux_err_frac = flux_err_frac_first
833 mag_model = mag_first
835 mag_ref = -2.5*np.log10(cat_ref[config_flux.column_ref_flux]) + config.mag_zeropoint_ref
836 flux_err_frac = [
None]*n_models
837 mag_model = [
None]*n_models
840 cat_ref[column_color_temp] = cat_ref[column_mag_temp] - mag_ref
842 cat_ref[column_mag_temp] = mag_ref
844 select_ref_bins = [select_ref & (mag_ref > mag_lo) & (mag_ref < mag_hi)
845 for idx_bin, (mag_lo, mag_hi)
in enumerate(bins_mag)]
848 for idx_model
in range(n_models):
849 column_target_flux = config_flux.columns_target_flux[idx_model]
850 column_target_flux_err = config_flux.columns_target_flux_err[idx_model]
852 flux_target = cat_target[column_target_flux]
853 mag_target = -2.5*np.log10(flux_target) + config.mag_zeropoint_target
854 if config.mag_ceiling_target
is not None:
855 mag_target[mag_target > config.mag_ceiling_target] = config.mag_ceiling_target
856 mag_model[idx_model] = mag_target
859 flux_err_frac[idx_model] = cat_target[column_target_flux_err]/flux_target
864 column_mag_temp_model = f
'{column_mag_temp}{idx_model}'
865 cat_target[column_mag_temp_model] = mag_target
867 columns_target[f
'flux_{column_target_flux}'] = (
868 config_flux.column_ref_flux,
870 column_target_flux_err,
874 columns_target[f
'mag_{column_target_flux}'] = (
875 column_mag_temp, column_mag_temp_model,
None,
False,
880 skip_color = (idx_band == n_bands)
and (n_bands <= 2)
882 column_color_temp_model = f
'{column_color_temp}{idx_model}'
883 column_color_err_temp_model = f
'{column_color_err_temp}{idx_model}'
886 cat_target[column_color_temp_model] = mag_prev[idx_model] - mag_model[idx_model]
889 cat_target[column_color_err_temp_model] = 2.5/np.log(10)*np.hypot(
890 flux_err_frac[idx_model], flux_err_frac_prev[idx_model])
891 columns_target[f
'color_{band_prev}_m_{band}_{column_target_flux}'] = (
893 column_color_temp_model,
894 column_color_err_temp_model,
898 for idx_bin, (mag_lo, mag_hi)
in enumerate(bins_mag):
902 select_ref_bin = select_ref_bins[idx_bin]
903 select_target_bin = select_target & (mag_target > mag_lo) & (mag_target < mag_hi)
905 for sourcetype
in SourceType:
906 sourcetype_info = sourcetype.value
907 is_extended = sourcetype_info.is_extended
909 select_ref_sub = select_ref_bin.copy()
910 select_target_sub = select_target_bin.copy()
911 if is_extended
is not None:
912 is_extended_ref = (extended_ref == is_extended)
913 select_ref_sub &= is_extended_ref
915 n_ref_sub = np.count_nonzero(select_ref_sub)
917 MatchType.ALL.value)] = n_ref_sub
918 select_target_sub &= (extended_target == is_extended)
919 n_target_sub = np.count_nonzero(select_target_sub)
921 MatchType.ALL.value)] = n_target_sub
924 match_row_bin = match_row.copy()
925 match_row_bin[~select_ref_sub] = -1
926 match_good = match_row_bin >= 0
928 n_match = np.count_nonzero(match_good)
933 rows_matched = match_row_bin[match_good]
934 subset_target = cat_target.iloc[rows_matched]
935 if (is_extended
is not None)
and (idx_model == 0):
936 right_type = extended_target[rows_matched] == is_extended
937 n_total = len(right_type)
938 n_right = np.count_nonzero(right_type)
940 MatchType.MATCH_RIGHT.value)] = n_right
943 sourcetype_info.label,
945 MatchType.MATCH_WRONG.value,
946 )] = n_total - n_right
949 for column, (column_ref, column_target, column_err_target, skip_diff) \
950 in columns_target.items():
951 values_ref = cat_ref[column_ref][match_good].values
953 subset_target[column_err_target].values
954 if column_err_target
is not None
959 subset_target[column_target].values,
964 prefix=f
'{band}_{sourcetype_info.label}_{column}',
972 select_target_sub &= matched_target
974 if is_extended
is not None and (np.count_nonzero(select_target_sub) > 0):
975 n_total = np.count_nonzero(select_target_sub)
976 right_type = np.zeros(n_target, dtype=bool)
977 right_type[match_row[matched_ref & is_extended_ref]] =
True
978 right_type &= select_target_sub
979 n_right = np.count_nonzero(right_type)
981 MatchType.MATCH_RIGHT.value)] = n_right
983 MatchType.MATCH_WRONG.value)] = n_total - n_right
986 for prefix
in (
'flux',
'mag'):
987 del columns_target[f
'{prefix}_{column_target_flux}']
989 del columns_target[f
'color_{band_prev}_m_{band}_{column_target_flux}']
992 flux_err_frac_prev = flux_err_frac
996 flux_err_frac_first = flux_err_frac
997 mag_first = mag_model
998 mag_ref_first = mag_ref
1000 if config.include_unmatched:
1001 cat_matched = pd.concat((cat_matched, cat_unmatched))
1003 retStruct = pipeBase.Struct(cat_matched=cat_matched, diff_matched=pd.DataFrame(data))