23 'DiffMatchedTractCatalogConfig',
'DiffMatchedTractCatalogTask',
'MatchedCatalogFluxesConfig',
28 ComparableCatalog, ConvertCatalogCoordinatesConfig,
33import lsst.pipe.base.connectionTypes
as cT
36from abc
import ABCMeta, abstractmethod
37from astropy.stats
import mad_std
38from dataclasses
import dataclass
39from enum
import Enum, auto
42from scipy.stats
import iqr
43from typing
import Dict, Set
46DiffMatchedTractCatalogBaseTemplates = {
47 "name_input_cat_ref":
"truth_summary",
48 "name_input_cat_target":
"objectTable_tract",
49 "name_skymap": BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
54 pipeBase.PipelineTaskConnections,
55 dimensions=(
"tract",
"skymap"),
56 defaultTemplates=DiffMatchedTractCatalogBaseTemplates,
59 doc=
"Reference object catalog to match from",
60 name=
"{name_input_cat_ref}",
61 storageClass=
"DataFrame",
62 dimensions=(
"tract",
"skymap"),
65 cat_target = cT.Input(
66 doc=
"Target object catalog to match",
67 name=
"{name_input_cat_target}",
68 storageClass=
"DataFrame",
69 dimensions=(
"tract",
"skymap"),
73 doc=
"Input definition of geometry/bbox and projection/wcs for coadded exposures",
75 storageClass=
"SkyMap",
76 dimensions=(
"skymap",),
78 cat_match_ref = cT.Input(
79 doc=
"Reference match catalog with indices of target matches",
80 name=
"match_ref_{name_input_cat_ref}_{name_input_cat_target}",
81 storageClass=
"DataFrame",
82 dimensions=(
"tract",
"skymap"),
85 cat_match_target = cT.Input(
86 doc=
"Target match catalog with indices of references matches",
87 name=
"match_target_{name_input_cat_ref}_{name_input_cat_target}",
88 storageClass=
"DataFrame",
89 dimensions=(
"tract",
"skymap"),
92 columns_match_target = cT.Input(
93 doc=
"Target match catalog columns",
94 name=
"match_target_{name_input_cat_ref}_{name_input_cat_target}.columns",
95 storageClass=
"DataFrameIndex",
96 dimensions=(
"tract",
"skymap"),
98 cat_matched = cT.Output(
99 doc=
"Catalog with reference and target columns for matched sources only",
100 name=
"matched_{name_input_cat_ref}_{name_input_cat_target}",
101 storageClass=
"DataFrame",
102 dimensions=(
"tract",
"skymap"),
104 diff_matched = cT.Output(
105 doc=
"Table with aggregated counts, difference and chi statistics",
106 name=
"diff_matched_{name_input_cat_ref}_{name_input_cat_target}",
107 storageClass=
"DataFrame",
108 dimensions=(
"tract",
"skymap"),
113 column_ref_flux = pexConfig.Field(
115 doc=
'Reference catalog flux column name',
117 columns_target_flux = pexConfig.ListField(
119 listCheck=
lambda x: len(set(x)) == len(x),
120 doc=
"List of target catalog flux column names",
122 columns_target_flux_err = pexConfig.ListField(
124 listCheck=
lambda x: len(set(x)) == len(x),
125 doc=
"List of target catalog flux error column names",
138 pipeBase.PipelineTaskConfig,
139 pipelineConnections=DiffMatchedTractCatalogConnections,
141 column_matched_prefix_ref = pexConfig.Field(
144 doc=
'The prefix for matched columns copied from the reference catalog',
146 column_ref_extended = pexConfig.Field(
148 default=
'is_pointsource',
149 doc=
'The boolean reference table column specifying if the target is extended',
151 column_ref_extended_inverted = pexConfig.Field(
154 doc=
'Whether column_ref_extended specifies if the object is compact, not extended',
156 column_target_extended = pexConfig.Field(
158 default=
'refExtendedness',
159 doc=
'The target table column estimating the extendedness of the object (0 <= x <= 1)',
166 for column_lists
in (
170 (x.columns_in_ref
for x
in self.
columns_fluxcolumns_flux.values()),
172 for column_list
in column_lists:
173 columns_all.extend(column_list)
175 return set(columns_all)
179 columns_all = [self.
coord_formatcoord_format.column_target_coord1, self.
coord_formatcoord_format.column_target_coord2,
181 if self.
coord_formatcoord_format.coords_ref_to_convert
is not None:
182 columns_all.extend(self.
coord_formatcoord_format.coords_ref_to_convert.values())
183 for column_lists
in (
190 (x.columns_in_target
for x
in self.
columns_fluxcolumns_flux.values()),
192 for column_list
in column_lists:
193 columns_all.extend(column_list)
194 return set(columns_all)
196 columns_flux = pexConfig.ConfigDictField(
198 itemtype=MatchedCatalogFluxesConfig,
199 doc=
"Configs for flux columns for each band",
201 columns_ref_copy = pexConfig.ListField(
204 doc=
'Reference table columns to copy to copy into cat_matched',
206 columns_target_coord_err = pexConfig.ListField(
208 listCheck=
lambda x: (len(x) == 2)
and (x[0] != x[1]),
209 doc=
'Target table coordinate columns with standard errors (sigma)',
211 columns_target_copy = pexConfig.ListField(
214 doc=
'Target table columns to copy to copy into cat_matched',
216 columns_target_select_true = pexConfig.ListField(
218 default=(
'detect_isPrimary',),
219 doc=
'Target table columns to require to be True for selecting sources',
221 columns_target_select_false = pexConfig.ListField(
223 default=(
'merge_peak_sky',),
224 doc=
'Target table columns to require to be False for selecting sources',
226 coord_format = pexConfig.ConfigField(
227 dtype=ConvertCatalogCoordinatesConfig,
228 doc=
"Configuration for coordinate conversion",
230 extendedness_cut = pexConfig.Field(
233 doc=
'Minimum extendedness for a measured source to be considered extended',
235 mag_num_bins = pexConfig.Field(
236 doc=
'Number of magnitude bins',
240 mag_brightest_ref = pexConfig.Field(
243 doc=
'Brightest magnitude cutoff for binning',
245 mag_ceiling_target = pexConfig.Field(
249 doc=
'Ceiling (maximum/faint) magnitude for target sources',
251 mag_faintest_ref = pexConfig.Field(
254 doc=
'Faintest magnitude cutoff for binning',
256 mag_zeropoint_ref = pexConfig.Field(
259 doc=
'Magnitude zeropoint for reference sources',
261 mag_zeropoint_target = pexConfig.Field(
264 doc=
'Magnitude zeropoint for target sources',
274 """A statistic that can be applied to a set of values.
278 """Return the value of the statistic given a set of values.
282 values : `Collection` [`float`]
283 A set of values to compute the statistic for.
288 The value of the statistic.
294 """The median of a set of values."""
296 return np.median(values)
300 """The re-scaled inter-quartile range (sigma equivalent)."""
302 return iqr(values, scale=
'normal')
306 """The re-scaled median absolute deviation (sigma equivalent)."""
308 return mad_std(values)
311@dataclass(frozen=True)
313 """An arbitrary percentile.
318 A valid percentile (0 <= p <= 100).
323 return np.percentile(values, self.percentile)
326def compute_stats(values_ref, values_target, errors_target, row, stats, suffixes, prefix, skip_diff=False):
327 """Compute statistics on differences and store results in a row.
331 values_ref : `numpy.ndarray`, (N,)
333 values_target : `numpy.ndarray`, (N,)
335 errors_target : `numpy.ndarray`, (N,)
336 Errors (standard deviations) on `values_target`.
337 row : `numpy.ndarray`, (1, C)
338 A numpy array with pre-assigned column names.
339 stats : `Dict` [`str`, `Statistic`]
340 A dict of `Statistic` values to measure, keyed by their column suffix.
341 suffixes : `Dict` [`str`, `Measurement`]
342 A dict of measurement types are the only valid values),
343 keyed by the column suffix.
345 A prefix
for all column names (e.g. band).
347 Whether to skip computing statistics on differences. Note that
348 differences will still be computed
for chi statistics.
352 row_with_stats : `numpy.ndarray`, (1, C)
353 The original `row`
with statistic values assigned.
355 n_ref = len(values_ref)
357 n_target = len(values_target)
358 n_target_err = len(errors_target)
if errors_target
is not None else n_ref
359 if (n_target != n_ref)
or (n_target_err != n_ref):
360 raise ValueError(f
'lengths of values_ref={n_ref}, values_target={n_target}'
361 f
', error_target={n_target_err} must match')
363 do_chi = errors_target
is not None
364 diff = values_target - values_ref
365 chi = diff/errors_target
if do_chi
else diff
367 valid = np.isfinite(chi)
368 values_type = {}
if skip_diff
else {Measurement.DIFF: diff[valid]}
370 values_type[Measurement.CHI] = chi[valid]
372 for suffix_type, suffix
in suffixes.items():
373 values = values_type.get(suffix_type)
374 if values
is not None and len(values) > 0:
375 for stat_name, stat
in stats.items():
376 row[f
'{prefix}{suffix}{stat_name}'] = stat.value(values)
380def _get_columns(bands_columns: Dict, suffixes: Dict, suffixes_flux: Dict, suffixes_mag: Dict,
381 stats: Dict, target: ComparableCatalog, column_dist: str):
382 """Get column names for a table of difference statistics.
386 bands_columns : `Dict` [`str`,`MatchedCatalogFluxesConfig`]
387 Dict keyed by band of flux column configuration.
388 suffixes, suffixes_flux, suffixes_mag : `Dict` [`Measurement`, `str`]
389 Dict of suffixes for each `Measurement` type,
for general columns (e.g.
390 coordinates), fluxes
and magnitudes, respectively.
391 stats : `Dict` [`Statistic`, `str`]
392 Dict of suffixes
for each `Statistic` type.
393 target : `ComparableCatalog`
394 A target catalog
with coordinate column names.
396 The name of the distance column.
400 columns : `Dict` [`str`, `type`]
401 Dictionary of column types keyed by name.
403 The number of models measurements will be made
for.
407 Presently, models must be identical
for each band.
419 bands = list(bands_columns.keys())
420 for idx, (band, config_flux)
in enumerate(bands_columns.items()):
421 columns_suffix = [(
'_flux', suffixes_flux), (
'_mag', suffixes_mag), ]
423 columns_suffix.append((f
'_color_{bands[idx - 1]}-{band}', suffixes))
425 n_models = len(config_flux.columns_target_flux)
426 n_models_flux = len(config_flux.columns_target_flux)
427 n_models_err = len(config_flux.columns_target_flux_err)
430 if (n_models_flux != n_models)
or (n_models_err != n_models):
431 raise RuntimeError(f
'{config_flux} len(columns_target_flux)={n_models_flux} and'
432 f
' len(columns_target_flux_err)={n_models_err} must equal {n_models}')
434 for subtype
in (
'',
'_resolved',
'_unresolved'):
437 for item
in (f
'n_{itype}{mtype}' for itype
in (
'ref',
'target')
438 for mtype
in (
'',
'_match_right',
'_match_wrong')):
439 columns[f
'{band}{subtype}_{item}'] = int
441 for item
in (target.column_coord1, target.column_coord2, column_dist):
442 for suffix
in suffixes.values():
443 for stat
in stats.keys():
444 columns[f
'{band}{subtype}_{item}{suffix}{stat}'] = float
446 for item
in config_flux.columns_target_flux:
447 for prefix_item, suffixes_col
in columns_suffix:
448 for suffix
in suffixes_col.values():
449 for stat
in stats.keys():
450 columns[f
'{band}{subtype}{prefix_item}_{item}{suffix}{stat}'] = float
452 return columns, n_models
456 """Load subsets of matched catalogs and output a merged catalog of matched sources.
458 ConfigClass = DiffMatchedTractCatalogConfig
459 _DefaultName = "DiffMatchedTractCatalog"
462 inputs = butlerQC.get(inputRefs)
463 skymap = inputs.pop(
"skymap")
465 columns_match_target = [
'match_row']
466 if 'match_candidate' in inputs[
'columns_match_target']:
467 columns_match_target.append(
'match_candidate')
469 outputs = self.
runrun(
470 catalog_ref=inputs[
'cat_ref'].get(parameters={
'columns': self.config.columns_in_ref}),
471 catalog_target=inputs[
'cat_target'].get(parameters={
'columns': self.config.columns_in_target}),
472 catalog_match_ref=inputs[
'cat_match_ref'].get(
473 parameters={
'columns': [
'match_candidate',
'match_row']},
475 catalog_match_target=inputs[
'cat_match_target'].get(
476 parameters={
'columns': columns_match_target},
478 wcs=skymap[butlerQC.quantum.dataId[
"tract"]].wcs,
480 butlerQC.put(outputs, outputRefs)
484 catalog_ref: pd.DataFrame,
485 catalog_target: pd.DataFrame,
486 catalog_match_ref: pd.DataFrame,
487 catalog_match_target: pd.DataFrame,
488 wcs: afwGeom.SkyWcs =
None,
489 ) -> pipeBase.Struct:
490 """Load matched reference and target (measured) catalogs, measure summary statistics, and output
491 a combined matched catalog with columns
from both inputs.
495 catalog_ref : `pandas.DataFrame`
496 A reference catalog to diff objects/sources
from.
497 catalog_target : `pandas.DataFrame`
498 A target catalog to diff reference objects/sources to.
499 catalog_match_ref : `pandas.DataFrame`
500 A catalog
with match indices of target sources
and selection flags
501 for each reference source.
502 catalog_match_target : `pandas.DataFrame`
503 A catalog
with selection flags
for each target source.
504 wcs : `lsst.afw.image.SkyWcs`
505 A coordinate system to convert catalog positions to sky coordinates,
510 retStruct : `lsst.pipe.base.Struct`
511 A struct
with output_ref
and output_target attribute containing the
512 output matched catalogs.
516 select_ref = catalog_match_ref['match_candidate'].values
519 select_target = (catalog_match_target[
'match_candidate'].values
520 if 'match_candidate' in catalog_match_target.columns
521 else np.ones(len(catalog_match_target), dtype=bool))
522 for column
in config.columns_target_select_true:
523 select_target &= catalog_target[column].values
524 for column
in config.columns_target_select_false:
525 select_target &= ~catalog_target[column].values
527 ref, target = config.coord_format.format_catalogs(
528 catalog_ref=catalog_ref, catalog_target=catalog_target,
529 select_ref=
None, select_target=select_target, wcs=wcs, radec_to_xy_func=radec_to_xy,
530 return_converted_columns=config.coord_format.coords_ref_to_convert
is not None,
532 cat_ref = ref.catalog
533 cat_target = target.catalog
534 n_target = len(cat_target)
536 match_row = catalog_match_ref[
'match_row'].values
537 matched_ref = match_row >= 0
538 matched_row = match_row[matched_ref]
539 matched_target = np.zeros(n_target, dtype=bool)
540 matched_target[matched_row] =
True
543 cat_left = cat_target.iloc[matched_row]
544 has_index_left = cat_left.index.name
is not None
545 cat_right = cat_ref[matched_ref].reset_index()
546 cat_matched = pd.concat((cat_left.reset_index(drop=
True), cat_right), 1)
548 cat_matched.index = cat_left.index
549 cat_matched.columns.values[len(cat_target.columns):] = [f
'refcat_{col}' for col
in cat_right.columns]
552 coord1_target_err, coord2_target_err = config.columns_target_coord_err
553 column_dist, column_dist_err =
'distance',
'distanceErr'
554 dist = np.full(n_target, np.Inf)
556 dist[matched_row] = np.hypot(
557 target.coord1[matched_row] - ref.coord1[matched_ref],
558 target.coord2[matched_row] - ref.coord2[matched_ref],
560 dist_err = np.full(n_target, np.Inf)
561 dist_err[matched_row] = np.hypot(cat_target.iloc[matched_row][coord1_target_err].values,
562 cat_target.iloc[matched_row][coord2_target_err].values)
563 cat_target[column_dist], cat_target[column_dist_err] = dist, dist_err
566 column_dummy =
'dummy'
567 cat_ref[column_dummy] = np.zeros_like(ref.coord1)
570 extended_ref = cat_ref[config.column_ref_extended]
571 if config.column_ref_extended_inverted:
572 extended_ref = 1 - extended_ref
574 extended_target = cat_target[config.column_target_extended].values >= config.extendedness_cut
577 suffixes = {Measurement.DIFF:
'', Measurement.CHI:
'_chi'}
579 suffixes_flux = {Measurement.CHI: suffixes[Measurement.CHI]}
581 suffixes_mag = {Measurement.DIFF: suffixes[Measurement.DIFF]}
587 for name, percentile
in ((
'p05', 5.), (
'p16', 16.), (
'p84', 84.), (
'p95', 95.)):
588 stats[f
'_{name}'] =
Percentile(percentile=percentile)
591 columns, n_models = _get_columns(
592 bands_columns=config.columns_flux,
594 suffixes_flux=suffixes_flux,
595 suffixes_mag=suffixes_mag,
598 column_dist=column_dist,
602 n_bins = config.mag_num_bins
603 data = np.zeros((n_bins,), dtype=[(key, value)
for key, value
in columns.items()])
604 data[
'bin'] = np.arange(n_bins)
607 bins_mag = np.linspace(start=config.mag_brightest_ref, stop=config.mag_faintest_ref,
609 data[
'mag_min'] = bins_mag[:-1]
610 data[
'mag_max'] = bins_mag[1:]
611 bins_mag = tuple((bins_mag[idx], bins_mag[idx + 1])
for idx
in range(n_bins))
614 column_mag_temp =
'mag_temp'
615 column_color_temp =
'color_temp'
616 column_color_err_temp =
'colorErr_temp'
617 flux_err_frac_prev = [
None]*n_models
618 mag_prev = [
None]*n_models
621 target.column_coord1: (
622 ref.column_coord1, target.column_coord1, coord1_target_err,
False,
624 target.column_coord2: (
625 ref.column_coord2, target.column_coord2, coord2_target_err,
False,
627 column_dist: (column_dummy, column_dist, column_dist_err,
False),
631 for idx_band, (band, config_flux)
in enumerate(config.columns_flux.items()):
632 mag_ref = -2.5*np.log10(cat_ref[config_flux.column_ref_flux]) + config.mag_zeropoint_ref
634 cat_ref[column_color_temp] = cat_ref[column_mag_temp] - mag_ref
635 cat_ref[column_mag_temp] = mag_ref
637 flux_err_frac = [
None]*n_models
638 mag_model = [
None]*n_models
640 select_ref_bins = [select_ref & (mag_ref > mag_lo) & (mag_ref < mag_hi)
641 for idx_bin, (mag_lo, mag_hi)
in enumerate(bins_mag)]
644 for idx_model
in range(n_models):
645 column_target_flux = config_flux.columns_target_flux[idx_model]
646 column_target_flux_err = config_flux.columns_target_flux_err[idx_model]
648 flux_target = cat_target[column_target_flux]
649 mag_target = -2.5*np.log10(flux_target) + config.mag_zeropoint_target
650 if config.mag_ceiling_target
is not None:
651 mag_target[mag_target > config.mag_ceiling_target] = config.mag_ceiling_target
652 mag_model[idx_model] = mag_target
655 flux_err_frac[idx_model] = cat_target[column_target_flux_err]/flux_target
657 column_mag_temp_model = f
'{column_mag_temp}{idx_model}'
658 cat_target[column_mag_temp_model] = mag_target
660 columns_target[f
'flux_{column_target_flux}'] = (
661 config_flux.column_ref_flux,
663 column_target_flux_err,
667 columns_target[f
'mag_{column_target_flux}'] = (
668 column_mag_temp, column_mag_temp_model,
None,
False,
672 column_color_temp_model = f
'{column_color_temp}{idx_model}'
673 column_color_err_temp_model = f
'{column_color_err_temp}{idx_model}'
676 cat_target[column_color_temp_model] = mag_prev[idx_model] - mag_model[idx_model]
679 cat_target[column_color_err_temp_model] = 2.5/np.log(10)*np.hypot(
680 flux_err_frac[idx_model], flux_err_frac_prev[idx_model])
681 columns_target[f
'color_{band_prev}-{band}_{column_target_flux}'] = (
683 column_color_temp_model,
684 column_color_err_temp_model,
688 for idx_bin, (mag_lo, mag_hi)
in enumerate(bins_mag):
690 select_ref_bin = select_ref_bins[idx_bin]
691 select_target_bin = select_target & (mag_target > mag_lo) & (mag_target < mag_hi)
693 for subtype, is_extended
in ((
'',
None), (
'_resolved',
True), (
'_unresolved',
False)):
695 select_ref_sub = select_ref_bin.copy()
696 select_target_sub = select_target_bin.copy()
697 if is_extended
is not None:
698 is_extended_ref = (extended_ref == is_extended)
699 select_ref_sub &= is_extended_ref
700 select_target_sub &= (extended_target == is_extended)
701 n_ref_sub = np.count_nonzero(select_ref_sub)
702 n_target_sub = np.count_nonzero(select_target_sub)
703 row[f
'{band}{subtype}_n_ref'] = n_ref_sub
704 row[f
'{band}{subtype}_n_target'] = n_target_sub
707 match_row_bin = match_row.copy()
708 match_row_bin[~select_ref_sub] = -1
709 match_good = match_row_bin >= 0
711 n_match = np.count_nonzero(match_good)
716 rows_matched = match_row_bin[match_good]
717 subset_target = cat_target.iloc[rows_matched]
718 if is_extended
is not None:
719 right_type = extended_target[rows_matched] == is_extended
720 n_total = len(right_type)
721 n_right = np.count_nonzero(right_type)
722 row[f
'{band}{subtype}_n_ref_match_right'] = n_right
723 row[f
'{band}{subtype}_n_ref_match_wrong'] = n_total - n_right
726 for column, (column_ref, column_target, column_err_target, skip_diff) \
727 in columns_target.items():
728 values_ref = cat_ref[column_ref][match_good].values
731 subset_target[column_target].values,
732 (subset_target[column_err_target].values
if column_err_target
is not None
737 prefix=f
'{band}{subtype}_{column}',
745 select_target_sub &= matched_target
747 if is_extended
is not None and (np.count_nonzero(select_target_sub) > 0):
748 n_total = np.count_nonzero(select_target_sub)
749 right_type = np.zeros(n_target, dtype=bool)
750 right_type[match_row[matched_ref & is_extended_ref]] =
True
751 right_type &= select_target_sub
752 n_right = np.count_nonzero(right_type)
753 row[f
'{band}{subtype}_n_target_match_right'] = n_right
754 row[f
'{band}{subtype}_n_target_match_wrong'] = n_total - n_right
757 for prefix
in (
'flux_',
'mag_'):
758 del columns_target[f
'{prefix}{column_target_flux}']
760 del columns_target[f
'color_{band_prev}-{band}_{column_target_flux}']
763 flux_err_frac_prev = flux_err_frac
767 retStruct = pipeBase.Struct(cat_matched=cat_matched, diff_matched=pd.DataFrame(data))
columns_target_select_true
Set[str] columns_in_ref(self)
columns_target_select_false
Set[str] columns_in_target(self)
pipeBase.Struct run(self, pd.DataFrame catalog_ref, pd.DataFrame catalog_target, pd.DataFrame catalog_match_ref, pd.DataFrame catalog_match_target, afwGeom.SkyWcs wcs=None)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
Set[str] columns_in_ref(self)
Set[str] columns_in_target(self)
def compute_stats(values_ref, values_target, errors_target, row, stats, suffixes, prefix, skip_diff=False)