Coverage for python/lsst/pipe/tasks/diff_matched_tract_catalog.py: 25%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = [
23 'DiffMatchedTractCatalogConfig', 'DiffMatchedTractCatalogTask', 'MatchedCatalogFluxesConfig',
24]
26import lsst.afw.geom as afwGeom
27from lsst.meas.astrom.matcher_probabilistic import (
28 ComparableCatalog, ConvertCatalogCoordinatesConfig,
29)
30from lsst.meas.astrom.match_probabilistic_task import radec_to_xy
31import lsst.pex.config as pexConfig
32import lsst.pipe.base as pipeBase
33import lsst.pipe.base.connectionTypes as cT
34from lsst.skymap import BaseSkyMap
36from abc import ABCMeta, abstractmethod
37from astropy.stats import mad_std
38from dataclasses import dataclass
39from enum import Enum, auto
40import numpy as np
41import pandas as pd
42from scipy.stats import iqr
43from typing import Dict, Set
46DiffMatchedTractCatalogBaseTemplates = {
47 "name_input_cat_ref": "truth_summary",
48 "name_input_cat_target": "objectTable_tract",
49 "name_skymap": BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
50}
53class DiffMatchedTractCatalogConnections(
54 pipeBase.PipelineTaskConnections,
55 dimensions=("tract", "skymap"),
56 defaultTemplates=DiffMatchedTractCatalogBaseTemplates,
57):
58 cat_ref = cT.Input(
59 doc="Reference object catalog to match from",
60 name="{name_input_cat_ref}",
61 storageClass="DataFrame",
62 dimensions=("tract", "skymap"),
63 deferLoad=True,
64 )
65 cat_target = cT.Input(
66 doc="Target object catalog to match",
67 name="{name_input_cat_target}",
68 storageClass="DataFrame",
69 dimensions=("tract", "skymap"),
70 deferLoad=True,
71 )
72 skymap = cT.Input(
73 doc="Input definition of geometry/bbox and projection/wcs for coadded exposures",
74 name="{name_skymap}",
75 storageClass="SkyMap",
76 dimensions=("skymap",),
77 )
78 cat_match_ref = cT.Input(
79 doc="Reference match catalog with indices of target matches",
80 name="match_ref_{name_input_cat_ref}_{name_input_cat_target}",
81 storageClass="DataFrame",
82 dimensions=("tract", "skymap"),
83 deferLoad=True,
84 )
85 cat_match_target = cT.Input(
86 doc="Target match catalog with indices of references matches",
87 name="match_target_{name_input_cat_ref}_{name_input_cat_target}",
88 storageClass="DataFrame",
89 dimensions=("tract", "skymap"),
90 deferLoad=True,
91 )
92 columns_match_target = cT.Input(
93 doc="Target match catalog columns",
94 name="match_target_{name_input_cat_ref}_{name_input_cat_target}.columns",
95 storageClass="DataFrameIndex",
96 dimensions=("tract", "skymap"),
97 )
98 cat_matched = cT.Output(
99 doc="Catalog with reference and target columns for matched sources only",
100 name="matched_{name_input_cat_ref}_{name_input_cat_target}",
101 storageClass="DataFrame",
102 dimensions=("tract", "skymap"),
103 )
104 diff_matched = cT.Output(
105 doc="Table with aggregated counts, difference and chi statistics",
106 name="diff_matched_{name_input_cat_ref}_{name_input_cat_target}",
107 storageClass="DataFrame",
108 dimensions=("tract", "skymap"),
109 )
112class MatchedCatalogFluxesConfig(pexConfig.Config):
113 column_ref_flux = pexConfig.Field(
114 dtype=str,
115 doc='Reference catalog flux column name',
116 )
117 columns_target_flux = pexConfig.ListField( 117 ↛ exitline 117 didn't jump to the function exit
118 dtype=str,
119 listCheck=lambda x: len(set(x)) == len(x),
120 doc="List of target catalog flux column names",
121 )
122 columns_target_flux_err = pexConfig.ListField( 122 ↛ exitline 122 didn't jump to the function exit
123 dtype=str,
124 listCheck=lambda x: len(set(x)) == len(x),
125 doc="List of target catalog flux error column names",
126 )
128 @property
129 def columns_in_ref(self) -> Set[str]:
130 return {self.column_ref_flux}
132 @property
133 def columns_in_target(self) -> Set[str]:
134 return set(self.columns_target_flux).union(set(self.columns_target_flux_err))
137class DiffMatchedTractCatalogConfig(
138 pipeBase.PipelineTaskConfig,
139 pipelineConnections=DiffMatchedTractCatalogConnections,
140):
141 column_matched_prefix_ref = pexConfig.Field(
142 dtype=str,
143 default='refcat_',
144 doc='The prefix for matched columns copied from the reference catalog',
145 )
146 column_ref_extended = pexConfig.Field(
147 dtype=str,
148 default='is_pointsource',
149 doc='The boolean reference table column specifying if the target is extended',
150 )
151 column_ref_extended_inverted = pexConfig.Field(
152 dtype=bool,
153 default=True,
154 doc='Whether column_ref_extended specifies if the object is compact, not extended',
155 )
156 column_target_extended = pexConfig.Field(
157 dtype=str,
158 default='refExtendedness',
159 doc='The target table column estimating the extendedness of the object (0 <= x <= 1)',
160 )
162 @property
163 def columns_in_ref(self) -> Set[str]:
164 columns_all = [self.coord_format.column_ref_coord1, self.coord_format.column_ref_coord2,
165 self.column_ref_extended]
166 for column_lists in (
167 (
168 self.columns_ref_copy,
169 ),
170 (x.columns_in_ref for x in self.columns_flux.values()),
171 ):
172 for column_list in column_lists:
173 columns_all.extend(column_list)
175 return set(columns_all)
177 @property
178 def columns_in_target(self) -> Set[str]:
179 columns_all = [self.coord_format.column_target_coord1, self.coord_format.column_target_coord2,
180 self.column_target_extended]
181 if self.coord_format.coords_ref_to_convert is not None:
182 columns_all.extend(self.coord_format.coords_ref_to_convert.values())
183 for column_lists in (
184 (
185 self.columns_target_coord_err,
186 self.columns_target_select_false,
187 self.columns_target_select_true,
188 self.columns_target_copy,
189 ),
190 (x.columns_in_target for x in self.columns_flux.values()),
191 ):
192 for column_list in column_lists:
193 columns_all.extend(column_list)
194 return set(columns_all)
196 columns_flux = pexConfig.ConfigDictField(
197 keytype=str,
198 itemtype=MatchedCatalogFluxesConfig,
199 doc="Configs for flux columns for each band",
200 )
201 columns_ref_copy = pexConfig.ListField(
202 dtype=str,
203 default=set(),
204 doc='Reference table columns to copy to copy into cat_matched',
205 )
206 columns_target_coord_err = pexConfig.ListField( 206 ↛ exitline 206 didn't jump to the function exit
207 dtype=str,
208 listCheck=lambda x: (len(x) == 2) and (x[0] != x[1]),
209 doc='Target table coordinate columns with standard errors (sigma)',
210 )
211 columns_target_copy = pexConfig.ListField(
212 dtype=str,
213 default=('patch',),
214 doc='Target table columns to copy to copy into cat_matched',
215 )
216 columns_target_select_true = pexConfig.ListField(
217 dtype=str,
218 default=('detect_isPrimary',),
219 doc='Target table columns to require to be True for selecting sources',
220 )
221 columns_target_select_false = pexConfig.ListField(
222 dtype=str,
223 default=('merge_peak_sky',),
224 doc='Target table columns to require to be False for selecting sources',
225 )
226 coord_format = pexConfig.ConfigField(
227 dtype=ConvertCatalogCoordinatesConfig,
228 doc="Configuration for coordinate conversion",
229 )
230 extendedness_cut = pexConfig.Field(
231 dtype=float,
232 default=0.5,
233 doc='Minimum extendedness for a measured source to be considered extended',
234 )
235 mag_num_bins = pexConfig.Field(
236 doc='Number of magnitude bins',
237 default=15,
238 dtype=int,
239 )
240 mag_brightest_ref = pexConfig.Field(
241 dtype=float,
242 default=15,
243 doc='Brightest magnitude cutoff for binning',
244 )
245 mag_ceiling_target = pexConfig.Field(
246 dtype=float,
247 default=None,
248 optional=True,
249 doc='Ceiling (maximum/faint) magnitude for target sources',
250 )
251 mag_faintest_ref = pexConfig.Field(
252 dtype=float,
253 default=30,
254 doc='Faintest magnitude cutoff for binning',
255 )
256 mag_zeropoint_ref = pexConfig.Field(
257 dtype=float,
258 default=31.4,
259 doc='Magnitude zeropoint for reference sources',
260 )
261 mag_zeropoint_target = pexConfig.Field(
262 dtype=float,
263 default=31.4,
264 doc='Magnitude zeropoint for target sources',
265 )
268class Measurement(Enum):
269 DIFF = auto()
270 CHI = auto()
273class Statistic(metaclass=ABCMeta):
274 """A statistic that can be applied to a set of values.
275 """
276 @abstractmethod
277 def value(self, values):
278 """Return the value of the statistic given a set of values.
280 Parameters
281 ----------
282 values : `Collection` [`float`]
283 A set of values to compute the statistic for.
285 Returns
286 -------
287 statistic : `float`
288 The value of the statistic.
289 """
290 pass
293class Median(Statistic):
294 """The median of a set of values."""
295 def value(self, values):
296 return np.median(values)
299class SigmaIQR(Statistic):
300 """The re-scaled inter-quartile range (sigma equivalent)."""
301 def value(self, values):
302 return iqr(values, scale='normal')
305class SigmaMAD(Statistic):
306 """The re-scaled median absolute deviation (sigma equivalent)."""
307 def value(self, values):
308 return mad_std(values)
311@dataclass(frozen=True)
312class Percentile(Statistic):
313 """An arbitrary percentile.
315 Parameters
316 ----------
317 percentile : `float`
318 A valid percentile (0 <= p <= 100).
319 """
320 percentile: float
322 def value(self, values):
323 return np.percentile(values, self.percentile)
326def compute_stats(values_ref, values_target, errors_target, row, stats, suffixes, prefix, skip_diff=False):
327 """Compute statistics on differences and store results in a row.
329 Parameters
330 ----------
331 values_ref : `numpy.ndarray`, (N,)
332 Reference values.
333 values_target : `numpy.ndarray`, (N,)
334 Measured values.
335 errors_target : `numpy.ndarray`, (N,)
336 Errors (standard deviations) on `values_target`.
337 row : `numpy.ndarray`, (1, C)
338 A numpy array with pre-assigned column names.
339 stats : `Dict` [`str`, `Statistic`]
340 A dict of `Statistic` values to measure, keyed by their column suffix.
341 suffixes : `Dict` [`str`, `Measurement`]
342 A dict of measurement types are the only valid values),
343 keyed by the column suffix.
344 prefix : `str`
345 A prefix for all column names (e.g. band).
346 skip_diff : `bool`
347 Whether to skip computing statistics on differences. Note that
348 differences will still be computed for chi statistics.
350 Returns
351 -------
352 row_with_stats : `numpy.ndarray`, (1, C)
353 The original `row` with statistic values assigned.
354 """
355 n_ref = len(values_ref)
356 if n_ref > 0:
357 n_target = len(values_target)
358 n_target_err = len(errors_target) if errors_target is not None else n_ref
359 if (n_target != n_ref) or (n_target_err != n_ref):
360 raise ValueError(f'lengths of values_ref={n_ref}, values_target={n_target}'
361 f', error_target={n_target_err} must match')
363 do_chi = errors_target is not None
364 diff = values_target - values_ref
365 chi = diff/errors_target if do_chi else diff
366 # Could make this configurable, but non-finite values/errors are not really usable
367 valid = np.isfinite(chi)
368 values_type = {} if skip_diff else {Measurement.DIFF: diff[valid]}
369 if do_chi:
370 values_type[Measurement.CHI] = chi[valid]
372 for suffix_type, suffix in suffixes.items():
373 values = values_type.get(suffix_type)
374 if values is not None and len(values) > 0:
375 for stat_name, stat in stats.items():
376 row[f'{prefix}{suffix}{stat_name}'] = stat.value(values)
377 return row
380def _get_columns(bands_columns: Dict, suffixes: Dict, suffixes_flux: Dict, suffixes_mag: Dict,
381 stats: Dict, target: ComparableCatalog, column_dist: str):
382 """Get column names for a table of difference statistics.
384 Parameters
385 ----------
386 bands_columns : `Dict` [`str`,`MatchedCatalogFluxesConfig`]
387 Dict keyed by band of flux column configuration.
388 suffixes, suffixes_flux, suffixes_mag : `Dict` [`Measurement`, `str`]
389 Dict of suffixes for each `Measurement` type, for general columns (e.g.
390 coordinates), fluxes and magnitudes, respectively.
391 stats : `Dict` [`Statistic`, `str`]
392 Dict of suffixes for each `Statistic` type.
393 target : `ComparableCatalog`
394 A target catalog with coordinate column names.
395 column_dist : `str`
396 The name of the distance column.
398 Returns
399 -------
400 columns : `Dict` [`str`, `type`]
401 Dictionary of column types keyed by name.
402 n_models : `int`
403 The number of models measurements will be made for.
405 Notes
406 -----
407 Presently, models must be identical for each band.
408 """
409 # Initial columns
410 columns = {
411 "bin": int,
412 "mag_min": float,
413 "mag_max": float,
414 }
416 # pre-assign all of the columns with appropriate types
417 n_models = 0
419 bands = list(bands_columns.keys())
420 for idx, (band, config_flux) in enumerate(bands_columns.items()):
421 columns_suffix = [('_flux', suffixes_flux), ('_mag', suffixes_mag), ]
422 if idx > 0:
423 columns_suffix.append((f'_color_{bands[idx - 1]}-{band}', suffixes))
424 else:
425 n_models = len(config_flux.columns_target_flux)
426 n_models_flux = len(config_flux.columns_target_flux)
427 n_models_err = len(config_flux.columns_target_flux_err)
429 # TODO: Do equivalent validation earlier, in the config
430 if (n_models_flux != n_models) or (n_models_err != n_models):
431 raise RuntimeError(f'{config_flux} len(columns_target_flux)={n_models_flux} and'
432 f' len(columns_target_flux_err)={n_models_err} must equal {n_models}')
434 for subtype in ('', '_resolved', '_unresolved'):
435 # Totals would be redundant
436 if subtype != '':
437 for item in (f'n_{itype}{mtype}' for itype in ('ref', 'target')
438 for mtype in ('', '_match_right', '_match_wrong')):
439 columns[f'{band}{subtype}_{item}'] = int
441 for item in (target.column_coord1, target.column_coord2, column_dist):
442 for suffix in suffixes.values():
443 for stat in stats.keys():
444 columns[f'{band}{subtype}_{item}{suffix}{stat}'] = float
446 for item in config_flux.columns_target_flux:
447 for prefix_item, suffixes_col in columns_suffix:
448 for suffix in suffixes_col.values():
449 for stat in stats.keys():
450 columns[f'{band}{subtype}{prefix_item}_{item}{suffix}{stat}'] = float
452 return columns, n_models
455class DiffMatchedTractCatalogTask(pipeBase.PipelineTask):
456 """Load subsets of matched catalogs and output a merged catalog of matched sources.
457 """
458 ConfigClass = DiffMatchedTractCatalogConfig
459 _DefaultName = "DiffMatchedTractCatalog"
461 def runQuantum(self, butlerQC, inputRefs, outputRefs):
462 inputs = butlerQC.get(inputRefs)
463 skymap = inputs.pop("skymap")
465 columns_match_target = ['match_row']
466 if 'match_candidate' in inputs['columns_match_target']:
467 columns_match_target.append('match_candidate')
469 outputs = self.run(
470 catalog_ref=inputs['cat_ref'].get(parameters={'columns': self.config.columns_in_ref}),
471 catalog_target=inputs['cat_target'].get(parameters={'columns': self.config.columns_in_target}),
472 catalog_match_ref=inputs['cat_match_ref'].get(
473 parameters={'columns': ['match_candidate', 'match_row']},
474 ),
475 catalog_match_target=inputs['cat_match_target'].get(
476 parameters={'columns': columns_match_target},
477 ),
478 wcs=skymap[butlerQC.quantum.dataId["tract"]].wcs,
479 )
480 butlerQC.put(outputs, outputRefs)
482 def run(
483 self,
484 catalog_ref: pd.DataFrame,
485 catalog_target: pd.DataFrame,
486 catalog_match_ref: pd.DataFrame,
487 catalog_match_target: pd.DataFrame,
488 wcs: afwGeom.SkyWcs = None,
489 ) -> pipeBase.Struct:
490 """Load matched reference and target (measured) catalogs, measure summary statistics, and output
491 a combined matched catalog with columns from both inputs.
493 Parameters
494 ----------
495 catalog_ref : `pandas.DataFrame`
496 A reference catalog to diff objects/sources from.
497 catalog_target : `pandas.DataFrame`
498 A target catalog to diff reference objects/sources to.
499 catalog_match_ref : `pandas.DataFrame`
500 A catalog with match indices of target sources and selection flags
501 for each reference source.
502 catalog_match_target : `pandas.DataFrame`
503 A catalog with selection flags for each target source.
504 wcs : `lsst.afw.image.SkyWcs`
505 A coordinate system to convert catalog positions to sky coordinates,
506 if necessary.
508 Returns
509 -------
510 retStruct : `lsst.pipe.base.Struct`
511 A struct with output_ref and output_target attribute containing the
512 output matched catalogs.
513 """
514 config = self.config
516 select_ref = catalog_match_ref['match_candidate'].values
517 # Add additional selection criteria for target sources beyond those for matching
518 # (not recommended, but can be done anyway)
519 select_target = (catalog_match_target['match_candidate'].values
520 if 'match_candidate' in catalog_match_target.columns
521 else np.ones(len(catalog_match_target), dtype=bool))
522 for column in config.columns_target_select_true:
523 select_target &= catalog_target[column].values
524 for column in config.columns_target_select_false:
525 select_target &= ~catalog_target[column].values
527 ref, target = config.coord_format.format_catalogs(
528 catalog_ref=catalog_ref, catalog_target=catalog_target,
529 select_ref=None, select_target=select_target, wcs=wcs, radec_to_xy_func=radec_to_xy,
530 return_converted_columns=config.coord_format.coords_ref_to_convert is not None,
531 )
532 cat_ref = ref.catalog
533 cat_target = target.catalog
534 n_target = len(cat_target)
536 match_row = catalog_match_ref['match_row'].values
537 matched_ref = match_row >= 0
538 matched_row = match_row[matched_ref]
539 matched_target = np.zeros(n_target, dtype=bool)
540 matched_target[matched_row] = True
542 # Create a matched table, preserving the target catalog's named index (if it has one)
543 cat_left = cat_target.iloc[matched_row]
544 has_index_left = cat_left.index.name is not None
545 cat_right = cat_ref[matched_ref].reset_index()
546 cat_matched = pd.concat((cat_left.reset_index(drop=True), cat_right), axis=1)
547 if has_index_left:
548 cat_matched.index = cat_left.index
549 cat_matched.columns.values[len(cat_target.columns):] = [f'refcat_{col}' for col in cat_right.columns]
551 # Add/compute distance columns
552 coord1_target_err, coord2_target_err = config.columns_target_coord_err
553 column_dist, column_dist_err = 'distance', 'distanceErr'
554 dist = np.full(n_target, np.Inf)
556 dist[matched_row] = np.hypot(
557 target.coord1[matched_row] - ref.coord1[matched_ref],
558 target.coord2[matched_row] - ref.coord2[matched_ref],
559 )
560 dist_err = np.full(n_target, np.Inf)
561 dist_err[matched_row] = np.hypot(cat_target.iloc[matched_row][coord1_target_err].values,
562 cat_target.iloc[matched_row][coord2_target_err].values)
563 cat_target[column_dist], cat_target[column_dist_err] = dist, dist_err
565 # Slightly smelly hack for when a column (like distance) is already relative to truth
566 column_dummy = 'dummy'
567 cat_ref[column_dummy] = np.zeros_like(ref.coord1)
569 # Add a boolean column for whether a match is classified correctly
570 extended_ref = cat_ref[config.column_ref_extended]
571 if config.column_ref_extended_inverted:
572 extended_ref = 1 - extended_ref
574 extended_target = cat_target[config.column_target_extended].values >= config.extendedness_cut
576 # Define difference/chi columns and statistics thereof
577 suffixes = {Measurement.DIFF: '', Measurement.CHI: '_chi'}
578 # Skip diff for fluxes - covered by mags
579 suffixes_flux = {Measurement.CHI: suffixes[Measurement.CHI]}
580 # Skip chi for magnitudes, which have strange errors
581 suffixes_mag = {Measurement.DIFF: suffixes[Measurement.DIFF]}
582 stats = {
583 '_median': Median(),
584 '_sig_iqr': SigmaIQR(),
585 '_sig_mad': SigmaMAD(),
586 }
587 for name, percentile in (('p05', 5.), ('p16', 16.), ('p84', 84.), ('p95', 95.)):
588 stats[f'_{name}'] = Percentile(percentile=percentile)
590 # Get dict of column names
591 columns, n_models = _get_columns(
592 bands_columns=config.columns_flux,
593 suffixes=suffixes,
594 suffixes_flux=suffixes_flux,
595 suffixes_mag=suffixes_mag,
596 stats=stats,
597 target=target,
598 column_dist=column_dist,
599 )
601 # Setup numpy table
602 n_bins = config.mag_num_bins
603 data = np.zeros((n_bins,), dtype=[(key, value) for key, value in columns.items()])
604 data['bin'] = np.arange(n_bins)
606 # Setup bins
607 bins_mag = np.linspace(start=config.mag_brightest_ref, stop=config.mag_faintest_ref,
608 num=n_bins + 1)
609 data['mag_min'] = bins_mag[:-1]
610 data['mag_max'] = bins_mag[1:]
611 bins_mag = tuple((bins_mag[idx], bins_mag[idx + 1]) for idx in range(n_bins))
613 # Define temporary columns for intermediate storage
614 column_mag_temp = 'mag_temp'
615 column_color_temp = 'color_temp'
616 column_color_err_temp = 'colorErr_temp'
617 flux_err_frac_prev = [None]*n_models
618 mag_prev = [None]*n_models
620 columns_target = {
621 target.column_coord1: (
622 ref.column_coord1, target.column_coord1, coord1_target_err, False,
623 ),
624 target.column_coord2: (
625 ref.column_coord2, target.column_coord2, coord2_target_err, False,
626 ),
627 column_dist: (column_dummy, column_dist, column_dist_err, False),
628 }
630 band_prev = None
631 for idx_band, (band, config_flux) in enumerate(config.columns_flux.items()):
632 mag_ref = -2.5*np.log10(cat_ref[config_flux.column_ref_flux]) + config.mag_zeropoint_ref
633 if idx_band > 0:
634 cat_ref[column_color_temp] = cat_ref[column_mag_temp] - mag_ref
635 cat_ref[column_mag_temp] = mag_ref
637 flux_err_frac = [None]*n_models
638 mag_model = [None]*n_models
640 select_ref_bins = [select_ref & (mag_ref > mag_lo) & (mag_ref < mag_hi)
641 for idx_bin, (mag_lo, mag_hi) in enumerate(bins_mag)]
643 # Iterate over multiple models, compute their mags and colours (if there's a previous band)
644 for idx_model in range(n_models):
645 column_target_flux = config_flux.columns_target_flux[idx_model]
646 column_target_flux_err = config_flux.columns_target_flux_err[idx_model]
648 flux_target = cat_target[column_target_flux]
649 mag_target = -2.5*np.log10(flux_target) + config.mag_zeropoint_target
650 if config.mag_ceiling_target is not None:
651 mag_target[mag_target > config.mag_ceiling_target] = config.mag_ceiling_target
652 mag_model[idx_model] = mag_target
654 # These are needed for computing magnitude/color "errors" (which are a sketchy concept)
655 flux_err_frac[idx_model] = cat_target[column_target_flux_err]/flux_target
656 # Keep these mags tabulated for convenience
657 column_mag_temp_model = f'{column_mag_temp}{idx_model}'
658 cat_target[column_mag_temp_model] = mag_target
660 columns_target[f'flux_{column_target_flux}'] = (
661 config_flux.column_ref_flux,
662 column_target_flux,
663 column_target_flux_err,
664 True,
665 )
666 # Note: magnitude errors are generally problematic and not worth aggregating
667 columns_target[f'mag_{column_target_flux}'] = (
668 column_mag_temp, column_mag_temp_model, None, False,
669 )
671 if idx_band > 0:
672 column_color_temp_model = f'{column_color_temp}{idx_model}'
673 column_color_err_temp_model = f'{column_color_err_temp}{idx_model}'
675 # e.g. if order is ugrizy, first color will be u - g
676 cat_target[column_color_temp_model] = mag_prev[idx_model] - mag_model[idx_model]
678 # Sum (in quadrature, and admittedly sketchy for faint fluxes) magnitude errors
679 cat_target[column_color_err_temp_model] = 2.5/np.log(10)*np.hypot(
680 flux_err_frac[idx_model], flux_err_frac_prev[idx_model])
681 columns_target[f'color_{band_prev}-{band}_{column_target_flux}'] = (
682 column_color_temp,
683 column_color_temp_model,
684 column_color_err_temp_model,
685 False,
686 )
688 for idx_bin, (mag_lo, mag_hi) in enumerate(bins_mag):
689 row = data[idx_bin]
690 select_ref_bin = select_ref_bins[idx_bin]
691 select_target_bin = select_target & (mag_target > mag_lo) & (mag_target < mag_hi)
693 for subtype, is_extended in (('', None), ('_resolved', True), ('_unresolved', False)):
694 # Counts filtered by match selection and magnitude bin
695 select_ref_sub = select_ref_bin.copy()
696 select_target_sub = select_target_bin.copy()
697 if is_extended is not None:
698 is_extended_ref = (extended_ref == is_extended)
699 select_ref_sub &= is_extended_ref
700 select_target_sub &= (extended_target == is_extended)
701 n_ref_sub = np.count_nonzero(select_ref_sub)
702 n_target_sub = np.count_nonzero(select_target_sub)
703 row[f'{band}{subtype}_n_ref'] = n_ref_sub
704 row[f'{band}{subtype}_n_target'] = n_target_sub
706 # Filter matches by magnitude bin and true class
707 match_row_bin = match_row.copy()
708 match_row_bin[~select_ref_sub] = -1
709 match_good = match_row_bin >= 0
711 n_match = np.count_nonzero(match_good)
713 # Same for counts of matched target sources (for e.g. purity)
715 if n_match > 0:
716 rows_matched = match_row_bin[match_good]
717 subset_target = cat_target.iloc[rows_matched]
718 if is_extended is not None:
719 right_type = extended_target[rows_matched] == is_extended
720 n_total = len(right_type)
721 n_right = np.count_nonzero(right_type)
722 row[f'{band}{subtype}_n_ref_match_right'] = n_right
723 row[f'{band}{subtype}_n_ref_match_wrong'] = n_total - n_right
725 # compute stats for this bin, for all columns
726 for column, (column_ref, column_target, column_err_target, skip_diff) \
727 in columns_target.items():
728 values_ref = cat_ref[column_ref][match_good].values
729 compute_stats(
730 values_ref,
731 subset_target[column_target].values,
732 (subset_target[column_err_target].values if column_err_target is not None
733 else None),
734 row,
735 stats,
736 suffixes,
737 prefix=f'{band}{subtype}_{column}',
738 skip_diff=skip_diff,
739 )
741 # Count matched target sources with *measured* mags within bin
742 # Used for e.g. purity calculation
743 # Should be merged with above code if there's ever a need for
744 # measuring stats on this source selection
745 select_target_sub &= matched_target
747 if is_extended is not None and (np.count_nonzero(select_target_sub) > 0):
748 n_total = np.count_nonzero(select_target_sub)
749 right_type = np.zeros(n_target, dtype=bool)
750 right_type[match_row[matched_ref & is_extended_ref]] = True
751 right_type &= select_target_sub
752 n_right = np.count_nonzero(right_type)
753 row[f'{band}{subtype}_n_target_match_right'] = n_right
754 row[f'{band}{subtype}_n_target_match_wrong'] = n_total - n_right
756 # delete the flux/color columns since they change with each band
757 for prefix in ('flux_', 'mag_'):
758 del columns_target[f'{prefix}{column_target_flux}']
759 if idx_band > 0:
760 del columns_target[f'color_{band_prev}-{band}_{column_target_flux}']
762 # keep values needed for colors
763 flux_err_frac_prev = flux_err_frac
764 mag_prev = mag_model
765 band_prev = band
767 retStruct = pipeBase.Struct(cat_matched=cat_matched, diff_matched=pd.DataFrame(data))
768 return retStruct