lsst.pipe.tasks gcc30fec89c+dc01c004f9
diff_matched_tract_catalog.py
Go to the documentation of this file.
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22__all__ = [
23 'DiffMatchedTractCatalogConfig', 'DiffMatchedTractCatalogTask', 'MatchedCatalogFluxesConfig',
24]
25
26import lsst.afw.geom as afwGeom
28 ComparableCatalog, ConvertCatalogCoordinatesConfig,
29)
31import lsst.pex.config as pexConfig
32import lsst.pipe.base as pipeBase
33import lsst.pipe.base.connectionTypes as cT
34from lsst.skymap import BaseSkyMap
35
36from abc import ABCMeta, abstractmethod
37from astropy.stats import mad_std
38from dataclasses import dataclass
39from enum import Enum, auto
40import numpy as np
41import pandas as pd
42from scipy.stats import iqr
43from typing import Dict, Set
44
45
46DiffMatchedTractCatalogBaseTemplates = {
47 "name_input_cat_ref": "truth_summary",
48 "name_input_cat_target": "objectTable_tract",
49 "name_skymap": BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
50}
51
52
54 pipeBase.PipelineTaskConnections,
55 dimensions=("tract", "skymap"),
56 defaultTemplates=DiffMatchedTractCatalogBaseTemplates,
57):
58 cat_ref = cT.Input(
59 doc="Reference object catalog to match from",
60 name="{name_input_cat_ref}",
61 storageClass="DataFrame",
62 dimensions=("tract", "skymap"),
63 deferLoad=True,
64 )
65 cat_target = cT.Input(
66 doc="Target object catalog to match",
67 name="{name_input_cat_target}",
68 storageClass="DataFrame",
69 dimensions=("tract", "skymap"),
70 deferLoad=True,
71 )
72 skymap = cT.Input(
73 doc="Input definition of geometry/bbox and projection/wcs for coadded exposures",
74 name="{name_skymap}",
75 storageClass="SkyMap",
76 dimensions=("skymap",),
77 )
78 cat_match_ref = cT.Input(
79 doc="Reference match catalog with indices of target matches",
80 name="match_ref_{name_input_cat_ref}_{name_input_cat_target}",
81 storageClass="DataFrame",
82 dimensions=("tract", "skymap"),
83 deferLoad=True,
84 )
85 cat_match_target = cT.Input(
86 doc="Target match catalog with indices of references matches",
87 name="match_target_{name_input_cat_ref}_{name_input_cat_target}",
88 storageClass="DataFrame",
89 dimensions=("tract", "skymap"),
90 deferLoad=True,
91 )
92 columns_match_target = cT.Input(
93 doc="Target match catalog columns",
94 name="match_target_{name_input_cat_ref}_{name_input_cat_target}.columns",
95 storageClass="DataFrameIndex",
96 dimensions=("tract", "skymap"),
97 )
98 cat_matched = cT.Output(
99 doc="Catalog with reference and target columns for matched sources only",
100 name="matched_{name_input_cat_ref}_{name_input_cat_target}",
101 storageClass="DataFrame",
102 dimensions=("tract", "skymap"),
103 )
104 diff_matched = cT.Output(
105 doc="Table with aggregated counts, difference and chi statistics",
106 name="diff_matched_{name_input_cat_ref}_{name_input_cat_target}",
107 storageClass="DataFrame",
108 dimensions=("tract", "skymap"),
109 )
110
111
112class MatchedCatalogFluxesConfig(pexConfig.Config):
113 column_ref_flux = pexConfig.Field(
114 dtype=str,
115 doc='Reference catalog flux column name',
116 )
117 columns_target_flux = pexConfig.ListField(
118 dtype=str,
119 listCheck=lambda x: len(set(x)) == len(x),
120 doc="List of target catalog flux column names",
121 )
122 columns_target_flux_err = pexConfig.ListField(
123 dtype=str,
124 listCheck=lambda x: len(set(x)) == len(x),
125 doc="List of target catalog flux error column names",
126 )
127
128 @property
129 def columns_in_ref(self) -> Set[str]:
130 return {self.column_ref_fluxcolumn_ref_flux}
131
132 @property
133 def columns_in_target(self) -> Set[str]:
134 return set(self.columns_target_fluxcolumns_target_flux).union(set(self.columns_target_flux_errcolumns_target_flux_err))
135
136
138 pipeBase.PipelineTaskConfig,
139 pipelineConnections=DiffMatchedTractCatalogConnections,
140):
141 column_matched_prefix_ref = pexConfig.Field(
142 dtype=str,
143 default='refcat_',
144 doc='The prefix for matched columns copied from the reference catalog',
145 )
146 column_ref_extended = pexConfig.Field(
147 dtype=str,
148 default='is_pointsource',
149 doc='The boolean reference table column specifying if the target is extended',
150 )
151 column_ref_extended_inverted = pexConfig.Field(
152 dtype=bool,
153 default=True,
154 doc='Whether column_ref_extended specifies if the object is compact, not extended',
155 )
156 column_target_extended = pexConfig.Field(
157 dtype=str,
158 default='refExtendedness',
159 doc='The target table column estimating the extendedness of the object (0 <= x <= 1)',
160 )
161
162 @property
163 def columns_in_ref(self) -> Set[str]:
164 columns_all = [self.coord_formatcoord_format.column_ref_coord1, self.coord_formatcoord_format.column_ref_coord2,
165 self.column_ref_extendedcolumn_ref_extended]
166 for column_lists in (
167 (
168 self.columns_ref_copycolumns_ref_copy,
169 ),
170 (x.columns_in_ref for x in self.columns_fluxcolumns_flux.values()),
171 ):
172 for column_list in column_lists:
173 columns_all.extend(column_list)
174
175 return set(columns_all)
176
177 @property
178 def columns_in_target(self) -> Set[str]:
179 columns_all = [self.coord_formatcoord_format.column_target_coord1, self.coord_formatcoord_format.column_target_coord2,
180 self.column_target_extendedcolumn_target_extended]
181 if self.coord_formatcoord_format.coords_ref_to_convert is not None:
182 columns_all.extend(self.coord_formatcoord_format.coords_ref_to_convert.values())
183 for column_lists in (
184 (
185 self.columns_target_coord_errcolumns_target_coord_err,
186 self.columns_target_select_falsecolumns_target_select_false,
187 self.columns_target_select_truecolumns_target_select_true,
188 self.columns_target_copycolumns_target_copy,
189 ),
190 (x.columns_in_target for x in self.columns_fluxcolumns_flux.values()),
191 ):
192 for column_list in column_lists:
193 columns_all.extend(column_list)
194 return set(columns_all)
195
196 columns_flux = pexConfig.ConfigDictField(
197 keytype=str,
198 itemtype=MatchedCatalogFluxesConfig,
199 doc="Configs for flux columns for each band",
200 )
201 columns_ref_copy = pexConfig.ListField(
202 dtype=str,
203 default=set(),
204 doc='Reference table columns to copy to copy into cat_matched',
205 )
206 columns_target_coord_err = pexConfig.ListField(
207 dtype=str,
208 listCheck=lambda x: (len(x) == 2) and (x[0] != x[1]),
209 doc='Target table coordinate columns with standard errors (sigma)',
210 )
211 columns_target_copy = pexConfig.ListField(
212 dtype=str,
213 default=('patch',),
214 doc='Target table columns to copy to copy into cat_matched',
215 )
216 columns_target_select_true = pexConfig.ListField(
217 dtype=str,
218 default=('detect_isPrimary',),
219 doc='Target table columns to require to be True for selecting sources',
220 )
221 columns_target_select_false = pexConfig.ListField(
222 dtype=str,
223 default=('merge_peak_sky',),
224 doc='Target table columns to require to be False for selecting sources',
225 )
226 coord_format = pexConfig.ConfigField(
227 dtype=ConvertCatalogCoordinatesConfig,
228 doc="Configuration for coordinate conversion",
229 )
230 extendedness_cut = pexConfig.Field(
231 dtype=float,
232 default=0.5,
233 doc='Minimum extendedness for a measured source to be considered extended',
234 )
235 mag_num_bins = pexConfig.Field(
236 doc='Number of magnitude bins',
237 default=15,
238 dtype=int,
239 )
240 mag_brightest_ref = pexConfig.Field(
241 dtype=float,
242 default=15,
243 doc='Brightest magnitude cutoff for binning',
244 )
245 mag_ceiling_target = pexConfig.Field(
246 dtype=float,
247 default=None,
248 optional=True,
249 doc='Ceiling (maximum/faint) magnitude for target sources',
250 )
251 mag_faintest_ref = pexConfig.Field(
252 dtype=float,
253 default=30,
254 doc='Faintest magnitude cutoff for binning',
255 )
256 mag_zeropoint_ref = pexConfig.Field(
257 dtype=float,
258 default=31.4,
259 doc='Magnitude zeropoint for reference sources',
260 )
261 mag_zeropoint_target = pexConfig.Field(
262 dtype=float,
263 default=31.4,
264 doc='Magnitude zeropoint for target sources',
265 )
266
267
268class Measurement(Enum):
269 DIFF = auto()
270 CHI = auto()
271
272
273class Statistic(metaclass=ABCMeta):
274 """A statistic that can be applied to a set of values.
275 """
276 @abstractmethod
277 def value(self, values):
278 """Return the value of the statistic given a set of values.
279
280 Parameters
281 ----------
282 values : `Collection` [`float`]
283 A set of values to compute the statistic for.
284
285 Returns
286 -------
287 statistic : `float`
288 The value of the statistic.
289 """
290 pass
291
292
294 """The median of a set of values."""
295 def value(self, values):
296 return np.median(values)
297
298
300 """The re-scaled inter-quartile range (sigma equivalent)."""
301 def value(self, values):
302 return iqr(values, scale='normal')
303
304
306 """The re-scaled median absolute deviation (sigma equivalent)."""
307 def value(self, values):
308 return mad_std(values)
309
310
311@dataclass(frozen=True)
313 """An arbitrary percentile.
314
315 Parameters
316 ----------
317 percentile : `float`
318 A valid percentile (0 <= p <= 100).
319 """
320 percentile: float
321
322 def value(self, values):
323 return np.percentile(values, self.percentile)
324
325
326def compute_stats(values_ref, values_target, errors_target, row, stats, suffixes, prefix, skip_diff=False):
327 """Compute statistics on differences and store results in a row.
328
329 Parameters
330 ----------
331 values_ref : `numpy.ndarray`, (N,)
332 Reference values.
333 values_target : `numpy.ndarray`, (N,)
334 Measured values.
335 errors_target : `numpy.ndarray`, (N,)
336 Errors (standard deviations) on `values_target`.
337 row : `numpy.ndarray`, (1, C)
338 A numpy array with pre-assigned column names.
339 stats : `Dict` [`str`, `Statistic`]
340 A dict of `Statistic` values to measure, keyed by their column suffix.
341 suffixes : `Dict` [`str`, `Measurement`]
342 A dict of measurement types are the only valid values),
343 keyed by the column suffix.
344 prefix : `str`
345 A prefix for all column names (e.g. band).
346 skip_diff : `bool`
347 Whether to skip computing statistics on differences. Note that
348 differences will still be computed for chi statistics.
349
350 Returns
351 -------
352 row_with_stats : `numpy.ndarray`, (1, C)
353 The original `row` with statistic values assigned.
354 """
355 n_ref = len(values_ref)
356 if n_ref > 0:
357 n_target = len(values_target)
358 n_target_err = len(errors_target) if errors_target is not None else n_ref
359 if (n_target != n_ref) or (n_target_err != n_ref):
360 raise ValueError(f'lengths of values_ref={n_ref}, values_target={n_target}'
361 f', error_target={n_target_err} must match')
362
363 do_chi = errors_target is not None
364 diff = values_target - values_ref
365 chi = diff/errors_target if do_chi else diff
366 # Could make this configurable, but non-finite values/errors are not really usable
367 valid = np.isfinite(chi)
368 values_type = {} if skip_diff else {Measurement.DIFF: diff[valid]}
369 if do_chi:
370 values_type[Measurement.CHI] = chi[valid]
371
372 for suffix_type, suffix in suffixes.items():
373 values = values_type.get(suffix_type)
374 if values is not None and len(values) > 0:
375 for stat_name, stat in stats.items():
376 row[f'{prefix}{suffix}{stat_name}'] = stat.value(values)
377 return row
378
379
380def _get_columns(bands_columns: Dict, suffixes: Dict, suffixes_flux: Dict, suffixes_mag: Dict,
381 stats: Dict, target: ComparableCatalog, column_dist: str):
382 """Get column names for a table of difference statistics.
383
384 Parameters
385 ----------
386 bands_columns : `Dict` [`str`,`MatchedCatalogFluxesConfig`]
387 Dict keyed by band of flux column configuration.
388 suffixes, suffixes_flux, suffixes_mag : `Dict` [`Measurement`, `str`]
389 Dict of suffixes for each `Measurement` type, for general columns (e.g.
390 coordinates), fluxes and magnitudes, respectively.
391 stats : `Dict` [`Statistic`, `str`]
392 Dict of suffixes for each `Statistic` type.
393 target : `ComparableCatalog`
394 A target catalog with coordinate column names.
395 column_dist : `str`
396 The name of the distance column.
397
398 Returns
399 -------
400 columns : `Dict` [`str`, `type`]
401 Dictionary of column types keyed by name.
402 n_models : `int`
403 The number of models measurements will be made for.
404
405 Notes
406 -----
407 Presently, models must be identical for each band.
408 """
409 # Initial columns
410 columns = {
411 "bin": int,
412 "mag_min": float,
413 "mag_max": float,
414 }
415
416 # pre-assign all of the columns with appropriate types
417 n_models = 0
418
419 bands = list(bands_columns.keys())
420 for idx, (band, config_flux) in enumerate(bands_columns.items()):
421 columns_suffix = [('_flux', suffixes_flux), ('_mag', suffixes_mag), ]
422 if idx > 0:
423 columns_suffix.append((f'_color_{bands[idx - 1]}-{band}', suffixes))
424 else:
425 n_models = len(config_flux.columns_target_flux)
426 n_models_flux = len(config_flux.columns_target_flux)
427 n_models_err = len(config_flux.columns_target_flux_err)
428
429 # TODO: Do equivalent validation earlier, in the config
430 if (n_models_flux != n_models) or (n_models_err != n_models):
431 raise RuntimeError(f'{config_flux} len(columns_target_flux)={n_models_flux} and'
432 f' len(columns_target_flux_err)={n_models_err} must equal {n_models}')
433
434 for subtype in ('', '_resolved', '_unresolved'):
435 # Totals would be redundant
436 if subtype != '':
437 for item in (f'n_{itype}{mtype}' for itype in ('ref', 'target')
438 for mtype in ('', '_match_right', '_match_wrong')):
439 columns[f'{band}{subtype}_{item}'] = int
440
441 for item in (target.column_coord1, target.column_coord2, column_dist):
442 for suffix in suffixes.values():
443 for stat in stats.keys():
444 columns[f'{band}{subtype}_{item}{suffix}{stat}'] = float
445
446 for item in config_flux.columns_target_flux:
447 for prefix_item, suffixes_col in columns_suffix:
448 for suffix in suffixes_col.values():
449 for stat in stats.keys():
450 columns[f'{band}{subtype}{prefix_item}_{item}{suffix}{stat}'] = float
451
452 return columns, n_models
453
454
455class DiffMatchedTractCatalogTask(pipeBase.PipelineTask):
456 """Load subsets of matched catalogs and output a merged catalog of matched sources.
457 """
458 ConfigClass = DiffMatchedTractCatalogConfig
459 _DefaultName = "DiffMatchedTractCatalog"
460
461 def runQuantum(self, butlerQC, inputRefs, outputRefs):
462 inputs = butlerQC.get(inputRefs)
463 skymap = inputs.pop("skymap")
464
465 columns_match_target = ['match_row']
466 if 'match_candidate' in inputs['columns_match_target']:
467 columns_match_target.append('match_candidate')
468
469 outputs = self.runrun(
470 catalog_ref=inputs['cat_ref'].get(parameters={'columns': self.config.columns_in_ref}),
471 catalog_target=inputs['cat_target'].get(parameters={'columns': self.config.columns_in_target}),
472 catalog_match_ref=inputs['cat_match_ref'].get(
473 parameters={'columns': ['match_candidate', 'match_row']},
474 ),
475 catalog_match_target=inputs['cat_match_target'].get(
476 parameters={'columns': columns_match_target},
477 ),
478 wcs=skymap[butlerQC.quantum.dataId["tract"]].wcs,
479 )
480 butlerQC.put(outputs, outputRefs)
481
482 def run(
483 self,
484 catalog_ref: pd.DataFrame,
485 catalog_target: pd.DataFrame,
486 catalog_match_ref: pd.DataFrame,
487 catalog_match_target: pd.DataFrame,
488 wcs: afwGeom.SkyWcs = None,
489 ) -> pipeBase.Struct:
490 """Load matched reference and target (measured) catalogs, measure summary statistics, and output
491 a combined matched catalog with columns from both inputs.
492
493 Parameters
494 ----------
495 catalog_ref : `pandas.DataFrame`
496 A reference catalog to diff objects/sources from.
497 catalog_target : `pandas.DataFrame`
498 A target catalog to diff reference objects/sources to.
499 catalog_match_ref : `pandas.DataFrame`
500 A catalog with match indices of target sources and selection flags
501 for each reference source.
502 catalog_match_target : `pandas.DataFrame`
503 A catalog with selection flags for each target source.
504 wcs : `lsst.afw.image.SkyWcs`
505 A coordinate system to convert catalog positions to sky coordinates,
506 if necessary.
507
508 Returns
509 -------
510 retStruct : `lsst.pipe.base.Struct`
511 A struct with output_ref and output_target attribute containing the
512 output matched catalogs.
513 """
514 config = self.config
515
516 select_ref = catalog_match_ref['match_candidate'].values
517 # Add additional selection criteria for target sources beyond those for matching
518 # (not recommended, but can be done anyway)
519 select_target = (catalog_match_target['match_candidate'].values
520 if 'match_candidate' in catalog_match_target.columns
521 else np.ones(len(catalog_match_target), dtype=bool))
522 for column in config.columns_target_select_true:
523 select_target &= catalog_target[column].values
524 for column in config.columns_target_select_false:
525 select_target &= ~catalog_target[column].values
526
527 ref, target = config.coord_format.format_catalogs(
528 catalog_ref=catalog_ref, catalog_target=catalog_target,
529 select_ref=None, select_target=select_target, wcs=wcs, radec_to_xy_func=radec_to_xy,
530 return_converted_columns=config.coord_format.coords_ref_to_convert is not None,
531 )
532 cat_ref = ref.catalog
533 cat_target = target.catalog
534 n_target = len(cat_target)
535
536 match_row = catalog_match_ref['match_row'].values
537 matched_ref = match_row >= 0
538 matched_row = match_row[matched_ref]
539 matched_target = np.zeros(n_target, dtype=bool)
540 matched_target[matched_row] = True
541
542 # Create a matched table, preserving the target catalog's named index (if it has one)
543 cat_left = cat_target.iloc[matched_row]
544 has_index_left = cat_left.index.name is not None
545 cat_right = cat_ref[matched_ref].reset_index()
546 cat_matched = pd.concat((cat_left.reset_index(drop=True), cat_right), 1)
547 if has_index_left:
548 cat_matched.index = cat_left.index
549 cat_matched.columns.values[len(cat_target.columns):] = [f'refcat_{col}' for col in cat_right.columns]
550
551 # Add/compute distance columns
552 coord1_target_err, coord2_target_err = config.columns_target_coord_err
553 column_dist, column_dist_err = 'distance', 'distanceErr'
554 dist = np.full(n_target, np.Inf)
555
556 dist[matched_row] = np.hypot(
557 target.coord1[matched_row] - ref.coord1[matched_ref],
558 target.coord2[matched_row] - ref.coord2[matched_ref],
559 )
560 dist_err = np.full(n_target, np.Inf)
561 dist_err[matched_row] = np.hypot(cat_target.iloc[matched_row][coord1_target_err].values,
562 cat_target.iloc[matched_row][coord2_target_err].values)
563 cat_target[column_dist], cat_target[column_dist_err] = dist, dist_err
564
565 # Slightly smelly hack for when a column (like distance) is already relative to truth
566 column_dummy = 'dummy'
567 cat_ref[column_dummy] = np.zeros_like(ref.coord1)
568
569 # Add a boolean column for whether a match is classified correctly
570 extended_ref = cat_ref[config.column_ref_extended]
571 if config.column_ref_extended_inverted:
572 extended_ref = 1 - extended_ref
573
574 extended_target = cat_target[config.column_target_extended].values >= config.extendedness_cut
575
576 # Define difference/chi columns and statistics thereof
577 suffixes = {Measurement.DIFF: '', Measurement.CHI: '_chi'}
578 # Skip diff for fluxes - covered by mags
579 suffixes_flux = {Measurement.CHI: suffixes[Measurement.CHI]}
580 # Skip chi for magnitudes, which have strange errors
581 suffixes_mag = {Measurement.DIFF: suffixes[Measurement.DIFF]}
582 stats = {
583 '_median': Median(),
584 '_sig_iqr': SigmaIQR(),
585 '_sig_mad': SigmaMAD(),
586 }
587 for name, percentile in (('p05', 5.), ('p16', 16.), ('p84', 84.), ('p95', 95.)):
588 stats[f'_{name}'] = Percentile(percentile=percentile)
589
590 # Get dict of column names
591 columns, n_models = _get_columns(
592 bands_columns=config.columns_flux,
593 suffixes=suffixes,
594 suffixes_flux=suffixes_flux,
595 suffixes_mag=suffixes_mag,
596 stats=stats,
597 target=target,
598 column_dist=column_dist,
599 )
600
601 # Setup numpy table
602 n_bins = config.mag_num_bins
603 data = np.zeros((n_bins,), dtype=[(key, value) for key, value in columns.items()])
604 data['bin'] = np.arange(n_bins)
605
606 # Setup bins
607 bins_mag = np.linspace(start=config.mag_brightest_ref, stop=config.mag_faintest_ref,
608 num=n_bins + 1)
609 data['mag_min'] = bins_mag[:-1]
610 data['mag_max'] = bins_mag[1:]
611 bins_mag = tuple((bins_mag[idx], bins_mag[idx + 1]) for idx in range(n_bins))
612
613 # Define temporary columns for intermediate storage
614 column_mag_temp = 'mag_temp'
615 column_color_temp = 'color_temp'
616 column_color_err_temp = 'colorErr_temp'
617 flux_err_frac_prev = [None]*n_models
618 mag_prev = [None]*n_models
619
620 columns_target = {
621 target.column_coord1: (
622 ref.column_coord1, target.column_coord1, coord1_target_err, False,
623 ),
624 target.column_coord2: (
625 ref.column_coord2, target.column_coord2, coord2_target_err, False,
626 ),
627 column_dist: (column_dummy, column_dist, column_dist_err, False),
628 }
629
630 band_prev = None
631 for idx_band, (band, config_flux) in enumerate(config.columns_flux.items()):
632 mag_ref = -2.5*np.log10(cat_ref[config_flux.column_ref_flux]) + config.mag_zeropoint_ref
633 if idx_band > 0:
634 cat_ref[column_color_temp] = cat_ref[column_mag_temp] - mag_ref
635 cat_ref[column_mag_temp] = mag_ref
636
637 flux_err_frac = [None]*n_models
638 mag_model = [None]*n_models
639
640 select_ref_bins = [select_ref & (mag_ref > mag_lo) & (mag_ref < mag_hi)
641 for idx_bin, (mag_lo, mag_hi) in enumerate(bins_mag)]
642
643 # Iterate over multiple models, compute their mags and colours (if there's a previous band)
644 for idx_model in range(n_models):
645 column_target_flux = config_flux.columns_target_flux[idx_model]
646 column_target_flux_err = config_flux.columns_target_flux_err[idx_model]
647
648 flux_target = cat_target[column_target_flux]
649 mag_target = -2.5*np.log10(flux_target) + config.mag_zeropoint_target
650 if config.mag_ceiling_target is not None:
651 mag_target[mag_target > config.mag_ceiling_target] = config.mag_ceiling_target
652 mag_model[idx_model] = mag_target
653
654 # These are needed for computing magnitude/color "errors" (which are a sketchy concept)
655 flux_err_frac[idx_model] = cat_target[column_target_flux_err]/flux_target
656 # Keep these mags tabulated for convenience
657 column_mag_temp_model = f'{column_mag_temp}{idx_model}'
658 cat_target[column_mag_temp_model] = mag_target
659
660 columns_target[f'flux_{column_target_flux}'] = (
661 config_flux.column_ref_flux,
662 column_target_flux,
663 column_target_flux_err,
664 True,
665 )
666 # Note: magnitude errors are generally problematic and not worth aggregating
667 columns_target[f'mag_{column_target_flux}'] = (
668 column_mag_temp, column_mag_temp_model, None, False,
669 )
670
671 if idx_band > 0:
672 column_color_temp_model = f'{column_color_temp}{idx_model}'
673 column_color_err_temp_model = f'{column_color_err_temp}{idx_model}'
674
675 # e.g. if order is ugrizy, first color will be u - g
676 cat_target[column_color_temp_model] = mag_prev[idx_model] - mag_model[idx_model]
677
678 # Sum (in quadrature, and admittedly sketchy for faint fluxes) magnitude errors
679 cat_target[column_color_err_temp_model] = 2.5/np.log(10)*np.hypot(
680 flux_err_frac[idx_model], flux_err_frac_prev[idx_model])
681 columns_target[f'color_{band_prev}-{band}_{column_target_flux}'] = (
682 column_color_temp,
683 column_color_temp_model,
684 column_color_err_temp_model,
685 False,
686 )
687
688 for idx_bin, (mag_lo, mag_hi) in enumerate(bins_mag):
689 row = data[idx_bin]
690 select_ref_bin = select_ref_bins[idx_bin]
691 select_target_bin = select_target & (mag_target > mag_lo) & (mag_target < mag_hi)
692
693 for subtype, is_extended in (('', None), ('_resolved', True), ('_unresolved', False)):
694 # Counts filtered by match selection and magnitude bin
695 select_ref_sub = select_ref_bin.copy()
696 select_target_sub = select_target_bin.copy()
697 if is_extended is not None:
698 is_extended_ref = (extended_ref == is_extended)
699 select_ref_sub &= is_extended_ref
700 select_target_sub &= (extended_target == is_extended)
701 n_ref_sub = np.count_nonzero(select_ref_sub)
702 n_target_sub = np.count_nonzero(select_target_sub)
703 row[f'{band}{subtype}_n_ref'] = n_ref_sub
704 row[f'{band}{subtype}_n_target'] = n_target_sub
705
706 # Filter matches by magnitude bin and true class
707 match_row_bin = match_row.copy()
708 match_row_bin[~select_ref_sub] = -1
709 match_good = match_row_bin >= 0
710
711 n_match = np.count_nonzero(match_good)
712
713 # Same for counts of matched target sources (for e.g. purity)
714
715 if n_match > 0:
716 rows_matched = match_row_bin[match_good]
717 subset_target = cat_target.iloc[rows_matched]
718 if is_extended is not None:
719 right_type = extended_target[rows_matched] == is_extended
720 n_total = len(right_type)
721 n_right = np.count_nonzero(right_type)
722 row[f'{band}{subtype}_n_ref_match_right'] = n_right
723 row[f'{band}{subtype}_n_ref_match_wrong'] = n_total - n_right
724
725 # compute stats for this bin, for all columns
726 for column, (column_ref, column_target, column_err_target, skip_diff) \
727 in columns_target.items():
728 values_ref = cat_ref[column_ref][match_good].values
730 values_ref,
731 subset_target[column_target].values,
732 (subset_target[column_err_target].values if column_err_target is not None
733 else None),
734 row,
735 stats,
736 suffixes,
737 prefix=f'{band}{subtype}_{column}',
738 skip_diff=skip_diff,
739 )
740
741 # Count matched target sources with *measured* mags within bin
742 # Used for e.g. purity calculation
743 # Should be merged with above code if there's ever a need for
744 # measuring stats on this source selection
745 select_target_sub &= matched_target
746
747 if is_extended is not None and (np.count_nonzero(select_target_sub) > 0):
748 n_total = np.count_nonzero(select_target_sub)
749 right_type = np.zeros(n_target, dtype=bool)
750 right_type[match_row[matched_ref & is_extended_ref]] = True
751 right_type &= select_target_sub
752 n_right = np.count_nonzero(right_type)
753 row[f'{band}{subtype}_n_target_match_right'] = n_right
754 row[f'{band}{subtype}_n_target_match_wrong'] = n_total - n_right
755
756 # delete the flux/color columns since they change with each band
757 for prefix in ('flux_', 'mag_'):
758 del columns_target[f'{prefix}{column_target_flux}']
759 if idx_band > 0:
760 del columns_target[f'color_{band_prev}-{band}_{column_target_flux}']
761
762 # keep values needed for colors
763 flux_err_frac_prev = flux_err_frac
764 mag_prev = mag_model
765 band_prev = band
766
767 retStruct = pipeBase.Struct(cat_matched=cat_matched, diff_matched=pd.DataFrame(data))
768 return retStruct
pipeBase.Struct run(self, pd.DataFrame catalog_ref, pd.DataFrame catalog_target, pd.DataFrame catalog_match_ref, pd.DataFrame catalog_match_target, afwGeom.SkyWcs wcs=None)
def compute_stats(values_ref, values_target, errors_target, row, stats, suffixes, prefix, skip_diff=False)