Coverage for python / lsst / analysis / tools / actions / keyedData / calcBinnedCompleteness.py: 37%
122 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-06 09:07 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-06 09:07 +0000
1# This file is part of analysis_tools.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("CalcBinnedCompletenessAction",)
25import copy
26from typing import Any
28import numpy as np
30from lsst.pex.config import Field
31from lsst.pex.config.configurableActions import ConfigurableActionField
33from ...interfaces import KeyedData, KeyedDataAction, KeyedDataSchema, Scalar, Vector
34from ...math import divide
35from ..vector.selectors import RangeSelector
38class CalcBinnedCompletenessAction(KeyedDataAction):
39 """Calculate completeness and purity in a single magnitude bin.
41 Completeness is the fraction of matched objects with reference magnitudes
42 within the bin limits, while purity is the fraction of matched objects
43 with measured magnitudes within the bin limits.
45 Both statistics are also computed separately for objects that are
46 considered "good" and "bad" matches, given a boolean field key.
47 """
49 key_match_distance = Field[str](
50 default="match_distance",
51 doc="Key for column with distance between matched objects",
52 )
53 key_matched_class = Field[str](
54 default="matched_class",
55 doc="Key for boolean vector (True if matched objects have the same class as their ref match)",
56 )
57 key_mask_ref = Field[str](
58 default=None,
59 doc="Key for mask to apply for reference objects in completeness",
60 optional=True,
61 )
62 key_mask_target = Field[str](
63 default=None,
64 doc="Key for mask to apply for target objects in purity",
65 optional=True,
66 )
67 name_prefix = Field[str](default="", doc="Field name to append statistic names to")
68 name_suffix = Field[str](default="", doc="Field name to append to statistic names")
69 selector_range_ref = ConfigurableActionField[RangeSelector](doc="Range selector for reference objects")
70 selector_range_target = ConfigurableActionField[RangeSelector](doc="Range selector for measured objects")
72 def getInputSchema(self, **kwargs) -> KeyedDataSchema:
73 yield self.key_match_distance, Vector
74 yield self.key_matched_class, Vector
75 if self.key_mask_ref:
76 yield self.key_mask_ref, Vector
77 if self.key_mask_target:
78 yield self.key_mask_target, Vector
79 yield from self.selector_range_ref.getInputSchema()
80 yield from self.selector_range_target.getInputSchema()
82 def getOutputSchema(self) -> KeyedDataSchema:
83 return (
84 (self.name_mask_ref, Vector),
85 (self.name_mask_target, Vector),
86 (self.name_count, Scalar),
87 (self.name_count_ref, Scalar),
88 (self.name_count_target, Scalar),
89 (self.name_completeness, Scalar),
90 (self.name_completeness_bad_match, Scalar),
91 (self.name_completeness_good_match, Scalar),
92 (self.name_purity, Scalar),
93 (self.name_purity_bad_match, Scalar),
94 (self.name_purity_good_match, Scalar),
95 (self.name_range_maximum, Scalar),
96 (self.name_range_minimum, Scalar),
97 )
99 def getFormattedOutputKeys(self, band: str | None = None, **kwargs: Any) -> dict[str, str]:
100 """Return the mapping from unformatted output schema keys to formatted.
102 Parameters
103 ----------
104 band
105 The name of the band, if any.
106 kwargs
107 Additional keyword arguments that are unused.
109 Returns
110 -------
111 result : dict[`str`, `str`]
112 A dict with formatted key values for unformatted keys.
113 """
114 kwargs_format = {}
115 if band is not None:
116 kwargs_format["band"] = band
118 result = {
119 self.name_mask_ref: self.name_mask_ref.format(**kwargs_format),
120 self.name_mask_target: self.name_mask_target.format(**kwargs_format),
121 self.name_count: self.name_count.format(**kwargs_format),
122 self.name_count_ref: self.name_count_ref.format(**kwargs_format),
123 self.name_count_target: self.name_count_target.format(**kwargs_format),
124 self.name_completeness: self.name_completeness.format(**kwargs_format),
125 self.name_completeness_bad_match: self.name_completeness_bad_match.format(**kwargs_format),
126 self.name_completeness_good_match: self.name_completeness_good_match.format(**kwargs_format),
127 self.name_purity: self.name_purity.format(**kwargs_format),
128 self.name_purity_bad_match: self.name_purity_bad_match.format(**kwargs_format),
129 self.name_purity_good_match: self.name_purity_good_match.format(**kwargs_format),
130 self.name_range_maximum: self.name_range_maximum.format(**kwargs_format),
131 self.name_range_minimum: self.name_range_minimum.format(**kwargs_format),
132 }
133 return result
135 @property
136 def name_count(self):
137 return f"{self.name_prefix}count{self.name_suffix}"
139 @property
140 def name_count_ref(self):
141 return f"{self.name_prefix}count_ref{self.name_suffix}"
143 @property
144 def name_count_target(self):
145 return f"{self.name_prefix}count_target{self.name_suffix}"
147 @property
148 def name_mask_ref(self):
149 return f"{self.name_prefix}mask_ref{self.name_suffix}"
151 @property
152 def name_mask_target(self):
153 return f"{self.name_prefix}mask_ref{self.name_suffix}"
155 @property
156 def name_completeness(self):
157 return f"{self.name_prefix}completeness{self.name_suffix}"
159 @property
160 def name_completeness_bad_match(self):
161 return f"{self.name_prefix}completeness_bad_match{self.name_suffix}"
163 @property
164 def name_completeness_good_match(self):
165 return f"{self.name_prefix}completeness_good_match{self.name_suffix}"
167 @property
168 def name_purity(self):
169 return f"{self.name_prefix}purity{self.name_suffix}"
171 @property
172 def name_purity_bad_match(self):
173 return f"{self.name_prefix}purity_bad_match{self.name_suffix}"
175 @property
176 def name_purity_good_match(self):
177 return f"{self.name_prefix}purity_good_match{self.name_suffix}"
179 @property
180 def name_range_maximum(self):
181 return f"{self.name_prefix}range_maximum{self.name_suffix}"
183 @property
184 def name_range_minimum(self):
185 return f"{self.name_prefix}range_minimum{self.name_suffix}"
187 def name_mag_completeness(self, name_threshold: str):
188 name_threshold = f"_{name_threshold}" if name_threshold else name_threshold
189 return f"{self.name_prefix}mag_completeness{name_threshold}{self.name_suffix}"
191 def __call__(self, data: KeyedData, band: str | None = None, mask=None, **kwargs: Any) -> KeyedData:
192 """Compute completeness and purity metrics.
194 Parameters
195 ----------
196 data
197 Input data to read form.
198 band
199 The name of the band, if any.
200 mask
201 An additional mask to select on before computing statistics.
202 kwargs
203 Additional keyword arguments that are unused.
205 Returns
206 -------
207 data
208 Dictionary with formatted keys:
210 ``"name_count"``
211 The number of objects of either type (reference or target)
212 within the bin (and mask).
213 ``"name_count_ref"``
214 The number of reference objects within the bin (and mask).
215 ``"name_count_target"``
216 The number of target (measured) objects within the bin
217 (and mask).
218 ``"name_completeness"``
219 The completeness within the bin.
220 ``"name_completeness_bad_match"``
221 The completeness of objects considered bad matches.
222 ``"name_completeness_good_match"``
223 The completeness of objects considered good matches.
224 ``"name_purity"``
225 The purity within the bin.
226 ``"name_purity_bad_match"``
227 The purity of objects considered bad matches.
228 ``"name_purity_good_match"``
229 The purity of objects considered good matches.
230 ``"name_range_maximum"``
231 The maximum magnitude of the bin selector.
232 ``"name_range_minimum"``
233 The minimum magnitude of the bin selector.
234 """
235 results = {}
236 mask_ref = self.selector_range_ref(data)
237 mask_target = self.selector_range_target(data)
238 mask = copy.copy(mask) if mask else None
239 for mask_sub, key_new in ((mask_ref, self.key_mask_ref), (mask_target, self.key_mask_target)):
240 if key_new:
241 mask_sub &= data[key_new]
243 results[self.name_mask_ref] = mask_ref
244 results[self.name_mask_target] = mask_target
246 n_ref = np.sum(mask_ref)
247 n_target = np.sum(mask_target)
248 mask_any = mask_ref | mask_target
249 matched = data[self.key_match_distance] >= 0
250 if mask:
251 matched = matched[mask]
252 mask_ref = mask_ref[mask]
253 mask_target = mask_target[mask]
255 matched_ref = matched & mask_ref
256 matched_target = matched & mask_target
257 n_matched_ref = np.sum(matched_ref)
258 n_matched_target = np.sum(matched & mask_target)
260 matched_good = data[self.key_matched_class]
261 if mask:
262 matched_good = matched_good[mask]
264 n_matched_same_ref = np.sum(matched_good & matched_ref)
265 n_matched_same_target = np.sum(matched_good & matched_target)
267 results[self.name_count] = np.sum(mask_any)
268 results[self.name_count_ref] = n_ref
269 results[self.name_count_target] = n_target
270 results[self.name_completeness] = divide(n_matched_ref, n_ref)
271 results[self.name_completeness_bad_match] = divide(n_matched_ref - n_matched_same_ref, n_ref)
272 results[self.name_completeness_good_match] = divide(n_matched_same_ref, n_ref)
273 results[self.name_purity] = divide(n_matched_target, n_target)
274 results[self.name_purity_bad_match] = divide(n_matched_target - n_matched_same_target, n_target)
275 results[self.name_purity_good_match] = divide(n_matched_same_target, n_target)
277 results[self.name_range_maximum] = self.selector_range_ref.maximum
278 results[self.name_range_minimum] = self.selector_range_ref.minimum
280 keys_formatted = self.getFormattedOutputKeys(band=band)
281 results = {key_new: results[key_old] for key_old, key_new in keys_formatted.items()}
283 return results
285 def validate(self):
286 if (self.selector_range_ref.minimum != self.selector_range_target.minimum) or (
287 self.selector_range_ref.maximum != self.selector_range_target.maximum
288 ):
289 raise ValueError(
290 f"{self.selector_range_ref.minimum=} != {self.selector_range_target.minimum=} or"
291 f" {self.selector_range_ref.maximum=} != {self.selector_range_target.maximum=};"
292 f" selectors must have identical ranges."
293 )