Coverage for python / lsst / analysis / tools / actions / keyedData / calcBinnedCompleteness.py: 37%
122 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-22 09:09 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-22 09:09 +0000
1# This file is part of analysis_tools.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("CalcBinnedCompletenessAction",)
25import copy
26from typing import Any
28import numpy as np
29from lsst.pex.config import Field
30from lsst.pex.config.configurableActions import ConfigurableActionField
32from ...interfaces import KeyedData, KeyedDataAction, KeyedDataSchema, Scalar, Vector
33from ...math import divide
34from ..vector.selectors import RangeSelector
37class CalcBinnedCompletenessAction(KeyedDataAction):
38 """Calculate completeness and purity in a single magnitude bin.
40 Completeness is the fraction of matched objects with reference magnitudes
41 within the bin limits, while purity is the fraction of matched objects
42 with measured magnitudes within the bin limits.
44 Both statistics are also computed separately for objects that are
45 considered "good" and "bad" matches, given a boolean field key.
46 """
48 key_match_distance = Field[str](
49 default="match_distance",
50 doc="Key for column with distance between matched objects",
51 )
52 key_matched_class = Field[str](
53 default="matched_class",
54 doc="Key for boolean vector (True if matched objects have the same class as their ref match)",
55 )
56 key_mask_ref = Field[str](
57 default=None,
58 doc="Key for mask to apply for reference objects in completeness",
59 optional=True,
60 )
61 key_mask_target = Field[str](
62 default=None,
63 doc="Key for mask to apply for target objects in purity",
64 optional=True,
65 )
66 name_prefix = Field[str](default="", doc="Field name to append statistic names to")
67 name_suffix = Field[str](default="", doc="Field name to append to statistic names")
68 selector_range_ref = ConfigurableActionField[RangeSelector](doc="Range selector for reference objects")
69 selector_range_target = ConfigurableActionField[RangeSelector](doc="Range selector for measured objects")
71 def getInputSchema(self, **kwargs) -> KeyedDataSchema:
72 yield self.key_match_distance, Vector
73 yield self.key_matched_class, Vector
74 if self.key_mask_ref:
75 yield self.key_mask_ref, Vector
76 if self.key_mask_target:
77 yield self.key_mask_target, Vector
78 yield from self.selector_range_ref.getInputSchema()
79 yield from self.selector_range_target.getInputSchema()
81 def getOutputSchema(self) -> KeyedDataSchema:
82 return (
83 (self.name_mask_ref, Vector),
84 (self.name_mask_target, Vector),
85 (self.name_count, Scalar),
86 (self.name_count_ref, Scalar),
87 (self.name_count_target, Scalar),
88 (self.name_completeness, Scalar),
89 (self.name_completeness_bad_match, Scalar),
90 (self.name_completeness_good_match, Scalar),
91 (self.name_purity, Scalar),
92 (self.name_purity_bad_match, Scalar),
93 (self.name_purity_good_match, Scalar),
94 (self.name_range_maximum, Scalar),
95 (self.name_range_minimum, Scalar),
96 )
98 def getFormattedOutputKeys(self, band: str | None = None, **kwargs: Any) -> dict[str, str]:
99 """Return the mapping from unformatted output schema keys to formatted.
101 Parameters
102 ----------
103 band
104 The name of the band, if any.
105 kwargs
106 Additional keyword arguments that are unused.
108 Returns
109 -------
110 result : dict[`str`, `str`]
111 A dict with formatted key values for unformatted keys.
112 """
113 kwargs_format = {}
114 if band is not None:
115 kwargs_format["band"] = band
117 result = {
118 self.name_mask_ref: self.name_mask_ref.format(**kwargs_format),
119 self.name_mask_target: self.name_mask_target.format(**kwargs_format),
120 self.name_count: self.name_count.format(**kwargs_format),
121 self.name_count_ref: self.name_count_ref.format(**kwargs_format),
122 self.name_count_target: self.name_count_target.format(**kwargs_format),
123 self.name_completeness: self.name_completeness.format(**kwargs_format),
124 self.name_completeness_bad_match: self.name_completeness_bad_match.format(**kwargs_format),
125 self.name_completeness_good_match: self.name_completeness_good_match.format(**kwargs_format),
126 self.name_purity: self.name_purity.format(**kwargs_format),
127 self.name_purity_bad_match: self.name_purity_bad_match.format(**kwargs_format),
128 self.name_purity_good_match: self.name_purity_good_match.format(**kwargs_format),
129 self.name_range_maximum: self.name_range_maximum.format(**kwargs_format),
130 self.name_range_minimum: self.name_range_minimum.format(**kwargs_format),
131 }
132 return result
134 @property
135 def name_count(self):
136 return f"{self.name_prefix}count{self.name_suffix}"
138 @property
139 def name_count_ref(self):
140 return f"{self.name_prefix}count_ref{self.name_suffix}"
142 @property
143 def name_count_target(self):
144 return f"{self.name_prefix}count_target{self.name_suffix}"
146 @property
147 def name_mask_ref(self):
148 return f"{self.name_prefix}mask_ref{self.name_suffix}"
150 @property
151 def name_mask_target(self):
152 return f"{self.name_prefix}mask_ref{self.name_suffix}"
154 @property
155 def name_completeness(self):
156 return f"{self.name_prefix}completeness{self.name_suffix}"
158 @property
159 def name_completeness_bad_match(self):
160 return f"{self.name_prefix}completeness_bad_match{self.name_suffix}"
162 @property
163 def name_completeness_good_match(self):
164 return f"{self.name_prefix}completeness_good_match{self.name_suffix}"
166 @property
167 def name_purity(self):
168 return f"{self.name_prefix}purity{self.name_suffix}"
170 @property
171 def name_purity_bad_match(self):
172 return f"{self.name_prefix}purity_bad_match{self.name_suffix}"
174 @property
175 def name_purity_good_match(self):
176 return f"{self.name_prefix}purity_good_match{self.name_suffix}"
178 @property
179 def name_range_maximum(self):
180 return f"{self.name_prefix}range_maximum{self.name_suffix}"
182 @property
183 def name_range_minimum(self):
184 return f"{self.name_prefix}range_minimum{self.name_suffix}"
186 def name_mag_completeness(self, name_threshold: str):
187 name_threshold = f"_{name_threshold}" if name_threshold else name_threshold
188 return f"{self.name_prefix}mag_completeness{name_threshold}{self.name_suffix}"
190 def __call__(self, data: KeyedData, band: str | None = None, mask=None, **kwargs: Any) -> KeyedData:
191 """Compute completeness and purity metrics.
193 Parameters
194 ----------
195 data
196 Input data to read form.
197 band
198 The name of the band, if any.
199 mask
200 An additional mask to select on before computing statistics.
201 kwargs
202 Additional keyword arguments that are unused.
204 Returns
205 -------
206 data
207 Dictionary with formatted keys:
209 ``"name_count"``
210 The number of objects of either type (reference or target)
211 within the bin (and mask).
212 ``"name_count_ref"``
213 The number of reference objects within the bin (and mask).
214 ``"name_count_target"``
215 The number of target (measured) objects within the bin
216 (and mask).
217 ``"name_completeness"``
218 The completeness within the bin.
219 ``"name_completeness_bad_match"``
220 The completeness of objects considered bad matches.
221 ``"name_completeness_good_match"``
222 The completeness of objects considered good matches.
223 ``"name_purity"``
224 The purity within the bin.
225 ``"name_purity_bad_match"``
226 The purity of objects considered bad matches.
227 ``"name_purity_good_match"``
228 The purity of objects considered good matches.
229 ``"name_range_maximum"``
230 The maximum magnitude of the bin selector.
231 ``"name_range_minimum"``
232 The minimum magnitude of the bin selector.
233 """
234 results = {}
235 mask_ref = self.selector_range_ref(data)
236 mask_target = self.selector_range_target(data)
237 mask = copy.copy(mask) if mask else None
238 for mask_sub, key_new in ((mask_ref, self.key_mask_ref), (mask_target, self.key_mask_target)):
239 if key_new:
240 mask_sub &= data[key_new]
242 results[self.name_mask_ref] = mask_ref
243 results[self.name_mask_target] = mask_target
245 n_ref = np.sum(mask_ref)
246 n_target = np.sum(mask_target)
247 mask_any = mask_ref | mask_target
248 matched = data[self.key_match_distance] >= 0
249 if mask:
250 matched = matched[mask]
251 mask_ref = mask_ref[mask]
252 mask_target = mask_target[mask]
254 matched_ref = matched & mask_ref
255 matched_target = matched & mask_target
256 n_matched_ref = np.sum(matched_ref)
257 n_matched_target = np.sum(matched & mask_target)
259 matched_good = data[self.key_matched_class]
260 if mask:
261 matched_good = matched_good[mask]
263 n_matched_same_ref = np.sum(matched_good & matched_ref)
264 n_matched_same_target = np.sum(matched_good & matched_target)
266 results[self.name_count] = np.sum(mask_any)
267 results[self.name_count_ref] = n_ref
268 results[self.name_count_target] = n_target
269 results[self.name_completeness] = divide(n_matched_ref, n_ref)
270 results[self.name_completeness_bad_match] = divide(n_matched_ref - n_matched_same_ref, n_ref)
271 results[self.name_completeness_good_match] = divide(n_matched_same_ref, n_ref)
272 results[self.name_purity] = divide(n_matched_target, n_target)
273 results[self.name_purity_bad_match] = divide(n_matched_target - n_matched_same_target, n_target)
274 results[self.name_purity_good_match] = divide(n_matched_same_target, n_target)
276 results[self.name_range_maximum] = self.selector_range_ref.maximum
277 results[self.name_range_minimum] = self.selector_range_ref.minimum
279 keys_formatted = self.getFormattedOutputKeys(band=band)
280 results = {key_new: results[key_old] for key_old, key_new in keys_formatted.items()}
282 return results
284 def validate(self):
285 if (self.selector_range_ref.minimum != self.selector_range_target.minimum) or (
286 self.selector_range_ref.maximum != self.selector_range_target.maximum
287 ):
288 raise ValueError(
289 f"{self.selector_range_ref.minimum=} != {self.selector_range_target.minimum=} or"
290 f" {self.selector_range_ref.maximum=} != {self.selector_range_target.maximum=};"
291 f" selectors must have identical ranges."
292 )