Coverage for python / lsst / analysis / tools / actions / keyedData / calcBinnedCompleteness.py: 37%

122 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-15 00:23 +0000

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("CalcBinnedCompletenessAction",) 

24 

25import copy 

26from typing import Any 

27 

28import numpy as np 

29from lsst.pex.config import Field 

30from lsst.pex.config.configurableActions import ConfigurableActionField 

31 

32from ...interfaces import KeyedData, KeyedDataAction, KeyedDataSchema, Scalar, Vector 

33from ...math import divide 

34from ..vector.selectors import RangeSelector 

35 

36 

37class CalcBinnedCompletenessAction(KeyedDataAction): 

38 """Calculate completeness and purity in a single magnitude bin. 

39 

40 Completeness is the fraction of matched objects with reference magnitudes 

41 within the bin limits, while purity is the fraction of matched objects 

42 with measured magnitudes within the bin limits. 

43 

44 Both statistics are also computed separately for objects that are 

45 considered "good" and "bad" matches, given a boolean field key. 

46 """ 

47 

48 key_match_distance = Field[str]( 

49 default="match_distance", 

50 doc="Key for column with distance between matched objects", 

51 ) 

52 key_matched_class = Field[str]( 

53 default="matched_class", 

54 doc="Key for boolean vector (True if matched objects have the same class as their ref match)", 

55 ) 

56 key_mask_ref = Field[str]( 

57 default=None, 

58 doc="Key for mask to apply for reference objects in completeness", 

59 optional=True, 

60 ) 

61 key_mask_target = Field[str]( 

62 default=None, 

63 doc="Key for mask to apply for target objects in purity", 

64 optional=True, 

65 ) 

66 name_prefix = Field[str](default="", doc="Field name to append statistic names to") 

67 name_suffix = Field[str](default="", doc="Field name to append to statistic names") 

68 selector_range_ref = ConfigurableActionField[RangeSelector](doc="Range selector for reference objects") 

69 selector_range_target = ConfigurableActionField[RangeSelector](doc="Range selector for measured objects") 

70 

71 def getInputSchema(self, **kwargs) -> KeyedDataSchema: 

72 yield self.key_match_distance, Vector 

73 yield self.key_matched_class, Vector 

74 if self.key_mask_ref: 

75 yield self.key_mask_ref, Vector 

76 if self.key_mask_target: 

77 yield self.key_mask_target, Vector 

78 yield from self.selector_range_ref.getInputSchema() 

79 yield from self.selector_range_target.getInputSchema() 

80 

81 def getOutputSchema(self) -> KeyedDataSchema: 

82 return ( 

83 (self.name_mask_ref, Vector), 

84 (self.name_mask_target, Vector), 

85 (self.name_count, Scalar), 

86 (self.name_count_ref, Scalar), 

87 (self.name_count_target, Scalar), 

88 (self.name_completeness, Scalar), 

89 (self.name_completeness_bad_match, Scalar), 

90 (self.name_completeness_good_match, Scalar), 

91 (self.name_purity, Scalar), 

92 (self.name_purity_bad_match, Scalar), 

93 (self.name_purity_good_match, Scalar), 

94 (self.name_range_maximum, Scalar), 

95 (self.name_range_minimum, Scalar), 

96 ) 

97 

98 def getFormattedOutputKeys(self, band: str | None = None, **kwargs: Any) -> dict[str, str]: 

99 """Return the mapping from unformatted output schema keys to formatted. 

100 

101 Parameters 

102 ---------- 

103 band 

104 The name of the band, if any. 

105 kwargs 

106 Additional keyword arguments that are unused. 

107 

108 Returns 

109 ------- 

110 result : dict[`str`, `str`] 

111 A dict with formatted key values for unformatted keys. 

112 """ 

113 kwargs_format = {} 

114 if band is not None: 

115 kwargs_format["band"] = band 

116 

117 result = { 

118 self.name_mask_ref: self.name_mask_ref.format(**kwargs_format), 

119 self.name_mask_target: self.name_mask_target.format(**kwargs_format), 

120 self.name_count: self.name_count.format(**kwargs_format), 

121 self.name_count_ref: self.name_count_ref.format(**kwargs_format), 

122 self.name_count_target: self.name_count_target.format(**kwargs_format), 

123 self.name_completeness: self.name_completeness.format(**kwargs_format), 

124 self.name_completeness_bad_match: self.name_completeness_bad_match.format(**kwargs_format), 

125 self.name_completeness_good_match: self.name_completeness_good_match.format(**kwargs_format), 

126 self.name_purity: self.name_purity.format(**kwargs_format), 

127 self.name_purity_bad_match: self.name_purity_bad_match.format(**kwargs_format), 

128 self.name_purity_good_match: self.name_purity_good_match.format(**kwargs_format), 

129 self.name_range_maximum: self.name_range_maximum.format(**kwargs_format), 

130 self.name_range_minimum: self.name_range_minimum.format(**kwargs_format), 

131 } 

132 return result 

133 

134 @property 

135 def name_count(self): 

136 return f"{self.name_prefix}count{self.name_suffix}" 

137 

138 @property 

139 def name_count_ref(self): 

140 return f"{self.name_prefix}count_ref{self.name_suffix}" 

141 

142 @property 

143 def name_count_target(self): 

144 return f"{self.name_prefix}count_target{self.name_suffix}" 

145 

146 @property 

147 def name_mask_ref(self): 

148 return f"{self.name_prefix}mask_ref{self.name_suffix}" 

149 

150 @property 

151 def name_mask_target(self): 

152 return f"{self.name_prefix}mask_ref{self.name_suffix}" 

153 

154 @property 

155 def name_completeness(self): 

156 return f"{self.name_prefix}completeness{self.name_suffix}" 

157 

158 @property 

159 def name_completeness_bad_match(self): 

160 return f"{self.name_prefix}completeness_bad_match{self.name_suffix}" 

161 

162 @property 

163 def name_completeness_good_match(self): 

164 return f"{self.name_prefix}completeness_good_match{self.name_suffix}" 

165 

166 @property 

167 def name_purity(self): 

168 return f"{self.name_prefix}purity{self.name_suffix}" 

169 

170 @property 

171 def name_purity_bad_match(self): 

172 return f"{self.name_prefix}purity_bad_match{self.name_suffix}" 

173 

174 @property 

175 def name_purity_good_match(self): 

176 return f"{self.name_prefix}purity_good_match{self.name_suffix}" 

177 

178 @property 

179 def name_range_maximum(self): 

180 return f"{self.name_prefix}range_maximum{self.name_suffix}" 

181 

182 @property 

183 def name_range_minimum(self): 

184 return f"{self.name_prefix}range_minimum{self.name_suffix}" 

185 

186 def name_mag_completeness(self, name_threshold: str): 

187 name_threshold = f"_{name_threshold}" if name_threshold else name_threshold 

188 return f"{self.name_prefix}mag_completeness{name_threshold}{self.name_suffix}" 

189 

190 def __call__(self, data: KeyedData, band: str | None = None, mask=None, **kwargs: Any) -> KeyedData: 

191 """Compute completeness and purity metrics. 

192 

193 Parameters 

194 ---------- 

195 data 

196 Input data to read form. 

197 band 

198 The name of the band, if any. 

199 mask 

200 An additional mask to select on before computing statistics. 

201 kwargs 

202 Additional keyword arguments that are unused. 

203 

204 Returns 

205 ------- 

206 data 

207 Dictionary with formatted keys: 

208 

209 ``"name_count"`` 

210 The number of objects of either type (reference or target) 

211 within the bin (and mask). 

212 ``"name_count_ref"`` 

213 The number of reference objects within the bin (and mask). 

214 ``"name_count_target"`` 

215 The number of target (measured) objects within the bin 

216 (and mask). 

217 ``"name_completeness"`` 

218 The completeness within the bin. 

219 ``"name_completeness_bad_match"`` 

220 The completeness of objects considered bad matches. 

221 ``"name_completeness_good_match"`` 

222 The completeness of objects considered good matches. 

223 ``"name_purity"`` 

224 The purity within the bin. 

225 ``"name_purity_bad_match"`` 

226 The purity of objects considered bad matches. 

227 ``"name_purity_good_match"`` 

228 The purity of objects considered good matches. 

229 ``"name_range_maximum"`` 

230 The maximum magnitude of the bin selector. 

231 ``"name_range_minimum"`` 

232 The minimum magnitude of the bin selector. 

233 """ 

234 results = {} 

235 mask_ref = self.selector_range_ref(data) 

236 mask_target = self.selector_range_target(data) 

237 mask = copy.copy(mask) if mask else None 

238 for mask_sub, key_new in ((mask_ref, self.key_mask_ref), (mask_target, self.key_mask_target)): 

239 if key_new: 

240 mask_sub &= data[key_new] 

241 

242 results[self.name_mask_ref] = mask_ref 

243 results[self.name_mask_target] = mask_target 

244 

245 n_ref = np.sum(mask_ref) 

246 n_target = np.sum(mask_target) 

247 mask_any = mask_ref | mask_target 

248 matched = data[self.key_match_distance] >= 0 

249 if mask: 

250 matched = matched[mask] 

251 mask_ref = mask_ref[mask] 

252 mask_target = mask_target[mask] 

253 

254 matched_ref = matched & mask_ref 

255 matched_target = matched & mask_target 

256 n_matched_ref = np.sum(matched_ref) 

257 n_matched_target = np.sum(matched & mask_target) 

258 

259 matched_good = data[self.key_matched_class] 

260 if mask: 

261 matched_good = matched_good[mask] 

262 

263 n_matched_same_ref = np.sum(matched_good & matched_ref) 

264 n_matched_same_target = np.sum(matched_good & matched_target) 

265 

266 results[self.name_count] = np.sum(mask_any) 

267 results[self.name_count_ref] = n_ref 

268 results[self.name_count_target] = n_target 

269 results[self.name_completeness] = divide(n_matched_ref, n_ref) 

270 results[self.name_completeness_bad_match] = divide(n_matched_ref - n_matched_same_ref, n_ref) 

271 results[self.name_completeness_good_match] = divide(n_matched_same_ref, n_ref) 

272 results[self.name_purity] = divide(n_matched_target, n_target) 

273 results[self.name_purity_bad_match] = divide(n_matched_target - n_matched_same_target, n_target) 

274 results[self.name_purity_good_match] = divide(n_matched_same_target, n_target) 

275 

276 results[self.name_range_maximum] = self.selector_range_ref.maximum 

277 results[self.name_range_minimum] = self.selector_range_ref.minimum 

278 

279 keys_formatted = self.getFormattedOutputKeys(band=band) 

280 results = {key_new: results[key_old] for key_old, key_new in keys_formatted.items()} 

281 

282 return results 

283 

284 def validate(self): 

285 if (self.selector_range_ref.minimum != self.selector_range_target.minimum) or ( 

286 self.selector_range_ref.maximum != self.selector_range_target.maximum 

287 ): 

288 raise ValueError( 

289 f"{self.selector_range_ref.minimum=} != {self.selector_range_target.minimum=} or" 

290 f" {self.selector_range_ref.maximum=} != {self.selector_range_target.maximum=};" 

291 f" selectors must have identical ranges." 

292 )