Coverage for python / lsst / analysis / tools / actions / keyedData / calcBinnedCompleteness.py: 37%

122 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-22 09:32 +0000

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("CalcBinnedCompletenessAction",) 

24 

25import copy 

26from typing import Any 

27 

28import numpy as np 

29 

30from lsst.pex.config import Field 

31from lsst.pex.config.configurableActions import ConfigurableActionField 

32 

33from ...interfaces import KeyedData, KeyedDataAction, KeyedDataSchema, Scalar, Vector 

34from ...math import divide 

35from ..vector.selectors import RangeSelector 

36 

37 

38class CalcBinnedCompletenessAction(KeyedDataAction): 

39 """Calculate completeness and purity in a single magnitude bin. 

40 

41 Completeness is the fraction of matched objects with reference magnitudes 

42 within the bin limits, while purity is the fraction of matched objects 

43 with measured magnitudes within the bin limits. 

44 

45 Both statistics are also computed separately for objects that are 

46 considered "good" and "bad" matches, given a boolean field key. 

47 """ 

48 

49 key_match_distance = Field[str]( 

50 default="match_distance", 

51 doc="Key for column with distance between matched objects", 

52 ) 

53 key_matched_class = Field[str]( 

54 default="matched_class", 

55 doc="Key for boolean vector (True if matched objects have the same class as their ref match)", 

56 ) 

57 key_mask_ref = Field[str]( 

58 default=None, 

59 doc="Key for mask to apply for reference objects in completeness", 

60 optional=True, 

61 ) 

62 key_mask_target = Field[str]( 

63 default=None, 

64 doc="Key for mask to apply for target objects in purity", 

65 optional=True, 

66 ) 

67 name_prefix = Field[str](default="", doc="Field name to append statistic names to") 

68 name_suffix = Field[str](default="", doc="Field name to append to statistic names") 

69 selector_range_ref = ConfigurableActionField[RangeSelector](doc="Range selector for reference objects") 

70 selector_range_target = ConfigurableActionField[RangeSelector](doc="Range selector for measured objects") 

71 

72 def getInputSchema(self, **kwargs) -> KeyedDataSchema: 

73 yield self.key_match_distance, Vector 

74 yield self.key_matched_class, Vector 

75 if self.key_mask_ref: 

76 yield self.key_mask_ref, Vector 

77 if self.key_mask_target: 

78 yield self.key_mask_target, Vector 

79 yield from self.selector_range_ref.getInputSchema() 

80 yield from self.selector_range_target.getInputSchema() 

81 

82 def getOutputSchema(self) -> KeyedDataSchema: 

83 return ( 

84 (self.name_mask_ref, Vector), 

85 (self.name_mask_target, Vector), 

86 (self.name_count, Scalar), 

87 (self.name_count_ref, Scalar), 

88 (self.name_count_target, Scalar), 

89 (self.name_completeness, Scalar), 

90 (self.name_completeness_bad_match, Scalar), 

91 (self.name_completeness_good_match, Scalar), 

92 (self.name_purity, Scalar), 

93 (self.name_purity_bad_match, Scalar), 

94 (self.name_purity_good_match, Scalar), 

95 (self.name_range_maximum, Scalar), 

96 (self.name_range_minimum, Scalar), 

97 ) 

98 

99 def getFormattedOutputKeys(self, band: str | None = None, **kwargs: Any) -> dict[str, str]: 

100 """Return the mapping from unformatted output schema keys to formatted. 

101 

102 Parameters 

103 ---------- 

104 band 

105 The name of the band, if any. 

106 kwargs 

107 Additional keyword arguments that are unused. 

108 

109 Returns 

110 ------- 

111 result : dict[`str`, `str`] 

112 A dict with formatted key values for unformatted keys. 

113 """ 

114 kwargs_format = {} 

115 if band is not None: 

116 kwargs_format["band"] = band 

117 

118 result = { 

119 self.name_mask_ref: self.name_mask_ref.format(**kwargs_format), 

120 self.name_mask_target: self.name_mask_target.format(**kwargs_format), 

121 self.name_count: self.name_count.format(**kwargs_format), 

122 self.name_count_ref: self.name_count_ref.format(**kwargs_format), 

123 self.name_count_target: self.name_count_target.format(**kwargs_format), 

124 self.name_completeness: self.name_completeness.format(**kwargs_format), 

125 self.name_completeness_bad_match: self.name_completeness_bad_match.format(**kwargs_format), 

126 self.name_completeness_good_match: self.name_completeness_good_match.format(**kwargs_format), 

127 self.name_purity: self.name_purity.format(**kwargs_format), 

128 self.name_purity_bad_match: self.name_purity_bad_match.format(**kwargs_format), 

129 self.name_purity_good_match: self.name_purity_good_match.format(**kwargs_format), 

130 self.name_range_maximum: self.name_range_maximum.format(**kwargs_format), 

131 self.name_range_minimum: self.name_range_minimum.format(**kwargs_format), 

132 } 

133 return result 

134 

135 @property 

136 def name_count(self): 

137 return f"{self.name_prefix}count{self.name_suffix}" 

138 

139 @property 

140 def name_count_ref(self): 

141 return f"{self.name_prefix}count_ref{self.name_suffix}" 

142 

143 @property 

144 def name_count_target(self): 

145 return f"{self.name_prefix}count_target{self.name_suffix}" 

146 

147 @property 

148 def name_mask_ref(self): 

149 return f"{self.name_prefix}mask_ref{self.name_suffix}" 

150 

151 @property 

152 def name_mask_target(self): 

153 return f"{self.name_prefix}mask_ref{self.name_suffix}" 

154 

155 @property 

156 def name_completeness(self): 

157 return f"{self.name_prefix}completeness{self.name_suffix}" 

158 

159 @property 

160 def name_completeness_bad_match(self): 

161 return f"{self.name_prefix}completeness_bad_match{self.name_suffix}" 

162 

163 @property 

164 def name_completeness_good_match(self): 

165 return f"{self.name_prefix}completeness_good_match{self.name_suffix}" 

166 

167 @property 

168 def name_purity(self): 

169 return f"{self.name_prefix}purity{self.name_suffix}" 

170 

171 @property 

172 def name_purity_bad_match(self): 

173 return f"{self.name_prefix}purity_bad_match{self.name_suffix}" 

174 

175 @property 

176 def name_purity_good_match(self): 

177 return f"{self.name_prefix}purity_good_match{self.name_suffix}" 

178 

179 @property 

180 def name_range_maximum(self): 

181 return f"{self.name_prefix}range_maximum{self.name_suffix}" 

182 

183 @property 

184 def name_range_minimum(self): 

185 return f"{self.name_prefix}range_minimum{self.name_suffix}" 

186 

187 def name_mag_completeness(self, name_threshold: str): 

188 name_threshold = f"_{name_threshold}" if name_threshold else name_threshold 

189 return f"{self.name_prefix}mag_completeness{name_threshold}{self.name_suffix}" 

190 

191 def __call__(self, data: KeyedData, band: str | None = None, mask=None, **kwargs: Any) -> KeyedData: 

192 """Compute completeness and purity metrics. 

193 

194 Parameters 

195 ---------- 

196 data 

197 Input data to read form. 

198 band 

199 The name of the band, if any. 

200 mask 

201 An additional mask to select on before computing statistics. 

202 kwargs 

203 Additional keyword arguments that are unused. 

204 

205 Returns 

206 ------- 

207 data 

208 Dictionary with formatted keys: 

209 

210 ``"name_count"`` 

211 The number of objects of either type (reference or target) 

212 within the bin (and mask). 

213 ``"name_count_ref"`` 

214 The number of reference objects within the bin (and mask). 

215 ``"name_count_target"`` 

216 The number of target (measured) objects within the bin 

217 (and mask). 

218 ``"name_completeness"`` 

219 The completeness within the bin. 

220 ``"name_completeness_bad_match"`` 

221 The completeness of objects considered bad matches. 

222 ``"name_completeness_good_match"`` 

223 The completeness of objects considered good matches. 

224 ``"name_purity"`` 

225 The purity within the bin. 

226 ``"name_purity_bad_match"`` 

227 The purity of objects considered bad matches. 

228 ``"name_purity_good_match"`` 

229 The purity of objects considered good matches. 

230 ``"name_range_maximum"`` 

231 The maximum magnitude of the bin selector. 

232 ``"name_range_minimum"`` 

233 The minimum magnitude of the bin selector. 

234 """ 

235 results = {} 

236 mask_ref = self.selector_range_ref(data) 

237 mask_target = self.selector_range_target(data) 

238 mask = copy.copy(mask) if mask else None 

239 for mask_sub, key_new in ((mask_ref, self.key_mask_ref), (mask_target, self.key_mask_target)): 

240 if key_new: 

241 mask_sub &= data[key_new] 

242 

243 results[self.name_mask_ref] = mask_ref 

244 results[self.name_mask_target] = mask_target 

245 

246 n_ref = np.sum(mask_ref) 

247 n_target = np.sum(mask_target) 

248 mask_any = mask_ref | mask_target 

249 matched = data[self.key_match_distance] >= 0 

250 if mask: 

251 matched = matched[mask] 

252 mask_ref = mask_ref[mask] 

253 mask_target = mask_target[mask] 

254 

255 matched_ref = matched & mask_ref 

256 matched_target = matched & mask_target 

257 n_matched_ref = np.sum(matched_ref) 

258 n_matched_target = np.sum(matched & mask_target) 

259 

260 matched_good = data[self.key_matched_class] 

261 if mask: 

262 matched_good = matched_good[mask] 

263 

264 n_matched_same_ref = np.sum(matched_good & matched_ref) 

265 n_matched_same_target = np.sum(matched_good & matched_target) 

266 

267 results[self.name_count] = np.sum(mask_any) 

268 results[self.name_count_ref] = n_ref 

269 results[self.name_count_target] = n_target 

270 results[self.name_completeness] = divide(n_matched_ref, n_ref) 

271 results[self.name_completeness_bad_match] = divide(n_matched_ref - n_matched_same_ref, n_ref) 

272 results[self.name_completeness_good_match] = divide(n_matched_same_ref, n_ref) 

273 results[self.name_purity] = divide(n_matched_target, n_target) 

274 results[self.name_purity_bad_match] = divide(n_matched_target - n_matched_same_target, n_target) 

275 results[self.name_purity_good_match] = divide(n_matched_same_target, n_target) 

276 

277 results[self.name_range_maximum] = self.selector_range_ref.maximum 

278 results[self.name_range_minimum] = self.selector_range_ref.minimum 

279 

280 keys_formatted = self.getFormattedOutputKeys(band=band) 

281 results = {key_new: results[key_old] for key_old, key_new in keys_formatted.items()} 

282 

283 return results 

284 

285 def validate(self): 

286 if (self.selector_range_ref.minimum != self.selector_range_target.minimum) or ( 

287 self.selector_range_ref.maximum != self.selector_range_target.maximum 

288 ): 

289 raise ValueError( 

290 f"{self.selector_range_ref.minimum=} != {self.selector_range_target.minimum=} or" 

291 f" {self.selector_range_ref.maximum=} != {self.selector_range_target.maximum=};" 

292 f" selectors must have identical ranges." 

293 )