Coverage for python / lsst / summit / extras / assessQFM.py: 11%

102 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-04 17:51 +0000

1# This file is part of summit_extras. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import argparse 

23import time 

24from multiprocessing import Pool 

25 

26import numpy as np 

27import pandas as pd 

28 

29import lsst.daf.butler as dafButler 

30import lsst.summit.utils.butlerUtils as butlerUtils 

31from lsst.pipe.tasks.quickFrameMeasurement import QuickFrameMeasurementTask, QuickFrameMeasurementTaskConfig 

32 

33 

34class AssessQFM: 

35 """Test a new version of quickFrameMeasurementTask against the baseline 

36 results. 

37 

38 Parameters 

39 ---------- 

40 butler : `lsst.daf.butler.Butler` 

41 Butler repository with the relevant exposures. 

42 dataProduct : `str`, optional 

43 Data product on which to run quickFrameMeasurement. 

44 dataset : `str`, optional 

45 File holding a table of vetted quickFrameMeasurement results. 

46 successCut : `float`, optional 

47 Distance in pixels between the baseline and new measurement centroids 

48 for already successful fits in order to consider the new fit equally 

49 successful. 

50 nearSuccessCut : `float`, optional 

51 Distance in pixels between the baseline and new measurement centroids 

52 for fits that were close to correct in order to consider the new fit 

53 approximately as successful. 

54 donutCut : `float`, optional 

55 Distance in pixels between the baseline and new measurement centroids 

56 for fits of donut images to consider the new fit approximately as 

57 successful. 

58 logLevel : `int`, optional 

59 Level of QuickFrameMeasurementTask log messages. Setting to 50 means 

60 that only CRITICAL messages will be printed. 

61 """ 

62 

63 def __init__( 

64 self, 

65 butler: dafButler.Butler, 

66 dataProduct: str = "quickLookExp", 

67 dataset: str = "data/qfm_baseline_assessment.parq", 

68 successCut: int = 2, 

69 nearSuccessCut: int = 10, 

70 donutCut: int = 10, 

71 logLevel: int = 50, 

72 ): 

73 self.butler = butler 

74 

75 qfmTaskConfig = QuickFrameMeasurementTaskConfig() 

76 self.qfmTask = QuickFrameMeasurementTask(config=qfmTaskConfig) 

77 self.qfmTask.log.setLevel(logLevel) 

78 

79 self.testData = pd.read_parquet(dataset) 

80 self.dataProduct = dataProduct 

81 self.dataIds = [ 

82 {"day_obs": row["day_obs"], "seq_num": row["sequence_number"], "detector": row["detector"]} 

83 for i, row in self.testData.iterrows() 

84 ] 

85 

86 self.cuts = {"G": successCut, "QG": nearSuccessCut, "DG": donutCut} 

87 

88 self.resultKey = { 

89 "G": "Success", # Centroid is centered on the brightest star 

90 "QG": "Near success", # Centroid is near the center of the brightest star 

91 "BI": "Bad image", # A tracking issue, for example. Don't expect good fit 

92 "WF": "Wrong star", # Centroid is not on the brightest star 

93 "OF": "Other failure", # Other source of failure 

94 "FG": "Good failure", # Calibration image, so failure is expected 

95 "FP": "False positive", # No stars, so fit should have failed 

96 "DG": "Success (Donut)", # Donut image, centroid is somewhere on donut 

97 "DF": "Failure (Donut)", # Donut image, fit failed 

98 "SG": "Success (Giant donut)", # Giant donut, centroid is somewhere on donut 

99 "SF": "Failure (Giant donut)", # Giant donut, fit failed 

100 "U": "Ambiguous", # Centroid is on a star, but unclear whether it is the brightest 

101 } 

102 

103 def run(self, nSamples: int | None = None, nProcesses: int = 1, outputFile: str | None = None) -> None: 

104 """Run quickFrameMeasurement on a sample dataset and compare the 

105 new results with the baseline, by-eye vetted results. 

106 

107 Parameters 

108 ---------- 

109 nSamples : `int`, optional 

110 Number of exposures to check. If greater than the number of 

111 exposures in the vetted dataset, all will be checked. If 

112 `None` (default), all exposures are checked. 

113 nProcesses : `int`, optional 

114 Number of worker processes to use. If greater than one, 

115 multiprocessing is used. 

116 outputFile : `str`, optional 

117 If provided, write the new QuickFrameMeasurement results to 

118 this Parquet file. 

119 """ 

120 

121 if nSamples is not None: 

122 if nSamples > len(self.dataIds): 

123 nSamples = len(self.dataIds) 

124 samples = np.random.choice(range(len(self.dataIds)), size=nSamples, replace=False) 

125 testSubset = self.testData.iloc[samples] 

126 else: 

127 testSubset = self.testData 

128 

129 if nProcesses > 1: 

130 with Pool(processes=nProcesses) as p: 

131 df_split = np.array_split(testSubset, nProcesses) 

132 pool_process = p.map(self._runQFM, df_split) 

133 qfmResults = pd.concat(pool_process) 

134 else: 

135 qfmResults = self._runQFM(testSubset) 

136 

137 if outputFile: 

138 qfmResults.to_parquet(outputFile) 

139 

140 self.compareToBaseline(qfmResults) 

141 

142 def _runQFM(self, testset: pd.DataFrame) -> pd.DataFrame: 

143 """Run quickFrameMeasurement on a subset of the dataset. 

144 

145 Parameters 

146 ---------- 

147 testset : `pandas.DataFrame` 

148 Table of vetted exposures. 

149 

150 Returns 

151 ------- 

152 qfmResults : `pandas.DataFrame` 

153 Table of results from new quickFrameMeasurement run. 

154 """ 

155 

156 qfmResults = pd.DataFrame(index=testset.index, columns=self.testData.columns) 

157 for i, row in testset.iterrows(): 

158 dataId = { 

159 "day_obs": row["day_obs"], 

160 "seq_num": row["sequence_number"], 

161 "detector": row["detector"], 

162 } 

163 

164 exp = self.butler.get(self.dataProduct, dataId=dataId) 

165 

166 t1 = time.time() 

167 result = self.qfmTask.run(exp) 

168 t2 = time.time() 

169 qfmResults.at[i, "runtime"] = t2 - t1 

170 

171 if result.success: 

172 pixCoord = result.brightestObjCentroid 

173 qfmResults.at[i, "centroid_x"] = pixCoord[0] 

174 qfmResults.at[i, "centroid_y"] = pixCoord[1] 

175 qfmResults.at[i, "finalTag"] = "P" 

176 

177 else: 

178 qfmResults.at[i, "finalTag"] = "F" 

179 return qfmResults 

180 

181 def compareToBaseline(self, comparisonData: pd.DataFrame) -> None: 

182 """Compare a table of quickFrameMeasurement results with the 

183 baseline vetted data, and print output of the comparison. 

184 

185 Parameters 

186 ---------- 

187 comparisonData : `pandas.DataFrame` 

188 Table to compare with baseline results. 

189 """ 

190 baselineData = self.testData.loc[comparisonData.index] 

191 

192 # First the cases that succeeded in the baseline results: 

193 for key in ["G", "QG", "WF", "DG", "SG", "FP", "U"]: 

194 key_inds = baselineData["finalTag"] == key 

195 if key_inds.sum() == 0: 

196 continue 

197 origResults = baselineData[key_inds] 

198 newResults = comparisonData[key_inds] 

199 

200 stillSucceeds = (newResults["finalTag"] == "P").sum() 

201 print(f"Results for '{self.resultKey[key]}' cases:") 

202 print(f" {stillSucceeds} out of {len(origResults)} still succeed") 

203 

204 centroid_distances = ( 

205 (origResults["centroid_x"] - newResults["centroid_x"]) ** 2 

206 + (origResults["centroid_y"] - newResults["centroid_y"]) ** 2 

207 ) ** 0.5 

208 

209 if key in ["G", "QG", "DG"]: 

210 inCut = centroid_distances < self.cuts[key] 

211 print( 

212 f" {inCut.sum()} out of {len(origResults)} centroids are within {self.cuts[key]} " 

213 "pixels of the baseline centroid fit." 

214 ) 

215 if key in ["U", "WF", "QG"]: 

216 print(" Individual exposures:") 

217 print(f" {'day_obs':<10}{'sequence_number':<17}{'old centroid':<17}{'new centroid':<17}") 

218 for i, res in origResults.iterrows(): 

219 newRes = newResults.loc[i] 

220 old_centroid = f"({res['centroid_x']:.1f}, {res['centroid_y']:.1f})" 

221 new_centroid = f"({newRes['centroid_x']:.1f}, {newRes['centroid_y']:.1f})" 

222 print( 

223 f" {res['day_obs']:<10}{res['sequence_number']:<17}{old_centroid:<17}" 

224 f"{new_centroid:<17}" 

225 ) 

226 

227 # Next the cases that failed in the past: 

228 for key in ["FG", "DF", "SF", "OF"]: 

229 key_inds = baselineData["finalTag"] == key 

230 if key_inds.sum() == 0: 

231 continue 

232 origResults = baselineData[key_inds] 

233 newResults = comparisonData[key_inds] 

234 

235 stillFails = (newResults["finalTag"] == "F").sum() 

236 print(f"Results for '{self.resultKey[key]}' cases:") 

237 print(f" {stillFails} out of {len(origResults)} still fail") 

238 

239 print("Runtime comparison:") 

240 print( 

241 f" Baseline: {np.mean(baselineData['runtime']):.2f}+/-" 

242 f"{np.std(baselineData['runtime']):.2f} seconds" 

243 ) 

244 print( 

245 f" Current: {np.mean(comparisonData['runtime']):.2f}+/-" 

246 f"{np.std(comparisonData['runtime']):.2f} seconds" 

247 ) 

248 

249 

250if __name__ == "__main__": 250 ↛ 251line 250 didn't jump to line 251 because the condition on line 250 was never true

251 parser = argparse.ArgumentParser() 

252 parser.add_argument( 

253 "--embargo", 

254 dest="embargo", 

255 action=argparse.BooleanOptionalAction, 

256 default=True, 

257 help="Whether to use embargo butler", 

258 ) 

259 parser.add_argument( 

260 "--nPool", dest="nPool", default=1, type=int, help="Number of threads to use in multiprocessing" 

261 ) 

262 parser.add_argument( 

263 "--nSamples", 

264 dest="nSamples", 

265 default=None, 

266 type=int, 

267 help="Number of sample exposures to use in assessment (default is all)", 

268 ) 

269 parser.add_argument( 

270 "-o", 

271 "--output-file", 

272 dest="outputFile", 

273 default="newQFMresults.parq", 

274 help="Name of output file for new quickFrameMeasurement results", 

275 ) 

276 args = parser.parse_args() 

277 

278 butler = butlerUtils.makeDefaultLatissButler(embargo=args.embargo) 

279 assess = AssessQFM(butler) 

280 nSamples = args.nSamples 

281 

282 t0 = time.time() 

283 assess.run(nSamples=nSamples, nProcesses=args.nPool, outputFile=args.outputFile) 

284 t1 = time.time() 

285 if nSamples is None: 

286 nSamples = assess.testData.shape[0] 

287 print(f"Total time for {nSamples} samples and {args.nPool} cores: {(t1 - t0):.2f} seconds")