Coverage for python / lsst / summit / extras / assessQFM.py: 0%

103 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-28 09:30 +0000

1# This file is part of summit_extras. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import argparse 

23import time 

24from multiprocessing import Pool 

25 

26import numpy as np 

27import pandas as pd 

28 

29import lsst.daf.butler as dafButler 

30import lsst.summit.utils.butlerUtils as butlerUtils 

31from lsst.pipe.tasks.quickFrameMeasurement import QuickFrameMeasurementTask, QuickFrameMeasurementTaskConfig 

32 

33 

34class AssessQFM: 

35 """Test a new version of quickFrameMeasurementTask against the baseline 

36 results. 

37 

38 Parameters 

39 ---------- 

40 butler : `lsst.daf.butler.Butler` 

41 Butler repository with the relevant exposures. 

42 dataProduct : `str`, optional 

43 Data product on which to run quickFrameMeasurement. 

44 dataset : `str`, optional 

45 File holding a table of vetted quickFrameMeasurement results. 

46 successCut : `float`, optional 

47 Distance in pixels between the baseline and new measurement centroids 

48 for already successful fits in order to consider the new fit equally 

49 successful. 

50 nearSuccessCut : `float`, optional 

51 Distance in pixels between the baseline and new measurement centroids 

52 for fits that were close to correct in order to consider the new fit 

53 approximately as successful. 

54 donutCut : `float`, optional 

55 Distance in pixels between the baseline and new measurement centroids 

56 for fits of donut images to consider the new fit approximately as 

57 successful. 

58 logLevel : `int`, optional 

59 Level of QuickFrameMeasurementTask log messages. Setting to 50 means 

60 that only CRITICAL messages will be printed. 

61 """ 

62 

63 def __init__( 

64 self, 

65 butler: dafButler.Butler, 

66 dataProduct: str = "quickLookExp", 

67 dataset: str = "data/qfm_baseline_assessment.parq", 

68 successCut: int = 2, 

69 nearSuccessCut: int = 10, 

70 donutCut: int = 10, 

71 logLevel: int = 50, 

72 ): 

73 self.butler = butler 

74 

75 qfmTaskConfig = QuickFrameMeasurementTaskConfig() 

76 self.qfmTask = QuickFrameMeasurementTask(config=qfmTaskConfig) 

77 self.qfmTask.log.setLevel(logLevel) 

78 

79 self.testData = pd.read_parquet(dataset) 

80 self.dataProduct = dataProduct 

81 self.dataIds = [ 

82 {"day_obs": row["day_obs"], "seq_num": row["sequence_number"], "detector": row["detector"]} 

83 for i, row in self.testData.iterrows() 

84 ] 

85 

86 self.cuts = {"G": successCut, "QG": nearSuccessCut, "DG": donutCut} 

87 

88 self.resultKey = { 

89 "G": "Success", # Centroid is centered on the brightest star 

90 "QG": "Near success", # Centroid is near the center of the brightest star 

91 "BI": "Bad image", # A tracking issue, for example. Don't expect good fit 

92 "WF": "Wrong star", # Centroid is not on the brightest star 

93 "OF": "Other failure", # Other source of failure 

94 "FG": "Good failure", # Calibration image, so failure is expected 

95 "FP": "False positive", # No stars, so fit should have failed 

96 "DG": "Success (Donut)", # Donut image, centroid is somewhere on donut 

97 "DF": "Failure (Donut)", # Donut image, fit failed 

98 "SG": "Success (Giant donut)", # Giant donut, centroid is somewhere on donut 

99 "SF": "Failure (Giant donut)", # Giant donut, fit failed 

100 "U": "Ambiguous", # Centroid is on a star, but unclear whether it is the brightest 

101 } 

102 

103 def run(self, nSamples: int | None = None, nProcesses: int = 1, outputFile: str | None = None) -> None: 

104 """Run quickFrameMeasurement on a sample dataset, save the new results, 

105 and compare them with the baseline, vetted by-eye results. 

106 

107 Parameters 

108 ---------- 

109 nSamples : `int` 

110 Number of exposures to check. If nSamples is greater than the 

111 number of exposures in the vetted dataset, will check all. 

112 nProcesses : `int` 

113 Number of threads to use. If greater than one, multithreading will 

114 be used. 

115 outputFile : `str` 

116 Name of the output file. 

117 """ 

118 

119 if nSamples is not None: 

120 if nSamples > len(self.dataIds): 

121 nSamples = len(self.dataIds) 

122 samples = np.random.choice(range(len(self.dataIds)), size=nSamples, replace=False) 

123 testSubset = self.testData.iloc[samples] 

124 else: 

125 testSubset = self.testData 

126 

127 if nProcesses > 1: 

128 with Pool(processes=nProcesses) as p: 

129 df_split = np.array_split(testSubset, nProcesses) 

130 pool_process = p.map(self._runQFM, df_split) 

131 qfmResults = pd.concat(pool_process) 

132 else: 

133 qfmResults = self._runQFM(testSubset) 

134 

135 if outputFile: 

136 qfmResults.to_parquet(outputFile) 

137 

138 self.compareToBaseline(qfmResults) 

139 

140 def _runQFM(self, testset: pd.DataFrame) -> pd.DataFrame: 

141 """Run quickFrameMeasurement on a subset of the dataset. 

142 

143 Parameters 

144 ---------- 

145 testset : `pandas.DataFrame` 

146 Table of vetted exposures. 

147 

148 Returns 

149 ------- 

150 qfmResults : `pandas.DataFrame` 

151 Table of results from new quickFrameMeasurement run. 

152 """ 

153 

154 qfmResults = pd.DataFrame(index=testset.index, columns=self.testData.columns) 

155 for i, row in testset.iterrows(): 

156 dataId = { 

157 "day_obs": row["day_obs"], 

158 "seq_num": row["sequence_number"], 

159 "detector": row["detector"], 

160 } 

161 

162 exp = self.butler.get(self.dataProduct, dataId=dataId) 

163 qfmRes = qfmResults.loc[i] 

164 

165 t1 = time.time() 

166 result = self.qfmTask.run(exp) 

167 t2 = time.time() 

168 qfmRes["runtime"] = t2 - t1 

169 

170 if result.success: 

171 pixCoord = result.brightestObjCentroid 

172 qfmRes["centroid_x"] = pixCoord[0] 

173 qfmRes["centroid_y"] = pixCoord[1] 

174 qfmRes["finalTag"] = "P" 

175 

176 else: 

177 qfmRes["finalTag"] = "F" 

178 return qfmResults 

179 

180 def compareToBaseline(self, comparisonData: pd.DataFrame) -> None: 

181 """Compare a table of quickFrameMeasurement results with the 

182 baseline vetted data, and print output of the comparison. 

183 

184 Parameters 

185 ---------- 

186 comparisonData : `pandas.DataFrame` 

187 Table to compare with baseline results. 

188 """ 

189 baselineData = self.testData.loc[comparisonData.index] 

190 

191 # First the cases that succeeded in the baseline results: 

192 for key in ["G", "QG", "WF", "DG", "SG", "FP", "U"]: 

193 key_inds = baselineData["finalTag"] == key 

194 if key_inds.sum() == 0: 

195 continue 

196 origResults = baselineData[key_inds] 

197 newResults = comparisonData[key_inds] 

198 

199 stillSucceeds = (newResults["finalTag"] == "P").sum() 

200 print(f"Results for '{self.resultKey[key]}' cases:") 

201 print(f" {stillSucceeds} out of {len(origResults)} still succeed") 

202 

203 centroid_distances = ( 

204 (origResults["centroid_x"] - newResults["centroid_x"]) ** 2 

205 + (origResults["centroid_y"] - newResults["centroid_y"]) ** 2 

206 ) ** 0.5 

207 

208 if key in ["G", "QG", "DG"]: 

209 inCut = centroid_distances < self.cuts[key] 

210 print( 

211 f" {inCut.sum()} out of {len(origResults)} centroids are within {self.cuts[key]} " 

212 "pixels of the baseline centroid fit." 

213 ) 

214 if key in ["U", "WF", "QG"]: 

215 print(" Individual exposures:") 

216 print(f" {'day_obs':<10}{'sequence_number':<17}{'old centroid':<17}{'new centroid':<17}") 

217 for i, res in origResults.iterrows(): 

218 newRes = newResults.loc[i] 

219 old_centroid = f"({res['centroid_x']:.1f}, {res['centroid_y']:.1f})" 

220 new_centroid = f"({newRes['centroid_x']:.1f}, {newRes['centroid_y']:.1f})" 

221 print( 

222 f" {res['day_obs']:<10}{res['sequence_number']:<17}{old_centroid:<17}" 

223 f"{new_centroid:<17}" 

224 ) 

225 

226 # Next the cases that failed in the past: 

227 for key in ["FG", "DF", "SF", "OF"]: 

228 key_inds = baselineData["finalTag"] == key 

229 if key_inds.sum() == 0: 

230 continue 

231 origResults = baselineData[key_inds] 

232 newResults = comparisonData[key_inds] 

233 

234 stillFails = (newResults["finalTag"] == "F").sum() 

235 print(f"Results for '{self.resultKey[key]}' cases:") 

236 print(f" {stillFails} out of {len(origResults)} still fail") 

237 

238 print("Runtime comparison:") 

239 print( 

240 f" Baseline: {np.mean(baselineData['runtime']):.2f}+/-" 

241 f"{np.std(baselineData['runtime']):.2f} seconds" 

242 ) 

243 print( 

244 f" Current: {np.mean(comparisonData['runtime']):.2f}+/-" 

245 f"{np.std(comparisonData['runtime']):.2f} seconds" 

246 ) 

247 

248 

249if __name__ == "__main__": 

250 parser = argparse.ArgumentParser() 

251 parser.add_argument( 

252 "--embargo", 

253 dest="embargo", 

254 action=argparse.BooleanOptionalAction, 

255 default=True, 

256 help="Whether to use embargo butler", 

257 ) 

258 parser.add_argument( 

259 "--nPool", dest="nPool", default=1, type=int, help="Number of threads to use in multiprocessing" 

260 ) 

261 parser.add_argument( 

262 "--nSamples", 

263 dest="nSamples", 

264 default=None, 

265 type=int, 

266 help="Number of sample exposures to use in assessment (default is all)", 

267 ) 

268 parser.add_argument( 

269 "-o", 

270 "--output-file", 

271 dest="outputFile", 

272 default="newQFMresults.parq", 

273 help="Name of output file for new quickFrameMeasurement results", 

274 ) 

275 args = parser.parse_args() 

276 

277 butler = butlerUtils.makeDefaultLatissButler(embargo=args.embargo) 

278 assess = AssessQFM(butler) 

279 nSamples = args.nSamples 

280 

281 t0 = time.time() 

282 assess.run(nSamples=nSamples, nProcesses=args.nPool, outputFile=args.outputFile) 

283 t1 = time.time() 

284 if nSamples is None: 

285 nSamples = assess.testData.shape[0] 

286 print(f"Total time for {nSamples} samples and {args.nPool} cores: {(t1 - t0):.2f} seconds")