Coverage for python/lsst/ap/association/transformDiaSourceCatalog.py: 23%

131 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2024-03-14 10:14 +0000

1# This file is part of ap_association 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ("TransformDiaSourceCatalogConnections", 

23 "TransformDiaSourceCatalogConfig", 

24 "TransformDiaSourceCatalogTask", 

25 "UnpackApdbFlags") 

26 

27import numpy as np 

28import os 

29import yaml 

30 

31from lsst.daf.base import DateTime 

32import lsst.pex.config as pexConfig 

33import lsst.pipe.base as pipeBase 

34import lsst.pipe.base.connectionTypes as connTypes 

35from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask, TransformCatalogBaseConfig 

36from lsst.pipe.tasks.parquetTable import ParquetTable 

37from lsst.pipe.tasks.functors import Column 

38from lsst.utils.timer import timeMethod 

39 

40 

41class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections, 

42 dimensions=("instrument", "visit", "detector"), 

43 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

44 """Butler connections for TransformDiaSourceCatalogTask. 

45 """ 

46 diaSourceSchema = connTypes.InitInput( 

47 doc="Schema for DIASource catalog output by ImageDifference.", 

48 storageClass="SourceCatalog", 

49 name="{fakesType}{coaddName}Diff_diaSrc_schema", 

50 ) 

51 diaSourceCat = connTypes.Input( 

52 doc="Catalog of DiaSources produced during image differencing.", 

53 name="{fakesType}{coaddName}Diff_diaSrc", 

54 storageClass="SourceCatalog", 

55 dimensions=("instrument", "visit", "detector"), 

56 ) 

57 diffIm = connTypes.Input( 

58 doc="Difference image on which the DiaSources were detected.", 

59 name="{fakesType}{coaddName}Diff_differenceExp", 

60 storageClass="ExposureF", 

61 dimensions=("instrument", "visit", "detector"), 

62 ) 

63 diaSourceTable = connTypes.Output( 

64 doc=".", 

65 name="{fakesType}{coaddName}Diff_diaSrcTable", 

66 storageClass="DataFrame", 

67 dimensions=("instrument", "visit", "detector"), 

68 ) 

69 

70 

71class TransformDiaSourceCatalogConfig(TransformCatalogBaseConfig, 

72 pipelineConnections=TransformDiaSourceCatalogConnections): 

73 """ 

74 """ 

75 flagMap = pexConfig.Field( 

76 dtype=str, 

77 doc="Yaml file specifying SciencePipelines flag fields to bit packs.", 

78 default=os.path.join("${AP_ASSOCIATION_DIR}", 

79 "data", 

80 "association-flag-map.yaml"), 

81 ) 

82 flagRenameMap = pexConfig.Field( 

83 dtype=str, 

84 doc="Yaml file specifying specifying rules to rename flag names", 

85 default=os.path.join("${AP_ASSOCIATION_DIR}", 

86 "data", 

87 "flag-rename-rules.yaml"), 

88 ) 

89 doRemoveSkySources = pexConfig.Field( 

90 dtype=bool, 

91 default=False, 

92 doc="Input DiaSource catalog contains SkySources that should be " 

93 "removed before storing the output DiaSource catalog." 

94 ) 

95 doPackFlags = pexConfig.Field( 

96 dtype=bool, 

97 default=True, 

98 doc="Do pack the flags into one integer column named 'flags'." 

99 "If False, instead produce one boolean column per flag." 

100 ) 

101 

102 def setDefaults(self): 

103 super().setDefaults() 

104 self.functorFile = os.path.join("${AP_ASSOCIATION_DIR}", 

105 "data", 

106 "DiaSource.yaml") 

107 

108 

109class TransformDiaSourceCatalogTask(TransformCatalogBaseTask): 

110 """Apply Science DataModel-ification on the DiaSource afw table. 

111 

112 This task calibrates and renames columns in the DiaSource catalog 

113 to ready the catalog for insertion into the Apdb. 

114 

115 This is a Gen3 Butler only task. It will not run in Gen2. 

116 """ 

117 

118 ConfigClass = TransformDiaSourceCatalogConfig 

119 _DefaultName = "transformDiaSourceCatalog" 

120 RunnerClass = pipeBase.ButlerInitializedTaskRunner 

121 # Needed to create a valid TransformCatalogBaseTask, but unused 

122 inputDataset = "deepDiff_diaSrc" 

123 outputDataset = "deepDiff_diaSrcTable" 

124 

125 def __init__(self, initInputs, **kwargs): 

126 super().__init__(**kwargs) 

127 self.funcs = self.getFunctors() 

128 self.inputSchema = initInputs['diaSourceSchema'].schema 

129 self._create_bit_pack_mappings() 

130 

131 if not self.config.doPackFlags: 

132 # get the flag rename rules 

133 with open(os.path.expandvars(self.config.flagRenameMap)) as yaml_stream: 

134 self.rename_rules = list(yaml.safe_load_all(yaml_stream)) 

135 

136 def _create_bit_pack_mappings(self): 

137 """Setup all flag bit packings. 

138 """ 

139 self.bit_pack_columns = [] 

140 flag_map_file = os.path.expandvars(self.config.flagMap) 

141 with open(flag_map_file) as yaml_stream: 

142 table_list = list(yaml.safe_load_all(yaml_stream)) 

143 for table in table_list: 

144 if table['tableName'] == 'DiaSource': 

145 self.bit_pack_columns = table['columns'] 

146 break 

147 

148 # Test that all flags requested are present in the input schemas. 

149 # Output schemas are flexible, however if names are not specified in 

150 # the Apdb schema, flag columns will not be persisted. 

151 for outputFlag in self.bit_pack_columns: 

152 bitList = outputFlag['bitList'] 

153 for bit in bitList: 

154 try: 

155 self.inputSchema.find(bit['name']) 

156 except KeyError: 

157 raise KeyError( 

158 "Requested column %s not found in input DiaSource " 

159 "schema. Please check that the requested input " 

160 "column exists." % bit['name']) 

161 

162 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

163 inputs = butlerQC.get(inputRefs) 

164 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector", 

165 returnMaxBits=True) 

166 inputs["ccdVisitId"] = expId 

167 inputs["band"] = butlerQC.quantum.dataId["band"] 

168 

169 outputs = self.run(**inputs) 

170 

171 butlerQC.put(outputs, outputRefs) 

172 

173 @timeMethod 

174 def run(self, 

175 diaSourceCat, 

176 diffIm, 

177 band, 

178 ccdVisitId, 

179 funcs=None): 

180 """Convert input catalog to ParquetTable/Pandas and run functors. 

181 

182 Additionally, add new columns for stripping information from the 

183 exposure and into the DiaSource catalog. 

184 

185 Parameters 

186 ---------- 

187 

188 Returns 

189 ------- 

190 results : `lsst.pipe.base.Struct` 

191 Results struct with components. 

192 

193 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values 

194 and renamed columns. 

195 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`) 

196 """ 

197 self.log.info( 

198 "Transforming/standardizing the DiaSource table ccdVisitId: %i", 

199 ccdVisitId) 

200 

201 diaSourceDf = diaSourceCat.asAstropy().to_pandas() 

202 if self.config.doRemoveSkySources: 

203 diaSourceDf = diaSourceDf[~diaSourceDf["sky_source"]] 

204 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat) 

205 diaSourceDf["ccdVisitId"] = ccdVisitId 

206 diaSourceDf["filterName"] = band 

207 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD) 

208 diaSourceDf["diaObjectId"] = 0 

209 diaSourceDf["ssObjectId"] = 0 

210 if self.config.doPackFlags: 

211 # either bitpack the flags 

212 self.bitPackFlags(diaSourceDf) 

213 else: 

214 # or add the individual flag functors 

215 self.addUnpackedFlagFunctors() 

216 # and remove the packed flag functor 

217 if 'flags' in self.funcs.funcDict: 

218 del self.funcs.funcDict['flags'] 

219 

220 df = self.transform(band, 

221 ParquetTable(dataFrame=diaSourceDf), 

222 self.funcs, 

223 dataId=None).df 

224 

225 return pipeBase.Struct( 

226 diaSourceTable=df, 

227 ) 

228 

229 def addUnpackedFlagFunctors(self): 

230 """Add Column functor for each of the flags 

231 

232 to the internal functor dictionary 

233 """ 

234 for flag in self.bit_pack_columns[0]['bitList']: 

235 flagName = flag['name'] 

236 targetName = self.funcs.renameCol(flagName, self.rename_rules[0]['flag_rename_rules']) 

237 self.funcs.update({targetName: Column(flagName)}) 

238 

239 def computeBBoxSizes(self, inputCatalog): 

240 """Compute the size of a square bbox that fully contains the detection 

241 footprint. 

242 

243 Parameters 

244 ---------- 

245 inputCatalog : `lsst.afw.table.SourceCatalog` 

246 Catalog containing detected footprints. 

247 

248 Returns 

249 ------- 

250 outputBBoxSizes : `list` of `float` 

251 Array of bbox sizes. 

252 """ 

253 outputBBoxSizes = [] 

254 for record in inputCatalog: 

255 if self.config.doRemoveSkySources: 

256 if record["sky_source"]: 

257 continue 

258 footprintBBox = record.getFootprint().getBBox() 

259 # Compute twice the size of the largest dimension of the footprint 

260 # bounding box. This is the largest footprint we should need to cover 

261 # the complete DiaSource assuming the centroid is withing the bounding 

262 # box. 

263 maxSize = 2 * np.max([footprintBBox.getWidth(), 

264 footprintBBox.getHeight()]) 

265 recX = record.getCentroid().x 

266 recY = record.getCentroid().y 

267 bboxSize = int( 

268 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX, 

269 footprintBBox.minX - recX, 

270 footprintBBox.maxY - recY, 

271 footprintBBox.minY - recY])))) 

272 if bboxSize > maxSize: 

273 bboxSize = maxSize 

274 outputBBoxSizes.append(bboxSize) 

275 

276 return outputBBoxSizes 

277 

278 def bitPackFlags(self, df): 

279 """Pack requested flag columns in inputRecord into single columns in 

280 outputRecord. 

281 

282 Parameters 

283 ---------- 

284 df : `pandas.DataFrame` 

285 DataFrame to read bits from and pack them into. 

286 """ 

287 for outputFlag in self.bit_pack_columns: 

288 bitList = outputFlag['bitList'] 

289 value = np.zeros(len(df), dtype=np.uint64) 

290 for bit in bitList: 

291 # Hard type the bit arrays. 

292 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64) 

293 df[outputFlag['columnName']] = value 

294 

295 

296class UnpackApdbFlags: 

297 """Class for unpacking bits from integer flag fields stored in the Apdb. 

298 

299 Attributes 

300 ---------- 

301 flag_map_file : `str` 

302 Absolute or relative path to a yaml file specifiying mappings of flags 

303 to integer bits. 

304 table_name : `str` 

305 Name of the Apdb table the integer bit data are coming from. 

306 """ 

307 

308 def __init__(self, flag_map_file, table_name): 

309 self.bit_pack_columns = [] 

310 flag_map_file = os.path.expandvars(flag_map_file) 

311 with open(flag_map_file) as yaml_stream: 

312 table_list = list(yaml.safe_load_all(yaml_stream)) 

313 for table in table_list: 

314 if table['tableName'] == table_name: 

315 self.bit_pack_columns = table['columns'] 

316 break 

317 

318 self.output_flag_columns = {} 

319 

320 for column in self.bit_pack_columns: 

321 names = [] 

322 for bit in column["bitList"]: 

323 names.append((bit["name"], bool)) 

324 self.output_flag_columns[column["columnName"]] = names 

325 

326 def unpack(self, input_flag_values, flag_name): 

327 """Determine individual boolean flags from an input array of unsigned 

328 ints. 

329 

330 Parameters 

331 ---------- 

332 input_flag_values : array-like of type uint 

333 Array of integer flags to unpack. 

334 flag_name : `str` 

335 Apdb column name of integer flags to unpack. Names of packed int 

336 flags are given by the flag_map_file. 

337 

338 Returns 

339 ------- 

340 output_flags : `numpy.ndarray` 

341 Numpy named tuple of booleans. 

342 """ 

343 bit_names_types = self.output_flag_columns[flag_name] 

344 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types) 

345 

346 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types): 

347 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx) 

348 output_flags[bit_name] = masked_bits 

349 

350 return output_flags