Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ap_association 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ("TransformDiaSourceCatalogConnections", 

23 "TransformDiaSourceCatalogConfig", 

24 "TransformDiaSourceCatalogTask", 

25 "UnpackApdbFlags") 

26 

27import numpy as np 

28import os 

29import yaml 

30 

31from lsst.daf.base import DateTime 

32import lsst.pex.config as pexConfig 

33import lsst.pipe.base as pipeBase 

34import lsst.pipe.base.connectionTypes as connTypes 

35from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask, TransformCatalogBaseConfig 

36from lsst.pipe.tasks.parquetTable import ParquetTable 

37from lsst.pipe.tasks.functors import Column 

38 

39 

40class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections, 

41 dimensions=("instrument", "visit", "detector"), 

42 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

43 """Butler connections for TransformDiaSourceCatalogTask. 

44 """ 

45 diaSourceSchema = connTypes.InitInput( 

46 doc="Schema for DIASource catalog output by ImageDifference.", 

47 storageClass="SourceCatalog", 

48 name="{fakesType}{coaddName}Diff_diaSrc_schema", 

49 ) 

50 diaSourceCat = connTypes.Input( 

51 doc="Catalog of DiaSources produced during image differencing.", 

52 name="{fakesType}{coaddName}Diff_diaSrc", 

53 storageClass="SourceCatalog", 

54 dimensions=("instrument", "visit", "detector"), 

55 ) 

56 diffIm = connTypes.Input( 

57 doc="Difference image on which the DiaSources were detected.", 

58 name="{fakesType}{coaddName}Diff_differenceExp", 

59 storageClass="ExposureF", 

60 dimensions=("instrument", "visit", "detector"), 

61 ) 

62 diaSourceTable = connTypes.Output( 

63 doc=".", 

64 name="{fakesType}{coaddName}Diff_diaSrcTable", 

65 storageClass="DataFrame", 

66 dimensions=("instrument", "visit", "detector"), 

67 ) 

68 

69 

70class TransformDiaSourceCatalogConfig(TransformCatalogBaseConfig, 

71 pipelineConnections=TransformDiaSourceCatalogConnections): 

72 """ 

73 """ 

74 flagMap = pexConfig.Field( 

75 dtype=str, 

76 doc="Yaml file specifying SciencePipelines flag fields to bit packs.", 

77 default=os.path.join("${AP_ASSOCIATION_DIR}", 

78 "data", 

79 "association-flag-map.yaml"), 

80 ) 

81 flagRenameMap = pexConfig.Field( 

82 dtype=str, 

83 doc="Yaml file specifying specifying rules to rename flag names", 

84 default=os.path.join("${AP_ASSOCIATION_DIR}", 

85 "data", 

86 "flag-rename-rules.yaml"), 

87 ) 

88 doRemoveSkySources = pexConfig.Field( 

89 dtype=bool, 

90 default=False, 

91 doc="Input DiaSource catalog contains SkySources that should be " 

92 "removed before storing the output DiaSource catalog." 

93 ) 

94 doPackFlags = pexConfig.Field( 

95 dtype=bool, 

96 default=True, 

97 doc="Do pack the flags into one integer column named 'flags'." 

98 "If False, instead produce one boolean column per flag." 

99 ) 

100 

101 def setDefaults(self): 

102 super().setDefaults() 

103 self.functorFile = os.path.join("${AP_ASSOCIATION_DIR}", 

104 "data", 

105 "DiaSource.yaml") 

106 

107 

108class TransformDiaSourceCatalogTask(TransformCatalogBaseTask): 

109 """Apply Science DataModel-ification on the DiaSource afw table. 

110 

111 This task calibrates and renames columns in the DiaSource catalog 

112 to ready the catalog for insertion into the Apdb. 

113 

114 This is a Gen3 Butler only task. It will not run in Gen2. 

115 """ 

116 

117 ConfigClass = TransformDiaSourceCatalogConfig 

118 _DefaultName = "transformDiaSourceCatalog" 

119 RunnerClass = pipeBase.ButlerInitializedTaskRunner 

120 # Needed to create a valid TransformCatalogBaseTask, but unused 

121 inputDataset = "deepDiff_diaSrc" 

122 outputDataset = "deepDiff_diaSrcTable" 

123 

124 def __init__(self, initInputs, **kwargs): 

125 super().__init__(**kwargs) 

126 self.funcs = self.getFunctors() 

127 self.inputSchema = initInputs['diaSourceSchema'].schema 

128 self._create_bit_pack_mappings() 

129 

130 if not self.config.doPackFlags: 

131 # get the flag rename rules 

132 with open(os.path.expandvars(self.config.flagRenameMap)) as yaml_stream: 

133 self.rename_rules = list(yaml.safe_load_all(yaml_stream)) 

134 

135 def _create_bit_pack_mappings(self): 

136 """Setup all flag bit packings. 

137 """ 

138 self.bit_pack_columns = [] 

139 flag_map_file = os.path.expandvars(self.config.flagMap) 

140 with open(flag_map_file) as yaml_stream: 

141 table_list = list(yaml.safe_load_all(yaml_stream)) 

142 for table in table_list: 

143 if table['tableName'] == 'DiaSource': 

144 self.bit_pack_columns = table['columns'] 

145 break 

146 

147 # Test that all flags requested are present in the input schemas. 

148 # Output schemas are flexible, however if names are not specified in 

149 # the Apdb schema, flag columns will not be persisted. 

150 for outputFlag in self.bit_pack_columns: 

151 bitList = outputFlag['bitList'] 

152 for bit in bitList: 

153 try: 

154 self.inputSchema.find(bit['name']) 

155 except KeyError: 

156 raise KeyError( 

157 "Requested column %s not found in input DiaSource " 

158 "schema. Please check that the requested input " 

159 "column exists." % bit['name']) 

160 

161 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

162 inputs = butlerQC.get(inputRefs) 

163 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector", 

164 returnMaxBits=True) 

165 inputs["ccdVisitId"] = expId 

166 inputs["band"] = butlerQC.quantum.dataId["band"] 

167 

168 outputs = self.run(**inputs) 

169 

170 butlerQC.put(outputs, outputRefs) 

171 

172 @pipeBase.timeMethod 

173 def run(self, 

174 diaSourceCat, 

175 diffIm, 

176 band, 

177 ccdVisitId, 

178 funcs=None): 

179 """Convert input catalog to ParquetTable/Pandas and run functors. 

180 

181 Additionally, add new columns for stripping information from the 

182 exposure and into the DiaSource catalog. 

183 

184 Parameters 

185 ---------- 

186 

187 Returns 

188 ------- 

189 results : `lsst.pipe.base.Struct` 

190 Results struct with components. 

191 

192 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values 

193 and renamed columns. 

194 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`) 

195 """ 

196 self.log.info( 

197 "Transforming/standardizing the DiaSource table ccdVisitId: %i", 

198 ccdVisitId) 

199 

200 diaSourceDf = diaSourceCat.asAstropy().to_pandas() 

201 if self.config.doRemoveSkySources: 

202 diaSourceDf = diaSourceDf[~diaSourceDf["sky_source"]] 

203 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat) 

204 diaSourceDf["ccdVisitId"] = ccdVisitId 

205 diaSourceDf["filterName"] = band 

206 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD) 

207 diaSourceDf["diaObjectId"] = 0 

208 diaSourceDf["ssObjectId"] = 0 

209 diaSourceDf["pixelId"] = 0 

210 if self.config.doPackFlags: 

211 # either bitpack the flags 

212 self.bitPackFlags(diaSourceDf) 

213 else: 

214 # or add the individual flag functors 

215 self.addUnpackedFlagFunctors() 

216 # and remove the packed flag functor 

217 if 'flags' in self.funcs.funcDict: 

218 del self.funcs.funcDict['flags'] 

219 

220 df = self.transform(band, 

221 ParquetTable(dataFrame=diaSourceDf), 

222 self.funcs, 

223 dataId=None).df 

224 

225 return pipeBase.Struct( 

226 diaSourceTable=df, 

227 ) 

228 

229 def addUnpackedFlagFunctors(self): 

230 """Add Column functor for each of the flags 

231 

232 to the internal functor dictionary 

233 """ 

234 for flag in self.bit_pack_columns[0]['bitList']: 

235 flagName = flag['name'] 

236 targetName = self.funcs.renameCol(flagName, self.rename_rules[0]['flag_rename_rules']) 

237 self.funcs.update({targetName: Column(flagName)}) 

238 

239 def computeBBoxSizes(self, inputCatalog): 

240 """Compute the size of a square bbox that fully contains the detection 

241 footprint. 

242 

243 Parameters 

244 ---------- 

245 inputCatalog : `lsst.afw.table.SourceCatalog` 

246 Catalog containing detected footprints. 

247 

248 Returns 

249 ------- 

250 outputBBoxSizes : `list` of `float` 

251 Array of bbox sizes. 

252 """ 

253 outputBBoxSizes = [] 

254 for record in inputCatalog: 

255 if self.config.doRemoveSkySources: 

256 if record["sky_source"]: 

257 continue 

258 footprintBBox = record.getFootprint().getBBox() 

259 # Compute twice the size of the largest dimension of the footprint 

260 # bounding box. This is the largest footprint we should need to cover 

261 # the complete DiaSource assuming the centroid is withing the bounding 

262 # box. 

263 maxSize = 2 * np.max([footprintBBox.getWidth(), 

264 footprintBBox.getHeight()]) 

265 recX = record.getCentroid().x 

266 recY = record.getCentroid().y 

267 bboxSize = int( 

268 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX, 

269 footprintBBox.minX - recX, 

270 footprintBBox.maxY - recY, 

271 footprintBBox.minY - recY])))) 

272 if bboxSize > maxSize: 

273 bboxSize = maxSize 

274 outputBBoxSizes.append(bboxSize) 

275 

276 return outputBBoxSizes 

277 

278 def bitPackFlags(self, df): 

279 """Pack requested flag columns in inputRecord into single columns in 

280 outputRecord. 

281 

282 Parameters 

283 ---------- 

284 df : `pandas.DataFrame` 

285 DataFrame to read bits from and pack them into. 

286 """ 

287 for outputFlag in self.bit_pack_columns: 

288 bitList = outputFlag['bitList'] 

289 value = np.zeros(len(df), dtype=np.uint64) 

290 for bit in bitList: 

291 # Hard type the bit arrays. 

292 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64) 

293 df[outputFlag['columnName']] = value 

294 

295 

296class UnpackApdbFlags: 

297 """Class for unpacking bits from integer flag fields stored in the Apdb. 

298 

299 Attributes 

300 ---------- 

301 flag_map_file : `str` 

302 Absolute or relative path to a yaml file specifiying mappings of flags 

303 to integer bits. 

304 table_name : `str` 

305 Name of the Apdb table the integer bit data are coming from. 

306 """ 

307 

308 def __init__(self, flag_map_file, table_name): 

309 self.bit_pack_columns = [] 

310 flag_map_file = os.path.expandvars(flag_map_file) 

311 with open(flag_map_file) as yaml_stream: 

312 table_list = list(yaml.safe_load_all(yaml_stream)) 

313 for table in table_list: 

314 if table['tableName'] == table_name: 

315 self.bit_pack_columns = table['columns'] 

316 break 

317 

318 self.output_flag_columns = {} 

319 

320 for column in self.bit_pack_columns: 

321 names = [] 

322 for bit in column["bitList"]: 

323 names.append((bit["name"], bool)) 

324 self.output_flag_columns[column["columnName"]] = names 

325 

326 def unpack(self, input_flag_values, flag_name): 

327 """Determine individual boolean flags from an input array of unsigned 

328 ints. 

329 

330 Parameters 

331 ---------- 

332 input_flag_values : array-like of type uint 

333 Array of integer flags to unpack. 

334 flag_name : `str` 

335 Apdb column name of integer flags to unpack. Names of packed int 

336 flags are given by the flag_map_file. 

337 

338 Returns 

339 ------- 

340 output_flags : `numpy.ndarray` 

341 Numpy named tuple of booleans. 

342 """ 

343 bit_names_types = self.output_flag_columns[flag_name] 

344 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types) 

345 

346 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types): 

347 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx) 

348 output_flags[bit_name] = masked_bits 

349 

350 return output_flags