Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ap_association 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ("TransformDiaSourceCatalogConnections", 

23 "TransformDiaSourceCatalogConfig", 

24 "TransformDiaSourceCatalogTask", 

25 "UnpackApdbFlags") 

26 

27import numpy as np 

28import os 

29import yaml 

30 

31from lsst.daf.base import DateTime 

32import lsst.pex.config as pexConfig 

33import lsst.pipe.base as pipeBase 

34import lsst.pipe.base.connectionTypes as connTypes 

35from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask, TransformCatalogBaseConfig 

36from lsst.pipe.tasks.parquetTable import ParquetTable 

37from lsst.pipe.tasks.functors import Column 

38 

39 

40class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections, 

41 dimensions=("instrument", "visit", "detector"), 

42 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

43 """Butler connections for TransformDiaSourceCatalogTask. 

44 """ 

45 diaSourceSchema = connTypes.InitInput( 

46 doc="Schema for DIASource catalog output by ImageDifference.", 

47 storageClass="SourceCatalog", 

48 name="{fakesType}{coaddName}Diff_diaSrc_schema", 

49 ) 

50 diaSourceCat = connTypes.Input( 

51 doc="Catalog of DiaSources produced during image differencing.", 

52 name="{fakesType}{coaddName}Diff_diaSrc", 

53 storageClass="SourceCatalog", 

54 dimensions=("instrument", "visit", "detector"), 

55 ) 

56 diffIm = connTypes.Input( 

57 doc="Difference image on which the DiaSources were detected.", 

58 name="{fakesType}{coaddName}Diff_differenceExp", 

59 storageClass="ExposureF", 

60 dimensions=("instrument", "visit", "detector"), 

61 ) 

62 diaSourceTable = connTypes.Output( 

63 doc=".", 

64 name="{fakesType}{coaddName}Diff_diaSrcTable", 

65 storageClass="DataFrame", 

66 dimensions=("instrument", "visit", "detector"), 

67 ) 

68 

69 

70class TransformDiaSourceCatalogConfig(TransformCatalogBaseConfig, 

71 pipelineConnections=TransformDiaSourceCatalogConnections): 

72 """ 

73 """ 

74 flagMap = pexConfig.Field( 

75 dtype=str, 

76 doc="Yaml file specifying SciencePipelines flag fields to bit packs.", 

77 default=os.path.join("${AP_ASSOCIATION_DIR}", 

78 "data", 

79 "association-flag-map.yaml"), 

80 ) 

81 flagRenameMap = pexConfig.Field( 

82 dtype=str, 

83 doc="Yaml file specifying specifying rules to rename flag names", 

84 default=os.path.join("${AP_ASSOCIATION_DIR}", 

85 "data", 

86 "flag-rename-rules.yaml"), 

87 ) 

88 doRemoveSkySources = pexConfig.Field( 

89 dtype=bool, 

90 default=False, 

91 doc="Input DiaSource catalog contains SkySources that should be " 

92 "removed before storing the output DiaSource catalog." 

93 ) 

94 doPackFlags = pexConfig.Field( 

95 dtype=bool, 

96 default=True, 

97 doc="Do pack the flags into one integer column named 'flags'." 

98 "If False, instead produce one boolean column per flag." 

99 ) 

100 

101 def setDefaults(self): 

102 super().setDefaults() 

103 self.functorFile = os.path.join("${AP_ASSOCIATION_DIR}", 

104 "data", 

105 "DiaSource.yaml") 

106 

107 

108class TransformDiaSourceCatalogTask(TransformCatalogBaseTask): 

109 """Apply Science DataModel-ification on the DiaSource afw table. 

110 

111 This task calibrates and renames columns in the DiaSource catalog 

112 to ready the catalog for insertion into the Apdb. 

113 

114 This is a Gen3 Butler only task. It will not run in Gen2. 

115 """ 

116 

117 ConfigClass = TransformDiaSourceCatalogConfig 

118 _DefaultName = "transformDiaSourceCatalog" 

119 RunnerClass = pipeBase.ButlerInitializedTaskRunner 

120 # Needed to create a valid TransformCatalogBaseTask, but unused 

121 inputDataset = "deepDiff_diaSrc" 

122 outputDataset = "deepDiff_diaSrcTable" 

123 

124 def __init__(self, initInputs, **kwargs): 

125 super().__init__(**kwargs) 

126 self.funcs = self.getFunctors() 

127 self.inputSchema = initInputs['diaSourceSchema'].schema 

128 self._create_bit_pack_mappings() 

129 

130 if not self.config.doPackFlags: 

131 # get the flag rename rules 

132 with open(os.path.expandvars(self.config.flagRenameMap)) as yaml_stream: 

133 self.rename_rules = list(yaml.safe_load_all(yaml_stream)) 

134 

135 def _create_bit_pack_mappings(self): 

136 """Setup all flag bit packings. 

137 """ 

138 self.bit_pack_columns = [] 

139 flag_map_file = os.path.expandvars(self.config.flagMap) 

140 with open(flag_map_file) as yaml_stream: 

141 table_list = list(yaml.safe_load_all(yaml_stream)) 

142 for table in table_list: 

143 if table['tableName'] == 'DiaSource': 

144 self.bit_pack_columns = table['columns'] 

145 break 

146 

147 # Test that all flags requested are present in the input schemas. 

148 # Output schemas are flexible, however if names are not specified in 

149 # the Apdb schema, flag columns will not be persisted. 

150 for outputFlag in self.bit_pack_columns: 

151 bitList = outputFlag['bitList'] 

152 for bit in bitList: 

153 try: 

154 self.inputSchema.find(bit['name']) 

155 except KeyError: 

156 raise KeyError( 

157 "Requested column %s not found in input DiaSource " 

158 "schema. Please check that the requested input " 

159 "column exists." % bit['name']) 

160 

161 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

162 inputs = butlerQC.get(inputRefs) 

163 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector", 

164 returnMaxBits=True) 

165 inputs["ccdVisitId"] = expId 

166 inputs["band"] = butlerQC.quantum.dataId["band"] 

167 

168 outputs = self.run(**inputs) 

169 

170 butlerQC.put(outputs, outputRefs) 

171 

172 @pipeBase.timeMethod 

173 def run(self, 

174 diaSourceCat, 

175 diffIm, 

176 band, 

177 ccdVisitId, 

178 funcs=None): 

179 """Convert input catalog to ParquetTable/Pandas and run functors. 

180 

181 Additionally, add new columns for stripping information from the 

182 exposure and into the DiaSource catalog. 

183 

184 Parameters 

185 ---------- 

186 

187 Returns 

188 ------- 

189 results : `lsst.pipe.base.Struct` 

190 Results struct with components. 

191 

192 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values 

193 and renamed columns. 

194 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`) 

195 """ 

196 self.log.info( 

197 "Transforming/standardizing the DiaSource table ccdVisitId: %i", 

198 ccdVisitId) 

199 

200 diaSourceDf = diaSourceCat.asAstropy().to_pandas() 

201 if self.config.doRemoveSkySources: 

202 diaSourceDf = diaSourceDf[~diaSourceDf["sky_source"]] 

203 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat) 

204 diaSourceDf["ccdVisitId"] = ccdVisitId 

205 diaSourceDf["filterName"] = band 

206 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD) 

207 diaSourceDf["diaObjectId"] = 0 

208 diaSourceDf["ssObjectId"] = 0 

209 if self.config.doPackFlags: 

210 # either bitpack the flags 

211 self.bitPackFlags(diaSourceDf) 

212 else: 

213 # or add the individual flag functors 

214 self.addUnpackedFlagFunctors() 

215 # and remove the packed flag functor 

216 if 'flags' in self.funcs.funcDict: 

217 del self.funcs.funcDict['flags'] 

218 

219 df = self.transform(band, 

220 ParquetTable(dataFrame=diaSourceDf), 

221 self.funcs, 

222 dataId=None).df 

223 

224 return pipeBase.Struct( 

225 diaSourceTable=df, 

226 ) 

227 

228 def addUnpackedFlagFunctors(self): 

229 """Add Column functor for each of the flags 

230 

231 to the internal functor dictionary 

232 """ 

233 for flag in self.bit_pack_columns[0]['bitList']: 

234 flagName = flag['name'] 

235 targetName = self.funcs.renameCol(flagName, self.rename_rules[0]['flag_rename_rules']) 

236 self.funcs.update({targetName: Column(flagName)}) 

237 

238 def computeBBoxSizes(self, inputCatalog): 

239 """Compute the size of a square bbox that fully contains the detection 

240 footprint. 

241 

242 Parameters 

243 ---------- 

244 inputCatalog : `lsst.afw.table.SourceCatalog` 

245 Catalog containing detected footprints. 

246 

247 Returns 

248 ------- 

249 outputBBoxSizes : `list` of `float` 

250 Array of bbox sizes. 

251 """ 

252 outputBBoxSizes = [] 

253 for record in inputCatalog: 

254 if self.config.doRemoveSkySources: 

255 if record["sky_source"]: 

256 continue 

257 footprintBBox = record.getFootprint().getBBox() 

258 # Compute twice the size of the largest dimension of the footprint 

259 # bounding box. This is the largest footprint we should need to cover 

260 # the complete DiaSource assuming the centroid is withing the bounding 

261 # box. 

262 maxSize = 2 * np.max([footprintBBox.getWidth(), 

263 footprintBBox.getHeight()]) 

264 recX = record.getCentroid().x 

265 recY = record.getCentroid().y 

266 bboxSize = int( 

267 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX, 

268 footprintBBox.minX - recX, 

269 footprintBBox.maxY - recY, 

270 footprintBBox.minY - recY])))) 

271 if bboxSize > maxSize: 

272 bboxSize = maxSize 

273 outputBBoxSizes.append(bboxSize) 

274 

275 return outputBBoxSizes 

276 

277 def bitPackFlags(self, df): 

278 """Pack requested flag columns in inputRecord into single columns in 

279 outputRecord. 

280 

281 Parameters 

282 ---------- 

283 df : `pandas.DataFrame` 

284 DataFrame to read bits from and pack them into. 

285 """ 

286 for outputFlag in self.bit_pack_columns: 

287 bitList = outputFlag['bitList'] 

288 value = np.zeros(len(df), dtype=np.uint64) 

289 for bit in bitList: 

290 # Hard type the bit arrays. 

291 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64) 

292 df[outputFlag['columnName']] = value 

293 

294 

295class UnpackApdbFlags: 

296 """Class for unpacking bits from integer flag fields stored in the Apdb. 

297 

298 Attributes 

299 ---------- 

300 flag_map_file : `str` 

301 Absolute or relative path to a yaml file specifiying mappings of flags 

302 to integer bits. 

303 table_name : `str` 

304 Name of the Apdb table the integer bit data are coming from. 

305 """ 

306 

307 def __init__(self, flag_map_file, table_name): 

308 self.bit_pack_columns = [] 

309 flag_map_file = os.path.expandvars(flag_map_file) 

310 with open(flag_map_file) as yaml_stream: 

311 table_list = list(yaml.safe_load_all(yaml_stream)) 

312 for table in table_list: 

313 if table['tableName'] == table_name: 

314 self.bit_pack_columns = table['columns'] 

315 break 

316 

317 self.output_flag_columns = {} 

318 

319 for column in self.bit_pack_columns: 

320 names = [] 

321 for bit in column["bitList"]: 

322 names.append((bit["name"], bool)) 

323 self.output_flag_columns[column["columnName"]] = names 

324 

325 def unpack(self, input_flag_values, flag_name): 

326 """Determine individual boolean flags from an input array of unsigned 

327 ints. 

328 

329 Parameters 

330 ---------- 

331 input_flag_values : array-like of type uint 

332 Array of integer flags to unpack. 

333 flag_name : `str` 

334 Apdb column name of integer flags to unpack. Names of packed int 

335 flags are given by the flag_map_file. 

336 

337 Returns 

338 ------- 

339 output_flags : `numpy.ndarray` 

340 Numpy named tuple of booleans. 

341 """ 

342 bit_names_types = self.output_flag_columns[flag_name] 

343 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types) 

344 

345 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types): 

346 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx) 

347 output_flags[bit_name] = masked_bits 

348 

349 return output_flags