Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ap_association 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ("TransformDiaSourceCatalogConnections", 

23 "TransformDiaSourceCatalogConfig", 

24 "TransformDiaSourceCatalogTask", 

25 "UnpackApdbFlags") 

26 

27import numpy as np 

28import os 

29import yaml 

30 

31from lsst.daf.base import DateTime 

32import lsst.pex.config as pexConfig 

33import lsst.pipe.base as pipeBase 

34import lsst.pipe.base.connectionTypes as connTypes 

35from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask 

36from lsst.pipe.tasks.parquetTable import ParquetTable 

37 

38 

39class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections, 

40 dimensions=("instrument", "visit", "detector"), 

41 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

42 """Butler connections for TransformDiaSourceCatalogTask. 

43 """ 

44 diaSourceSchema = connTypes.InitInput( 

45 doc="Schema for DIASource catalog output by ImageDifference.", 

46 storageClass="SourceCatalog", 

47 name="{fakesType}{coaddName}Diff_diaSrc_schema", 

48 ) 

49 diaSourceCat = connTypes.Input( 

50 doc="Catalog of DiaSources produced during image differencing.", 

51 name="{fakesType}{coaddName}Diff_diaSrc", 

52 storageClass="SourceCatalog", 

53 dimensions=("instrument", "visit", "detector"), 

54 ) 

55 diffIm = connTypes.Input( 

56 doc="Difference image on which the DiaSources were detected.", 

57 name="{fakesType}{coaddName}Diff_differenceExp", 

58 storageClass="ExposureF", 

59 dimensions=("instrument", "visit", "detector"), 

60 ) 

61 diaSourceTable = connTypes.Output( 

62 doc=".", 

63 name="{fakesType}{coaddName}Diff_diaSrcTable", 

64 storageClass="DataFrame", 

65 dimensions=("instrument", "visit", "detector"), 

66 ) 

67 

68 

69class TransformDiaSourceCatalogConfig(pipeBase.PipelineTaskConfig, 

70 pipelineConnections=TransformDiaSourceCatalogConnections): 

71 """ 

72 """ 

73 flagMap = pexConfig.Field( 

74 dtype=str, 

75 doc="Yaml file specifying SciencePipelines flag fields to bit packs.", 

76 default=os.path.join("${AP_ASSOCIATION_DIR}", 

77 "data", 

78 "association-flag-map.yaml"), 

79 ) 

80 functorFile = pexConfig.Field( 

81 dtype=str, 

82 doc='Path to YAML file specifying Science DataModel functors to use ' 

83 'when copying columns and computing calibrated values.', 

84 default=os.path.join("${AP_ASSOCIATION_DIR}", 

85 "data", 

86 "DiaSource.yaml") 

87 ) 

88 doRemoveSkySources = pexConfig.Field( 

89 dtype=bool, 

90 default=False, 

91 doc="Input DiaSource catalog contains SkySources that should be " 

92 "removed before storing the output DiaSource catalog." 

93 ) 

94 

95 

96class TransformDiaSourceCatalogTask(TransformCatalogBaseTask): 

97 """Apply Science DataModel-ification on the DiaSource afw table. 

98 

99 This task calibrates and renames columns in the DiaSource catalog 

100 to ready the catalog for insertion into the Apdb. 

101 

102 This is a Gen3 Butler only task. It will not run in Gen2. 

103 """ 

104 

105 ConfigClass = TransformDiaSourceCatalogConfig 

106 _DefaultName = "transformDiaSourceCatalog" 

107 RunnerClass = pipeBase.ButlerInitializedTaskRunner 

108 # Needed to create a valid TransformCatalogBaseTask, but unused 

109 inputDataset = "deepDiff_diaSrc" 

110 outputDataset = "deepDiff_diaSrcTable" 

111 

112 def __init__(self, initInputs, **kwargs): 

113 super().__init__(**kwargs) 

114 self.funcs = self.getFunctors() 

115 self.inputSchema = initInputs['diaSourceSchema'].schema 

116 self._create_bit_pack_mappings() 

117 

118 def _create_bit_pack_mappings(self): 

119 """Setup all flag bit packings. 

120 """ 

121 self.bit_pack_columns = [] 

122 flag_map_file = os.path.expandvars(self.config.flagMap) 

123 with open(flag_map_file) as yaml_stream: 

124 table_list = list(yaml.safe_load_all(yaml_stream)) 

125 for table in table_list: 

126 if table['tableName'] == 'DiaSource': 

127 self.bit_pack_columns = table['columns'] 

128 break 

129 

130 # Test that all flags requested are present in the input schemas. 

131 # Output schemas are flexible, however if names are not specified in 

132 # the Apdb schema, flag columns will not be persisted. 

133 for outputFlag in self.bit_pack_columns: 

134 bitList = outputFlag['bitList'] 

135 for bit in bitList: 

136 try: 

137 self.inputSchema.find(bit['name']) 

138 except KeyError: 

139 raise KeyError( 

140 "Requested column %s not found in input DiaSource " 

141 "schema. Please check that the requested input " 

142 "column exists." % bit['name']) 

143 

144 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

145 inputs = butlerQC.get(inputRefs) 

146 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector", 

147 returnMaxBits=True) 

148 inputs["ccdVisitId"] = expId 

149 inputs["band"] = butlerQC.quantum.dataId["band"] 

150 

151 outputs = self.run(**inputs) 

152 

153 butlerQC.put(outputs, outputRefs) 

154 

155 @pipeBase.timeMethod 

156 def run(self, 

157 diaSourceCat, 

158 diffIm, 

159 band, 

160 ccdVisitId, 

161 funcs=None): 

162 """Convert input catalog to ParquetTable/Pandas and run functors. 

163 

164 Additionally, add new columns for stripping information from the 

165 exposure and into the DiaSource catalog. 

166 

167 Parameters 

168 ---------- 

169 

170 Returns 

171 ------- 

172 results : `lsst.pipe.base.Struct` 

173 Results struct with components. 

174 

175 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values 

176 and renamed columns. 

177 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`) 

178 """ 

179 self.log.info( 

180 "Transforming/standardizing the DiaSource table ccdVisitId: %i", 

181 ccdVisitId) 

182 

183 diaSourceDf = diaSourceCat.asAstropy().to_pandas() 

184 if self.config.doRemoveSkySources: 

185 diaSourceDf = diaSourceDf[~diaSourceDf["sky_source"]] 

186 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat) 

187 diaSourceDf["ccdVisitId"] = ccdVisitId 

188 diaSourceDf["filterName"] = band 

189 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD) 

190 diaSourceDf["diaObjectId"] = 0 

191 diaSourceDf["pixelId"] = 0 

192 self.bitPackFlags(diaSourceDf) 

193 

194 df = self.transform(band, 

195 ParquetTable(dataFrame=diaSourceDf), 

196 self.funcs, 

197 dataId=None).df 

198 # The Ra/DecColumn functors preserve the coord_ra/dec original columns. 

199 # Since we don't need these and keeping them causes a DB insert crash 

200 # we drop them from the DataFrame before returning output catalog. 

201 return pipeBase.Struct( 

202 diaSourceTable=df.drop(columns=["coord_ra", "coord_dec"]), 

203 ) 

204 

205 def computeBBoxSizes(self, inputCatalog): 

206 """Compute the size of a square bbox that fully contains the detection 

207 footprint. 

208 

209 Parameters 

210 ---------- 

211 inputCatalog : `lsst.afw.table.SourceCatalog` 

212 Catalog containing detected footprints. 

213 

214 Returns 

215 ------- 

216 outputBBoxSizes : `list` of `float` 

217 Array of bbox sizes. 

218 """ 

219 outputBBoxSizes = [] 

220 for record in inputCatalog: 

221 if self.config.doRemoveSkySources: 

222 if record["sky_source"]: 

223 continue 

224 footprintBBox = record.getFootprint().getBBox() 

225 # Compute twice the size of the largest dimension of the footprint 

226 # bounding box. This is the largest footprint we should need to cover 

227 # the complete DiaSource assuming the centroid is withing the bounding 

228 # box. 

229 maxSize = 2 * np.max([footprintBBox.getWidth(), 

230 footprintBBox.getHeight()]) 

231 recX = record.getCentroid().x 

232 recY = record.getCentroid().y 

233 bboxSize = int( 

234 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX, 

235 footprintBBox.minX - recX, 

236 footprintBBox.maxY - recY, 

237 footprintBBox.minY - recY])))) 

238 if bboxSize > maxSize: 

239 bboxSize = maxSize 

240 outputBBoxSizes.append(bboxSize) 

241 

242 return outputBBoxSizes 

243 

244 def bitPackFlags(self, df): 

245 """Pack requested flag columns in inputRecord into single columns in 

246 outputRecord. 

247 

248 Parameters 

249 ---------- 

250 df : `pandas.DataFrame` 

251 DataFrame to read bits from and pack them into. 

252 """ 

253 for outputFlag in self.bit_pack_columns: 

254 bitList = outputFlag['bitList'] 

255 value = np.zeros(len(df), dtype=np.uint64) 

256 for bit in bitList: 

257 # Hard type the bit arrays. 

258 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64) 

259 df[outputFlag['columnName']] = value 

260 

261 

262class UnpackApdbFlags: 

263 """Class for unpacking bits from integer flag fields stored in the Apdb. 

264 

265 Attributes 

266 ---------- 

267 flag_map_file : `str` 

268 Absolute or relative path to a yaml file specifiying mappings of flags 

269 to integer bits. 

270 table_name : `str` 

271 Name of the Apdb table the integer bit data are coming from. 

272 """ 

273 

274 def __init__(self, flag_map_file, table_name): 

275 self.bit_pack_columns = [] 

276 with open(flag_map_file) as yaml_stream: 

277 table_list = list(yaml.safe_load_all(yaml_stream)) 

278 for table in table_list: 

279 if table['tableName'] == table_name: 

280 self.bit_pack_columns = table['columns'] 

281 break 

282 

283 self.output_flag_columns = {} 

284 

285 for column in self.bit_pack_columns: 

286 names = [] 

287 for bit in column["bitList"]: 

288 names.append((bit["name"], bool)) 

289 self.output_flag_columns[column["columnName"]] = names 

290 

291 def unpack(self, input_flag_values, flag_name): 

292 """Determine individual boolean flags from an input array of unsigned 

293 ints. 

294 

295 Parameters 

296 ---------- 

297 input_flag_values : array-like of type uint 

298 Array of integer flags to unpack. 

299 flag_name : `str` 

300 Apdb column name of integer flags to unpack. Names of packed int 

301 flags are given by the flag_map_file. 

302 

303 Returns 

304 ------- 

305 output_flags : `numpy.ndarray` 

306 Numpy named tuple of booleans. 

307 """ 

308 bit_names_types = self.output_flag_columns[flag_name] 

309 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types) 

310 

311 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types): 

312 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx) 

313 output_flags[bit_name] = masked_bits 

314 

315 return output_flags