Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ap_association 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ("TransformDiaSourceCatalogConnections", 

23 "TransformDiaSourceCatalogConfig", 

24 "TransformDiaSourceCatalogTask", 

25 "UnpackApdbFlags") 

26 

27import numpy as np 

28import os 

29import yaml 

30 

31from lsst.daf.base import DateTime 

32import lsst.pex.config as pexConfig 

33import lsst.pipe.base as pipeBase 

34import lsst.pipe.base.connectionTypes as connTypes 

35from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask 

36from lsst.pipe.tasks.parquetTable import ParquetTable 

37 

38 

39class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections, 

40 dimensions=("instrument", "visit", "detector"), 

41 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

42 """Butler connections for TransformDiaSourceCatalogTask. 

43 """ 

44 diaSourceSchema = connTypes.InitInput( 

45 doc="Schema for DIASource catalog output by ImageDifference.", 

46 storageClass="SourceCatalog", 

47 name="{fakesType}{coaddName}Diff_diaSrc_schema", 

48 ) 

49 diaSourceCat = connTypes.Input( 

50 doc="Catalog of DiaSources produced during image differencing.", 

51 name="{fakesType}{coaddName}Diff_diaSrc", 

52 storageClass="SourceCatalog", 

53 dimensions=("instrument", "visit", "detector"), 

54 ) 

55 diffIm = connTypes.Input( 

56 doc="Difference image on which the DiaSources were detected.", 

57 name="{fakesType}{coaddName}Diff_differenceExp", 

58 storageClass="ExposureF", 

59 dimensions=("instrument", "visit", "detector"), 

60 ) 

61 diaSourceTable = connTypes.Output( 

62 doc=".", 

63 name="{fakesType}{coaddName}Diff_diaSrcTable", 

64 storageClass="DataFrame", 

65 dimensions=("instrument", "visit", "detector"), 

66 ) 

67 

68 

69class TransformDiaSourceCatalogConfig(pipeBase.PipelineTaskConfig, 

70 pipelineConnections=TransformDiaSourceCatalogConnections): 

71 """ 

72 """ 

73 flagMap = pexConfig.Field( 

74 dtype=str, 

75 doc="Yaml file specifying SciencePipelines flag fields to bit packs.", 

76 default=os.path.join("${AP_ASSOCIATION_DIR}", 

77 "data", 

78 "association-flag-map.yaml"), 

79 ) 

80 functorFile = pexConfig.Field( 

81 dtype=str, 

82 doc='Path to YAML file specifying Science DataModel functors to use ' 

83 'when copying columns and computing calibrated values.', 

84 default=os.path.join("${AP_ASSOCIATION_DIR}", 

85 "data", 

86 "DiaSource.yaml") 

87 ) 

88 doRemoveSkySources = pexConfig.Field( 

89 dtype=bool, 

90 default=False, 

91 doc="Input DiaSource catalog contains SkySources that should be " 

92 "removed before storing the output DiaSource catalog." 

93 ) 

94 

95 

96class TransformDiaSourceCatalogTask(TransformCatalogBaseTask): 

97 """Apply Science DataModel-ification on the DiaSource afw table. 

98 

99 This task calibrates and renames columns in the DiaSource catalog 

100 to ready the catalog for insertion into the Apdb. 

101 

102 This is a Gen3 Butler only task. It will not run in Gen2. 

103 """ 

104 

105 ConfigClass = TransformDiaSourceCatalogConfig 

106 _DefaultName = "transformDiaSourceCatalog" 

107 RunnerClass = pipeBase.ButlerInitializedTaskRunner 

108 

109 def __init__(self, initInputs, **kwargs): 

110 super().__init__(**kwargs) 

111 self.funcs = self.getFunctors() 

112 self.inputSchema = initInputs['diaSourceSchema'].schema 

113 self._create_bit_pack_mappings() 

114 

115 def _create_bit_pack_mappings(self): 

116 """Setup all flag bit packings. 

117 """ 

118 self.bit_pack_columns = [] 

119 flag_map_file = os.path.expandvars(self.config.flagMap) 

120 with open(flag_map_file) as yaml_stream: 

121 table_list = list(yaml.safe_load_all(yaml_stream)) 

122 for table in table_list: 

123 if table['tableName'] == 'DiaSource': 

124 self.bit_pack_columns = table['columns'] 

125 break 

126 

127 # Test that all flags requested are present in the input schemas. 

128 # Output schemas are flexible, however if names are not specified in 

129 # the Apdb schema, flag columns will not be persisted. 

130 for outputFlag in self.bit_pack_columns: 

131 bitList = outputFlag['bitList'] 

132 for bit in bitList: 

133 try: 

134 self.inputSchema.find(bit['name']) 

135 except KeyError: 

136 raise KeyError( 

137 "Requested column %s not found in input DiaSource " 

138 "schema. Please check that the requested input " 

139 "column exists." % bit['name']) 

140 

141 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

142 inputs = butlerQC.get(inputRefs) 

143 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector", 

144 returnMaxBits=True) 

145 inputs["ccdVisitId"] = expId 

146 inputs["band"] = butlerQC.quantum.dataId["band"] 

147 

148 outputs = self.run(**inputs) 

149 

150 butlerQC.put(outputs, outputRefs) 

151 

152 @pipeBase.timeMethod 

153 def run(self, 

154 diaSourceCat, 

155 diffIm, 

156 band, 

157 ccdVisitId, 

158 funcs=None): 

159 """Convert input catalog to ParquetTable/Pandas and run functors. 

160 

161 Additionally, add new columns for stripping information from the 

162 exposure and into the DiaSource catalog. 

163 

164 Parameters 

165 ---------- 

166 

167 Returns 

168 ------- 

169 results : `lsst.pipe.base.Struct` 

170 Results struct with components. 

171 

172 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values 

173 and renamed columns. 

174 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`) 

175 """ 

176 self.log.info( 

177 "Transforming/standardizing the DiaSource table ccdVisitId: %i", 

178 ccdVisitId) 

179 

180 diaSourceDf = diaSourceCat.asAstropy().to_pandas() 

181 if self.config.doRemoveSkySources: 

182 diaSourceDf = diaSourceDf[~diaSourceDf["sky_source"]] 

183 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat) 

184 diaSourceDf["ccdVisitId"] = ccdVisitId 

185 diaSourceDf["filterName"] = band 

186 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD) 

187 diaSourceDf["diaObjectId"] = 0 

188 diaSourceDf["pixelId"] = 0 

189 self.bitPackFlags(diaSourceDf) 

190 

191 df = self.transform(band, 

192 ParquetTable(dataFrame=diaSourceDf), 

193 self.funcs, 

194 dataId=None).df 

195 # The Ra/DecColumn functors preserve the coord_ra/dec original columns. 

196 # Since we don't need these and keeping them causes a DB insert crash 

197 # we drop them from the DataFrame before returning output catalog. 

198 return pipeBase.Struct( 

199 diaSourceTable=df.drop(columns=["coord_ra", "coord_dec"]), 

200 ) 

201 

202 def computeBBoxSizes(self, inputCatalog): 

203 """Compute the size of a square bbox that fully contains the detection 

204 footprint. 

205 

206 Parameters 

207 ---------- 

208 inputCatalog : `lsst.afw.table.SourceCatalog` 

209 Catalog containing detected footprints. 

210 

211 Returns 

212 ------- 

213 outputBBoxSizes : `list` of `float` 

214 Array of bbox sizes. 

215 """ 

216 outputBBoxSizes = [] 

217 for record in inputCatalog: 

218 if self.config.doRemoveSkySources: 

219 if record["sky_source"]: 

220 continue 

221 footprintBBox = record.getFootprint().getBBox() 

222 # Compute twice the size of the largest dimension of the footprint 

223 # bounding box. This is the largest footprint we should need to cover 

224 # the complete DiaSource assuming the centroid is withing the bounding 

225 # box. 

226 maxSize = 2 * np.max([footprintBBox.getWidth(), 

227 footprintBBox.getHeight()]) 

228 recX = record.getCentroid().x 

229 recY = record.getCentroid().y 

230 bboxSize = int( 

231 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX, 

232 footprintBBox.minX - recX, 

233 footprintBBox.maxY - recY, 

234 footprintBBox.minY - recY])))) 

235 if bboxSize > maxSize: 

236 bboxSize = maxSize 

237 outputBBoxSizes.append(bboxSize) 

238 

239 return outputBBoxSizes 

240 

241 def bitPackFlags(self, df): 

242 """Pack requested flag columns in inputRecord into single columns in 

243 outputRecord. 

244 

245 Parameters 

246 ---------- 

247 df : `pandas.DataFrame` 

248 DataFrame to read bits from and pack them into. 

249 """ 

250 for outputFlag in self.bit_pack_columns: 

251 bitList = outputFlag['bitList'] 

252 value = np.zeros(len(df), dtype=np.uint64) 

253 for bit in bitList: 

254 # Hard type the bit arrays. 

255 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64) 

256 df[outputFlag['columnName']] = value 

257 

258 

259class UnpackApdbFlags: 

260 """Class for unpacking bits from integer flag fields stored in the Apdb. 

261 

262 Attributes 

263 ---------- 

264 flag_map_file : `str` 

265 Absolute or relative path to a yaml file specifiying mappings of flags 

266 to integer bits. 

267 table_name : `str` 

268 Name of the Apdb table the integer bit data are coming from. 

269 """ 

270 

271 def __init__(self, flag_map_file, table_name): 

272 self.bit_pack_columns = [] 

273 with open(flag_map_file) as yaml_stream: 

274 table_list = list(yaml.safe_load_all(yaml_stream)) 

275 for table in table_list: 

276 if table['tableName'] == table_name: 

277 self.bit_pack_columns = table['columns'] 

278 break 

279 

280 self.output_flag_columns = {} 

281 

282 for column in self.bit_pack_columns: 

283 names = [] 

284 for bit in column["bitList"]: 

285 names.append((bit["name"], bool)) 

286 self.output_flag_columns[column["columnName"]] = names 

287 

288 def unpack(self, input_flag_values, flag_name): 

289 """Determine individual boolean flags from an input array of unsigned 

290 ints. 

291 

292 Parameters 

293 ---------- 

294 input_flag_values : array-like of type uint 

295 Array of integer flags to unpack. 

296 flag_name : `str` 

297 Apdb column name of integer flags to unpack. Names of packed int 

298 flags are given by the flag_map_file. 

299 

300 Returns 

301 ------- 

302 output_flags : `numpy.ndarray` 

303 Numpy named tuple of booleans. 

304 """ 

305 bit_names_types = self.output_flag_columns[flag_name] 

306 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types) 

307 

308 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types): 

309 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx) 

310 output_flags[bit_name] = masked_bits 

311 

312 return output_flags