Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ap_association 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ("TransformDiaSourceCatalogConnections", 

23 "TransformDiaSourceCatalogConfig", 

24 "TransformDiaSourceCatalogTask", 

25 "UnpackApdbFlags") 

26 

27import numpy as np 

28import os 

29import yaml 

30 

31from lsst.daf.base import DateTime 

32import lsst.pex.config as pexConfig 

33import lsst.pipe.base as pipeBase 

34import lsst.pipe.base.connectionTypes as connTypes 

35from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask 

36from lsst.pipe.tasks.parquetTable import ParquetTable 

37from lsst.utils import getPackageDir 

38 

39 

40class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections, 

41 dimensions=("instrument", "visit", "detector"), 

42 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

43 """Butler connections for TransformDiaSourceCatalogTask. 

44 """ 

45 diaSourceSchema = connTypes.InitInput( 

46 doc="Schema for DIASource catalog output by ImageDifference.", 

47 storageClass="SourceCatalog", 

48 name="{fakesType}{coaddName}Diff_diaSrc_schema", 

49 ) 

50 diaSourceCat = connTypes.Input( 

51 doc="Catalog of DiaSources produced during image differencing.", 

52 name="{fakesType}{coaddName}Diff_diaSrc", 

53 storageClass="SourceCatalog", 

54 dimensions=("instrument", "visit", "detector"), 

55 ) 

56 diffIm = connTypes.Input( 

57 doc="Difference image on which the DiaSources were detected.", 

58 name="{fakesType}{coaddName}Diff_differenceExp", 

59 storageClass="ExposureF", 

60 dimensions=("instrument", "visit", "detector"), 

61 ) 

62 diaSourceTable = connTypes.Output( 

63 doc=".", 

64 name="{fakesType}{coaddName}Diff_diaSrcTable", 

65 storageClass="DataFrame", 

66 dimensions=("instrument", "visit", "detector"), 

67 ) 

68 

69 

70class TransformDiaSourceCatalogConfig(pipeBase.PipelineTaskConfig, 

71 pipelineConnections=TransformDiaSourceCatalogConnections): 

72 """ 

73 """ 

74 flagMap = pexConfig.Field( 

75 dtype=str, 

76 doc="Yaml file specifying SciencePipelines flag fields to bit packs.", 

77 default=os.path.join(getPackageDir("ap_association"), 

78 "data", 

79 "association-flag-map.yaml"), 

80 ) 

81 functorFile = pexConfig.Field( 

82 dtype=str, 

83 doc='Path to YAML file specifying Science DataModel functors to use ' 

84 'when copying columns and computing calibrated values.', 

85 default=os.path.join(getPackageDir("ap_association"), 

86 "data", 

87 "DiaSource.yaml") 

88 ) 

89 

90 

91class TransformDiaSourceCatalogTask(TransformCatalogBaseTask): 

92 """Apply Science DataModel-ification on the DiaSource afw table. 

93 

94 This task calibrates and renames columns in the DiaSource catalog 

95 to ready the catalog for insertion into the Apdb. 

96 

97 This is a Gen3 Butler only task. It will not run in Gen2. 

98 """ 

99 

100 ConfigClass = TransformDiaSourceCatalogConfig 

101 _DefaultName = "transformDiaSourceCatalog" 

102 RunnerClass = pipeBase.ButlerInitializedTaskRunner 

103 

104 def __init__(self, initInputs, **kwargs): 

105 super().__init__(**kwargs) 

106 self.funcs = self.getFunctors() 

107 self.inputSchema = initInputs['diaSourceSchema'].schema 

108 self._create_bit_pack_mappings() 

109 

110 def _create_bit_pack_mappings(self): 

111 """Setup all flag bit packings. 

112 """ 

113 self.bit_pack_columns = [] 

114 with open(self.config.flagMap) as yaml_stream: 

115 table_list = list(yaml.safe_load_all(yaml_stream)) 

116 for table in table_list: 

117 if table['tableName'] == 'DiaSource': 

118 self.bit_pack_columns = table['columns'] 

119 break 

120 

121 # Test that all flags requested are present in the input schemas. 

122 # Output schemas are flexible, however if names are not specified in 

123 # the Apdb schema, flag columns will not be persisted. 

124 for outputFlag in self.bit_pack_columns: 

125 bitList = outputFlag['bitList'] 

126 for bit in bitList: 

127 try: 

128 self.inputSchema.find(bit['name']) 

129 except KeyError: 

130 raise KeyError( 

131 "Requested column %s not found in input DiaSource " 

132 "schema. Please check that the requested input " 

133 "column exists." % bit['name']) 

134 

135 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

136 inputs = butlerQC.get(inputRefs) 

137 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector", 

138 returnMaxBits=True) 

139 inputs["ccdVisitId"] = expId 

140 inputs["band"] = butlerQC.quantum.dataId["band"] 

141 

142 outputs = self.run(**inputs) 

143 

144 butlerQC.put(outputs, outputRefs) 

145 

146 def run(self, 

147 diaSourceCat, 

148 diffIm, 

149 band, 

150 ccdVisitId, 

151 funcs=None): 

152 """Convert input catalog to ParquetTable/Pandas and run functors. 

153 

154 Additionally, add new columns for stripping information from the 

155 exposure and into the DiaSource catalog. 

156 

157 Parameters 

158 ---------- 

159 

160 Returns 

161 ------- 

162 results : `lsst.pipe.base.Struct` 

163 Results struct with components. 

164 

165 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values 

166 and renamed columns. 

167 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`) 

168 """ 

169 self.log.info( 

170 "Transforming/standardizing the DiaSource table ccdVisitId: %i", 

171 ccdVisitId) 

172 

173 diaSourceDf = diaSourceCat.asAstropy().to_pandas() 

174 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat) 

175 diaSourceDf["ccdVisitId"] = ccdVisitId 

176 diaSourceDf["filterName"] = band 

177 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD) 

178 diaSourceDf["diaObjectId"] = 0 

179 diaSourceDf["pixelId"] = 0 

180 self.bitPackFlags(diaSourceDf) 

181 

182 df = self.transform(band, 

183 ParquetTable(dataFrame=diaSourceDf), 

184 self.funcs, 

185 dataId=None).df 

186 # The Ra/DecColumn functors preserve the coord_ra/dec original columns. 

187 # Since we don't need these and keeping them causes a DB insert crash 

188 # we drop them from the DataFrame before returning output catalog. 

189 return pipeBase.Struct( 

190 diaSourceTable=df.drop(columns=["coord_ra", "coord_dec"]), 

191 ) 

192 

193 def computeBBoxSizes(self, inputCatalog): 

194 """Compute the size of a square bbox that fully contains the detection 

195 footprint. 

196 

197 Parameters 

198 ---------- 

199 inputCatalog : `lsst.afw.table.SourceCatalog` 

200 Catalog containing detected footprints. 

201 

202 Returns 

203 ------- 

204 outputBBoxSizes : `numpy.ndarray`, (N,) 

205 Array of bbox sizes. 

206 """ 

207 outputBBoxSizes = np.empty(len(inputCatalog), dtype=int) 

208 for idx, record in enumerate(inputCatalog): 

209 footprintBBox = record.getFootprint().getBBox() 

210 # Compute twice the size of the largest dimension of the footprint 

211 # bounding box. This is the largest footprint we should need to cover 

212 # the complete DiaSource assuming the centroid is withing the bounding 

213 # box. 

214 maxSize = 2 * np.max([footprintBBox.getWidth(), 

215 footprintBBox.getHeight()]) 

216 recX = record.getCentroid().x 

217 recY = record.getCentroid().y 

218 bboxSize = int( 

219 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX, 

220 footprintBBox.minX - recX, 

221 footprintBBox.maxY - recY, 

222 footprintBBox.minY - recY])))) 

223 if bboxSize > maxSize: 

224 bboxSize = maxSize 

225 outputBBoxSizes[idx] = bboxSize 

226 

227 return outputBBoxSizes 

228 

229 def bitPackFlags(self, df): 

230 """Pack requested flag columns in inputRecord into single columns in 

231 outputRecord. 

232 

233 Parameters 

234 ---------- 

235 df : `pandas.DataFrame` 

236 DataFrame to read bits from and pack them into. 

237 """ 

238 for outputFlag in self.bit_pack_columns: 

239 bitList = outputFlag['bitList'] 

240 value = np.zeros(len(df), dtype=np.uint64) 

241 for bit in bitList: 

242 # Hard type the bit arrays. 

243 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64) 

244 df[outputFlag['columnName']] = value 

245 

246 

247class UnpackApdbFlags: 

248 """Class for unpacking bits from integer flag fields stored in the Apdb. 

249 

250 Attributes 

251 ---------- 

252 flag_map_file : `str` 

253 Absolute or relative path to a yaml file specifiying mappings of flags 

254 to integer bits. 

255 table_name : `str` 

256 Name of the Apdb table the integer bit data are coming from. 

257 """ 

258 

259 def __init__(self, flag_map_file, table_name): 

260 self.bit_pack_columns = [] 

261 with open(flag_map_file) as yaml_stream: 

262 table_list = list(yaml.safe_load_all(yaml_stream)) 

263 for table in table_list: 

264 if table['tableName'] == table_name: 

265 self.bit_pack_columns = table['columns'] 

266 break 

267 

268 self.output_flag_columns = {} 

269 

270 for column in self.bit_pack_columns: 

271 names = [] 

272 for bit in column["bitList"]: 

273 names.append((bit["name"], bool)) 

274 self.output_flag_columns[column["columnName"]] = names 

275 

276 def unpack(self, input_flag_values, flag_name): 

277 """Determine individual boolean flags from an input array of unsigned 

278 ints. 

279 

280 Parameters 

281 ---------- 

282 input_flag_values : array-like of type uint 

283 Array of integer flags to unpack. 

284 flag_name : `str` 

285 Apdb column name of integer flags to unpack. Names of packed int 

286 flags are given by the flag_map_file. 

287 

288 Returns 

289 ------- 

290 output_flags : `numpy.ndarray` 

291 Numpy named tuple of booleans. 

292 """ 

293 bit_names_types = self.output_flag_columns[flag_name] 

294 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types) 

295 

296 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types): 

297 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx) 

298 output_flags[bit_name] = masked_bits 

299 

300 return output_flags