Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ap_association 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ("TransformDiaSourceCatalogConnections", 

23 "TransformDiaSourceCatalogConfig", 

24 "TransformDiaSourceCatalogTask", 

25 "UnpackApdbFlags") 

26 

27import numpy as np 

28import os 

29import yaml 

30 

31from lsst.daf.base import DateTime 

32import lsst.pex.config as pexConfig 

33import lsst.pipe.base as pipeBase 

34import lsst.pipe.base.connectionTypes as connTypes 

35from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask 

36from lsst.pipe.tasks.parquetTable import ParquetTable 

37 

38 

39class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections, 

40 dimensions=("instrument", "visit", "detector"), 

41 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

42 """Butler connections for TransformDiaSourceCatalogTask. 

43 """ 

44 diaSourceSchema = connTypes.InitInput( 

45 doc="Schema for DIASource catalog output by ImageDifference.", 

46 storageClass="SourceCatalog", 

47 name="{fakesType}{coaddName}Diff_diaSrc_schema", 

48 ) 

49 diaSourceCat = connTypes.Input( 

50 doc="Catalog of DiaSources produced during image differencing.", 

51 name="{fakesType}{coaddName}Diff_diaSrc", 

52 storageClass="SourceCatalog", 

53 dimensions=("instrument", "visit", "detector"), 

54 ) 

55 diffIm = connTypes.Input( 

56 doc="Difference image on which the DiaSources were detected.", 

57 name="{fakesType}{coaddName}Diff_differenceExp", 

58 storageClass="ExposureF", 

59 dimensions=("instrument", "visit", "detector"), 

60 ) 

61 diaSourceTable = connTypes.Output( 

62 doc=".", 

63 name="{fakesType}{coaddName}Diff_diaSrcTable", 

64 storageClass="DataFrame", 

65 dimensions=("instrument", "visit", "detector"), 

66 ) 

67 

68 

69class TransformDiaSourceCatalogConfig(pipeBase.PipelineTaskConfig, 

70 pipelineConnections=TransformDiaSourceCatalogConnections): 

71 """ 

72 """ 

73 flagMap = pexConfig.Field( 

74 dtype=str, 

75 doc="Yaml file specifying SciencePipelines flag fields to bit packs.", 

76 default=os.path.join("${AP_ASSOCIATION_DIR}", 

77 "data", 

78 "association-flag-map.yaml"), 

79 ) 

80 functorFile = pexConfig.Field( 

81 dtype=str, 

82 doc='Path to YAML file specifying Science DataModel functors to use ' 

83 'when copying columns and computing calibrated values.', 

84 default=os.path.join("${AP_ASSOCIATION_DIR}", 

85 "data", 

86 "DiaSource.yaml") 

87 ) 

88 

89 

90class TransformDiaSourceCatalogTask(TransformCatalogBaseTask): 

91 """Apply Science DataModel-ification on the DiaSource afw table. 

92 

93 This task calibrates and renames columns in the DiaSource catalog 

94 to ready the catalog for insertion into the Apdb. 

95 

96 This is a Gen3 Butler only task. It will not run in Gen2. 

97 """ 

98 

99 ConfigClass = TransformDiaSourceCatalogConfig 

100 _DefaultName = "transformDiaSourceCatalog" 

101 RunnerClass = pipeBase.ButlerInitializedTaskRunner 

102 

103 def __init__(self, initInputs, **kwargs): 

104 super().__init__(**kwargs) 

105 self.funcs = self.getFunctors() 

106 self.inputSchema = initInputs['diaSourceSchema'].schema 

107 self._create_bit_pack_mappings() 

108 

109 def _create_bit_pack_mappings(self): 

110 """Setup all flag bit packings. 

111 """ 

112 self.bit_pack_columns = [] 

113 flag_map_file = os.path.expandvars(self.config.flagMap) 

114 with open(flag_map_file) as yaml_stream: 

115 table_list = list(yaml.safe_load_all(yaml_stream)) 

116 for table in table_list: 

117 if table['tableName'] == 'DiaSource': 

118 self.bit_pack_columns = table['columns'] 

119 break 

120 

121 # Test that all flags requested are present in the input schemas. 

122 # Output schemas are flexible, however if names are not specified in 

123 # the Apdb schema, flag columns will not be persisted. 

124 for outputFlag in self.bit_pack_columns: 

125 bitList = outputFlag['bitList'] 

126 for bit in bitList: 

127 try: 

128 self.inputSchema.find(bit['name']) 

129 except KeyError: 

130 raise KeyError( 

131 "Requested column %s not found in input DiaSource " 

132 "schema. Please check that the requested input " 

133 "column exists." % bit['name']) 

134 

135 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

136 inputs = butlerQC.get(inputRefs) 

137 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector", 

138 returnMaxBits=True) 

139 inputs["ccdVisitId"] = expId 

140 inputs["band"] = butlerQC.quantum.dataId["band"] 

141 

142 outputs = self.run(**inputs) 

143 

144 butlerQC.put(outputs, outputRefs) 

145 

146 @pipeBase.timeMethod 

147 def run(self, 

148 diaSourceCat, 

149 diffIm, 

150 band, 

151 ccdVisitId, 

152 funcs=None): 

153 """Convert input catalog to ParquetTable/Pandas and run functors. 

154 

155 Additionally, add new columns for stripping information from the 

156 exposure and into the DiaSource catalog. 

157 

158 Parameters 

159 ---------- 

160 

161 Returns 

162 ------- 

163 results : `lsst.pipe.base.Struct` 

164 Results struct with components. 

165 

166 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values 

167 and renamed columns. 

168 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`) 

169 """ 

170 self.log.info( 

171 "Transforming/standardizing the DiaSource table ccdVisitId: %i", 

172 ccdVisitId) 

173 

174 diaSourceDf = diaSourceCat.asAstropy().to_pandas() 

175 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat) 

176 diaSourceDf["ccdVisitId"] = ccdVisitId 

177 diaSourceDf["filterName"] = band 

178 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD) 

179 diaSourceDf["diaObjectId"] = 0 

180 diaSourceDf["pixelId"] = 0 

181 self.bitPackFlags(diaSourceDf) 

182 

183 df = self.transform(band, 

184 ParquetTable(dataFrame=diaSourceDf), 

185 self.funcs, 

186 dataId=None).df 

187 # The Ra/DecColumn functors preserve the coord_ra/dec original columns. 

188 # Since we don't need these and keeping them causes a DB insert crash 

189 # we drop them from the DataFrame before returning output catalog. 

190 return pipeBase.Struct( 

191 diaSourceTable=df.drop(columns=["coord_ra", "coord_dec"]), 

192 ) 

193 

194 def computeBBoxSizes(self, inputCatalog): 

195 """Compute the size of a square bbox that fully contains the detection 

196 footprint. 

197 

198 Parameters 

199 ---------- 

200 inputCatalog : `lsst.afw.table.SourceCatalog` 

201 Catalog containing detected footprints. 

202 

203 Returns 

204 ------- 

205 outputBBoxSizes : `numpy.ndarray`, (N,) 

206 Array of bbox sizes. 

207 """ 

208 outputBBoxSizes = np.empty(len(inputCatalog), dtype=int) 

209 for idx, record in enumerate(inputCatalog): 

210 footprintBBox = record.getFootprint().getBBox() 

211 # Compute twice the size of the largest dimension of the footprint 

212 # bounding box. This is the largest footprint we should need to cover 

213 # the complete DiaSource assuming the centroid is withing the bounding 

214 # box. 

215 maxSize = 2 * np.max([footprintBBox.getWidth(), 

216 footprintBBox.getHeight()]) 

217 recX = record.getCentroid().x 

218 recY = record.getCentroid().y 

219 bboxSize = int( 

220 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX, 

221 footprintBBox.minX - recX, 

222 footprintBBox.maxY - recY, 

223 footprintBBox.minY - recY])))) 

224 if bboxSize > maxSize: 

225 bboxSize = maxSize 

226 outputBBoxSizes[idx] = bboxSize 

227 

228 return outputBBoxSizes 

229 

230 def bitPackFlags(self, df): 

231 """Pack requested flag columns in inputRecord into single columns in 

232 outputRecord. 

233 

234 Parameters 

235 ---------- 

236 df : `pandas.DataFrame` 

237 DataFrame to read bits from and pack them into. 

238 """ 

239 for outputFlag in self.bit_pack_columns: 

240 bitList = outputFlag['bitList'] 

241 value = np.zeros(len(df), dtype=np.uint64) 

242 for bit in bitList: 

243 # Hard type the bit arrays. 

244 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64) 

245 df[outputFlag['columnName']] = value 

246 

247 

248class UnpackApdbFlags: 

249 """Class for unpacking bits from integer flag fields stored in the Apdb. 

250 

251 Attributes 

252 ---------- 

253 flag_map_file : `str` 

254 Absolute or relative path to a yaml file specifiying mappings of flags 

255 to integer bits. 

256 table_name : `str` 

257 Name of the Apdb table the integer bit data are coming from. 

258 """ 

259 

260 def __init__(self, flag_map_file, table_name): 

261 self.bit_pack_columns = [] 

262 with open(flag_map_file) as yaml_stream: 

263 table_list = list(yaml.safe_load_all(yaml_stream)) 

264 for table in table_list: 

265 if table['tableName'] == table_name: 

266 self.bit_pack_columns = table['columns'] 

267 break 

268 

269 self.output_flag_columns = {} 

270 

271 for column in self.bit_pack_columns: 

272 names = [] 

273 for bit in column["bitList"]: 

274 names.append((bit["name"], bool)) 

275 self.output_flag_columns[column["columnName"]] = names 

276 

277 def unpack(self, input_flag_values, flag_name): 

278 """Determine individual boolean flags from an input array of unsigned 

279 ints. 

280 

281 Parameters 

282 ---------- 

283 input_flag_values : array-like of type uint 

284 Array of integer flags to unpack. 

285 flag_name : `str` 

286 Apdb column name of integer flags to unpack. Names of packed int 

287 flags are given by the flag_map_file. 

288 

289 Returns 

290 ------- 

291 output_flags : `numpy.ndarray` 

292 Numpy named tuple of booleans. 

293 """ 

294 bit_names_types = self.output_flag_columns[flag_name] 

295 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types) 

296 

297 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types): 

298 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx) 

299 output_flags[bit_name] = masked_bits 

300 

301 return output_flags