Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ap_association 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ("TransformDiaSourceCatalogConnections", 

23 "TransformDiaSourceCatalogConfig", 

24 "TransformDiaSourceCatalogTask", 

25 "UnpackApdbFlags") 

26 

27import numpy as np 

28import os 

29import yaml 

30 

31from lsst.daf.base import DateTime 

32import lsst.pex.config as pexConfig 

33import lsst.pipe.base as pipeBase 

34import lsst.pipe.base.connectionTypes as connTypes 

35from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask 

36from lsst.pipe.tasks.parquetTable import ParquetTable 

37 

38 

39class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections, 

40 dimensions=("instrument", "visit", "detector"), 

41 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

42 """Butler connections for TransformDiaSourceCatalogTask. 

43 """ 

44 diaSourceSchema = connTypes.InitInput( 

45 doc="Schema for DIASource catalog output by ImageDifference.", 

46 storageClass="SourceCatalog", 

47 name="{fakesType}{coaddName}Diff_diaSrc_schema", 

48 ) 

49 diaSourceCat = connTypes.Input( 

50 doc="Catalog of DiaSources produced during image differencing.", 

51 name="{fakesType}{coaddName}Diff_diaSrc", 

52 storageClass="SourceCatalog", 

53 dimensions=("instrument", "visit", "detector"), 

54 ) 

55 diffIm = connTypes.Input( 

56 doc="Difference image on which the DiaSources were detected.", 

57 name="{fakesType}{coaddName}Diff_differenceExp", 

58 storageClass="ExposureF", 

59 dimensions=("instrument", "visit", "detector"), 

60 ) 

61 diaSourceTable = connTypes.Output( 

62 doc=".", 

63 name="{fakesType}{coaddName}Diff_diaSrcTable", 

64 storageClass="DataFrame", 

65 dimensions=("instrument", "visit", "detector"), 

66 ) 

67 

68 

69class TransformDiaSourceCatalogConfig(pipeBase.PipelineTaskConfig, 

70 pipelineConnections=TransformDiaSourceCatalogConnections): 

71 """ 

72 """ 

73 flagMap = pexConfig.Field( 

74 dtype=str, 

75 doc="Yaml file specifying SciencePipelines flag fields to bit packs.", 

76 default=os.path.join("${AP_ASSOCIATION_DIR}", 

77 "data", 

78 "association-flag-map.yaml"), 

79 ) 

80 functorFile = pexConfig.Field( 

81 dtype=str, 

82 doc='Path to YAML file specifying Science DataModel functors to use ' 

83 'when copying columns and computing calibrated values.', 

84 default=os.path.join("${AP_ASSOCIATION_DIR}", 

85 "data", 

86 "DiaSource.yaml") 

87 ) 

88 doRemoveSkySources = pexConfig.Field( 

89 dtype=bool, 

90 default=False, 

91 doc="Input DiaSource catalog contains SkySources that should be " 

92 "removed before storing the output DiaSource catalog." 

93 ) 

94 

95 

96class TransformDiaSourceCatalogTask(TransformCatalogBaseTask): 

97 """Apply Science DataModel-ification on the DiaSource afw table. 

98 

99 This task calibrates and renames columns in the DiaSource catalog 

100 to ready the catalog for insertion into the Apdb. 

101 

102 This is a Gen3 Butler only task. It will not run in Gen2. 

103 """ 

104 

105 ConfigClass = TransformDiaSourceCatalogConfig 

106 _DefaultName = "transformDiaSourceCatalog" 

107 RunnerClass = pipeBase.ButlerInitializedTaskRunner 

108 # Needed to create a valid TransformCatalogBaseTask, but unused 

109 inputDataset = "deepDiff_diaSrc" 

110 outputDataset = "deepDiff_diaSrcTable" 

111 

112 def __init__(self, initInputs, **kwargs): 

113 super().__init__(**kwargs) 

114 self.funcs = self.getFunctors() 

115 self.inputSchema = initInputs['diaSourceSchema'].schema 

116 self._create_bit_pack_mappings() 

117 

118 def _create_bit_pack_mappings(self): 

119 """Setup all flag bit packings. 

120 """ 

121 self.bit_pack_columns = [] 

122 flag_map_file = os.path.expandvars(self.config.flagMap) 

123 with open(flag_map_file) as yaml_stream: 

124 table_list = list(yaml.safe_load_all(yaml_stream)) 

125 for table in table_list: 

126 if table['tableName'] == 'DiaSource': 

127 self.bit_pack_columns = table['columns'] 

128 break 

129 

130 # Test that all flags requested are present in the input schemas. 

131 # Output schemas are flexible, however if names are not specified in 

132 # the Apdb schema, flag columns will not be persisted. 

133 for outputFlag in self.bit_pack_columns: 

134 bitList = outputFlag['bitList'] 

135 for bit in bitList: 

136 try: 

137 self.inputSchema.find(bit['name']) 

138 except KeyError: 

139 raise KeyError( 

140 "Requested column %s not found in input DiaSource " 

141 "schema. Please check that the requested input " 

142 "column exists." % bit['name']) 

143 

144 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

145 inputs = butlerQC.get(inputRefs) 

146 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector", 

147 returnMaxBits=True) 

148 inputs["ccdVisitId"] = expId 

149 inputs["band"] = butlerQC.quantum.dataId["band"] 

150 

151 outputs = self.run(**inputs) 

152 

153 butlerQC.put(outputs, outputRefs) 

154 

155 @pipeBase.timeMethod 

156 def run(self, 

157 diaSourceCat, 

158 diffIm, 

159 band, 

160 ccdVisitId, 

161 funcs=None): 

162 """Convert input catalog to ParquetTable/Pandas and run functors. 

163 

164 Additionally, add new columns for stripping information from the 

165 exposure and into the DiaSource catalog. 

166 

167 Parameters 

168 ---------- 

169 

170 Returns 

171 ------- 

172 results : `lsst.pipe.base.Struct` 

173 Results struct with components. 

174 

175 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values 

176 and renamed columns. 

177 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`) 

178 """ 

179 self.log.info( 

180 "Transforming/standardizing the DiaSource table ccdVisitId: %i", 

181 ccdVisitId) 

182 

183 diaSourceDf = diaSourceCat.asAstropy().to_pandas() 

184 if self.config.doRemoveSkySources: 

185 diaSourceDf = diaSourceDf[~diaSourceDf["sky_source"]] 

186 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat) 

187 diaSourceDf["ccdVisitId"] = ccdVisitId 

188 diaSourceDf["filterName"] = band 

189 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD) 

190 diaSourceDf["diaObjectId"] = 0 

191 diaSourceDf["pixelId"] = 0 

192 self.bitPackFlags(diaSourceDf) 

193 

194 df = self.transform(band, 

195 ParquetTable(dataFrame=diaSourceDf), 

196 self.funcs, 

197 dataId=None).df 

198 

199 return pipeBase.Struct( 

200 diaSourceTable=df, 

201 ) 

202 

203 def computeBBoxSizes(self, inputCatalog): 

204 """Compute the size of a square bbox that fully contains the detection 

205 footprint. 

206 

207 Parameters 

208 ---------- 

209 inputCatalog : `lsst.afw.table.SourceCatalog` 

210 Catalog containing detected footprints. 

211 

212 Returns 

213 ------- 

214 outputBBoxSizes : `list` of `float` 

215 Array of bbox sizes. 

216 """ 

217 outputBBoxSizes = [] 

218 for record in inputCatalog: 

219 if self.config.doRemoveSkySources: 

220 if record["sky_source"]: 

221 continue 

222 footprintBBox = record.getFootprint().getBBox() 

223 # Compute twice the size of the largest dimension of the footprint 

224 # bounding box. This is the largest footprint we should need to cover 

225 # the complete DiaSource assuming the centroid is withing the bounding 

226 # box. 

227 maxSize = 2 * np.max([footprintBBox.getWidth(), 

228 footprintBBox.getHeight()]) 

229 recX = record.getCentroid().x 

230 recY = record.getCentroid().y 

231 bboxSize = int( 

232 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX, 

233 footprintBBox.minX - recX, 

234 footprintBBox.maxY - recY, 

235 footprintBBox.minY - recY])))) 

236 if bboxSize > maxSize: 

237 bboxSize = maxSize 

238 outputBBoxSizes.append(bboxSize) 

239 

240 return outputBBoxSizes 

241 

242 def bitPackFlags(self, df): 

243 """Pack requested flag columns in inputRecord into single columns in 

244 outputRecord. 

245 

246 Parameters 

247 ---------- 

248 df : `pandas.DataFrame` 

249 DataFrame to read bits from and pack them into. 

250 """ 

251 for outputFlag in self.bit_pack_columns: 

252 bitList = outputFlag['bitList'] 

253 value = np.zeros(len(df), dtype=np.uint64) 

254 for bit in bitList: 

255 # Hard type the bit arrays. 

256 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64) 

257 df[outputFlag['columnName']] = value 

258 

259 

260class UnpackApdbFlags: 

261 """Class for unpacking bits from integer flag fields stored in the Apdb. 

262 

263 Attributes 

264 ---------- 

265 flag_map_file : `str` 

266 Absolute or relative path to a yaml file specifiying mappings of flags 

267 to integer bits. 

268 table_name : `str` 

269 Name of the Apdb table the integer bit data are coming from. 

270 """ 

271 

272 def __init__(self, flag_map_file, table_name): 

273 self.bit_pack_columns = [] 

274 flag_map_file = os.path.expandvars(flag_map_file) 

275 with open(flag_map_file) as yaml_stream: 

276 table_list = list(yaml.safe_load_all(yaml_stream)) 

277 for table in table_list: 

278 if table['tableName'] == table_name: 

279 self.bit_pack_columns = table['columns'] 

280 break 

281 

282 self.output_flag_columns = {} 

283 

284 for column in self.bit_pack_columns: 

285 names = [] 

286 for bit in column["bitList"]: 

287 names.append((bit["name"], bool)) 

288 self.output_flag_columns[column["columnName"]] = names 

289 

290 def unpack(self, input_flag_values, flag_name): 

291 """Determine individual boolean flags from an input array of unsigned 

292 ints. 

293 

294 Parameters 

295 ---------- 

296 input_flag_values : array-like of type uint 

297 Array of integer flags to unpack. 

298 flag_name : `str` 

299 Apdb column name of integer flags to unpack. Names of packed int 

300 flags are given by the flag_map_file. 

301 

302 Returns 

303 ------- 

304 output_flags : `numpy.ndarray` 

305 Numpy named tuple of booleans. 

306 """ 

307 bit_names_types = self.output_flag_columns[flag_name] 

308 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types) 

309 

310 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types): 

311 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx) 

312 output_flags[bit_name] = masked_bits 

313 

314 return output_flags