Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ap_association 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ("TransformDiaSourceCatalogConnections", 

23 "TransformDiaSourceCatalogConfig", 

24 "TransformDiaSourceCatalogTask") 

25 

26import numpy as np 

27import os 

28import yaml 

29 

30from lsst.daf.base import DateTime 

31import lsst.pex.config as pexConfig 

32import lsst.pipe.base as pipeBase 

33import lsst.pipe.base.connectionTypes as connTypes 

34from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask 

35from lsst.pipe.tasks.parquetTable import ParquetTable 

36from lsst.utils import getPackageDir 

37 

38 

39class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections, 

40 dimensions=("instrument", "visit", "detector"), 

41 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

42 """Butler connections for TransformDiaSourceCatalogTask. 

43 """ 

44 diaSourceSchema = connTypes.InitInput( 

45 doc="Schema for DIASource catalog output by ImageDifference.", 

46 storageClass="SourceCatalog", 

47 name="{fakesType}{coaddName}Diff_diaSrc_schema", 

48 ) 

49 diaSourceCat = connTypes.Input( 

50 doc="Catalog of DiaSources produced during image differencing.", 

51 name="{fakesType}{coaddName}Diff_diaSrc", 

52 storageClass="SourceCatalog", 

53 dimensions=("instrument", "visit", "detector"), 

54 ) 

55 diffIm = connTypes.Input( 

56 doc="Difference image on which the DiaSources were detected.", 

57 name="{fakesType}{coaddName}Diff_differenceExp", 

58 storageClass="ExposureF", 

59 dimensions=("instrument", "visit", "detector"), 

60 ) 

61 diaSourceTable = connTypes.Output( 

62 doc=".", 

63 name="{fakesType}{coaddName}Diff_diaSrcTable", 

64 storageClass="DataFrame", 

65 dimensions=("instrument", "visit", "detector"), 

66 ) 

67 

68 

69class TransformDiaSourceCatalogConfig(pipeBase.PipelineTaskConfig, 

70 pipelineConnections=TransformDiaSourceCatalogConnections): 

71 """ 

72 """ 

73 flagMap = pexConfig.Field( 

74 dtype=str, 

75 doc="Yaml file specifying SciencePipelines flag fields to bit packs.", 

76 default=os.path.join(getPackageDir("ap_association"), 

77 "data", 

78 "association-flag-map.yaml"), 

79 ) 

80 functorFile = pexConfig.Field( 

81 dtype=str, 

82 doc='Path to YAML file specifying Science DataModel functors to use ' 

83 'when copying columns and computing calibrated values.', 

84 default=os.path.join(getPackageDir("ap_association"), 

85 "data", 

86 "DiaSource.yaml") 

87 ) 

88 

89 

90class TransformDiaSourceCatalogTask(TransformCatalogBaseTask): 

91 """Apply Science DataModel-ification on the DiaSource afw table. 

92 

93 This task calibrates and renames columns in the DiaSource catalog 

94 to ready the catalog for insertion into the Apdb. 

95 

96 This is a Gen3 Butler only task. It will not run in Gen2. 

97 """ 

98 

99 ConfigClass = TransformDiaSourceCatalogConfig 

100 _DefaultName = "transformDiaSourceCatalog" 

101 RunnerClass = pipeBase.ButlerInitializedTaskRunner 

102 

103 def __init__(self, initInputs, **kwargs): 

104 super().__init__(**kwargs) 

105 self.funcs = self.getFunctors() 

106 self.inputSchema = initInputs['diaSourceSchema'].schema 

107 self._create_bit_pack_mappings() 

108 

109 def _create_bit_pack_mappings(self): 

110 """Setup all flag bit packings. 

111 """ 

112 self.bit_pack_columns = [] 

113 with open(self.config.flagMap) as yaml_stream: 

114 table_list = list(yaml.safe_load_all(yaml_stream)) 

115 for table in table_list: 

116 if table['tableName'] == 'DiaSource': 

117 self.bit_pack_columns = table['columns'] 

118 break 

119 

120 # Test that all flags requested are present in the input schemas. 

121 # Output schemas are flexible, however if names are not specified in 

122 # the Apdb schema, flag columns will not be persisted. 

123 for outputFlag in self.bit_pack_columns: 

124 bitList = outputFlag['bitList'] 

125 for bit in bitList: 

126 try: 

127 self.inputSchema.find(bit['name']) 

128 except KeyError: 

129 raise KeyError( 

130 "Requested column %s not found in input DiaSource " 

131 "schema. Please check that the requested input " 

132 "column exists." % bit['name']) 

133 

134 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

135 inputs = butlerQC.get(inputRefs) 

136 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector", 

137 returnMaxBits=True) 

138 inputs["ccdVisitId"] = expId 

139 inputs["band"] = butlerQC.quantum.dataId["band"] 

140 

141 outputs = self.run(**inputs) 

142 

143 butlerQC.put(outputs, outputRefs) 

144 

145 def run(self, 

146 diaSourceCat, 

147 diffIm, 

148 band, 

149 ccdVisitId, 

150 funcs=None): 

151 """Convert input catalog to ParquetTable/Pandas and run functors. 

152 

153 Additionally, add new columns for stripping information from the 

154 exposure and into the DiaSource catalog. 

155 

156 Parameters 

157 ---------- 

158 

159 Returns 

160 ------- 

161 results : `lsst.pipe.base.Struct` 

162 Results struct with components. 

163 

164 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values 

165 and renamed columns. 

166 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`) 

167 """ 

168 self.log.info( 

169 "Transforming/standardizing the DiaSource table ccdVisitId: %i", 

170 ccdVisitId) 

171 

172 diaSourceDf = diaSourceCat.asAstropy().to_pandas() 

173 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat) 

174 diaSourceDf["ccdVisitId"] = ccdVisitId 

175 diaSourceDf["filterName"] = band 

176 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD) 

177 diaSourceDf["diaObjectId"] = 0 

178 diaSourceDf["pixelId"] = 0 

179 self.bitPackFlags(diaSourceDf) 

180 

181 df = self.transform(band, 

182 ParquetTable(dataFrame=diaSourceDf), 

183 self.funcs, 

184 dataId=None).df 

185 return pipeBase.Struct( 

186 diaSourceTable=df 

187 ) 

188 

189 def computeBBoxSizes(self, inputCatalog): 

190 """Compute the size of a square bbox that fully contains the detection 

191 footprint. 

192 

193 Parameters 

194 ---------- 

195 inputCatalog : `lsst.afw.table.SourceCatalog` 

196 Catalog containing detected footprints. 

197 

198 Returns 

199 ------- 

200 outputBBoxSizes : `numpy.ndarray`, (N,) 

201 Array of bbox sizes. 

202 """ 

203 outputBBoxSizes = np.empty(len(inputCatalog), dtype=int) 

204 for idx, record in enumerate(inputCatalog): 

205 footprintBBox = record.getFootprint().getBBox() 

206 # Compute twice the size of the largest dimension of the footprint 

207 # bounding box. This is the largest footprint we should need to cover 

208 # the complete DiaSource assuming the centroid is withing the bounding 

209 # box. 

210 maxSize = 2 * np.max([footprintBBox.getWidth(), 

211 footprintBBox.getHeight()]) 

212 recX = record.getCentroid().x 

213 recY = record.getCentroid().y 

214 bboxSize = int( 

215 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX, 

216 footprintBBox.minX - recX, 

217 footprintBBox.maxY - recY, 

218 footprintBBox.minY - recY])))) 

219 if bboxSize > maxSize: 

220 bboxSize = maxSize 

221 outputBBoxSizes[idx] = bboxSize 

222 

223 return outputBBoxSizes 

224 

225 def bitPackFlags(self, df): 

226 """Pack requested flag columns in inputRecord into single columns in 

227 outputRecord. 

228 

229 Parameters 

230 ---------- 

231 df : `pandas.DataFrame` 

232 DataFrame to read bits from and pack them into. 

233 """ 

234 for outputFlag in self.bit_pack_columns: 

235 bitList = outputFlag['bitList'] 

236 value = np.zeros(len(df), dtype=np.uint64) 

237 for bit in bitList: 

238 # Hard type the bit arrays. 

239 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64) 

240 df[outputFlag['columnName']] = value 

241 

242 

243class UnpackApdbFlags: 

244 """Class for unpacking bits from integer flag fields stored in the Apdb. 

245 

246 Attributes 

247 ---------- 

248 flag_map_file : `str` 

249 Absolute or relative path to a yaml file specifiying mappings of flags 

250 to integer bits. 

251 table_name : `str` 

252 Name of the Apdb table the integer bit data are coming from. 

253 """ 

254 

255 def __init__(self, flag_map_file, table_name): 

256 self.bit_pack_columns = [] 

257 with open(flag_map_file) as yaml_stream: 

258 table_list = list(yaml.safe_load_all(yaml_stream)) 

259 for table in table_list: 

260 if table['tableName'] == table_name: 

261 self.bit_pack_columns = table['columns'] 

262 break 

263 

264 self.output_flag_columns = {} 

265 

266 for column in self.bit_pack_columns: 

267 names = [] 

268 for bit in column["bitList"]: 

269 names.append((bit["name"], bool)) 

270 self.output_flag_columns[column["columnName"]] = names 

271 

272 def unpack(self, input_flag_values, flag_name): 

273 """Determine individual boolean flags from an input array of unsigned 

274 ints. 

275 

276 Parameters 

277 ---------- 

278 input_flag_values : array-like of type uint 

279 Array of integer flags to unpack. 

280 flag_name : `str` 

281 Apdb column name of integer flags to unpack. Names of packed int 

282 flags are given by the flag_map_file. 

283 

284 Returns 

285 ------- 

286 output_flags : `numpy.ndarray` 

287 Numpy named tuple of booleans. 

288 """ 

289 bit_names_types = self.output_flag_columns[flag_name] 

290 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types) 

291 

292 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types): 

293 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx) 

294 output_flags[bit_name] = masked_bits 

295 

296 return output_flags