Coverage for python/lsst/ap/association/transformDiaSourceCatalog.py: 20%

167 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-13 02:59 -0700

1# This file is part of ap_association 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ("TransformDiaSourceCatalogConnections", 

23 "TransformDiaSourceCatalogConfig", 

24 "TransformDiaSourceCatalogTask", 

25 "UnpackApdbFlags") 

26 

27import numpy as np 

28import os 

29import yaml 

30import pandas as pd 

31 

32from lsst.daf.base import DateTime 

33import lsst.pex.config as pexConfig 

34import lsst.pipe.base as pipeBase 

35import lsst.pipe.base.connectionTypes as connTypes 

36from lsst.meas.base import DetectorVisitIdGeneratorConfig 

37from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask, TransformCatalogBaseConfig 

38from lsst.pipe.tasks.functors import Column 

39from lsst.utils.timer import timeMethod 

40 

41 

42class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections, 

43 dimensions=("instrument", "visit", "detector"), 

44 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

45 diaSourceSchema = connTypes.InitInput( 

46 doc="Schema for DIASource catalog output by ImageDifference.", 

47 storageClass="SourceCatalog", 

48 name="{fakesType}{coaddName}Diff_diaSrc_schema", 

49 ) 

50 diaSourceCat = connTypes.Input( 

51 doc="Catalog of DiaSources produced during image differencing.", 

52 name="{fakesType}{coaddName}Diff_diaSrc", 

53 storageClass="SourceCatalog", 

54 dimensions=("instrument", "visit", "detector"), 

55 ) 

56 diffIm = connTypes.Input( 

57 doc="Difference image on which the DiaSources were detected.", 

58 name="{fakesType}{coaddName}Diff_differenceExp", 

59 storageClass="ExposureF", 

60 dimensions=("instrument", "visit", "detector"), 

61 ) 

62 spuriousness = connTypes.Input( 

63 doc="Spuriousness (e.g. real/bogus) classificiation of diaSourceCat sources (optional).", 

64 name="{fakesType}{coaddName}RealBogusSources", 

65 storageClass="Catalog", 

66 dimensions=("instrument", "visit", "detector"), 

67 ) 

68 diaSourceTable = connTypes.Output( 

69 doc=".", 

70 name="{fakesType}{coaddName}Diff_diaSrcTable", 

71 storageClass="DataFrame", 

72 dimensions=("instrument", "visit", "detector"), 

73 ) 

74 

75 def __init__(self, *, config=None): 

76 super().__init__(config=config) 

77 if not self.config.doIncludeSpuriousness: 

78 self.inputs.remove("spuriousness") 

79 

80 

81class TransformDiaSourceCatalogConfig(TransformCatalogBaseConfig, 

82 pipelineConnections=TransformDiaSourceCatalogConnections): 

83 flagMap = pexConfig.Field( 

84 dtype=str, 

85 doc="Yaml file specifying SciencePipelines flag fields to bit packs.", 

86 default=os.path.join("${AP_ASSOCIATION_DIR}", 

87 "data", 

88 "association-flag-map.yaml"), 

89 ) 

90 flagRenameMap = pexConfig.Field( 

91 dtype=str, 

92 doc="Yaml file specifying specifying rules to rename flag names", 

93 default=os.path.join("${AP_ASSOCIATION_DIR}", 

94 "data", 

95 "flag-rename-rules.yaml"), 

96 ) 

97 doRemoveSkySources = pexConfig.Field( 

98 dtype=bool, 

99 default=False, 

100 doc="Input DiaSource catalog contains SkySources that should be " 

101 "removed before storing the output DiaSource catalog." 

102 ) 

103 doPackFlags = pexConfig.Field( 

104 dtype=bool, 

105 default=True, 

106 doc="Do pack the flags into one integer column named 'flags'." 

107 "If False, instead produce one boolean column per flag." 

108 ) 

109 doIncludeSpuriousness = pexConfig.Field( 

110 dtype=bool, 

111 default=False, 

112 doc="Include the spuriousness (e.g. real/bogus) classifications in the output." 

113 ) 

114 

115 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

116 

117 def setDefaults(self): 

118 super().setDefaults() 

119 self.functorFile = os.path.join("${AP_ASSOCIATION_DIR}", 

120 "data", 

121 "DiaSource.yaml") 

122 

123 

124class TransformDiaSourceCatalogTask(TransformCatalogBaseTask): 

125 """Transform a DiaSource catalog by calibrating and renaming columns to 

126 produce a table ready to insert into the Apdb. 

127 

128 Parameters 

129 ---------- 

130 initInputs : `dict` 

131 Must contain ``diaSourceSchema`` as the schema for the input catalog. 

132 """ 

133 ConfigClass = TransformDiaSourceCatalogConfig 

134 _DefaultName = "transformDiaSourceCatalog" 

135 # Needed to create a valid TransformCatalogBaseTask, but unused 

136 inputDataset = "deepDiff_diaSrc" 

137 outputDataset = "deepDiff_diaSrcTable" 

138 

139 def __init__(self, initInputs, **kwargs): 

140 super().__init__(**kwargs) 

141 self.funcs = self.getFunctors() 

142 self.inputSchema = initInputs['diaSourceSchema'].schema 

143 self._create_bit_pack_mappings() 

144 

145 if not self.config.doPackFlags: 

146 # get the flag rename rules 

147 with open(os.path.expandvars(self.config.flagRenameMap)) as yaml_stream: 

148 self.rename_rules = list(yaml.safe_load_all(yaml_stream)) 

149 

150 def _create_bit_pack_mappings(self): 

151 """Setup all flag bit packings. 

152 """ 

153 self.bit_pack_columns = [] 

154 flag_map_file = os.path.expandvars(self.config.flagMap) 

155 with open(flag_map_file) as yaml_stream: 

156 table_list = list(yaml.safe_load_all(yaml_stream)) 

157 for table in table_list: 

158 if table['tableName'] == 'DiaSource': 

159 self.bit_pack_columns = table['columns'] 

160 break 

161 

162 # Test that all flags requested are present in the input schemas. 

163 # Output schemas are flexible, however if names are not specified in 

164 # the Apdb schema, flag columns will not be persisted. 

165 for outputFlag in self.bit_pack_columns: 

166 bitList = outputFlag['bitList'] 

167 for bit in bitList: 

168 try: 

169 self.inputSchema.find(bit['name']) 

170 except KeyError: 

171 raise KeyError( 

172 "Requested column %s not found in input DiaSource " 

173 "schema. Please check that the requested input " 

174 "column exists." % bit['name']) 

175 

176 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

177 inputs = butlerQC.get(inputRefs) 

178 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId) 

179 inputs["ccdVisitId"] = idGenerator.catalog_id 

180 inputs["band"] = butlerQC.quantum.dataId["band"] 

181 

182 outputs = self.run(**inputs) 

183 

184 butlerQC.put(outputs, outputRefs) 

185 

186 @timeMethod 

187 def run(self, 

188 diaSourceCat, 

189 diffIm, 

190 band, 

191 ccdVisitId, 

192 spuriousness=None): 

193 """Convert input catalog to ParquetTable/Pandas and run functors. 

194 

195 Additionally, add new columns for stripping information from the 

196 exposure and into the DiaSource catalog. 

197 

198 Parameters 

199 ---------- 

200 diaSourceCat : `lsst.afw.table.SourceCatalog` 

201 Catalog of sources measured on the difference image. 

202 diffIm : `lsst.afw.image.Exposure` 

203 Result of subtracting template and science images. 

204 band : `str` 

205 Filter band of the science image. 

206 ccdVisitId : `int` 

207 Identifier for this detector+visit. 

208 spuriousness : `lsst.afw.table.SourceCatalog` 

209 Spuriousness (e.g. real/bogus) scores, row-matched to 

210 ``diaSourceCat``. 

211 

212 Returns 

213 ------- 

214 results : `lsst.pipe.base.Struct` 

215 Results struct with components. 

216 

217 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values 

218 and renamed columns. 

219 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`) 

220 """ 

221 self.log.info( 

222 "Transforming/standardizing the DiaSource table ccdVisitId: %i", 

223 ccdVisitId) 

224 

225 diaSourceDf = diaSourceCat.asAstropy().to_pandas() 

226 if self.config.doRemoveSkySources: 

227 diaSourceDf = diaSourceDf[~diaSourceDf["sky_source"]] 

228 diaSourceCat = diaSourceCat[~diaSourceCat["sky_source"]] 

229 

230 diaSourceDf["snr"] = getSignificance(diaSourceCat) 

231 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat) 

232 diaSourceDf["ccdVisitId"] = ccdVisitId 

233 diaSourceDf["filterName"] = band 

234 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD) 

235 diaSourceDf["diaObjectId"] = 0 

236 diaSourceDf["ssObjectId"] = 0 

237 

238 if self.config.doIncludeSpuriousness: 

239 spuriousnessDf = spuriousness.asAstropy().to_pandas() 

240 # This uses the pandas index to match scores with diaSources 

241 # but it will silently fill with NaNs if they don't match. 

242 diaSourceDf = pd.merge(diaSourceDf, spuriousnessDf, 

243 how="left", on="id", validate="1:1") 

244 diaSourceDf = diaSourceDf.rename(columns={"score": "spuriousness"}) 

245 if np.sum(diaSourceDf["spuriousness"].isna()) == len(diaSourceDf): 

246 self.log.warning("Spuriousness identifiers did not match diaSourceIds") 

247 else: 

248 diaSourceDf["spuriousness"] = np.float32(np.nan) 

249 

250 if self.config.doPackFlags: 

251 # either bitpack the flags 

252 self.bitPackFlags(diaSourceDf) 

253 else: 

254 # or add the individual flag functors 

255 self.addUnpackedFlagFunctors() 

256 # and remove the packed flag functor 

257 if 'flags' in self.funcs.funcDict: 

258 del self.funcs.funcDict['flags'] 

259 

260 df = self.transform(band, 

261 diaSourceDf, 

262 self.funcs, 

263 dataId=None).df 

264 

265 return pipeBase.Struct( 

266 diaSourceTable=df, 

267 ) 

268 

269 def addUnpackedFlagFunctors(self): 

270 """Add Column functor for each of the flags to the internal functor 

271 dictionary. 

272 """ 

273 for flag in self.bit_pack_columns[0]['bitList']: 

274 flagName = flag['name'] 

275 targetName = self.funcs.renameCol(flagName, self.rename_rules[0]['flag_rename_rules']) 

276 self.funcs.update({targetName: Column(flagName)}) 

277 

278 def computeBBoxSizes(self, inputCatalog): 

279 """Compute the size of a square bbox that fully contains the detection 

280 footprint. 

281 

282 Parameters 

283 ---------- 

284 inputCatalog : `lsst.afw.table.SourceCatalog` 

285 Catalog containing detected footprints. 

286 

287 Returns 

288 ------- 

289 outputBBoxSizes : `np.ndarray`, (N,) 

290 Array of bbox sizes. 

291 """ 

292 # Schema validation requires that this field is int. 

293 outputBBoxSizes = np.empty(len(inputCatalog), dtype=int) 

294 for i, record in enumerate(inputCatalog): 

295 footprintBBox = record.getFootprint().getBBox() 

296 # Compute twice the size of the largest dimension of the footprint 

297 # bounding box. This is the largest footprint we should need to cover 

298 # the complete DiaSource assuming the centroid is within the bounding 

299 # box. 

300 maxSize = 2 * np.max([footprintBBox.getWidth(), 

301 footprintBBox.getHeight()]) 

302 recX = record.getCentroid().x 

303 recY = record.getCentroid().y 

304 bboxSize = int( 

305 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX, 

306 footprintBBox.minX - recX, 

307 footprintBBox.maxY - recY, 

308 footprintBBox.minY - recY])))) 

309 if bboxSize > maxSize: 

310 bboxSize = maxSize 

311 outputBBoxSizes[i] = bboxSize 

312 

313 return outputBBoxSizes 

314 

315 def bitPackFlags(self, df): 

316 """Pack requested flag columns in inputRecord into single columns in 

317 outputRecord. 

318 

319 Parameters 

320 ---------- 

321 df : `pandas.DataFrame` 

322 DataFrame to read bits from and pack them into. 

323 """ 

324 for outputFlag in self.bit_pack_columns: 

325 bitList = outputFlag['bitList'] 

326 value = np.zeros(len(df), dtype=np.uint64) 

327 for bit in bitList: 

328 # Hard type the bit arrays. 

329 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64) 

330 df[outputFlag['columnName']] = value 

331 

332 

333class UnpackApdbFlags: 

334 """Class for unpacking bits from integer flag fields stored in the Apdb. 

335 

336 Attributes 

337 ---------- 

338 flag_map_file : `str` 

339 Absolute or relative path to a yaml file specifiying mappings of flags 

340 to integer bits. 

341 table_name : `str` 

342 Name of the Apdb table the integer bit data are coming from. 

343 """ 

344 

345 def __init__(self, flag_map_file, table_name): 

346 self.bit_pack_columns = [] 

347 flag_map_file = os.path.expandvars(flag_map_file) 

348 with open(flag_map_file) as yaml_stream: 

349 table_list = list(yaml.safe_load_all(yaml_stream)) 

350 for table in table_list: 

351 if table['tableName'] == table_name: 

352 self.bit_pack_columns = table['columns'] 

353 break 

354 

355 self.output_flag_columns = {} 

356 

357 for column in self.bit_pack_columns: 

358 names = [] 

359 for bit in column["bitList"]: 

360 names.append((bit["name"], bool)) 

361 self.output_flag_columns[column["columnName"]] = names 

362 

363 def unpack(self, input_flag_values, flag_name): 

364 """Determine individual boolean flags from an input array of unsigned 

365 ints. 

366 

367 Parameters 

368 ---------- 

369 input_flag_values : array-like of type uint 

370 Array of integer flags to unpack. 

371 flag_name : `str` 

372 Apdb column name of integer flags to unpack. Names of packed int 

373 flags are given by the flag_map_file. 

374 

375 Returns 

376 ------- 

377 output_flags : `numpy.ndarray` 

378 Numpy named tuple of booleans. 

379 """ 

380 bit_names_types = self.output_flag_columns[flag_name] 

381 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types) 

382 

383 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types): 

384 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx) 

385 output_flags[bit_name] = masked_bits 

386 

387 return output_flags 

388 

389 def flagExists(self, flagName, columnName='flags'): 

390 """Check if named flag is in the bitpacked flag set. 

391 

392 Parameters: 

393 ---------- 

394 flagName : `str` 

395 Flag name to search for. 

396 columnName : `str`, optional 

397 Name of bitpacked flag column to search in. 

398 

399 Returns 

400 ------- 

401 flagExists : `bool` 

402 `True` if `flagName` is present in `columnName`. 

403 

404 Raises 

405 ------ 

406 ValueError 

407 Raised if `columnName` is not defined. 

408 """ 

409 if columnName not in self.output_flag_columns: 

410 raise ValueError(f'column {columnName} not in flag map: {self.output_flag_columns}') 

411 

412 return flagName in [c[0] for c in self.output_flag_columns[columnName]] 

413 

414 def makeFlagBitMask(self, flagNames, columnName='flags'): 

415 """Return a bitmask corresponding to the supplied flag names. 

416 

417 Parameters: 

418 ---------- 

419 flagNames : `list` [`str`] 

420 Flag names to include in the bitmask. 

421 columnName : `str`, optional 

422 Name of bitpacked flag column. 

423 

424 Returns 

425 ------- 

426 bitmask : `np.unit64` 

427 Bitmask corresponding to the supplied flag names given the loaded configuration. 

428 

429 Raises 

430 ------ 

431 ValueError 

432 Raised if a flag in `flagName` is not included in `columnName`. 

433 """ 

434 bitmask = np.uint64(0) 

435 

436 for flag in flagNames: 

437 if not self.flagExists(flag, columnName=columnName): 

438 raise ValueError(f"flag '{flag}' not included in '{columnName}' flag column") 

439 

440 for outputFlag in self.bit_pack_columns: 

441 if outputFlag['columnName'] == columnName: 

442 bitList = outputFlag['bitList'] 

443 for bit in bitList: 

444 if bit['name'] in flagNames: 

445 bitmask += np.uint64(2**bit['bit']) 

446 

447 return bitmask 

448 

449 

450def getSignificance(catalog): 

451 """Return the significance value of the first peak in each source 

452 footprint, or NaN for peaks without a significance field. 

453 

454 Parameters 

455 ---------- 

456 catalog : `lsst.afw.table.SourceCatalog` 

457 Catalog to process. 

458 

459 Returns 

460 ------- 

461 significance : `np.ndarray`, (N,) 

462 Signficance of the first peak in each source footprint. 

463 """ 

464 result = np.full(len(catalog), np.nan) 

465 for i, record in enumerate(catalog): 

466 peaks = record.getFootprint().peaks 

467 if "significance" in peaks.schema: 

468 result[i] = peaks[0]["significance"] 

469 return result