Coverage for python/lsst/pipe/tasks/postprocess.py: 32%

695 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-12 01:27 -0700

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24import logging 

25import numpy as np 

26import numbers 

27import os 

28 

29import lsst.geom 

30import lsst.pex.config as pexConfig 

31import lsst.pipe.base as pipeBase 

32import lsst.daf.base as dafBase 

33from lsst.obs.base import ExposureIdInfo 

34from lsst.pipe.base import connectionTypes 

35import lsst.afw.table as afwTable 

36from lsst.meas.base import SingleFrameMeasurementTask 

37from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate 

38from lsst.skymap import BaseSkyMap 

39 

40from .parquetTable import ParquetTable 

41from .functors import CompositeFunctor, Column 

42 

43log = logging.getLogger(__name__) 

44 

45 

46def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

47 """Flattens a dataframe with multilevel column index. 

48 """ 

49 newDf = pd.DataFrame() 

50 # band is the level 0 index 

51 dfBands = df.columns.unique(level=0).values 

52 for band in dfBands: 

53 subdf = df[band] 

54 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

55 newColumns = {c: columnFormat.format(band, c) 

56 for c in subdf.columns if c not in noDupCols} 

57 cols = list(newColumns.keys()) 

58 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

59 

60 # Band must be present in the input and output or else column is all NaN: 

61 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

62 # Get the unexploded columns from any present band's partition 

63 noDupDf = df[presentBands[0]][noDupCols] 

64 newDf = pd.concat([noDupDf, newDf], axis=1) 

65 return newDf 

66 

67 

68class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

69 defaultTemplates={"coaddName": "deep"}, 

70 dimensions=("tract", "patch", "skymap")): 

71 inputCatalogMeas = connectionTypes.Input( 

72 doc="Catalog of source measurements on the deepCoadd.", 

73 dimensions=("tract", "patch", "band", "skymap"), 

74 storageClass="SourceCatalog", 

75 name="{coaddName}Coadd_meas", 

76 multiple=True 

77 ) 

78 inputCatalogForcedSrc = connectionTypes.Input( 

79 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

80 dimensions=("tract", "patch", "band", "skymap"), 

81 storageClass="SourceCatalog", 

82 name="{coaddName}Coadd_forced_src", 

83 multiple=True 

84 ) 

85 inputCatalogRef = connectionTypes.Input( 

86 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

87 "for each detection in deepCoadd_mergeDet.", 

88 dimensions=("tract", "patch", "skymap"), 

89 storageClass="SourceCatalog", 

90 name="{coaddName}Coadd_ref" 

91 ) 

92 outputCatalog = connectionTypes.Output( 

93 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

94 "stored as a DataFrame with a multi-level column index per-patch.", 

95 dimensions=("tract", "patch", "skymap"), 

96 storageClass="DataFrame", 

97 name="{coaddName}Coadd_obj" 

98 ) 

99 

100 

101class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

102 pipelineConnections=WriteObjectTableConnections): 

103 engine = pexConfig.Field( 

104 dtype=str, 

105 default="pyarrow", 

106 doc="Parquet engine for writing (pyarrow or fastparquet)" 

107 ) 

108 coaddName = pexConfig.Field( 

109 dtype=str, 

110 default="deep", 

111 doc="Name of coadd" 

112 ) 

113 

114 

115class WriteObjectTableTask(pipeBase.PipelineTask): 

116 """Write filter-merged source tables to parquet 

117 """ 

118 _DefaultName = "writeObjectTable" 

119 ConfigClass = WriteObjectTableConfig 

120 

121 # Names of table datasets to be merged 

122 inputDatasets = ('forced_src', 'meas', 'ref') 

123 

124 # Tag of output dataset written by `MergeSourcesTask.write` 

125 outputDataset = 'obj' 

126 

127 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

128 inputs = butlerQC.get(inputRefs) 

129 

130 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

131 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

132 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

133 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

134 

135 catalogs = {} 

136 for band in measDict.keys(): 

137 catalogs[band] = {'meas': measDict[band]['meas'], 

138 'forced_src': forcedSourceDict[band]['forced_src'], 

139 'ref': inputs['inputCatalogRef']} 

140 dataId = butlerQC.quantum.dataId 

141 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

142 outputs = pipeBase.Struct(outputCatalog=df) 

143 butlerQC.put(outputs, outputRefs) 

144 

145 def run(self, catalogs, tract, patch): 

146 """Merge multiple catalogs. 

147 

148 Parameters 

149 ---------- 

150 catalogs : `dict` 

151 Mapping from filter names to dict of catalogs. 

152 tract : int 

153 tractId to use for the tractId column. 

154 patch : str 

155 patchId to use for the patchId column. 

156 

157 Returns 

158 ------- 

159 catalog : `pandas.DataFrame` 

160 Merged dataframe. 

161 """ 

162 

163 dfs = [] 

164 for filt, tableDict in catalogs.items(): 

165 for dataset, table in tableDict.items(): 

166 # Convert afwTable to pandas DataFrame 

167 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

168 

169 # Sort columns by name, to ensure matching schema among patches 

170 df = df.reindex(sorted(df.columns), axis=1) 

171 df['tractId'] = tract 

172 df['patchId'] = patch 

173 

174 # Make columns a 3-level MultiIndex 

175 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

176 names=('dataset', 'band', 'column')) 

177 dfs.append(df) 

178 

179 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

180 return catalog 

181 

182 

183class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

184 defaultTemplates={"catalogType": ""}, 

185 dimensions=("instrument", "visit", "detector")): 

186 

187 catalog = connectionTypes.Input( 

188 doc="Input full-depth catalog of sources produced by CalibrateTask", 

189 name="{catalogType}src", 

190 storageClass="SourceCatalog", 

191 dimensions=("instrument", "visit", "detector") 

192 ) 

193 outputCatalog = connectionTypes.Output( 

194 doc="Catalog of sources, `src` in Parquet format. The 'id' column is " 

195 "replaced with an index; all other columns are unchanged.", 

196 name="{catalogType}source", 

197 storageClass="DataFrame", 

198 dimensions=("instrument", "visit", "detector") 

199 ) 

200 

201 

202class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

203 pipelineConnections=WriteSourceTableConnections): 

204 pass 

205 

206 

207class WriteSourceTableTask(pipeBase.PipelineTask): 

208 """Write source table to parquet. 

209 """ 

210 _DefaultName = "writeSourceTable" 

211 ConfigClass = WriteSourceTableConfig 

212 

213 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

214 inputs = butlerQC.get(inputRefs) 

215 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

216 result = self.run(**inputs).table 

217 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

218 butlerQC.put(outputs, outputRefs) 

219 

220 def run(self, catalog, ccdVisitId=None, **kwargs): 

221 """Convert `src` catalog to parquet 

222 

223 Parameters 

224 ---------- 

225 catalog: `afwTable.SourceCatalog` 

226 catalog to be converted 

227 ccdVisitId: `int` 

228 ccdVisitId to be added as a column 

229 

230 Returns 

231 ------- 

232 result : `lsst.pipe.base.Struct` 

233 ``table`` 

234 `ParquetTable` version of the input catalog 

235 """ 

236 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId) 

237 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

238 df['ccdVisitId'] = ccdVisitId 

239 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

240 

241 

242class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections, 

243 defaultTemplates={"catalogType": "", 

244 "skyWcsName": "jointcal", 

245 "photoCalibName": "fgcm"}, 

246 dimensions=("instrument", "visit", "detector", "skymap")): 

247 skyMap = connectionTypes.Input( 

248 doc="skyMap needed to choose which tract-level calibrations to use when multiple available", 

249 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

250 storageClass="SkyMap", 

251 dimensions=("skymap",), 

252 ) 

253 exposure = connectionTypes.Input( 

254 doc="Input exposure to perform photometry on.", 

255 name="calexp", 

256 storageClass="ExposureF", 

257 dimensions=["instrument", "visit", "detector"], 

258 ) 

259 externalSkyWcsTractCatalog = connectionTypes.Input( 

260 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector " 

261 "id for the catalog id, sorted on id for fast lookup."), 

262 name="{skyWcsName}SkyWcsCatalog", 

263 storageClass="ExposureCatalog", 

264 dimensions=["instrument", "visit", "tract"], 

265 multiple=True 

266 ) 

267 externalSkyWcsGlobalCatalog = connectionTypes.Input( 

268 doc=("Per-visit wcs calibrations computed globally (with no tract information). " 

269 "These catalogs use the detector id for the catalog id, sorted on id for " 

270 "fast lookup."), 

271 name="{skyWcsName}SkyWcsCatalog", 

272 storageClass="ExposureCatalog", 

273 dimensions=["instrument", "visit"], 

274 ) 

275 externalPhotoCalibTractCatalog = connectionTypes.Input( 

276 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the " 

277 "detector id for the catalog id, sorted on id for fast lookup."), 

278 name="{photoCalibName}PhotoCalibCatalog", 

279 storageClass="ExposureCatalog", 

280 dimensions=["instrument", "visit", "tract"], 

281 multiple=True 

282 ) 

283 externalPhotoCalibGlobalCatalog = connectionTypes.Input( 

284 doc=("Per-visit photometric calibrations computed globally (with no tract " 

285 "information). These catalogs use the detector id for the catalog id, " 

286 "sorted on id for fast lookup."), 

287 name="{photoCalibName}PhotoCalibCatalog", 

288 storageClass="ExposureCatalog", 

289 dimensions=["instrument", "visit"], 

290 ) 

291 

292 def __init__(self, *, config=None): 

293 super().__init__(config=config) 

294 # Same connection boilerplate as all other applications of 

295 # Global/Tract calibrations 

296 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs: 

297 if config.useGlobalExternalSkyWcs: 

298 self.inputs.remove("externalSkyWcsTractCatalog") 

299 else: 

300 self.inputs.remove("externalSkyWcsGlobalCatalog") 

301 else: 

302 self.inputs.remove("externalSkyWcsTractCatalog") 

303 self.inputs.remove("externalSkyWcsGlobalCatalog") 

304 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib: 

305 if config.useGlobalExternalPhotoCalib: 

306 self.inputs.remove("externalPhotoCalibTractCatalog") 

307 else: 

308 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

309 else: 

310 self.inputs.remove("externalPhotoCalibTractCatalog") 

311 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

312 

313 

314class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig, 

315 pipelineConnections=WriteRecalibratedSourceTableConnections): 

316 

317 doReevaluatePhotoCalib = pexConfig.Field( 

318 dtype=bool, 

319 default=True, 

320 doc=("Add or replace local photoCalib columns") 

321 ) 

322 doReevaluateSkyWcs = pexConfig.Field( 

323 dtype=bool, 

324 default=True, 

325 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec") 

326 ) 

327 doApplyExternalPhotoCalib = pexConfig.Field( 

328 dtype=bool, 

329 default=True, 

330 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ", 

331 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."), 

332 ) 

333 doApplyExternalSkyWcs = pexConfig.Field( 

334 dtype=bool, 

335 default=True, 

336 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ", 

337 "else use the wcs already attached to the exposure."), 

338 ) 

339 useGlobalExternalPhotoCalib = pexConfig.Field( 

340 dtype=bool, 

341 default=True, 

342 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations " 

343 "that are not run per-tract. When False, use per-tract photometric " 

344 "calibration files.") 

345 ) 

346 useGlobalExternalSkyWcs = pexConfig.Field( 

347 dtype=bool, 

348 default=False, 

349 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations " 

350 "that are not run per-tract. When False, use per-tract wcs " 

351 "files.") 

352 ) 

353 

354 def validate(self): 

355 super().validate() 

356 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs: 

357 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False" 

358 "External SkyWcs will not be read or evaluated.") 

359 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib: 

360 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False." 

361 "External PhotoCalib will not be read or evaluated.") 

362 

363 

364class WriteRecalibratedSourceTableTask(WriteSourceTableTask): 

365 """Write source table to parquet 

366 """ 

367 _DefaultName = "writeRecalibratedSourceTable" 

368 ConfigClass = WriteRecalibratedSourceTableConfig 

369 

370 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

371 inputs = butlerQC.get(inputRefs) 

372 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

373 inputs['exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "visit_detector") 

374 

375 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs: 

376 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

377 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs) 

378 

379 inputs['catalog'] = self.addCalibColumns(**inputs) 

380 

381 result = self.run(**inputs).table 

382 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

383 butlerQC.put(outputs, outputRefs) 

384 

385 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None, 

386 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None, 

387 externalPhotoCalibTractCatalog=None, **kwargs): 

388 """Apply external calibrations to exposure per configuration 

389 

390 When multiple tract-level calibrations overlap, select the one with the 

391 center closest to detector. 

392 

393 Parameters 

394 ---------- 

395 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of 

396 tract-level calibs. 

397 skyMap : `lsst.skymap.SkyMap` 

398 exposure : `lsst.afw.image.exposure.Exposure` 

399 Input exposure to adjust calibrations. 

400 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

401 Exposure catalog with external skyWcs to be applied per config 

402 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

403 Exposure catalog with external skyWcs to be applied per config 

404 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

405 Exposure catalog with external photoCalib to be applied per config 

406 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

407 

408 

409 Returns 

410 ------- 

411 exposure : `lsst.afw.image.exposure.Exposure` 

412 Exposure with adjusted calibrations. 

413 """ 

414 if not self.config.doApplyExternalSkyWcs: 

415 # Do not modify the exposure's SkyWcs 

416 externalSkyWcsCatalog = None 

417 elif self.config.useGlobalExternalSkyWcs: 

418 # Use the global external SkyWcs 

419 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog 

420 self.log.info('Applying global SkyWcs') 

421 else: 

422 # use tract-level external SkyWcs from the closest overlapping tract 

423 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog') 

424 tracts = [ref.dataId['tract'] for ref in inputRef] 

425 if len(tracts) == 1: 

426 ind = 0 

427 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind]) 

428 else: 

429 ind = self.getClosestTract(tracts, skyMap, 

430 exposure.getBBox(), exposure.getWcs()) 

431 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). ' 

432 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

433 

434 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind] 

435 

436 if not self.config.doApplyExternalPhotoCalib: 

437 # Do not modify the exposure's PhotoCalib 

438 externalPhotoCalibCatalog = None 

439 elif self.config.useGlobalExternalPhotoCalib: 

440 # Use the global external PhotoCalib 

441 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog 

442 self.log.info('Applying global PhotoCalib') 

443 else: 

444 # use tract-level external PhotoCalib from the closest overlapping tract 

445 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog') 

446 tracts = [ref.dataId['tract'] for ref in inputRef] 

447 if len(tracts) == 1: 

448 ind = 0 

449 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind]) 

450 else: 

451 ind = self.getClosestTract(tracts, skyMap, 

452 exposure.getBBox(), exposure.getWcs()) 

453 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). ' 

454 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

455 

456 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind] 

457 

458 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog) 

459 

460 def getClosestTract(self, tracts, skyMap, bbox, wcs): 

461 """Find the index of the tract closest to detector from list of tractIds 

462 

463 Parameters 

464 ---------- 

465 tracts: `list` [`int`] 

466 Iterable of integer tractIds 

467 skyMap : `lsst.skymap.SkyMap` 

468 skyMap to lookup tract geometry and wcs 

469 bbox : `lsst.geom.Box2I` 

470 Detector bbox, center of which will compared to tract centers 

471 wcs : `lsst.afw.geom.SkyWcs` 

472 Detector Wcs object to map the detector center to SkyCoord 

473 

474 Returns 

475 ------- 

476 index : `int` 

477 """ 

478 if len(tracts) == 1: 

479 return 0 

480 

481 center = wcs.pixelToSky(bbox.getCenter()) 

482 sep = [] 

483 for tractId in tracts: 

484 tract = skyMap[tractId] 

485 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter()) 

486 sep.append(center.separation(tractCenter)) 

487 

488 return np.argmin(sep) 

489 

490 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None): 

491 """Prepare a calibrated exposure and apply external calibrations 

492 if so configured. 

493 

494 Parameters 

495 ---------- 

496 exposure : `lsst.afw.image.exposure.Exposure` 

497 Input exposure to adjust calibrations. 

498 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional 

499 Exposure catalog with external skyWcs to be applied 

500 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id 

501 for the catalog id, sorted on id for fast lookup. 

502 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional 

503 Exposure catalog with external photoCalib to be applied 

504 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector 

505 id for the catalog id, sorted on id for fast lookup. 

506 

507 Returns 

508 ------- 

509 exposure : `lsst.afw.image.exposure.Exposure` 

510 Exposure with adjusted calibrations. 

511 """ 

512 detectorId = exposure.getInfo().getDetector().getId() 

513 

514 if externalPhotoCalibCatalog is not None: 

515 row = externalPhotoCalibCatalog.find(detectorId) 

516 if row is None: 

517 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; " 

518 "Using original photoCalib.", detectorId) 

519 else: 

520 photoCalib = row.getPhotoCalib() 

521 if photoCalib is None: 

522 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; " 

523 "Using original photoCalib.", detectorId) 

524 else: 

525 exposure.setPhotoCalib(photoCalib) 

526 

527 if externalSkyWcsCatalog is not None: 

528 row = externalSkyWcsCatalog.find(detectorId) 

529 if row is None: 

530 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; " 

531 "Using original skyWcs.", detectorId) 

532 else: 

533 skyWcs = row.getWcs() 

534 if skyWcs is None: 

535 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; " 

536 "Using original skyWcs.", detectorId) 

537 else: 

538 exposure.setWcs(skyWcs) 

539 

540 return exposure 

541 

542 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs): 

543 """Add replace columns with calibs evaluated at each centroid 

544 

545 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a 

546 a source catalog, by rerunning the plugins. 

547 

548 Parameters 

549 ---------- 

550 catalog : `lsst.afw.table.SourceCatalog` 

551 catalog to which calib columns will be added 

552 exposure : `lsst.afw.image.exposure.Exposure` 

553 Exposure with attached PhotoCalibs and SkyWcs attributes to be 

554 reevaluated at local centroids. Pixels are not required. 

555 exposureIdInfo : `lsst.obs.base.ExposureIdInfo` 

556 

557 Returns 

558 ------- 

559 newCat: `lsst.afw.table.SourceCatalog` 

560 Source Catalog with requested local calib columns 

561 """ 

562 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

563 measureConfig.doReplaceWithNoise = False 

564 

565 # Clear all slots, because we aren't running the relevant plugins. 

566 for slot in measureConfig.slots: 

567 setattr(measureConfig.slots, slot, None) 

568 

569 measureConfig.plugins.names = [] 

570 if self.config.doReevaluateSkyWcs: 

571 measureConfig.plugins.names.add('base_LocalWcs') 

572 self.log.info("Re-evaluating base_LocalWcs plugin") 

573 if self.config.doReevaluatePhotoCalib: 

574 measureConfig.plugins.names.add('base_LocalPhotoCalib') 

575 self.log.info("Re-evaluating base_LocalPhotoCalib plugin") 

576 pluginsNotToCopy = tuple(measureConfig.plugins.names) 

577 

578 # Create a new schema and catalog 

579 # Copy all columns from original except for the ones to reevaluate 

580 aliasMap = catalog.schema.getAliasMap() 

581 mapper = afwTable.SchemaMapper(catalog.schema) 

582 for item in catalog.schema: 

583 if not item.field.getName().startswith(pluginsNotToCopy): 

584 mapper.addMapping(item.key) 

585 

586 schema = mapper.getOutputSchema() 

587 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

588 schema.setAliasMap(aliasMap) 

589 newCat = afwTable.SourceCatalog(schema) 

590 newCat.extend(catalog, mapper=mapper) 

591 

592 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to 

593 # update here. LocalPhotoCalibs are applied during transform tasks. 

594 # Update coord_ra/coord_dec, which are expected to be positions on the 

595 # sky and are used as such in sdm tables without transform 

596 if self.config.doReevaluateSkyWcs: 

597 afwTable.updateSourceCoords(exposure.wcs, newCat) 

598 

599 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

600 

601 return newCat 

602 

603 

604class PostprocessAnalysis(object): 

605 """Calculate columns from ParquetTable. 

606 

607 This object manages and organizes an arbitrary set of computations 

608 on a catalog. The catalog is defined by a 

609 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such 

610 as a `deepCoadd_obj` dataset, and the computations are defined by a 

611 collection of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

612 a `CompositeFunctor`). 

613 

614 After the object is initialized, accessing the `.df` attribute (which 

615 holds the `pandas.DataFrame` containing the results of the calculations) 

616 triggers computation of said dataframe. 

617 

618 One of the conveniences of using this object is the ability to define a 

619 desired common filter for all functors. This enables the same functor 

620 collection to be passed to several different `PostprocessAnalysis` objects 

621 without having to change the original functor collection, since the `filt` 

622 keyword argument of this object triggers an overwrite of the `filt` 

623 property for all functors in the collection. 

624 

625 This object also allows a list of refFlags to be passed, and defines a set 

626 of default refFlags that are always included even if not requested. 

627 

628 If a list of `ParquetTable` object is passed, rather than a single one, 

629 then the calculations will be mapped over all the input catalogs. In 

630 principle, it should be straightforward to parallelize this activity, but 

631 initial tests have failed (see TODO in code comments). 

632 

633 Parameters 

634 ---------- 

635 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

636 Source catalog(s) for computation. 

637 

638 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

639 Computations to do (functors that act on `parq`). 

640 If a dict, the output 

641 DataFrame will have columns keyed accordingly. 

642 If a list, the column keys will come from the 

643 `.shortname` attribute of each functor. 

644 

645 filt : `str`, optional 

646 Filter in which to calculate. If provided, 

647 this will overwrite any existing `.filt` attribute 

648 of the provided functors. 

649 

650 flags : `list`, optional 

651 List of flags (per-band) to include in output table. 

652 Taken from the `meas` dataset if applied to a multilevel Object Table. 

653 

654 refFlags : `list`, optional 

655 List of refFlags (only reference band) to include in output table. 

656 

657 forcedFlags : `list`, optional 

658 List of flags (per-band) to include in output table. 

659 Taken from the ``forced_src`` dataset if applied to a 

660 multilevel Object Table. Intended for flags from measurement plugins 

661 only run during multi-band forced-photometry. 

662 """ 

663 _defaultRefFlags = [] 

664 _defaultFuncs = () 

665 

666 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

667 self.parq = parq 

668 self.functors = functors 

669 

670 self.filt = filt 

671 self.flags = list(flags) if flags is not None else [] 

672 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

673 self.refFlags = list(self._defaultRefFlags) 

674 if refFlags is not None: 

675 self.refFlags += list(refFlags) 

676 

677 self._df = None 

678 

679 @property 

680 def defaultFuncs(self): 

681 funcs = dict(self._defaultFuncs) 

682 return funcs 

683 

684 @property 

685 def func(self): 

686 additionalFuncs = self.defaultFuncs 

687 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

688 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

689 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

690 

691 if isinstance(self.functors, CompositeFunctor): 

692 func = self.functors 

693 else: 

694 func = CompositeFunctor(self.functors) 

695 

696 func.funcDict.update(additionalFuncs) 

697 func.filt = self.filt 

698 

699 return func 

700 

701 @property 

702 def noDupCols(self): 

703 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

704 

705 @property 

706 def df(self): 

707 if self._df is None: 

708 self.compute() 

709 return self._df 

710 

711 def compute(self, dropna=False, pool=None): 

712 # map over multiple parquet tables 

713 if type(self.parq) in (list, tuple): 

714 if pool is None: 

715 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

716 else: 

717 # TODO: Figure out why this doesn't work (pyarrow pickling 

718 # issues?) 

719 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

720 self._df = pd.concat(dflist) 

721 else: 

722 self._df = self.func(self.parq, dropna=dropna) 

723 

724 return self._df 

725 

726 

727class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

728 dimensions=()): 

729 """Expected Connections for subclasses of TransformCatalogBaseTask. 

730 

731 Must be subclassed. 

732 """ 

733 inputCatalog = connectionTypes.Input( 

734 name="", 

735 storageClass="DataFrame", 

736 ) 

737 outputCatalog = connectionTypes.Output( 

738 name="", 

739 storageClass="DataFrame", 

740 ) 

741 

742 

743class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

744 pipelineConnections=TransformCatalogBaseConnections): 

745 functorFile = pexConfig.Field( 

746 dtype=str, 

747 doc="Path to YAML file specifying Science Data Model functors to use " 

748 "when copying columns and computing calibrated values.", 

749 default=None, 

750 optional=True 

751 ) 

752 primaryKey = pexConfig.Field( 

753 dtype=str, 

754 doc="Name of column to be set as the DataFrame index. If None, the index" 

755 "will be named `id`", 

756 default=None, 

757 optional=True 

758 ) 

759 columnsFromDataId = pexConfig.ListField( 

760 dtype=str, 

761 default=None, 

762 optional=True, 

763 doc="Columns to extract from the dataId", 

764 ) 

765 

766 

767class TransformCatalogBaseTask(pipeBase.PipelineTask): 

768 """Base class for transforming/standardizing a catalog 

769 

770 by applying functors that convert units and apply calibrations. 

771 The purpose of this task is to perform a set of computations on 

772 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

773 results to a new dataset (which needs to be declared in an `outputDataset` 

774 attribute). 

775 

776 The calculations to be performed are defined in a YAML file that specifies 

777 a set of functors to be computed, provided as 

778 a `--functorFile` config parameter. An example of such a YAML file 

779 is the following: 

780 

781 funcs: 

782 psfMag: 

783 functor: Mag 

784 args: 

785 - base_PsfFlux 

786 filt: HSC-G 

787 dataset: meas 

788 cmodel_magDiff: 

789 functor: MagDiff 

790 args: 

791 - modelfit_CModel 

792 - base_PsfFlux 

793 filt: HSC-G 

794 gauss_magDiff: 

795 functor: MagDiff 

796 args: 

797 - base_GaussianFlux 

798 - base_PsfFlux 

799 filt: HSC-G 

800 count: 

801 functor: Column 

802 args: 

803 - base_InputCount_value 

804 filt: HSC-G 

805 deconvolved_moments: 

806 functor: DeconvolvedMoments 

807 filt: HSC-G 

808 dataset: forced_src 

809 refFlags: 

810 - calib_psfUsed 

811 - merge_measurement_i 

812 - merge_measurement_r 

813 - merge_measurement_z 

814 - merge_measurement_y 

815 - merge_measurement_g 

816 - base_PixelFlags_flag_inexact_psfCenter 

817 - detect_isPrimary 

818 

819 The names for each entry under "func" will become the names of columns in 

820 the output dataset. All the functors referenced are defined in 

821 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each 

822 functor are in the `args` list, and any additional entries for each column 

823 other than "functor" or "args" (e.g., `'filt'`, `'dataset'`) are treated as 

824 keyword arguments to be passed to the functor initialization. 

825 

826 The "flags" entry is the default shortcut for `Column` functors. 

827 All columns listed under "flags" will be copied to the output table 

828 untransformed. They can be of any datatype. 

829 In the special case of transforming a multi-level oject table with 

830 band and dataset indices (deepCoadd_obj), these will be taked from the 

831 `meas` dataset and exploded out per band. 

832 

833 There are two special shortcuts that only apply when transforming 

834 multi-level Object (deepCoadd_obj) tables: 

835 - The "refFlags" entry is shortcut for `Column` functor 

836 taken from the `'ref'` dataset if transforming an ObjectTable. 

837 - The "forcedFlags" entry is shortcut for `Column` functors. 

838 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

839 These are expanded out per band. 

840 

841 

842 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

843 to organize and excecute the calculations. 

844 """ 

845 @property 

846 def _DefaultName(self): 

847 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

848 

849 @property 

850 def outputDataset(self): 

851 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

852 

853 @property 

854 def inputDataset(self): 

855 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

856 

857 @property 

858 def ConfigClass(self): 

859 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

860 

861 def __init__(self, *args, **kwargs): 

862 super().__init__(*args, **kwargs) 

863 if self.config.functorFile: 

864 self.log.info('Loading tranform functor definitions from %s', 

865 self.config.functorFile) 

866 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

867 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

868 else: 

869 self.funcs = None 

870 

871 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

872 inputs = butlerQC.get(inputRefs) 

873 if self.funcs is None: 

874 raise ValueError("config.functorFile is None. " 

875 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

876 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

877 dataId=outputRefs.outputCatalog.dataId.full) 

878 outputs = pipeBase.Struct(outputCatalog=result) 

879 butlerQC.put(outputs, outputRefs) 

880 

881 def run(self, parq, funcs=None, dataId=None, band=None): 

882 """Do postprocessing calculations 

883 

884 Takes a `ParquetTable` object and dataId, 

885 returns a dataframe with results of postprocessing calculations. 

886 

887 Parameters 

888 ---------- 

889 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

890 ParquetTable from which calculations are done. 

891 funcs : `lsst.pipe.tasks.functors.Functors` 

892 Functors to apply to the table's columns 

893 dataId : dict, optional 

894 Used to add a `patchId` column to the output dataframe. 

895 band : `str`, optional 

896 Filter band that is being processed. 

897 

898 Returns 

899 ------ 

900 df : `pandas.DataFrame` 

901 """ 

902 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

903 

904 df = self.transform(band, parq, funcs, dataId).df 

905 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

906 return df 

907 

908 def getFunctors(self): 

909 return self.funcs 

910 

911 def getAnalysis(self, parq, funcs=None, band=None): 

912 if funcs is None: 

913 funcs = self.funcs 

914 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

915 return analysis 

916 

917 def transform(self, band, parq, funcs, dataId): 

918 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

919 df = analysis.df 

920 if dataId and self.config.columnsFromDataId: 

921 for key in self.config.columnsFromDataId: 

922 if key in dataId: 

923 df[str(key)] = dataId[key] 

924 else: 

925 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}") 

926 

927 if self.config.primaryKey: 

928 if df.index.name != self.config.primaryKey and self.config.primaryKey in df: 

929 df.reset_index(inplace=True, drop=True) 

930 df.set_index(self.config.primaryKey, inplace=True) 

931 

932 return pipeBase.Struct( 

933 df=df, 

934 analysis=analysis 

935 ) 

936 

937 

938class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

939 defaultTemplates={"coaddName": "deep"}, 

940 dimensions=("tract", "patch", "skymap")): 

941 inputCatalog = connectionTypes.Input( 

942 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

943 "stored as a DataFrame with a multi-level column index per-patch.", 

944 dimensions=("tract", "patch", "skymap"), 

945 storageClass="DataFrame", 

946 name="{coaddName}Coadd_obj", 

947 deferLoad=True, 

948 ) 

949 outputCatalog = connectionTypes.Output( 

950 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

951 "data model.", 

952 dimensions=("tract", "patch", "skymap"), 

953 storageClass="DataFrame", 

954 name="objectTable" 

955 ) 

956 

957 

958class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

959 pipelineConnections=TransformObjectCatalogConnections): 

960 coaddName = pexConfig.Field( 

961 dtype=str, 

962 default="deep", 

963 doc="Name of coadd" 

964 ) 

965 # TODO: remove in DM-27177 

966 filterMap = pexConfig.DictField( 

967 keytype=str, 

968 itemtype=str, 

969 default={}, 

970 doc=("Dictionary mapping full filter name to short one for column name munging." 

971 "These filters determine the output columns no matter what filters the " 

972 "input data actually contain."), 

973 deprecated=("Coadds are now identified by the band, so this transform is unused." 

974 "Will be removed after v22.") 

975 ) 

976 outputBands = pexConfig.ListField( 

977 dtype=str, 

978 default=None, 

979 optional=True, 

980 doc=("These bands and only these bands will appear in the output," 

981 " NaN-filled if the input does not include them." 

982 " If None, then use all bands found in the input.") 

983 ) 

984 camelCase = pexConfig.Field( 

985 dtype=bool, 

986 default=False, 

987 doc=("Write per-band columns names with camelCase, else underscore " 

988 "For example: gPsFlux instead of g_PsFlux.") 

989 ) 

990 multilevelOutput = pexConfig.Field( 

991 dtype=bool, 

992 default=False, 

993 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

994 "and name-munged (False).") 

995 ) 

996 goodFlags = pexConfig.ListField( 

997 dtype=str, 

998 default=[], 

999 doc=("List of 'good' flags that should be set False when populating empty tables. " 

1000 "All other flags are considered to be 'bad' flags and will be set to True.") 

1001 ) 

1002 floatFillValue = pexConfig.Field( 

1003 dtype=float, 

1004 default=np.nan, 

1005 doc="Fill value for float fields when populating empty tables." 

1006 ) 

1007 integerFillValue = pexConfig.Field( 

1008 dtype=int, 

1009 default=-1, 

1010 doc="Fill value for integer fields when populating empty tables." 

1011 ) 

1012 

1013 def setDefaults(self): 

1014 super().setDefaults() 

1015 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml') 

1016 self.primaryKey = 'objectId' 

1017 self.columnsFromDataId = ['tract', 'patch'] 

1018 self.goodFlags = ['calib_astrometry_used', 

1019 'calib_photometry_reserved', 

1020 'calib_photometry_used', 

1021 'calib_psf_candidate', 

1022 'calib_psf_reserved', 

1023 'calib_psf_used'] 

1024 

1025 

1026class TransformObjectCatalogTask(TransformCatalogBaseTask): 

1027 """Produce a flattened Object Table to match the format specified in 

1028 sdm_schemas. 

1029 

1030 Do the same set of postprocessing calculations on all bands. 

1031 

1032 This is identical to `TransformCatalogBaseTask`, except for that it does 

1033 the specified functor calculations for all filters present in the 

1034 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

1035 by the YAML file will be superceded. 

1036 """ 

1037 _DefaultName = "transformObjectCatalog" 

1038 ConfigClass = TransformObjectCatalogConfig 

1039 

1040 def run(self, parq, funcs=None, dataId=None, band=None): 

1041 # NOTE: band kwarg is ignored here. 

1042 dfDict = {} 

1043 analysisDict = {} 

1044 templateDf = pd.DataFrame() 

1045 

1046 if isinstance(parq, DeferredDatasetHandle): 

1047 columns = parq.get(component='columns') 

1048 inputBands = columns.unique(level=1).values 

1049 else: 

1050 inputBands = parq.columnLevelNames['band'] 

1051 

1052 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

1053 

1054 # Perform transform for data of filters that exist in parq. 

1055 for inputBand in inputBands: 

1056 if inputBand not in outputBands: 

1057 self.log.info("Ignoring %s band data in the input", inputBand) 

1058 continue 

1059 self.log.info("Transforming the catalog of band %s", inputBand) 

1060 result = self.transform(inputBand, parq, funcs, dataId) 

1061 dfDict[inputBand] = result.df 

1062 analysisDict[inputBand] = result.analysis 

1063 if templateDf.empty: 

1064 templateDf = result.df 

1065 

1066 # Put filler values in columns of other wanted bands 

1067 for filt in outputBands: 

1068 if filt not in dfDict: 

1069 self.log.info("Adding empty columns for band %s", filt) 

1070 dfTemp = templateDf.copy() 

1071 for col in dfTemp.columns: 

1072 testValue = dfTemp[col].values[0] 

1073 if isinstance(testValue, (np.bool_, pd.BooleanDtype)): 

1074 # Boolean flag type, check if it is a "good" flag 

1075 if col in self.config.goodFlags: 

1076 fillValue = False 

1077 else: 

1078 fillValue = True 

1079 elif isinstance(testValue, numbers.Integral): 

1080 # Checking numbers.Integral catches all flavors 

1081 # of python, numpy, pandas, etc. integers. 

1082 # We must ensure this is not an unsigned integer. 

1083 if isinstance(testValue, np.unsignedinteger): 

1084 raise ValueError("Parquet tables may not have unsigned integer columns.") 

1085 else: 

1086 fillValue = self.config.integerFillValue 

1087 else: 

1088 fillValue = self.config.floatFillValue 

1089 dfTemp[col].values[:] = fillValue 

1090 dfDict[filt] = dfTemp 

1091 

1092 # This makes a multilevel column index, with band as first level 

1093 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

1094 

1095 if not self.config.multilevelOutput: 

1096 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

1097 if self.config.primaryKey in noDupCols: 

1098 noDupCols.remove(self.config.primaryKey) 

1099 if dataId and self.config.columnsFromDataId: 

1100 noDupCols += self.config.columnsFromDataId 

1101 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

1102 inputBands=inputBands) 

1103 

1104 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

1105 

1106 return df 

1107 

1108 

1109class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

1110 dimensions=("tract", "skymap")): 

1111 inputCatalogs = connectionTypes.Input( 

1112 doc="Per-Patch objectTables conforming to the standard data model.", 

1113 name="objectTable", 

1114 storageClass="DataFrame", 

1115 dimensions=("tract", "patch", "skymap"), 

1116 multiple=True, 

1117 ) 

1118 outputCatalog = connectionTypes.Output( 

1119 doc="Pre-tract horizontal concatenation of the input objectTables", 

1120 name="objectTable_tract", 

1121 storageClass="DataFrame", 

1122 dimensions=("tract", "skymap"), 

1123 ) 

1124 

1125 

1126class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

1127 pipelineConnections=ConsolidateObjectTableConnections): 

1128 coaddName = pexConfig.Field( 

1129 dtype=str, 

1130 default="deep", 

1131 doc="Name of coadd" 

1132 ) 

1133 

1134 

1135class ConsolidateObjectTableTask(pipeBase.PipelineTask): 

1136 """Write patch-merged source tables to a tract-level parquet file. 

1137 

1138 Concatenates `objectTable` list into a per-visit `objectTable_tract`. 

1139 """ 

1140 _DefaultName = "consolidateObjectTable" 

1141 ConfigClass = ConsolidateObjectTableConfig 

1142 

1143 inputDataset = 'objectTable' 

1144 outputDataset = 'objectTable_tract' 

1145 

1146 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1147 inputs = butlerQC.get(inputRefs) 

1148 self.log.info("Concatenating %s per-patch Object Tables", 

1149 len(inputs['inputCatalogs'])) 

1150 df = pd.concat(inputs['inputCatalogs']) 

1151 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1152 

1153 

1154class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

1155 defaultTemplates={"catalogType": ""}, 

1156 dimensions=("instrument", "visit", "detector")): 

1157 

1158 inputCatalog = connectionTypes.Input( 

1159 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

1160 name="{catalogType}source", 

1161 storageClass="DataFrame", 

1162 dimensions=("instrument", "visit", "detector"), 

1163 deferLoad=True 

1164 ) 

1165 outputCatalog = connectionTypes.Output( 

1166 doc="Narrower, per-detector Source Table transformed and converted per a " 

1167 "specified set of functors", 

1168 name="{catalogType}sourceTable", 

1169 storageClass="DataFrame", 

1170 dimensions=("instrument", "visit", "detector") 

1171 ) 

1172 

1173 

1174class TransformSourceTableConfig(TransformCatalogBaseConfig, 

1175 pipelineConnections=TransformSourceTableConnections): 

1176 

1177 def setDefaults(self): 

1178 super().setDefaults() 

1179 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml') 

1180 self.primaryKey = 'sourceId' 

1181 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter'] 

1182 

1183 

1184class TransformSourceTableTask(TransformCatalogBaseTask): 

1185 """Transform/standardize a source catalog 

1186 """ 

1187 _DefaultName = "transformSourceTable" 

1188 ConfigClass = TransformSourceTableConfig 

1189 

1190 

1191class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1192 dimensions=("instrument", "visit",), 

1193 defaultTemplates={"calexpType": ""}): 

1194 calexp = connectionTypes.Input( 

1195 doc="Processed exposures used for metadata", 

1196 name="{calexpType}calexp", 

1197 storageClass="ExposureF", 

1198 dimensions=("instrument", "visit", "detector"), 

1199 deferLoad=True, 

1200 multiple=True, 

1201 ) 

1202 visitSummary = connectionTypes.Output( 

1203 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1204 "detector id for the id and are sorted for fast lookups of a " 

1205 "detector."), 

1206 name="{calexpType}visitSummary", 

1207 storageClass="ExposureCatalog", 

1208 dimensions=("instrument", "visit"), 

1209 ) 

1210 

1211 

1212class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1213 pipelineConnections=ConsolidateVisitSummaryConnections): 

1214 """Config for ConsolidateVisitSummaryTask""" 

1215 pass 

1216 

1217 

1218class ConsolidateVisitSummaryTask(pipeBase.PipelineTask): 

1219 """Task to consolidate per-detector visit metadata. 

1220 

1221 This task aggregates the following metadata from all the detectors in a 

1222 single visit into an exposure catalog: 

1223 - The visitInfo. 

1224 - The wcs. 

1225 - The photoCalib. 

1226 - The physical_filter and band (if available). 

1227 - The psf size, shape, and effective area at the center of the detector. 

1228 - The corners of the bounding box in right ascension/declination. 

1229 

1230 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1231 are not persisted here because of storage concerns, and because of their 

1232 limited utility as summary statistics. 

1233 

1234 Tests for this task are performed in ci_hsc_gen3. 

1235 """ 

1236 _DefaultName = "consolidateVisitSummary" 

1237 ConfigClass = ConsolidateVisitSummaryConfig 

1238 

1239 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1240 dataRefs = butlerQC.get(inputRefs.calexp) 

1241 visit = dataRefs[0].dataId.byName()['visit'] 

1242 

1243 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1244 len(dataRefs), visit) 

1245 

1246 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1247 

1248 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1249 

1250 def _combineExposureMetadata(self, visit, dataRefs): 

1251 """Make a combined exposure catalog from a list of dataRefs. 

1252 These dataRefs must point to exposures with wcs, summaryStats, 

1253 and other visit metadata. 

1254 

1255 Parameters 

1256 ---------- 

1257 visit : `int` 

1258 Visit identification number. 

1259 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1260 List of dataRefs in visit. 

1261 

1262 Returns 

1263 ------- 

1264 visitSummary : `lsst.afw.table.ExposureCatalog` 

1265 Exposure catalog with per-detector summary information. 

1266 """ 

1267 schema = self._makeVisitSummarySchema() 

1268 cat = afwTable.ExposureCatalog(schema) 

1269 cat.resize(len(dataRefs)) 

1270 

1271 cat['visit'] = visit 

1272 

1273 for i, dataRef in enumerate(dataRefs): 

1274 visitInfo = dataRef.get(component='visitInfo') 

1275 filterLabel = dataRef.get(component='filter') 

1276 summaryStats = dataRef.get(component='summaryStats') 

1277 detector = dataRef.get(component='detector') 

1278 wcs = dataRef.get(component='wcs') 

1279 photoCalib = dataRef.get(component='photoCalib') 

1280 detector = dataRef.get(component='detector') 

1281 bbox = dataRef.get(component='bbox') 

1282 validPolygon = dataRef.get(component='validPolygon') 

1283 

1284 rec = cat[i] 

1285 rec.setBBox(bbox) 

1286 rec.setVisitInfo(visitInfo) 

1287 rec.setWcs(wcs) 

1288 rec.setPhotoCalib(photoCalib) 

1289 rec.setValidPolygon(validPolygon) 

1290 

1291 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1292 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1293 rec.setId(detector.getId()) 

1294 rec['psfSigma'] = summaryStats.psfSigma 

1295 rec['psfIxx'] = summaryStats.psfIxx 

1296 rec['psfIyy'] = summaryStats.psfIyy 

1297 rec['psfIxy'] = summaryStats.psfIxy 

1298 rec['psfArea'] = summaryStats.psfArea 

1299 rec['raCorners'][:] = summaryStats.raCorners 

1300 rec['decCorners'][:] = summaryStats.decCorners 

1301 rec['ra'] = summaryStats.ra 

1302 rec['decl'] = summaryStats.decl 

1303 rec['zenithDistance'] = summaryStats.zenithDistance 

1304 rec['zeroPoint'] = summaryStats.zeroPoint 

1305 rec['skyBg'] = summaryStats.skyBg 

1306 rec['skyNoise'] = summaryStats.skyNoise 

1307 rec['meanVar'] = summaryStats.meanVar 

1308 rec['astromOffsetMean'] = summaryStats.astromOffsetMean 

1309 rec['astromOffsetStd'] = summaryStats.astromOffsetStd 

1310 rec['nPsfStar'] = summaryStats.nPsfStar 

1311 rec['psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median 

1312 rec['psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median 

1313 rec['psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter 

1314 rec['psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter 

1315 rec['psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian 

1316 rec['psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter 

1317 rec['psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter 

1318 

1319 metadata = dafBase.PropertyList() 

1320 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1321 # We are looping over existing datarefs, so the following is true 

1322 metadata.add("COMMENT", "Only detectors with data have entries.") 

1323 cat.setMetadata(metadata) 

1324 

1325 cat.sort() 

1326 return cat 

1327 

1328 def _makeVisitSummarySchema(self): 

1329 """Make the schema for the visitSummary catalog.""" 

1330 schema = afwTable.ExposureTable.makeMinimalSchema() 

1331 schema.addField('visit', type='L', doc='Visit number') 

1332 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1333 schema.addField('band', type='String', size=32, doc='Name of band') 

1334 schema.addField('psfSigma', type='F', 

1335 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

1336 schema.addField('psfArea', type='F', 

1337 doc='PSF model effective area (center of chip) (pixel**2)') 

1338 schema.addField('psfIxx', type='F', 

1339 doc='PSF model Ixx (center of chip) (pixel**2)') 

1340 schema.addField('psfIyy', type='F', 

1341 doc='PSF model Iyy (center of chip) (pixel**2)') 

1342 schema.addField('psfIxy', type='F', 

1343 doc='PSF model Ixy (center of chip) (pixel**2)') 

1344 schema.addField('raCorners', type='ArrayD', size=4, 

1345 doc='Right Ascension of bounding box corners (degrees)') 

1346 schema.addField('decCorners', type='ArrayD', size=4, 

1347 doc='Declination of bounding box corners (degrees)') 

1348 schema.addField('ra', type='D', 

1349 doc='Right Ascension of bounding box center (degrees)') 

1350 schema.addField('decl', type='D', 

1351 doc='Declination of bounding box center (degrees)') 

1352 schema.addField('zenithDistance', type='F', 

1353 doc='Zenith distance of bounding box center (degrees)') 

1354 schema.addField('zeroPoint', type='F', 

1355 doc='Mean zeropoint in detector (mag)') 

1356 schema.addField('skyBg', type='F', 

1357 doc='Average sky background (ADU)') 

1358 schema.addField('skyNoise', type='F', 

1359 doc='Average sky noise (ADU)') 

1360 schema.addField('meanVar', type='F', 

1361 doc='Mean variance of the weight plane (ADU**2)') 

1362 schema.addField('astromOffsetMean', type='F', 

1363 doc='Mean offset of astrometric calibration matches (arcsec)') 

1364 schema.addField('astromOffsetStd', type='F', 

1365 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)') 

1366 schema.addField('nPsfStar', type='I', doc='Number of stars used for PSF model') 

1367 schema.addField('psfStarDeltaE1Median', type='F', 

1368 doc='Median E1 residual (starE1 - psfE1) for psf stars') 

1369 schema.addField('psfStarDeltaE2Median', type='F', 

1370 doc='Median E2 residual (starE2 - psfE2) for psf stars') 

1371 schema.addField('psfStarDeltaE1Scatter', type='F', 

1372 doc='Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars') 

1373 schema.addField('psfStarDeltaE2Scatter', type='F', 

1374 doc='Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars') 

1375 schema.addField('psfStarDeltaSizeMedian', type='F', 

1376 doc='Median size residual (starSize - psfSize) for psf stars (pixel)') 

1377 schema.addField('psfStarDeltaSizeScatter', type='F', 

1378 doc='Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)') 

1379 schema.addField('psfStarScaledDeltaSizeScatter', type='F', 

1380 doc='Scatter (via MAD) of size residual scaled by median size squared') 

1381 

1382 return schema 

1383 

1384 

1385class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1386 defaultTemplates={"catalogType": ""}, 

1387 dimensions=("instrument", "visit")): 

1388 inputCatalogs = connectionTypes.Input( 

1389 doc="Input per-detector Source Tables", 

1390 name="{catalogType}sourceTable", 

1391 storageClass="DataFrame", 

1392 dimensions=("instrument", "visit", "detector"), 

1393 multiple=True 

1394 ) 

1395 outputCatalog = connectionTypes.Output( 

1396 doc="Per-visit concatenation of Source Table", 

1397 name="{catalogType}sourceTable_visit", 

1398 storageClass="DataFrame", 

1399 dimensions=("instrument", "visit") 

1400 ) 

1401 

1402 

1403class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1404 pipelineConnections=ConsolidateSourceTableConnections): 

1405 pass 

1406 

1407 

1408class ConsolidateSourceTableTask(pipeBase.PipelineTask): 

1409 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1410 """ 

1411 _DefaultName = 'consolidateSourceTable' 

1412 ConfigClass = ConsolidateSourceTableConfig 

1413 

1414 inputDataset = 'sourceTable' 

1415 outputDataset = 'sourceTable_visit' 

1416 

1417 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1418 from .makeWarp import reorderRefs 

1419 

1420 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs] 

1421 detectorOrder.sort() 

1422 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector') 

1423 inputs = butlerQC.get(inputRefs) 

1424 self.log.info("Concatenating %s per-detector Source Tables", 

1425 len(inputs['inputCatalogs'])) 

1426 df = pd.concat(inputs['inputCatalogs']) 

1427 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1428 

1429 

1430class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1431 dimensions=("instrument",), 

1432 defaultTemplates={"calexpType": ""}): 

1433 visitSummaryRefs = connectionTypes.Input( 

1434 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1435 name="{calexpType}visitSummary", 

1436 storageClass="ExposureCatalog", 

1437 dimensions=("instrument", "visit"), 

1438 multiple=True, 

1439 deferLoad=True, 

1440 ) 

1441 outputCatalog = connectionTypes.Output( 

1442 doc="CCD and Visit metadata table", 

1443 name="ccdVisitTable", 

1444 storageClass="DataFrame", 

1445 dimensions=("instrument",) 

1446 ) 

1447 

1448 

1449class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1450 pipelineConnections=MakeCcdVisitTableConnections): 

1451 pass 

1452 

1453 

1454class MakeCcdVisitTableTask(pipeBase.PipelineTask): 

1455 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs. 

1456 """ 

1457 _DefaultName = 'makeCcdVisitTable' 

1458 ConfigClass = MakeCcdVisitTableConfig 

1459 

1460 def run(self, visitSummaryRefs): 

1461 """Make a table of ccd information from the `visitSummary` catalogs. 

1462 

1463 Parameters 

1464 ---------- 

1465 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1466 List of DeferredDatasetHandles pointing to exposure catalogs with 

1467 per-detector summary information. 

1468 

1469 Returns 

1470 ------- 

1471 result : `lsst.pipe.Base.Struct` 

1472 Results struct with attribute: 

1473 

1474 ``outputCatalog`` 

1475 Catalog of ccd and visit information. 

1476 """ 

1477 ccdEntries = [] 

1478 for visitSummaryRef in visitSummaryRefs: 

1479 visitSummary = visitSummaryRef.get() 

1480 visitInfo = visitSummary[0].getVisitInfo() 

1481 

1482 ccdEntry = {} 

1483 summaryTable = visitSummary.asAstropy() 

1484 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance', 

1485 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise', 

1486 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar', 

1487 'psfStarDeltaE1Median', 'psfStarDeltaE2Median', 

1488 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter', 

1489 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter', 

1490 'psfStarScaledDeltaSizeScatter'] 

1491 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1492 # 'visit' is the human readable visit number. 

1493 # 'visitId' is the key to the visitId table. They are the same. 

1494 # Technically you should join to get the visit from the visit 

1495 # table. 

1496 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"}) 

1497 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in 

1498 summaryTable['id']] 

1499 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId) 

1500 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds] 

1501 ccdEntry['ccdVisitId'] = ccdVisitIds 

1502 ccdEntry['detector'] = summaryTable['id'] 

1503 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary]) 

1504 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1505 

1506 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1507 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1508 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1509 expTime = visitInfo.getExposureTime() 

1510 ccdEntry['expTime'] = expTime 

1511 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1512 expTime_days = expTime / (60*60*24) 

1513 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days 

1514 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1515 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1516 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1517 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1518 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1519 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1520 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1521 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1522 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1523 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1524 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1525 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, 

1526 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. 

1527 # values are actually wanted. 

1528 ccdEntries.append(ccdEntry) 

1529 

1530 outputCatalog = pd.concat(ccdEntries) 

1531 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True) 

1532 return pipeBase.Struct(outputCatalog=outputCatalog) 

1533 

1534 

1535class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1536 dimensions=("instrument",), 

1537 defaultTemplates={"calexpType": ""}): 

1538 visitSummaries = connectionTypes.Input( 

1539 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1540 name="{calexpType}visitSummary", 

1541 storageClass="ExposureCatalog", 

1542 dimensions=("instrument", "visit",), 

1543 multiple=True, 

1544 deferLoad=True, 

1545 ) 

1546 outputCatalog = connectionTypes.Output( 

1547 doc="Visit metadata table", 

1548 name="visitTable", 

1549 storageClass="DataFrame", 

1550 dimensions=("instrument",) 

1551 ) 

1552 

1553 

1554class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1555 pipelineConnections=MakeVisitTableConnections): 

1556 pass 

1557 

1558 

1559class MakeVisitTableTask(pipeBase.PipelineTask): 

1560 """Produce a `visitTable` from the `visitSummary` exposure catalogs. 

1561 """ 

1562 _DefaultName = 'makeVisitTable' 

1563 ConfigClass = MakeVisitTableConfig 

1564 

1565 def run(self, visitSummaries): 

1566 """Make a table of visit information from the `visitSummary` catalogs. 

1567 

1568 Parameters 

1569 ---------- 

1570 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog` 

1571 List of exposure catalogs with per-detector summary information. 

1572 Returns 

1573 ------- 

1574 result : `lsst.pipe.Base.Struct` 

1575 Results struct with attribute: 

1576 

1577 ``outputCatalog`` 

1578 Catalog of visit information. 

1579 """ 

1580 visitEntries = [] 

1581 for visitSummary in visitSummaries: 

1582 visitSummary = visitSummary.get() 

1583 visitRow = visitSummary[0] 

1584 visitInfo = visitRow.getVisitInfo() 

1585 

1586 visitEntry = {} 

1587 visitEntry["visitId"] = visitRow['visit'] 

1588 visitEntry["visit"] = visitRow['visit'] 

1589 visitEntry["physical_filter"] = visitRow['physical_filter'] 

1590 visitEntry["band"] = visitRow['band'] 

1591 raDec = visitInfo.getBoresightRaDec() 

1592 visitEntry["ra"] = raDec.getRa().asDegrees() 

1593 visitEntry["decl"] = raDec.getDec().asDegrees() 

1594 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1595 azAlt = visitInfo.getBoresightAzAlt() 

1596 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1597 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1598 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1599 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1600 expTime = visitInfo.getExposureTime() 

1601 visitEntry["expTime"] = expTime 

1602 visitEntry["expMidpt"] = visitInfo.getDate().toPython() 

1603 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1604 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1605 expTime_days = expTime / (60*60*24) 

1606 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days 

1607 visitEntries.append(visitEntry) 

1608 

1609 # TODO: DM-30623, Add programId, exposureType, cameraTemp, 

1610 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp, 

1611 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures. 

1612 

1613 outputCatalog = pd.DataFrame(data=visitEntries) 

1614 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True) 

1615 return pipeBase.Struct(outputCatalog=outputCatalog) 

1616 

1617 

1618class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1619 dimensions=("instrument", "visit", "detector", "skymap", "tract")): 

1620 

1621 inputCatalog = connectionTypes.Input( 

1622 doc="Primary per-detector, single-epoch forced-photometry catalog. " 

1623 "By default, it is the output of ForcedPhotCcdTask on calexps", 

1624 name="forced_src", 

1625 storageClass="SourceCatalog", 

1626 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1627 ) 

1628 inputCatalogDiff = connectionTypes.Input( 

1629 doc="Secondary multi-epoch, per-detector, forced photometry catalog. " 

1630 "By default, it is the output of ForcedPhotCcdTask run on image differences.", 

1631 name="forced_diff", 

1632 storageClass="SourceCatalog", 

1633 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1634 ) 

1635 outputCatalog = connectionTypes.Output( 

1636 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format", 

1637 name="mergedForcedSource", 

1638 storageClass="DataFrame", 

1639 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1640 ) 

1641 

1642 

1643class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig, 

1644 pipelineConnections=WriteForcedSourceTableConnections): 

1645 key = lsst.pex.config.Field( 

1646 doc="Column on which to join the two input tables on and make the primary key of the output", 

1647 dtype=str, 

1648 default="objectId", 

1649 ) 

1650 

1651 

1652class WriteForcedSourceTableTask(pipeBase.PipelineTask): 

1653 """Merge and convert per-detector forced source catalogs to parquet. 

1654 

1655 Because the predecessor ForcedPhotCcdTask operates per-detector, 

1656 per-tract, (i.e., it has tract in its dimensions), detectors 

1657 on the tract boundary may have multiple forced source catalogs. 

1658 

1659 The successor task TransformForcedSourceTable runs per-patch 

1660 and temporally-aggregates overlapping mergedForcedSource catalogs from all 

1661 available multiple epochs. 

1662 """ 

1663 _DefaultName = "writeForcedSourceTable" 

1664 ConfigClass = WriteForcedSourceTableConfig 

1665 

1666 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1667 inputs = butlerQC.get(inputRefs) 

1668 # Add ccdVisitId to allow joining with CcdVisitTable 

1669 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

1670 inputs['band'] = butlerQC.quantum.dataId.full['band'] 

1671 outputs = self.run(**inputs) 

1672 butlerQC.put(outputs, outputRefs) 

1673 

1674 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None): 

1675 dfs = [] 

1676 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')): 

1677 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False) 

1678 df = df.reindex(sorted(df.columns), axis=1) 

1679 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA 

1680 df['band'] = band if band else pd.NA 

1681 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns], 

1682 names=('dataset', 'column')) 

1683 

1684 dfs.append(df) 

1685 

1686 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

1687 return pipeBase.Struct(outputCatalog=outputCatalog) 

1688 

1689 

1690class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1691 dimensions=("instrument", "skymap", "patch", "tract")): 

1692 

1693 inputCatalogs = connectionTypes.Input( 

1694 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask", 

1695 name="mergedForcedSource", 

1696 storageClass="DataFrame", 

1697 dimensions=("instrument", "visit", "detector", "skymap", "tract"), 

1698 multiple=True, 

1699 deferLoad=True 

1700 ) 

1701 referenceCatalog = connectionTypes.Input( 

1702 doc="Reference catalog which was used to seed the forcedPhot. Columns " 

1703 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner " 

1704 "are expected.", 

1705 name="objectTable", 

1706 storageClass="DataFrame", 

1707 dimensions=("tract", "patch", "skymap"), 

1708 deferLoad=True 

1709 ) 

1710 outputCatalog = connectionTypes.Output( 

1711 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a " 

1712 "specified set of functors", 

1713 name="forcedSourceTable", 

1714 storageClass="DataFrame", 

1715 dimensions=("tract", "patch", "skymap") 

1716 ) 

1717 

1718 

1719class TransformForcedSourceTableConfig(TransformCatalogBaseConfig, 

1720 pipelineConnections=TransformForcedSourceTableConnections): 

1721 referenceColumns = pexConfig.ListField( 

1722 dtype=str, 

1723 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"], 

1724 optional=True, 

1725 doc="Columns to pull from reference catalog", 

1726 ) 

1727 keyRef = lsst.pex.config.Field( 

1728 doc="Column on which to join the two input tables on and make the primary key of the output", 

1729 dtype=str, 

1730 default="objectId", 

1731 ) 

1732 key = lsst.pex.config.Field( 

1733 doc="Rename the output DataFrame index to this name", 

1734 dtype=str, 

1735 default="forcedSourceId", 

1736 ) 

1737 

1738 def setDefaults(self): 

1739 super().setDefaults() 

1740 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml') 

1741 self.columnsFromDataId = ['tract', 'patch'] 

1742 

1743 

1744class TransformForcedSourceTableTask(TransformCatalogBaseTask): 

1745 """Transform/standardize a ForcedSource catalog 

1746 

1747 Transforms each wide, per-detector forcedSource parquet table per the 

1748 specification file (per-camera defaults found in ForcedSource.yaml). 

1749 All epochs that overlap the patch are aggregated into one per-patch 

1750 narrow-parquet file. 

1751 

1752 No de-duplication of rows is performed. Duplicate resolutions flags are 

1753 pulled in from the referenceCatalog: `detect_isPrimary`, 

1754 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate 

1755 for analysis or compare duplicates for QA. 

1756 

1757 The resulting table includes multiple bands. Epochs (MJDs) and other useful 

1758 per-visit rows can be retreived by joining with the CcdVisitTable on 

1759 ccdVisitId. 

1760 """ 

1761 _DefaultName = "transformForcedSourceTable" 

1762 ConfigClass = TransformForcedSourceTableConfig 

1763 

1764 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1765 inputs = butlerQC.get(inputRefs) 

1766 if self.funcs is None: 

1767 raise ValueError("config.functorFile is None. " 

1768 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

1769 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs, 

1770 dataId=outputRefs.outputCatalog.dataId.full) 

1771 

1772 butlerQC.put(outputs, outputRefs) 

1773 

1774 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None): 

1775 dfs = [] 

1776 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns}) 

1777 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs))) 

1778 for handle in inputCatalogs: 

1779 result = self.transform(None, handle, funcs, dataId) 

1780 # Filter for only rows that were detected on (overlap) the patch 

1781 dfs.append(result.df.join(ref, how='inner')) 

1782 

1783 outputCatalog = pd.concat(dfs) 

1784 

1785 # Now that we are done joining on config.keyRef 

1786 # Change index to config.key by 

1787 outputCatalog.index.rename(self.config.keyRef, inplace=True) 

1788 # Add config.keyRef to the column list 

1789 outputCatalog.reset_index(inplace=True) 

1790 # Set the forcedSourceId to the index. This is specified in the 

1791 # ForcedSource.yaml 

1792 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True) 

1793 # Rename it to the config.key 

1794 outputCatalog.index.rename(self.config.key, inplace=True) 

1795 

1796 self.log.info("Made a table of %d columns and %d rows", 

1797 len(outputCatalog.columns), len(outputCatalog)) 

1798 return pipeBase.Struct(outputCatalog=outputCatalog) 

1799 

1800 

1801class ConsolidateTractConnections(pipeBase.PipelineTaskConnections, 

1802 defaultTemplates={"catalogType": ""}, 

1803 dimensions=("instrument", "tract")): 

1804 inputCatalogs = connectionTypes.Input( 

1805 doc="Input per-patch DataFrame Tables to be concatenated", 

1806 name="{catalogType}ForcedSourceTable", 

1807 storageClass="DataFrame", 

1808 dimensions=("tract", "patch", "skymap"), 

1809 multiple=True, 

1810 ) 

1811 

1812 outputCatalog = connectionTypes.Output( 

1813 doc="Output per-tract concatenation of DataFrame Tables", 

1814 name="{catalogType}ForcedSourceTable_tract", 

1815 storageClass="DataFrame", 

1816 dimensions=("tract", "skymap"), 

1817 ) 

1818 

1819 

1820class ConsolidateTractConfig(pipeBase.PipelineTaskConfig, 

1821 pipelineConnections=ConsolidateTractConnections): 

1822 pass 

1823 

1824 

1825class ConsolidateTractTask(pipeBase.PipelineTask): 

1826 """Concatenate any per-patch, dataframe list into a single 

1827 per-tract DataFrame. 

1828 """ 

1829 _DefaultName = 'ConsolidateTract' 

1830 ConfigClass = ConsolidateTractConfig 

1831 

1832 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1833 inputs = butlerQC.get(inputRefs) 

1834 # Not checking at least one inputCatalog exists because that'd be an 

1835 # empty QG. 

1836 self.log.info("Concatenating %s per-patch %s Tables", 

1837 len(inputs['inputCatalogs']), 

1838 inputRefs.inputCatalogs[0].datasetType.name) 

1839 df = pd.concat(inputs['inputCatalogs']) 

1840 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)