Coverage for python/lsst/pipe/tasks/postprocess.py: 26%

651 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-22 03:00 -0700

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["WriteObjectTableConfig", "WriteObjectTableTask", 

23 "WriteSourceTableConfig", "WriteSourceTableTask", 

24 "WriteRecalibratedSourceTableConfig", "WriteRecalibratedSourceTableTask", 

25 "PostprocessAnalysis", 

26 "TransformCatalogBaseConfig", "TransformCatalogBaseTask", 

27 "TransformObjectCatalogConfig", "TransformObjectCatalogTask", 

28 "ConsolidateObjectTableConfig", "ConsolidateObjectTableTask", 

29 "TransformSourceTableConfig", "TransformSourceTableTask", 

30 "ConsolidateVisitSummaryConfig", "ConsolidateVisitSummaryTask", 

31 "ConsolidateSourceTableConfig", "ConsolidateSourceTableTask", 

32 "MakeCcdVisitTableConfig", "MakeCcdVisitTableTask", 

33 "MakeVisitTableConfig", "MakeVisitTableTask", 

34 "WriteForcedSourceTableConfig", "WriteForcedSourceTableTask", 

35 "TransformForcedSourceTableConfig", "TransformForcedSourceTableTask", 

36 "ConsolidateTractConfig", "ConsolidateTractTask"] 

37 

38import functools 

39import pandas as pd 

40import logging 

41import numpy as np 

42import numbers 

43import os 

44 

45import lsst.geom 

46import lsst.pex.config as pexConfig 

47import lsst.pipe.base as pipeBase 

48import lsst.daf.base as dafBase 

49from lsst.obs.base import ExposureIdInfo 

50from lsst.pipe.base import connectionTypes 

51import lsst.afw.table as afwTable 

52from lsst.afw.image import ExposureSummaryStats 

53from lsst.meas.base import SingleFrameMeasurementTask 

54from lsst.daf.butler import DataCoordinate 

55from lsst.skymap import BaseSkyMap 

56 

57from .functors import CompositeFunctor, Column 

58 

59log = logging.getLogger(__name__) 

60 

61 

62def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

63 """Flattens a dataframe with multilevel column index. 

64 """ 

65 newDf = pd.DataFrame() 

66 # band is the level 0 index 

67 dfBands = df.columns.unique(level=0).values 

68 for band in dfBands: 

69 subdf = df[band] 

70 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

71 newColumns = {c: columnFormat.format(band, c) 

72 for c in subdf.columns if c not in noDupCols} 

73 cols = list(newColumns.keys()) 

74 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

75 

76 # Band must be present in the input and output or else column is all NaN: 

77 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

78 # Get the unexploded columns from any present band's partition 

79 noDupDf = df[presentBands[0]][noDupCols] 

80 newDf = pd.concat([noDupDf, newDf], axis=1) 

81 return newDf 

82 

83 

84class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

85 defaultTemplates={"coaddName": "deep"}, 

86 dimensions=("tract", "patch", "skymap")): 

87 inputCatalogMeas = connectionTypes.Input( 

88 doc="Catalog of source measurements on the deepCoadd.", 

89 dimensions=("tract", "patch", "band", "skymap"), 

90 storageClass="SourceCatalog", 

91 name="{coaddName}Coadd_meas", 

92 multiple=True 

93 ) 

94 inputCatalogForcedSrc = connectionTypes.Input( 

95 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

96 dimensions=("tract", "patch", "band", "skymap"), 

97 storageClass="SourceCatalog", 

98 name="{coaddName}Coadd_forced_src", 

99 multiple=True 

100 ) 

101 inputCatalogRef = connectionTypes.Input( 

102 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

103 "for each detection in deepCoadd_mergeDet.", 

104 dimensions=("tract", "patch", "skymap"), 

105 storageClass="SourceCatalog", 

106 name="{coaddName}Coadd_ref" 

107 ) 

108 outputCatalog = connectionTypes.Output( 

109 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

110 "stored as a DataFrame with a multi-level column index per-patch.", 

111 dimensions=("tract", "patch", "skymap"), 

112 storageClass="DataFrame", 

113 name="{coaddName}Coadd_obj" 

114 ) 

115 

116 

117class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

118 pipelineConnections=WriteObjectTableConnections): 

119 engine = pexConfig.Field( 

120 dtype=str, 

121 default="pyarrow", 

122 doc="Parquet engine for writing (pyarrow or fastparquet)", 

123 deprecated="This config is no longer used, and will be removed after v26." 

124 ) 

125 coaddName = pexConfig.Field( 

126 dtype=str, 

127 default="deep", 

128 doc="Name of coadd" 

129 ) 

130 

131 

132class WriteObjectTableTask(pipeBase.PipelineTask): 

133 """Write filter-merged source tables as a DataFrame in parquet format. 

134 """ 

135 _DefaultName = "writeObjectTable" 

136 ConfigClass = WriteObjectTableConfig 

137 

138 # Names of table datasets to be merged 

139 inputDatasets = ('forced_src', 'meas', 'ref') 

140 

141 # Tag of output dataset written by `MergeSourcesTask.write` 

142 outputDataset = 'obj' 

143 

144 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

145 inputs = butlerQC.get(inputRefs) 

146 

147 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

148 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

149 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

150 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

151 

152 catalogs = {} 

153 for band in measDict.keys(): 

154 catalogs[band] = {'meas': measDict[band]['meas'], 

155 'forced_src': forcedSourceDict[band]['forced_src'], 

156 'ref': inputs['inputCatalogRef']} 

157 dataId = butlerQC.quantum.dataId 

158 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

159 outputs = pipeBase.Struct(outputCatalog=df) 

160 butlerQC.put(outputs, outputRefs) 

161 

162 def run(self, catalogs, tract, patch): 

163 """Merge multiple catalogs. 

164 

165 Parameters 

166 ---------- 

167 catalogs : `dict` 

168 Mapping from filter names to dict of catalogs. 

169 tract : int 

170 tractId to use for the tractId column. 

171 patch : str 

172 patchId to use for the patchId column. 

173 

174 Returns 

175 ------- 

176 catalog : `pandas.DataFrame` 

177 Merged dataframe. 

178 """ 

179 

180 dfs = [] 

181 for filt, tableDict in catalogs.items(): 

182 for dataset, table in tableDict.items(): 

183 # Convert afwTable to pandas DataFrame 

184 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

185 

186 # Sort columns by name, to ensure matching schema among patches 

187 df = df.reindex(sorted(df.columns), axis=1) 

188 df['tractId'] = tract 

189 df['patchId'] = patch 

190 

191 # Make columns a 3-level MultiIndex 

192 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

193 names=('dataset', 'band', 'column')) 

194 dfs.append(df) 

195 

196 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

197 return catalog 

198 

199 

200class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

201 defaultTemplates={"catalogType": ""}, 

202 dimensions=("instrument", "visit", "detector")): 

203 

204 catalog = connectionTypes.Input( 

205 doc="Input full-depth catalog of sources produced by CalibrateTask", 

206 name="{catalogType}src", 

207 storageClass="SourceCatalog", 

208 dimensions=("instrument", "visit", "detector") 

209 ) 

210 outputCatalog = connectionTypes.Output( 

211 doc="Catalog of sources, `src` in DataFrame/Parquet format. The 'id' column is " 

212 "replaced with an index; all other columns are unchanged.", 

213 name="{catalogType}source", 

214 storageClass="DataFrame", 

215 dimensions=("instrument", "visit", "detector") 

216 ) 

217 

218 

219class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

220 pipelineConnections=WriteSourceTableConnections): 

221 pass 

222 

223 

224class WriteSourceTableTask(pipeBase.PipelineTask): 

225 """Write source table to DataFrame Parquet format. 

226 """ 

227 _DefaultName = "writeSourceTable" 

228 ConfigClass = WriteSourceTableConfig 

229 

230 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

231 inputs = butlerQC.get(inputRefs) 

232 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

233 result = self.run(**inputs) 

234 outputs = pipeBase.Struct(outputCatalog=result.table) 

235 butlerQC.put(outputs, outputRefs) 

236 

237 def run(self, catalog, ccdVisitId=None, **kwargs): 

238 """Convert `src` catalog to DataFrame 

239 

240 Parameters 

241 ---------- 

242 catalog: `afwTable.SourceCatalog` 

243 catalog to be converted 

244 ccdVisitId: `int` 

245 ccdVisitId to be added as a column 

246 

247 Returns 

248 ------- 

249 result : `lsst.pipe.base.Struct` 

250 ``table`` 

251 `DataFrame` version of the input catalog 

252 """ 

253 self.log.info("Generating DataFrame from src catalog ccdVisitId=%s", ccdVisitId) 

254 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

255 df['ccdVisitId'] = ccdVisitId 

256 return pipeBase.Struct(table=df) 

257 

258 

259class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections, 

260 defaultTemplates={"catalogType": "", 

261 "skyWcsName": "gbdesAstrometricFit", 

262 "photoCalibName": "fgcm"}, 

263 dimensions=("instrument", "visit", "detector", "skymap")): 

264 skyMap = connectionTypes.Input( 

265 doc="skyMap needed to choose which tract-level calibrations to use when multiple available", 

266 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

267 storageClass="SkyMap", 

268 dimensions=("skymap",), 

269 ) 

270 exposure = connectionTypes.Input( 

271 doc="Input exposure to perform photometry on.", 

272 name="calexp", 

273 storageClass="ExposureF", 

274 dimensions=["instrument", "visit", "detector"], 

275 ) 

276 externalSkyWcsTractCatalog = connectionTypes.Input( 

277 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector " 

278 "id for the catalog id, sorted on id for fast lookup."), 

279 name="{skyWcsName}SkyWcsCatalog", 

280 storageClass="ExposureCatalog", 

281 dimensions=["instrument", "visit", "tract"], 

282 multiple=True 

283 ) 

284 externalSkyWcsGlobalCatalog = connectionTypes.Input( 

285 doc=("Per-visit wcs calibrations computed globally (with no tract information). " 

286 "These catalogs use the detector id for the catalog id, sorted on id for " 

287 "fast lookup."), 

288 name="finalVisitSummary", 

289 storageClass="ExposureCatalog", 

290 dimensions=["instrument", "visit"], 

291 ) 

292 externalPhotoCalibTractCatalog = connectionTypes.Input( 

293 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the " 

294 "detector id for the catalog id, sorted on id for fast lookup."), 

295 name="{photoCalibName}PhotoCalibCatalog", 

296 storageClass="ExposureCatalog", 

297 dimensions=["instrument", "visit", "tract"], 

298 multiple=True 

299 ) 

300 externalPhotoCalibGlobalCatalog = connectionTypes.Input( 

301 doc=("Per-visit photometric calibrations computed globally (with no tract " 

302 "information). These catalogs use the detector id for the catalog id, " 

303 "sorted on id for fast lookup."), 

304 name="finalVisitSummary", 

305 storageClass="ExposureCatalog", 

306 dimensions=["instrument", "visit"], 

307 ) 

308 

309 def __init__(self, *, config=None): 

310 super().__init__(config=config) 

311 # Same connection boilerplate as all other applications of 

312 # Global/Tract calibrations 

313 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs: 

314 if config.useGlobalExternalSkyWcs: 

315 self.inputs.remove("externalSkyWcsTractCatalog") 

316 else: 

317 self.inputs.remove("externalSkyWcsGlobalCatalog") 

318 else: 

319 self.inputs.remove("externalSkyWcsTractCatalog") 

320 self.inputs.remove("externalSkyWcsGlobalCatalog") 

321 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib: 

322 if config.useGlobalExternalPhotoCalib: 

323 self.inputs.remove("externalPhotoCalibTractCatalog") 

324 else: 

325 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

326 else: 

327 self.inputs.remove("externalPhotoCalibTractCatalog") 

328 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

329 

330 

331class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig, 

332 pipelineConnections=WriteRecalibratedSourceTableConnections): 

333 

334 doReevaluatePhotoCalib = pexConfig.Field( 

335 dtype=bool, 

336 default=True, 

337 doc=("Add or replace local photoCalib columns") 

338 ) 

339 doReevaluateSkyWcs = pexConfig.Field( 

340 dtype=bool, 

341 default=True, 

342 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec") 

343 ) 

344 doApplyExternalPhotoCalib = pexConfig.Field( 

345 dtype=bool, 

346 default=True, 

347 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ", 

348 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."), 

349 ) 

350 doApplyExternalSkyWcs = pexConfig.Field( 

351 dtype=bool, 

352 default=True, 

353 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ", 

354 "else use the wcs already attached to the exposure."), 

355 ) 

356 useGlobalExternalPhotoCalib = pexConfig.Field( 

357 dtype=bool, 

358 default=True, 

359 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations " 

360 "that are not run per-tract. When False, use per-tract photometric " 

361 "calibration files.") 

362 ) 

363 useGlobalExternalSkyWcs = pexConfig.Field( 

364 dtype=bool, 

365 default=True, 

366 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations " 

367 "that are not run per-tract. When False, use per-tract wcs " 

368 "files.") 

369 ) 

370 

371 def validate(self): 

372 super().validate() 

373 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs: 

374 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False" 

375 "External SkyWcs will not be read or evaluated.") 

376 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib: 

377 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False." 

378 "External PhotoCalib will not be read or evaluated.") 

379 

380 

381class WriteRecalibratedSourceTableTask(WriteSourceTableTask): 

382 """Write source table to DataFrame Parquet format. 

383 """ 

384 _DefaultName = "writeRecalibratedSourceTable" 

385 ConfigClass = WriteRecalibratedSourceTableConfig 

386 

387 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

388 inputs = butlerQC.get(inputRefs) 

389 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

390 inputs['exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "visit_detector") 

391 

392 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs: 

393 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

394 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs) 

395 

396 inputs['catalog'] = self.addCalibColumns(**inputs) 

397 

398 result = self.run(**inputs) 

399 outputs = pipeBase.Struct(outputCatalog=result.table) 

400 butlerQC.put(outputs, outputRefs) 

401 

402 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None, 

403 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None, 

404 externalPhotoCalibTractCatalog=None, **kwargs): 

405 """Apply external calibrations to exposure per configuration 

406 

407 When multiple tract-level calibrations overlap, select the one with the 

408 center closest to detector. 

409 

410 Parameters 

411 ---------- 

412 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of 

413 tract-level calibs. 

414 skyMap : `lsst.skymap.SkyMap` 

415 exposure : `lsst.afw.image.exposure.Exposure` 

416 Input exposure to adjust calibrations. 

417 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

418 Exposure catalog with external skyWcs to be applied per config 

419 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

420 Exposure catalog with external skyWcs to be applied per config 

421 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

422 Exposure catalog with external photoCalib to be applied per config 

423 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

424 

425 

426 Returns 

427 ------- 

428 exposure : `lsst.afw.image.exposure.Exposure` 

429 Exposure with adjusted calibrations. 

430 """ 

431 if not self.config.doApplyExternalSkyWcs: 

432 # Do not modify the exposure's SkyWcs 

433 externalSkyWcsCatalog = None 

434 elif self.config.useGlobalExternalSkyWcs: 

435 # Use the global external SkyWcs 

436 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog 

437 self.log.info('Applying global SkyWcs') 

438 else: 

439 # use tract-level external SkyWcs from the closest overlapping tract 

440 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog') 

441 tracts = [ref.dataId['tract'] for ref in inputRef] 

442 if len(tracts) == 1: 

443 ind = 0 

444 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind]) 

445 else: 

446 if exposure.getWcs() is None: # TODO: could this look-up use the externalPhotoCalib? 

447 raise ValueError("Trying to locate nearest tract, but exposure.wcs is None.") 

448 ind = self.getClosestTract(tracts, skyMap, 

449 exposure.getBBox(), exposure.getWcs()) 

450 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). ' 

451 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

452 

453 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind] 

454 

455 if not self.config.doApplyExternalPhotoCalib: 

456 # Do not modify the exposure's PhotoCalib 

457 externalPhotoCalibCatalog = None 

458 elif self.config.useGlobalExternalPhotoCalib: 

459 # Use the global external PhotoCalib 

460 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog 

461 self.log.info('Applying global PhotoCalib') 

462 else: 

463 # use tract-level external PhotoCalib from the closest overlapping tract 

464 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog') 

465 tracts = [ref.dataId['tract'] for ref in inputRef] 

466 if len(tracts) == 1: 

467 ind = 0 

468 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind]) 

469 else: 

470 ind = self.getClosestTract(tracts, skyMap, 

471 exposure.getBBox(), exposure.getWcs()) 

472 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). ' 

473 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

474 

475 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind] 

476 

477 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog) 

478 

479 def getClosestTract(self, tracts, skyMap, bbox, wcs): 

480 """Find the index of the tract closest to detector from list of tractIds 

481 

482 Parameters 

483 ---------- 

484 tracts: `list` [`int`] 

485 Iterable of integer tractIds 

486 skyMap : `lsst.skymap.SkyMap` 

487 skyMap to lookup tract geometry and wcs 

488 bbox : `lsst.geom.Box2I` 

489 Detector bbox, center of which will compared to tract centers 

490 wcs : `lsst.afw.geom.SkyWcs` 

491 Detector Wcs object to map the detector center to SkyCoord 

492 

493 Returns 

494 ------- 

495 index : `int` 

496 """ 

497 if len(tracts) == 1: 

498 return 0 

499 

500 center = wcs.pixelToSky(bbox.getCenter()) 

501 sep = [] 

502 for tractId in tracts: 

503 tract = skyMap[tractId] 

504 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter()) 

505 sep.append(center.separation(tractCenter)) 

506 

507 return np.argmin(sep) 

508 

509 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None): 

510 """Prepare a calibrated exposure and apply external calibrations 

511 if so configured. 

512 

513 Parameters 

514 ---------- 

515 exposure : `lsst.afw.image.exposure.Exposure` 

516 Input exposure to adjust calibrations. 

517 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional 

518 Exposure catalog with external skyWcs to be applied 

519 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id 

520 for the catalog id, sorted on id for fast lookup. 

521 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional 

522 Exposure catalog with external photoCalib to be applied 

523 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector 

524 id for the catalog id, sorted on id for fast lookup. 

525 

526 Returns 

527 ------- 

528 exposure : `lsst.afw.image.exposure.Exposure` 

529 Exposure with adjusted calibrations. 

530 """ 

531 detectorId = exposure.getInfo().getDetector().getId() 

532 

533 if externalPhotoCalibCatalog is not None: 

534 row = externalPhotoCalibCatalog.find(detectorId) 

535 if row is None: 

536 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; " 

537 "Using original photoCalib.", detectorId) 

538 else: 

539 photoCalib = row.getPhotoCalib() 

540 if photoCalib is None: 

541 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; " 

542 "Using original photoCalib.", detectorId) 

543 else: 

544 exposure.setPhotoCalib(photoCalib) 

545 

546 if externalSkyWcsCatalog is not None: 

547 row = externalSkyWcsCatalog.find(detectorId) 

548 if row is None: 

549 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; " 

550 "Using original skyWcs.", detectorId) 

551 else: 

552 skyWcs = row.getWcs() 

553 if skyWcs is None: 

554 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; " 

555 "Using original skyWcs.", detectorId) 

556 else: 

557 exposure.setWcs(skyWcs) 

558 

559 return exposure 

560 

561 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs): 

562 """Add replace columns with calibs evaluated at each centroid 

563 

564 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a 

565 a source catalog, by rerunning the plugins. 

566 

567 Parameters 

568 ---------- 

569 catalog : `lsst.afw.table.SourceCatalog` 

570 catalog to which calib columns will be added 

571 exposure : `lsst.afw.image.exposure.Exposure` 

572 Exposure with attached PhotoCalibs and SkyWcs attributes to be 

573 reevaluated at local centroids. Pixels are not required. 

574 exposureIdInfo : `lsst.obs.base.ExposureIdInfo` 

575 

576 Returns 

577 ------- 

578 newCat: `lsst.afw.table.SourceCatalog` 

579 Source Catalog with requested local calib columns 

580 """ 

581 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

582 measureConfig.doReplaceWithNoise = False 

583 

584 # Clear all slots, because we aren't running the relevant plugins. 

585 for slot in measureConfig.slots: 

586 setattr(measureConfig.slots, slot, None) 

587 

588 measureConfig.plugins.names = [] 

589 if self.config.doReevaluateSkyWcs: 

590 measureConfig.plugins.names.add('base_LocalWcs') 

591 self.log.info("Re-evaluating base_LocalWcs plugin") 

592 if self.config.doReevaluatePhotoCalib: 

593 measureConfig.plugins.names.add('base_LocalPhotoCalib') 

594 self.log.info("Re-evaluating base_LocalPhotoCalib plugin") 

595 pluginsNotToCopy = tuple(measureConfig.plugins.names) 

596 

597 # Create a new schema and catalog 

598 # Copy all columns from original except for the ones to reevaluate 

599 aliasMap = catalog.schema.getAliasMap() 

600 mapper = afwTable.SchemaMapper(catalog.schema) 

601 for item in catalog.schema: 

602 if not item.field.getName().startswith(pluginsNotToCopy): 

603 mapper.addMapping(item.key) 

604 

605 schema = mapper.getOutputSchema() 

606 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

607 schema.setAliasMap(aliasMap) 

608 newCat = afwTable.SourceCatalog(schema) 

609 newCat.extend(catalog, mapper=mapper) 

610 

611 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to 

612 # update here. LocalPhotoCalibs are applied during transform tasks. 

613 # Update coord_ra/coord_dec, which are expected to be positions on the 

614 # sky and are used as such in sdm tables without transform 

615 if self.config.doReevaluateSkyWcs and exposure.wcs is not None: 

616 afwTable.updateSourceCoords(exposure.wcs, newCat) 

617 

618 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

619 

620 return newCat 

621 

622 

623class PostprocessAnalysis(object): 

624 """Calculate columns from DataFrames or handles storing DataFrames. 

625 

626 This object manages and organizes an arbitrary set of computations 

627 on a catalog. The catalog is defined by a 

628 `DeferredDatasetHandle` or `InMemoryDatasetHandle` object 

629 (or list thereof), such as a ``deepCoadd_obj`` dataset, and the 

630 computations are defined by a collection of `lsst.pipe.tasks.functor.Functor` 

631 objects (or, equivalently, a ``CompositeFunctor``). 

632 

633 After the object is initialized, accessing the ``.df`` attribute (which 

634 holds the `pandas.DataFrame` containing the results of the calculations) 

635 triggers computation of said dataframe. 

636 

637 One of the conveniences of using this object is the ability to define a 

638 desired common filter for all functors. This enables the same functor 

639 collection to be passed to several different `PostprocessAnalysis` objects 

640 without having to change the original functor collection, since the ``filt`` 

641 keyword argument of this object triggers an overwrite of the ``filt`` 

642 property for all functors in the collection. 

643 

644 This object also allows a list of refFlags to be passed, and defines a set 

645 of default refFlags that are always included even if not requested. 

646 

647 If a list of DataFrames or Handles is passed, rather than a single one, 

648 then the calculations will be mapped over all the input catalogs. In 

649 principle, it should be straightforward to parallelize this activity, but 

650 initial tests have failed (see TODO in code comments). 

651 

652 Parameters 

653 ---------- 

654 handles : `lsst.daf.butler.DeferredDatasetHandle` or 

655 `lsst.pipe.base.InMemoryDatasetHandle` or 

656 list of these. 

657 Source catalog(s) for computation. 

658 functors : `list`, `dict`, or `~lsst.pipe.tasks.functors.CompositeFunctor` 

659 Computations to do (functors that act on ``handles``). 

660 If a dict, the output 

661 DataFrame will have columns keyed accordingly. 

662 If a list, the column keys will come from the 

663 ``.shortname`` attribute of each functor. 

664 

665 filt : `str`, optional 

666 Filter in which to calculate. If provided, 

667 this will overwrite any existing ``.filt`` attribute 

668 of the provided functors. 

669 

670 flags : `list`, optional 

671 List of flags (per-band) to include in output table. 

672 Taken from the ``meas`` dataset if applied to a multilevel Object Table. 

673 

674 refFlags : `list`, optional 

675 List of refFlags (only reference band) to include in output table. 

676 

677 forcedFlags : `list`, optional 

678 List of flags (per-band) to include in output table. 

679 Taken from the ``forced_src`` dataset if applied to a 

680 multilevel Object Table. Intended for flags from measurement plugins 

681 only run during multi-band forced-photometry. 

682 """ 

683 _defaultRefFlags = [] 

684 _defaultFuncs = () 

685 

686 def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

687 self.handles = handles 

688 self.functors = functors 

689 

690 self.filt = filt 

691 self.flags = list(flags) if flags is not None else [] 

692 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

693 self.refFlags = list(self._defaultRefFlags) 

694 if refFlags is not None: 

695 self.refFlags += list(refFlags) 

696 

697 self._df = None 

698 

699 @property 

700 def defaultFuncs(self): 

701 funcs = dict(self._defaultFuncs) 

702 return funcs 

703 

704 @property 

705 def func(self): 

706 additionalFuncs = self.defaultFuncs 

707 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

708 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

709 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

710 

711 if isinstance(self.functors, CompositeFunctor): 

712 func = self.functors 

713 else: 

714 func = CompositeFunctor(self.functors) 

715 

716 func.funcDict.update(additionalFuncs) 

717 func.filt = self.filt 

718 

719 return func 

720 

721 @property 

722 def noDupCols(self): 

723 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

724 

725 @property 

726 def df(self): 

727 if self._df is None: 

728 self.compute() 

729 return self._df 

730 

731 def compute(self, dropna=False, pool=None): 

732 # map over multiple handles 

733 if type(self.handles) in (list, tuple): 

734 if pool is None: 

735 dflist = [self.func(handle, dropna=dropna) for handle in self.handles] 

736 else: 

737 # TODO: Figure out why this doesn't work (pyarrow pickling 

738 # issues?) 

739 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.handles) 

740 self._df = pd.concat(dflist) 

741 else: 

742 self._df = self.func(self.handles, dropna=dropna) 

743 

744 return self._df 

745 

746 

747class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

748 dimensions=()): 

749 """Expected Connections for subclasses of TransformCatalogBaseTask. 

750 

751 Must be subclassed. 

752 """ 

753 inputCatalog = connectionTypes.Input( 

754 name="", 

755 storageClass="DataFrame", 

756 ) 

757 outputCatalog = connectionTypes.Output( 

758 name="", 

759 storageClass="DataFrame", 

760 ) 

761 

762 

763class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

764 pipelineConnections=TransformCatalogBaseConnections): 

765 functorFile = pexConfig.Field( 

766 dtype=str, 

767 doc="Path to YAML file specifying Science Data Model functors to use " 

768 "when copying columns and computing calibrated values.", 

769 default=None, 

770 optional=True 

771 ) 

772 primaryKey = pexConfig.Field( 

773 dtype=str, 

774 doc="Name of column to be set as the DataFrame index. If None, the index" 

775 "will be named `id`", 

776 default=None, 

777 optional=True 

778 ) 

779 columnsFromDataId = pexConfig.ListField( 

780 dtype=str, 

781 default=None, 

782 optional=True, 

783 doc="Columns to extract from the dataId", 

784 ) 

785 

786 

787class TransformCatalogBaseTask(pipeBase.PipelineTask): 

788 """Base class for transforming/standardizing a catalog 

789 

790 by applying functors that convert units and apply calibrations. 

791 The purpose of this task is to perform a set of computations on 

792 an input ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` that holds 

793 a ``DataFrame`` dataset (such as ``deepCoadd_obj``), and write the 

794 results to a new dataset (which needs to be declared in an ``outputDataset`` 

795 attribute). 

796 

797 The calculations to be performed are defined in a YAML file that specifies 

798 a set of functors to be computed, provided as 

799 a ``--functorFile`` config parameter. An example of such a YAML file 

800 is the following: 

801 

802 funcs: 

803 psfMag: 

804 functor: Mag 

805 args: 

806 - base_PsfFlux 

807 filt: HSC-G 

808 dataset: meas 

809 cmodel_magDiff: 

810 functor: MagDiff 

811 args: 

812 - modelfit_CModel 

813 - base_PsfFlux 

814 filt: HSC-G 

815 gauss_magDiff: 

816 functor: MagDiff 

817 args: 

818 - base_GaussianFlux 

819 - base_PsfFlux 

820 filt: HSC-G 

821 count: 

822 functor: Column 

823 args: 

824 - base_InputCount_value 

825 filt: HSC-G 

826 deconvolved_moments: 

827 functor: DeconvolvedMoments 

828 filt: HSC-G 

829 dataset: forced_src 

830 refFlags: 

831 - calib_psfUsed 

832 - merge_measurement_i 

833 - merge_measurement_r 

834 - merge_measurement_z 

835 - merge_measurement_y 

836 - merge_measurement_g 

837 - base_PixelFlags_flag_inexact_psfCenter 

838 - detect_isPrimary 

839 

840 The names for each entry under "func" will become the names of columns in 

841 the output dataset. All the functors referenced are defined in 

842 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each 

843 functor are in the `args` list, and any additional entries for each column 

844 other than "functor" or "args" (e.g., ``'filt'``, ``'dataset'``) are treated as 

845 keyword arguments to be passed to the functor initialization. 

846 

847 The "flags" entry is the default shortcut for `Column` functors. 

848 All columns listed under "flags" will be copied to the output table 

849 untransformed. They can be of any datatype. 

850 In the special case of transforming a multi-level oject table with 

851 band and dataset indices (deepCoadd_obj), these will be taked from the 

852 `meas` dataset and exploded out per band. 

853 

854 There are two special shortcuts that only apply when transforming 

855 multi-level Object (deepCoadd_obj) tables: 

856 - The "refFlags" entry is shortcut for `Column` functor 

857 taken from the `'ref'` dataset if transforming an ObjectTable. 

858 - The "forcedFlags" entry is shortcut for `Column` functors. 

859 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

860 These are expanded out per band. 

861 

862 

863 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

864 to organize and excecute the calculations. 

865 """ 

866 @property 

867 def _DefaultName(self): 

868 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

869 

870 @property 

871 def outputDataset(self): 

872 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

873 

874 @property 

875 def inputDataset(self): 

876 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

877 

878 @property 

879 def ConfigClass(self): 

880 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

881 

882 def __init__(self, *args, **kwargs): 

883 super().__init__(*args, **kwargs) 

884 if self.config.functorFile: 

885 self.log.info('Loading tranform functor definitions from %s', 

886 self.config.functorFile) 

887 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

888 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

889 else: 

890 self.funcs = None 

891 

892 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

893 inputs = butlerQC.get(inputRefs) 

894 if self.funcs is None: 

895 raise ValueError("config.functorFile is None. " 

896 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

897 result = self.run(handle=inputs['inputCatalog'], funcs=self.funcs, 

898 dataId=outputRefs.outputCatalog.dataId.full) 

899 outputs = pipeBase.Struct(outputCatalog=result) 

900 butlerQC.put(outputs, outputRefs) 

901 

902 def run(self, handle, funcs=None, dataId=None, band=None): 

903 """Do postprocessing calculations 

904 

905 Takes a ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` or 

906 ``DataFrame`` object and dataId, 

907 returns a dataframe with results of postprocessing calculations. 

908 

909 Parameters 

910 ---------- 

911 handles : `lsst.daf.butler.DeferredDatasetHandle` or 

912 `lsst.pipe.base.InMemoryDatasetHandle` or 

913 `pandas.DataFrame`, or list of these. 

914 DataFrames from which calculations are done. 

915 funcs : `lsst.pipe.tasks.functors.Functors` 

916 Functors to apply to the table's columns 

917 dataId : dict, optional 

918 Used to add a `patchId` column to the output dataframe. 

919 band : `str`, optional 

920 Filter band that is being processed. 

921 

922 Returns 

923 ------ 

924 df : `pandas.DataFrame` 

925 """ 

926 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

927 

928 df = self.transform(band, handle, funcs, dataId).df 

929 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

930 return df 

931 

932 def getFunctors(self): 

933 return self.funcs 

934 

935 def getAnalysis(self, handles, funcs=None, band=None): 

936 if funcs is None: 

937 funcs = self.funcs 

938 analysis = PostprocessAnalysis(handles, funcs, filt=band) 

939 return analysis 

940 

941 def transform(self, band, handles, funcs, dataId): 

942 analysis = self.getAnalysis(handles, funcs=funcs, band=band) 

943 df = analysis.df 

944 if dataId and self.config.columnsFromDataId: 

945 for key in self.config.columnsFromDataId: 

946 if key in dataId: 

947 df[str(key)] = dataId[key] 

948 else: 

949 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}") 

950 

951 if self.config.primaryKey: 

952 if df.index.name != self.config.primaryKey and self.config.primaryKey in df: 

953 df.reset_index(inplace=True, drop=True) 

954 df.set_index(self.config.primaryKey, inplace=True) 

955 

956 return pipeBase.Struct( 

957 df=df, 

958 analysis=analysis 

959 ) 

960 

961 

962class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

963 defaultTemplates={"coaddName": "deep"}, 

964 dimensions=("tract", "patch", "skymap")): 

965 inputCatalog = connectionTypes.Input( 

966 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

967 "stored as a DataFrame with a multi-level column index per-patch.", 

968 dimensions=("tract", "patch", "skymap"), 

969 storageClass="DataFrame", 

970 name="{coaddName}Coadd_obj", 

971 deferLoad=True, 

972 ) 

973 outputCatalog = connectionTypes.Output( 

974 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

975 "data model.", 

976 dimensions=("tract", "patch", "skymap"), 

977 storageClass="DataFrame", 

978 name="objectTable" 

979 ) 

980 

981 

982class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

983 pipelineConnections=TransformObjectCatalogConnections): 

984 coaddName = pexConfig.Field( 

985 dtype=str, 

986 default="deep", 

987 doc="Name of coadd" 

988 ) 

989 # TODO: remove in DM-27177 

990 filterMap = pexConfig.DictField( 

991 keytype=str, 

992 itemtype=str, 

993 default={}, 

994 doc=("Dictionary mapping full filter name to short one for column name munging." 

995 "These filters determine the output columns no matter what filters the " 

996 "input data actually contain."), 

997 deprecated=("Coadds are now identified by the band, so this transform is unused." 

998 "Will be removed after v22.") 

999 ) 

1000 outputBands = pexConfig.ListField( 

1001 dtype=str, 

1002 default=None, 

1003 optional=True, 

1004 doc=("These bands and only these bands will appear in the output," 

1005 " NaN-filled if the input does not include them." 

1006 " If None, then use all bands found in the input.") 

1007 ) 

1008 camelCase = pexConfig.Field( 

1009 dtype=bool, 

1010 default=False, 

1011 doc=("Write per-band columns names with camelCase, else underscore " 

1012 "For example: gPsFlux instead of g_PsFlux.") 

1013 ) 

1014 multilevelOutput = pexConfig.Field( 

1015 dtype=bool, 

1016 default=False, 

1017 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

1018 "and name-munged (False).") 

1019 ) 

1020 goodFlags = pexConfig.ListField( 

1021 dtype=str, 

1022 default=[], 

1023 doc=("List of 'good' flags that should be set False when populating empty tables. " 

1024 "All other flags are considered to be 'bad' flags and will be set to True.") 

1025 ) 

1026 floatFillValue = pexConfig.Field( 

1027 dtype=float, 

1028 default=np.nan, 

1029 doc="Fill value for float fields when populating empty tables." 

1030 ) 

1031 integerFillValue = pexConfig.Field( 

1032 dtype=int, 

1033 default=-1, 

1034 doc="Fill value for integer fields when populating empty tables." 

1035 ) 

1036 

1037 def setDefaults(self): 

1038 super().setDefaults() 

1039 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml') 

1040 self.primaryKey = 'objectId' 

1041 self.columnsFromDataId = ['tract', 'patch'] 

1042 self.goodFlags = ['calib_astrometry_used', 

1043 'calib_photometry_reserved', 

1044 'calib_photometry_used', 

1045 'calib_psf_candidate', 

1046 'calib_psf_reserved', 

1047 'calib_psf_used'] 

1048 

1049 

1050class TransformObjectCatalogTask(TransformCatalogBaseTask): 

1051 """Produce a flattened Object Table to match the format specified in 

1052 sdm_schemas. 

1053 

1054 Do the same set of postprocessing calculations on all bands. 

1055 

1056 This is identical to `TransformCatalogBaseTask`, except for that it does 

1057 the specified functor calculations for all filters present in the 

1058 input `deepCoadd_obj` table. Any specific ``"filt"`` keywords specified 

1059 by the YAML file will be superceded. 

1060 """ 

1061 _DefaultName = "transformObjectCatalog" 

1062 ConfigClass = TransformObjectCatalogConfig 

1063 

1064 def run(self, handle, funcs=None, dataId=None, band=None): 

1065 # NOTE: band kwarg is ignored here. 

1066 dfDict = {} 

1067 analysisDict = {} 

1068 templateDf = pd.DataFrame() 

1069 

1070 columns = handle.get(component='columns') 

1071 inputBands = columns.unique(level=1).values 

1072 

1073 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

1074 

1075 # Perform transform for data of filters that exist in the handle dataframe. 

1076 for inputBand in inputBands: 

1077 if inputBand not in outputBands: 

1078 self.log.info("Ignoring %s band data in the input", inputBand) 

1079 continue 

1080 self.log.info("Transforming the catalog of band %s", inputBand) 

1081 result = self.transform(inputBand, handle, funcs, dataId) 

1082 dfDict[inputBand] = result.df 

1083 analysisDict[inputBand] = result.analysis 

1084 if templateDf.empty: 

1085 templateDf = result.df 

1086 

1087 # Put filler values in columns of other wanted bands 

1088 for filt in outputBands: 

1089 if filt not in dfDict: 

1090 self.log.info("Adding empty columns for band %s", filt) 

1091 dfTemp = templateDf.copy() 

1092 for col in dfTemp.columns: 

1093 testValue = dfTemp[col].values[0] 

1094 if isinstance(testValue, (np.bool_, pd.BooleanDtype)): 

1095 # Boolean flag type, check if it is a "good" flag 

1096 if col in self.config.goodFlags: 

1097 fillValue = False 

1098 else: 

1099 fillValue = True 

1100 elif isinstance(testValue, numbers.Integral): 

1101 # Checking numbers.Integral catches all flavors 

1102 # of python, numpy, pandas, etc. integers. 

1103 # We must ensure this is not an unsigned integer. 

1104 if isinstance(testValue, np.unsignedinteger): 

1105 raise ValueError("Parquet tables may not have unsigned integer columns.") 

1106 else: 

1107 fillValue = self.config.integerFillValue 

1108 else: 

1109 fillValue = self.config.floatFillValue 

1110 dfTemp[col].values[:] = fillValue 

1111 dfDict[filt] = dfTemp 

1112 

1113 # This makes a multilevel column index, with band as first level 

1114 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

1115 

1116 if not self.config.multilevelOutput: 

1117 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

1118 if self.config.primaryKey in noDupCols: 

1119 noDupCols.remove(self.config.primaryKey) 

1120 if dataId and self.config.columnsFromDataId: 

1121 noDupCols += self.config.columnsFromDataId 

1122 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

1123 inputBands=inputBands) 

1124 

1125 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

1126 

1127 return df 

1128 

1129 

1130class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

1131 dimensions=("tract", "skymap")): 

1132 inputCatalogs = connectionTypes.Input( 

1133 doc="Per-Patch objectTables conforming to the standard data model.", 

1134 name="objectTable", 

1135 storageClass="DataFrame", 

1136 dimensions=("tract", "patch", "skymap"), 

1137 multiple=True, 

1138 ) 

1139 outputCatalog = connectionTypes.Output( 

1140 doc="Pre-tract horizontal concatenation of the input objectTables", 

1141 name="objectTable_tract", 

1142 storageClass="DataFrame", 

1143 dimensions=("tract", "skymap"), 

1144 ) 

1145 

1146 

1147class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

1148 pipelineConnections=ConsolidateObjectTableConnections): 

1149 coaddName = pexConfig.Field( 

1150 dtype=str, 

1151 default="deep", 

1152 doc="Name of coadd" 

1153 ) 

1154 

1155 

1156class ConsolidateObjectTableTask(pipeBase.PipelineTask): 

1157 """Write patch-merged source tables to a tract-level DataFrame Parquet file. 

1158 

1159 Concatenates `objectTable` list into a per-visit `objectTable_tract`. 

1160 """ 

1161 _DefaultName = "consolidateObjectTable" 

1162 ConfigClass = ConsolidateObjectTableConfig 

1163 

1164 inputDataset = 'objectTable' 

1165 outputDataset = 'objectTable_tract' 

1166 

1167 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1168 inputs = butlerQC.get(inputRefs) 

1169 self.log.info("Concatenating %s per-patch Object Tables", 

1170 len(inputs['inputCatalogs'])) 

1171 df = pd.concat(inputs['inputCatalogs']) 

1172 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1173 

1174 

1175class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

1176 defaultTemplates={"catalogType": ""}, 

1177 dimensions=("instrument", "visit", "detector")): 

1178 

1179 inputCatalog = connectionTypes.Input( 

1180 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

1181 name="{catalogType}source", 

1182 storageClass="DataFrame", 

1183 dimensions=("instrument", "visit", "detector"), 

1184 deferLoad=True 

1185 ) 

1186 outputCatalog = connectionTypes.Output( 

1187 doc="Narrower, per-detector Source Table transformed and converted per a " 

1188 "specified set of functors", 

1189 name="{catalogType}sourceTable", 

1190 storageClass="DataFrame", 

1191 dimensions=("instrument", "visit", "detector") 

1192 ) 

1193 

1194 

1195class TransformSourceTableConfig(TransformCatalogBaseConfig, 

1196 pipelineConnections=TransformSourceTableConnections): 

1197 

1198 def setDefaults(self): 

1199 super().setDefaults() 

1200 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml') 

1201 self.primaryKey = 'sourceId' 

1202 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter'] 

1203 

1204 

1205class TransformSourceTableTask(TransformCatalogBaseTask): 

1206 """Transform/standardize a source catalog 

1207 """ 

1208 _DefaultName = "transformSourceTable" 

1209 ConfigClass = TransformSourceTableConfig 

1210 

1211 

1212class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1213 dimensions=("instrument", "visit",), 

1214 defaultTemplates={"calexpType": ""}): 

1215 calexp = connectionTypes.Input( 

1216 doc="Processed exposures used for metadata", 

1217 name="calexp", 

1218 storageClass="ExposureF", 

1219 dimensions=("instrument", "visit", "detector"), 

1220 deferLoad=True, 

1221 multiple=True, 

1222 ) 

1223 visitSummary = connectionTypes.Output( 

1224 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1225 "detector id for the id and are sorted for fast lookups of a " 

1226 "detector."), 

1227 name="visitSummary", 

1228 storageClass="ExposureCatalog", 

1229 dimensions=("instrument", "visit"), 

1230 ) 

1231 visitSummarySchema = connectionTypes.InitOutput( 

1232 doc="Schema of the visitSummary catalog", 

1233 name="visitSummary_schema", 

1234 storageClass="ExposureCatalog", 

1235 ) 

1236 

1237 

1238class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1239 pipelineConnections=ConsolidateVisitSummaryConnections): 

1240 """Config for ConsolidateVisitSummaryTask""" 

1241 pass 

1242 

1243 

1244class ConsolidateVisitSummaryTask(pipeBase.PipelineTask): 

1245 """Task to consolidate per-detector visit metadata. 

1246 

1247 This task aggregates the following metadata from all the detectors in a 

1248 single visit into an exposure catalog: 

1249 - The visitInfo. 

1250 - The wcs. 

1251 - The photoCalib. 

1252 - The physical_filter and band (if available). 

1253 - The psf size, shape, and effective area at the center of the detector. 

1254 - The corners of the bounding box in right ascension/declination. 

1255 

1256 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1257 are not persisted here because of storage concerns, and because of their 

1258 limited utility as summary statistics. 

1259 

1260 Tests for this task are performed in ci_hsc_gen3. 

1261 """ 

1262 _DefaultName = "consolidateVisitSummary" 

1263 ConfigClass = ConsolidateVisitSummaryConfig 

1264 

1265 def __init__(self, **kwargs): 

1266 super().__init__(**kwargs) 

1267 self.schema = afwTable.ExposureTable.makeMinimalSchema() 

1268 self.schema.addField('visit', type='L', doc='Visit number') 

1269 self.schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1270 self.schema.addField('band', type='String', size=32, doc='Name of band') 

1271 ExposureSummaryStats.update_schema(self.schema) 

1272 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema) 

1273 

1274 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1275 dataRefs = butlerQC.get(inputRefs.calexp) 

1276 visit = dataRefs[0].dataId.byName()['visit'] 

1277 

1278 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1279 len(dataRefs), visit) 

1280 

1281 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1282 

1283 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1284 

1285 def _combineExposureMetadata(self, visit, dataRefs): 

1286 """Make a combined exposure catalog from a list of dataRefs. 

1287 These dataRefs must point to exposures with wcs, summaryStats, 

1288 and other visit metadata. 

1289 

1290 Parameters 

1291 ---------- 

1292 visit : `int` 

1293 Visit identification number. 

1294 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1295 List of dataRefs in visit. 

1296 

1297 Returns 

1298 ------- 

1299 visitSummary : `lsst.afw.table.ExposureCatalog` 

1300 Exposure catalog with per-detector summary information. 

1301 """ 

1302 cat = afwTable.ExposureCatalog(self.schema) 

1303 cat.resize(len(dataRefs)) 

1304 

1305 cat['visit'] = visit 

1306 

1307 for i, dataRef in enumerate(dataRefs): 

1308 visitInfo = dataRef.get(component='visitInfo') 

1309 filterLabel = dataRef.get(component='filter') 

1310 summaryStats = dataRef.get(component='summaryStats') 

1311 detector = dataRef.get(component='detector') 

1312 wcs = dataRef.get(component='wcs') 

1313 photoCalib = dataRef.get(component='photoCalib') 

1314 detector = dataRef.get(component='detector') 

1315 bbox = dataRef.get(component='bbox') 

1316 validPolygon = dataRef.get(component='validPolygon') 

1317 

1318 rec = cat[i] 

1319 rec.setBBox(bbox) 

1320 rec.setVisitInfo(visitInfo) 

1321 rec.setWcs(wcs) 

1322 rec.setPhotoCalib(photoCalib) 

1323 rec.setValidPolygon(validPolygon) 

1324 

1325 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1326 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1327 rec.setId(detector.getId()) 

1328 summaryStats.update_record(rec) 

1329 

1330 metadata = dafBase.PropertyList() 

1331 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1332 # We are looping over existing datarefs, so the following is true 

1333 metadata.add("COMMENT", "Only detectors with data have entries.") 

1334 cat.setMetadata(metadata) 

1335 

1336 cat.sort() 

1337 return cat 

1338 

1339 

1340class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1341 defaultTemplates={"catalogType": ""}, 

1342 dimensions=("instrument", "visit")): 

1343 inputCatalogs = connectionTypes.Input( 

1344 doc="Input per-detector Source Tables", 

1345 name="{catalogType}sourceTable", 

1346 storageClass="DataFrame", 

1347 dimensions=("instrument", "visit", "detector"), 

1348 multiple=True 

1349 ) 

1350 outputCatalog = connectionTypes.Output( 

1351 doc="Per-visit concatenation of Source Table", 

1352 name="{catalogType}sourceTable_visit", 

1353 storageClass="DataFrame", 

1354 dimensions=("instrument", "visit") 

1355 ) 

1356 

1357 

1358class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1359 pipelineConnections=ConsolidateSourceTableConnections): 

1360 pass 

1361 

1362 

1363class ConsolidateSourceTableTask(pipeBase.PipelineTask): 

1364 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1365 """ 

1366 _DefaultName = 'consolidateSourceTable' 

1367 ConfigClass = ConsolidateSourceTableConfig 

1368 

1369 inputDataset = 'sourceTable' 

1370 outputDataset = 'sourceTable_visit' 

1371 

1372 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1373 from .makeWarp import reorderRefs 

1374 

1375 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs] 

1376 detectorOrder.sort() 

1377 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector') 

1378 inputs = butlerQC.get(inputRefs) 

1379 self.log.info("Concatenating %s per-detector Source Tables", 

1380 len(inputs['inputCatalogs'])) 

1381 df = pd.concat(inputs['inputCatalogs']) 

1382 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1383 

1384 

1385class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1386 dimensions=("instrument",), 

1387 defaultTemplates={"calexpType": ""}): 

1388 visitSummaryRefs = connectionTypes.Input( 

1389 doc="Data references for per-visit consolidated exposure metadata", 

1390 name="finalVisitSummary", 

1391 storageClass="ExposureCatalog", 

1392 dimensions=("instrument", "visit"), 

1393 multiple=True, 

1394 deferLoad=True, 

1395 ) 

1396 outputCatalog = connectionTypes.Output( 

1397 doc="CCD and Visit metadata table", 

1398 name="ccdVisitTable", 

1399 storageClass="DataFrame", 

1400 dimensions=("instrument",) 

1401 ) 

1402 

1403 

1404class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1405 pipelineConnections=MakeCcdVisitTableConnections): 

1406 pass 

1407 

1408 

1409class MakeCcdVisitTableTask(pipeBase.PipelineTask): 

1410 """Produce a `ccdVisitTable` from the visit summary exposure catalogs. 

1411 """ 

1412 _DefaultName = 'makeCcdVisitTable' 

1413 ConfigClass = MakeCcdVisitTableConfig 

1414 

1415 def run(self, visitSummaryRefs): 

1416 """Make a table of ccd information from the visit summary catalogs. 

1417 

1418 Parameters 

1419 ---------- 

1420 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1421 List of DeferredDatasetHandles pointing to exposure catalogs with 

1422 per-detector summary information. 

1423 

1424 Returns 

1425 ------- 

1426 result : `lsst.pipe.Base.Struct` 

1427 Results struct with attribute: 

1428 

1429 ``outputCatalog`` 

1430 Catalog of ccd and visit information. 

1431 """ 

1432 ccdEntries = [] 

1433 for visitSummaryRef in visitSummaryRefs: 

1434 visitSummary = visitSummaryRef.get() 

1435 visitInfo = visitSummary[0].getVisitInfo() 

1436 

1437 ccdEntry = {} 

1438 summaryTable = visitSummary.asAstropy() 

1439 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance', 

1440 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise', 

1441 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar', 

1442 'psfStarDeltaE1Median', 'psfStarDeltaE2Median', 

1443 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter', 

1444 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter', 

1445 'psfStarScaledDeltaSizeScatter', 

1446 'psfTraceRadiusDelta', 'maxDistToNearestPsf'] 

1447 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1448 # 'visit' is the human readable visit number. 

1449 # 'visitId' is the key to the visitId table. They are the same. 

1450 # Technically you should join to get the visit from the visit 

1451 # table. 

1452 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"}) 

1453 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in 

1454 summaryTable['id']] 

1455 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId) 

1456 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds] 

1457 ccdEntry['ccdVisitId'] = ccdVisitIds 

1458 ccdEntry['detector'] = summaryTable['id'] 

1459 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() if vR.getWcs() 

1460 else np.nan for vR in visitSummary]) 

1461 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1462 

1463 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1464 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1465 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1466 expTime = visitInfo.getExposureTime() 

1467 ccdEntry['expTime'] = expTime 

1468 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1469 expTime_days = expTime / (60*60*24) 

1470 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days 

1471 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1472 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1473 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1474 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1475 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1476 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1477 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1478 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1479 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1480 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1481 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1482 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, 

1483 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. 

1484 # values are actually wanted. 

1485 ccdEntries.append(ccdEntry) 

1486 

1487 outputCatalog = pd.concat(ccdEntries) 

1488 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True) 

1489 return pipeBase.Struct(outputCatalog=outputCatalog) 

1490 

1491 

1492class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1493 dimensions=("instrument",), 

1494 defaultTemplates={"calexpType": ""}): 

1495 visitSummaries = connectionTypes.Input( 

1496 doc="Per-visit consolidated exposure metadata", 

1497 name="finalVisitSummary", 

1498 storageClass="ExposureCatalog", 

1499 dimensions=("instrument", "visit",), 

1500 multiple=True, 

1501 deferLoad=True, 

1502 ) 

1503 outputCatalog = connectionTypes.Output( 

1504 doc="Visit metadata table", 

1505 name="visitTable", 

1506 storageClass="DataFrame", 

1507 dimensions=("instrument",) 

1508 ) 

1509 

1510 

1511class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1512 pipelineConnections=MakeVisitTableConnections): 

1513 pass 

1514 

1515 

1516class MakeVisitTableTask(pipeBase.PipelineTask): 

1517 """Produce a `visitTable` from the visit summary exposure catalogs. 

1518 """ 

1519 _DefaultName = 'makeVisitTable' 

1520 ConfigClass = MakeVisitTableConfig 

1521 

1522 def run(self, visitSummaries): 

1523 """Make a table of visit information from the visit summary catalogs. 

1524 

1525 Parameters 

1526 ---------- 

1527 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog` 

1528 List of exposure catalogs with per-detector summary information. 

1529 Returns 

1530 ------- 

1531 result : `lsst.pipe.Base.Struct` 

1532 Results struct with attribute: 

1533 

1534 ``outputCatalog`` 

1535 Catalog of visit information. 

1536 """ 

1537 visitEntries = [] 

1538 for visitSummary in visitSummaries: 

1539 visitSummary = visitSummary.get() 

1540 visitRow = visitSummary[0] 

1541 visitInfo = visitRow.getVisitInfo() 

1542 

1543 visitEntry = {} 

1544 visitEntry["visitId"] = visitRow['visit'] 

1545 visitEntry["visit"] = visitRow['visit'] 

1546 visitEntry["physical_filter"] = visitRow['physical_filter'] 

1547 visitEntry["band"] = visitRow['band'] 

1548 raDec = visitInfo.getBoresightRaDec() 

1549 visitEntry["ra"] = raDec.getRa().asDegrees() 

1550 visitEntry["decl"] = raDec.getDec().asDegrees() 

1551 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1552 azAlt = visitInfo.getBoresightAzAlt() 

1553 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1554 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1555 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1556 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1557 expTime = visitInfo.getExposureTime() 

1558 visitEntry["expTime"] = expTime 

1559 visitEntry["expMidpt"] = visitInfo.getDate().toPython() 

1560 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1561 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1562 expTime_days = expTime / (60*60*24) 

1563 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days 

1564 visitEntries.append(visitEntry) 

1565 

1566 # TODO: DM-30623, Add programId, exposureType, cameraTemp, 

1567 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp, 

1568 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures. 

1569 

1570 outputCatalog = pd.DataFrame(data=visitEntries) 

1571 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True) 

1572 return pipeBase.Struct(outputCatalog=outputCatalog) 

1573 

1574 

1575class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1576 dimensions=("instrument", "visit", "detector", "skymap", "tract")): 

1577 

1578 inputCatalog = connectionTypes.Input( 

1579 doc="Primary per-detector, single-epoch forced-photometry catalog. " 

1580 "By default, it is the output of ForcedPhotCcdTask on calexps", 

1581 name="forced_src", 

1582 storageClass="SourceCatalog", 

1583 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1584 ) 

1585 inputCatalogDiff = connectionTypes.Input( 

1586 doc="Secondary multi-epoch, per-detector, forced photometry catalog. " 

1587 "By default, it is the output of ForcedPhotCcdTask run on image differences.", 

1588 name="forced_diff", 

1589 storageClass="SourceCatalog", 

1590 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1591 ) 

1592 outputCatalog = connectionTypes.Output( 

1593 doc="InputCatalogs horizonatally joined on `objectId` in DataFrame parquet format", 

1594 name="mergedForcedSource", 

1595 storageClass="DataFrame", 

1596 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1597 ) 

1598 

1599 

1600class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig, 

1601 pipelineConnections=WriteForcedSourceTableConnections): 

1602 key = lsst.pex.config.Field( 

1603 doc="Column on which to join the two input tables on and make the primary key of the output", 

1604 dtype=str, 

1605 default="objectId", 

1606 ) 

1607 

1608 

1609class WriteForcedSourceTableTask(pipeBase.PipelineTask): 

1610 """Merge and convert per-detector forced source catalogs to DataFrame Parquet format. 

1611 

1612 Because the predecessor ForcedPhotCcdTask operates per-detector, 

1613 per-tract, (i.e., it has tract in its dimensions), detectors 

1614 on the tract boundary may have multiple forced source catalogs. 

1615 

1616 The successor task TransformForcedSourceTable runs per-patch 

1617 and temporally-aggregates overlapping mergedForcedSource catalogs from all 

1618 available multiple epochs. 

1619 """ 

1620 _DefaultName = "writeForcedSourceTable" 

1621 ConfigClass = WriteForcedSourceTableConfig 

1622 

1623 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1624 inputs = butlerQC.get(inputRefs) 

1625 # Add ccdVisitId to allow joining with CcdVisitTable 

1626 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

1627 inputs['band'] = butlerQC.quantum.dataId.full['band'] 

1628 outputs = self.run(**inputs) 

1629 butlerQC.put(outputs, outputRefs) 

1630 

1631 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None): 

1632 dfs = [] 

1633 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')): 

1634 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False) 

1635 df = df.reindex(sorted(df.columns), axis=1) 

1636 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA 

1637 df['band'] = band if band else pd.NA 

1638 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns], 

1639 names=('dataset', 'column')) 

1640 

1641 dfs.append(df) 

1642 

1643 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

1644 return pipeBase.Struct(outputCatalog=outputCatalog) 

1645 

1646 

1647class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1648 dimensions=("instrument", "skymap", "patch", "tract")): 

1649 

1650 inputCatalogs = connectionTypes.Input( 

1651 doc="DataFrames of merged ForcedSources produced by WriteForcedSourceTableTask", 

1652 name="mergedForcedSource", 

1653 storageClass="DataFrame", 

1654 dimensions=("instrument", "visit", "detector", "skymap", "tract"), 

1655 multiple=True, 

1656 deferLoad=True 

1657 ) 

1658 referenceCatalog = connectionTypes.Input( 

1659 doc="Reference catalog which was used to seed the forcedPhot. Columns " 

1660 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner " 

1661 "are expected.", 

1662 name="objectTable", 

1663 storageClass="DataFrame", 

1664 dimensions=("tract", "patch", "skymap"), 

1665 deferLoad=True 

1666 ) 

1667 outputCatalog = connectionTypes.Output( 

1668 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a " 

1669 "specified set of functors", 

1670 name="forcedSourceTable", 

1671 storageClass="DataFrame", 

1672 dimensions=("tract", "patch", "skymap") 

1673 ) 

1674 

1675 

1676class TransformForcedSourceTableConfig(TransformCatalogBaseConfig, 

1677 pipelineConnections=TransformForcedSourceTableConnections): 

1678 referenceColumns = pexConfig.ListField( 

1679 dtype=str, 

1680 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"], 

1681 optional=True, 

1682 doc="Columns to pull from reference catalog", 

1683 ) 

1684 keyRef = lsst.pex.config.Field( 

1685 doc="Column on which to join the two input tables on and make the primary key of the output", 

1686 dtype=str, 

1687 default="objectId", 

1688 ) 

1689 key = lsst.pex.config.Field( 

1690 doc="Rename the output DataFrame index to this name", 

1691 dtype=str, 

1692 default="forcedSourceId", 

1693 ) 

1694 

1695 def setDefaults(self): 

1696 super().setDefaults() 

1697 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml') 

1698 self.columnsFromDataId = ['tract', 'patch'] 

1699 

1700 

1701class TransformForcedSourceTableTask(TransformCatalogBaseTask): 

1702 """Transform/standardize a ForcedSource catalog 

1703 

1704 Transforms each wide, per-detector forcedSource DataFrame per the 

1705 specification file (per-camera defaults found in ForcedSource.yaml). 

1706 All epochs that overlap the patch are aggregated into one per-patch 

1707 narrow-DataFrame file. 

1708 

1709 No de-duplication of rows is performed. Duplicate resolutions flags are 

1710 pulled in from the referenceCatalog: `detect_isPrimary`, 

1711 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate 

1712 for analysis or compare duplicates for QA. 

1713 

1714 The resulting table includes multiple bands. Epochs (MJDs) and other useful 

1715 per-visit rows can be retreived by joining with the CcdVisitTable on 

1716 ccdVisitId. 

1717 """ 

1718 _DefaultName = "transformForcedSourceTable" 

1719 ConfigClass = TransformForcedSourceTableConfig 

1720 

1721 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1722 inputs = butlerQC.get(inputRefs) 

1723 if self.funcs is None: 

1724 raise ValueError("config.functorFile is None. " 

1725 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

1726 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs, 

1727 dataId=outputRefs.outputCatalog.dataId.full) 

1728 

1729 butlerQC.put(outputs, outputRefs) 

1730 

1731 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None): 

1732 dfs = [] 

1733 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns}) 

1734 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs))) 

1735 for handle in inputCatalogs: 

1736 result = self.transform(None, handle, funcs, dataId) 

1737 # Filter for only rows that were detected on (overlap) the patch 

1738 dfs.append(result.df.join(ref, how='inner')) 

1739 

1740 outputCatalog = pd.concat(dfs) 

1741 

1742 # Now that we are done joining on config.keyRef 

1743 # Change index to config.key by 

1744 outputCatalog.index.rename(self.config.keyRef, inplace=True) 

1745 # Add config.keyRef to the column list 

1746 outputCatalog.reset_index(inplace=True) 

1747 # Set the forcedSourceId to the index. This is specified in the 

1748 # ForcedSource.yaml 

1749 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True) 

1750 # Rename it to the config.key 

1751 outputCatalog.index.rename(self.config.key, inplace=True) 

1752 

1753 self.log.info("Made a table of %d columns and %d rows", 

1754 len(outputCatalog.columns), len(outputCatalog)) 

1755 return pipeBase.Struct(outputCatalog=outputCatalog) 

1756 

1757 

1758class ConsolidateTractConnections(pipeBase.PipelineTaskConnections, 

1759 defaultTemplates={"catalogType": ""}, 

1760 dimensions=("instrument", "tract")): 

1761 inputCatalogs = connectionTypes.Input( 

1762 doc="Input per-patch DataFrame Tables to be concatenated", 

1763 name="{catalogType}ForcedSourceTable", 

1764 storageClass="DataFrame", 

1765 dimensions=("tract", "patch", "skymap"), 

1766 multiple=True, 

1767 ) 

1768 

1769 outputCatalog = connectionTypes.Output( 

1770 doc="Output per-tract concatenation of DataFrame Tables", 

1771 name="{catalogType}ForcedSourceTable_tract", 

1772 storageClass="DataFrame", 

1773 dimensions=("tract", "skymap"), 

1774 ) 

1775 

1776 

1777class ConsolidateTractConfig(pipeBase.PipelineTaskConfig, 

1778 pipelineConnections=ConsolidateTractConnections): 

1779 pass 

1780 

1781 

1782class ConsolidateTractTask(pipeBase.PipelineTask): 

1783 """Concatenate any per-patch, dataframe list into a single 

1784 per-tract DataFrame. 

1785 """ 

1786 _DefaultName = 'ConsolidateTract' 

1787 ConfigClass = ConsolidateTractConfig 

1788 

1789 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1790 inputs = butlerQC.get(inputRefs) 

1791 # Not checking at least one inputCatalog exists because that'd be an 

1792 # empty QG. 

1793 self.log.info("Concatenating %s per-patch %s Tables", 

1794 len(inputs['inputCatalogs']), 

1795 inputRefs.inputCatalogs[0].datasetType.name) 

1796 df = pd.concat(inputs['inputCatalogs']) 

1797 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)