Coverage for python/lsst/pipe/tasks/postprocess.py: 26%

654 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-11 03:40 -0800

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["WriteObjectTableConfig", "WriteObjectTableTask", 

23 "WriteSourceTableConfig", "WriteSourceTableTask", 

24 "WriteRecalibratedSourceTableConfig", "WriteRecalibratedSourceTableTask", 

25 "PostprocessAnalysis", 

26 "TransformCatalogBaseConfig", "TransformCatalogBaseTask", 

27 "TransformObjectCatalogConfig", "TransformObjectCatalogTask", 

28 "ConsolidateObjectTableConfig", "ConsolidateObjectTableTask", 

29 "TransformSourceTableConfig", "TransformSourceTableTask", 

30 "ConsolidateVisitSummaryConfig", "ConsolidateVisitSummaryTask", 

31 "ConsolidateSourceTableConfig", "ConsolidateSourceTableTask", 

32 "MakeCcdVisitTableConfig", "MakeCcdVisitTableTask", 

33 "MakeVisitTableConfig", "MakeVisitTableTask", 

34 "WriteForcedSourceTableConfig", "WriteForcedSourceTableTask", 

35 "TransformForcedSourceTableConfig", "TransformForcedSourceTableTask", 

36 "ConsolidateTractConfig", "ConsolidateTractTask"] 

37 

38import functools 

39import pandas as pd 

40import logging 

41import numpy as np 

42import numbers 

43import os 

44 

45import lsst.geom 

46import lsst.pex.config as pexConfig 

47import lsst.pipe.base as pipeBase 

48import lsst.daf.base as dafBase 

49from lsst.obs.base import ExposureIdInfo 

50from lsst.pipe.base import connectionTypes 

51import lsst.afw.table as afwTable 

52from lsst.afw.image import ExposureSummaryStats 

53from lsst.meas.base import SingleFrameMeasurementTask 

54from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate 

55from lsst.skymap import BaseSkyMap 

56 

57from .parquetTable import ParquetTable 

58from .functors import CompositeFunctor, Column 

59 

60log = logging.getLogger(__name__) 

61 

62 

63def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

64 """Flattens a dataframe with multilevel column index. 

65 """ 

66 newDf = pd.DataFrame() 

67 # band is the level 0 index 

68 dfBands = df.columns.unique(level=0).values 

69 for band in dfBands: 

70 subdf = df[band] 

71 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

72 newColumns = {c: columnFormat.format(band, c) 

73 for c in subdf.columns if c not in noDupCols} 

74 cols = list(newColumns.keys()) 

75 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

76 

77 # Band must be present in the input and output or else column is all NaN: 

78 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

79 # Get the unexploded columns from any present band's partition 

80 noDupDf = df[presentBands[0]][noDupCols] 

81 newDf = pd.concat([noDupDf, newDf], axis=1) 

82 return newDf 

83 

84 

85class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

86 defaultTemplates={"coaddName": "deep"}, 

87 dimensions=("tract", "patch", "skymap")): 

88 inputCatalogMeas = connectionTypes.Input( 

89 doc="Catalog of source measurements on the deepCoadd.", 

90 dimensions=("tract", "patch", "band", "skymap"), 

91 storageClass="SourceCatalog", 

92 name="{coaddName}Coadd_meas", 

93 multiple=True 

94 ) 

95 inputCatalogForcedSrc = connectionTypes.Input( 

96 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

97 dimensions=("tract", "patch", "band", "skymap"), 

98 storageClass="SourceCatalog", 

99 name="{coaddName}Coadd_forced_src", 

100 multiple=True 

101 ) 

102 inputCatalogRef = connectionTypes.Input( 

103 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

104 "for each detection in deepCoadd_mergeDet.", 

105 dimensions=("tract", "patch", "skymap"), 

106 storageClass="SourceCatalog", 

107 name="{coaddName}Coadd_ref" 

108 ) 

109 outputCatalog = connectionTypes.Output( 

110 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

111 "stored as a DataFrame with a multi-level column index per-patch.", 

112 dimensions=("tract", "patch", "skymap"), 

113 storageClass="DataFrame", 

114 name="{coaddName}Coadd_obj" 

115 ) 

116 

117 

118class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

119 pipelineConnections=WriteObjectTableConnections): 

120 engine = pexConfig.Field( 

121 dtype=str, 

122 default="pyarrow", 

123 doc="Parquet engine for writing (pyarrow or fastparquet)" 

124 ) 

125 coaddName = pexConfig.Field( 

126 dtype=str, 

127 default="deep", 

128 doc="Name of coadd" 

129 ) 

130 

131 

132class WriteObjectTableTask(pipeBase.PipelineTask): 

133 """Write filter-merged source tables to parquet 

134 """ 

135 _DefaultName = "writeObjectTable" 

136 ConfigClass = WriteObjectTableConfig 

137 

138 # Names of table datasets to be merged 

139 inputDatasets = ('forced_src', 'meas', 'ref') 

140 

141 # Tag of output dataset written by `MergeSourcesTask.write` 

142 outputDataset = 'obj' 

143 

144 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

145 inputs = butlerQC.get(inputRefs) 

146 

147 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

148 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

149 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

150 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

151 

152 catalogs = {} 

153 for band in measDict.keys(): 

154 catalogs[band] = {'meas': measDict[band]['meas'], 

155 'forced_src': forcedSourceDict[band]['forced_src'], 

156 'ref': inputs['inputCatalogRef']} 

157 dataId = butlerQC.quantum.dataId 

158 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

159 outputs = pipeBase.Struct(outputCatalog=df) 

160 butlerQC.put(outputs, outputRefs) 

161 

162 def run(self, catalogs, tract, patch): 

163 """Merge multiple catalogs. 

164 

165 Parameters 

166 ---------- 

167 catalogs : `dict` 

168 Mapping from filter names to dict of catalogs. 

169 tract : int 

170 tractId to use for the tractId column. 

171 patch : str 

172 patchId to use for the patchId column. 

173 

174 Returns 

175 ------- 

176 catalog : `pandas.DataFrame` 

177 Merged dataframe. 

178 """ 

179 

180 dfs = [] 

181 for filt, tableDict in catalogs.items(): 

182 for dataset, table in tableDict.items(): 

183 # Convert afwTable to pandas DataFrame 

184 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

185 

186 # Sort columns by name, to ensure matching schema among patches 

187 df = df.reindex(sorted(df.columns), axis=1) 

188 df['tractId'] = tract 

189 df['patchId'] = patch 

190 

191 # Make columns a 3-level MultiIndex 

192 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

193 names=('dataset', 'band', 'column')) 

194 dfs.append(df) 

195 

196 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

197 return catalog 

198 

199 

200class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

201 defaultTemplates={"catalogType": ""}, 

202 dimensions=("instrument", "visit", "detector")): 

203 

204 catalog = connectionTypes.Input( 

205 doc="Input full-depth catalog of sources produced by CalibrateTask", 

206 name="{catalogType}src", 

207 storageClass="SourceCatalog", 

208 dimensions=("instrument", "visit", "detector") 

209 ) 

210 outputCatalog = connectionTypes.Output( 

211 doc="Catalog of sources, `src` in Parquet format. The 'id' column is " 

212 "replaced with an index; all other columns are unchanged.", 

213 name="{catalogType}source", 

214 storageClass="DataFrame", 

215 dimensions=("instrument", "visit", "detector") 

216 ) 

217 

218 

219class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

220 pipelineConnections=WriteSourceTableConnections): 

221 pass 

222 

223 

224class WriteSourceTableTask(pipeBase.PipelineTask): 

225 """Write source table to parquet. 

226 """ 

227 _DefaultName = "writeSourceTable" 

228 ConfigClass = WriteSourceTableConfig 

229 

230 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

231 inputs = butlerQC.get(inputRefs) 

232 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

233 result = self.run(**inputs).table 

234 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

235 butlerQC.put(outputs, outputRefs) 

236 

237 def run(self, catalog, ccdVisitId=None, **kwargs): 

238 """Convert `src` catalog to parquet 

239 

240 Parameters 

241 ---------- 

242 catalog: `afwTable.SourceCatalog` 

243 catalog to be converted 

244 ccdVisitId: `int` 

245 ccdVisitId to be added as a column 

246 

247 Returns 

248 ------- 

249 result : `lsst.pipe.base.Struct` 

250 ``table`` 

251 `ParquetTable` version of the input catalog 

252 """ 

253 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId) 

254 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

255 df['ccdVisitId'] = ccdVisitId 

256 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

257 

258 

259class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections, 

260 defaultTemplates={"catalogType": "", 

261 "skyWcsName": "jointcal", 

262 "photoCalibName": "fgcm"}, 

263 dimensions=("instrument", "visit", "detector", "skymap")): 

264 skyMap = connectionTypes.Input( 

265 doc="skyMap needed to choose which tract-level calibrations to use when multiple available", 

266 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

267 storageClass="SkyMap", 

268 dimensions=("skymap",), 

269 ) 

270 exposure = connectionTypes.Input( 

271 doc="Input exposure to perform photometry on.", 

272 name="calexp", 

273 storageClass="ExposureF", 

274 dimensions=["instrument", "visit", "detector"], 

275 ) 

276 externalSkyWcsTractCatalog = connectionTypes.Input( 

277 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector " 

278 "id for the catalog id, sorted on id for fast lookup."), 

279 name="{skyWcsName}SkyWcsCatalog", 

280 storageClass="ExposureCatalog", 

281 dimensions=["instrument", "visit", "tract"], 

282 multiple=True 

283 ) 

284 externalSkyWcsGlobalCatalog = connectionTypes.Input( 

285 doc=("Per-visit wcs calibrations computed globally (with no tract information). " 

286 "These catalogs use the detector id for the catalog id, sorted on id for " 

287 "fast lookup."), 

288 name="finalVisitSummary", 

289 storageClass="ExposureCatalog", 

290 dimensions=["instrument", "visit"], 

291 ) 

292 externalPhotoCalibTractCatalog = connectionTypes.Input( 

293 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the " 

294 "detector id for the catalog id, sorted on id for fast lookup."), 

295 name="{photoCalibName}PhotoCalibCatalog", 

296 storageClass="ExposureCatalog", 

297 dimensions=["instrument", "visit", "tract"], 

298 multiple=True 

299 ) 

300 externalPhotoCalibGlobalCatalog = connectionTypes.Input( 

301 doc=("Per-visit photometric calibrations computed globally (with no tract " 

302 "information). These catalogs use the detector id for the catalog id, " 

303 "sorted on id for fast lookup."), 

304 name="finalVisitSummary", 

305 storageClass="ExposureCatalog", 

306 dimensions=["instrument", "visit"], 

307 ) 

308 

309 def __init__(self, *, config=None): 

310 super().__init__(config=config) 

311 # Same connection boilerplate as all other applications of 

312 # Global/Tract calibrations 

313 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs: 

314 if config.useGlobalExternalSkyWcs: 

315 self.inputs.remove("externalSkyWcsTractCatalog") 

316 else: 

317 self.inputs.remove("externalSkyWcsGlobalCatalog") 

318 else: 

319 self.inputs.remove("externalSkyWcsTractCatalog") 

320 self.inputs.remove("externalSkyWcsGlobalCatalog") 

321 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib: 

322 if config.useGlobalExternalPhotoCalib: 

323 self.inputs.remove("externalPhotoCalibTractCatalog") 

324 else: 

325 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

326 else: 

327 self.inputs.remove("externalPhotoCalibTractCatalog") 

328 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

329 

330 

331class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig, 

332 pipelineConnections=WriteRecalibratedSourceTableConnections): 

333 

334 doReevaluatePhotoCalib = pexConfig.Field( 

335 dtype=bool, 

336 default=True, 

337 doc=("Add or replace local photoCalib columns") 

338 ) 

339 doReevaluateSkyWcs = pexConfig.Field( 

340 dtype=bool, 

341 default=True, 

342 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec") 

343 ) 

344 doApplyExternalPhotoCalib = pexConfig.Field( 

345 dtype=bool, 

346 default=True, 

347 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ", 

348 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."), 

349 ) 

350 doApplyExternalSkyWcs = pexConfig.Field( 

351 dtype=bool, 

352 default=True, 

353 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ", 

354 "else use the wcs already attached to the exposure."), 

355 ) 

356 useGlobalExternalPhotoCalib = pexConfig.Field( 

357 dtype=bool, 

358 default=True, 

359 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations " 

360 "that are not run per-tract. When False, use per-tract photometric " 

361 "calibration files.") 

362 ) 

363 useGlobalExternalSkyWcs = pexConfig.Field( 

364 dtype=bool, 

365 default=True, 

366 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations " 

367 "that are not run per-tract. When False, use per-tract wcs " 

368 "files.") 

369 ) 

370 

371 def validate(self): 

372 super().validate() 

373 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs: 

374 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False" 

375 "External SkyWcs will not be read or evaluated.") 

376 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib: 

377 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False." 

378 "External PhotoCalib will not be read or evaluated.") 

379 

380 

381class WriteRecalibratedSourceTableTask(WriteSourceTableTask): 

382 """Write source table to parquet 

383 """ 

384 _DefaultName = "writeRecalibratedSourceTable" 

385 ConfigClass = WriteRecalibratedSourceTableConfig 

386 

387 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

388 inputs = butlerQC.get(inputRefs) 

389 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

390 inputs['exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "visit_detector") 

391 

392 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs: 

393 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

394 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs) 

395 

396 inputs['catalog'] = self.addCalibColumns(**inputs) 

397 

398 result = self.run(**inputs).table 

399 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

400 butlerQC.put(outputs, outputRefs) 

401 

402 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None, 

403 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None, 

404 externalPhotoCalibTractCatalog=None, **kwargs): 

405 """Apply external calibrations to exposure per configuration 

406 

407 When multiple tract-level calibrations overlap, select the one with the 

408 center closest to detector. 

409 

410 Parameters 

411 ---------- 

412 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of 

413 tract-level calibs. 

414 skyMap : `lsst.skymap.SkyMap` 

415 exposure : `lsst.afw.image.exposure.Exposure` 

416 Input exposure to adjust calibrations. 

417 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

418 Exposure catalog with external skyWcs to be applied per config 

419 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

420 Exposure catalog with external skyWcs to be applied per config 

421 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

422 Exposure catalog with external photoCalib to be applied per config 

423 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

424 

425 

426 Returns 

427 ------- 

428 exposure : `lsst.afw.image.exposure.Exposure` 

429 Exposure with adjusted calibrations. 

430 """ 

431 if not self.config.doApplyExternalSkyWcs: 

432 # Do not modify the exposure's SkyWcs 

433 externalSkyWcsCatalog = None 

434 elif self.config.useGlobalExternalSkyWcs: 

435 # Use the global external SkyWcs 

436 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog 

437 self.log.info('Applying global SkyWcs') 

438 else: 

439 # use tract-level external SkyWcs from the closest overlapping tract 

440 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog') 

441 tracts = [ref.dataId['tract'] for ref in inputRef] 

442 if len(tracts) == 1: 

443 ind = 0 

444 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind]) 

445 else: 

446 if exposure.getWcs() is None: # TODO: could this look-up use the externalPhotoCalib? 

447 raise ValueError("Trying to locate nearest tract, but exposure.wcs is None.") 

448 ind = self.getClosestTract(tracts, skyMap, 

449 exposure.getBBox(), exposure.getWcs()) 

450 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). ' 

451 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

452 

453 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind] 

454 

455 if not self.config.doApplyExternalPhotoCalib: 

456 # Do not modify the exposure's PhotoCalib 

457 externalPhotoCalibCatalog = None 

458 elif self.config.useGlobalExternalPhotoCalib: 

459 # Use the global external PhotoCalib 

460 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog 

461 self.log.info('Applying global PhotoCalib') 

462 else: 

463 # use tract-level external PhotoCalib from the closest overlapping tract 

464 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog') 

465 tracts = [ref.dataId['tract'] for ref in inputRef] 

466 if len(tracts) == 1: 

467 ind = 0 

468 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind]) 

469 else: 

470 ind = self.getClosestTract(tracts, skyMap, 

471 exposure.getBBox(), exposure.getWcs()) 

472 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). ' 

473 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

474 

475 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind] 

476 

477 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog) 

478 

479 def getClosestTract(self, tracts, skyMap, bbox, wcs): 

480 """Find the index of the tract closest to detector from list of tractIds 

481 

482 Parameters 

483 ---------- 

484 tracts: `list` [`int`] 

485 Iterable of integer tractIds 

486 skyMap : `lsst.skymap.SkyMap` 

487 skyMap to lookup tract geometry and wcs 

488 bbox : `lsst.geom.Box2I` 

489 Detector bbox, center of which will compared to tract centers 

490 wcs : `lsst.afw.geom.SkyWcs` 

491 Detector Wcs object to map the detector center to SkyCoord 

492 

493 Returns 

494 ------- 

495 index : `int` 

496 """ 

497 if len(tracts) == 1: 

498 return 0 

499 

500 center = wcs.pixelToSky(bbox.getCenter()) 

501 sep = [] 

502 for tractId in tracts: 

503 tract = skyMap[tractId] 

504 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter()) 

505 sep.append(center.separation(tractCenter)) 

506 

507 return np.argmin(sep) 

508 

509 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None): 

510 """Prepare a calibrated exposure and apply external calibrations 

511 if so configured. 

512 

513 Parameters 

514 ---------- 

515 exposure : `lsst.afw.image.exposure.Exposure` 

516 Input exposure to adjust calibrations. 

517 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional 

518 Exposure catalog with external skyWcs to be applied 

519 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id 

520 for the catalog id, sorted on id for fast lookup. 

521 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional 

522 Exposure catalog with external photoCalib to be applied 

523 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector 

524 id for the catalog id, sorted on id for fast lookup. 

525 

526 Returns 

527 ------- 

528 exposure : `lsst.afw.image.exposure.Exposure` 

529 Exposure with adjusted calibrations. 

530 """ 

531 detectorId = exposure.getInfo().getDetector().getId() 

532 

533 if externalPhotoCalibCatalog is not None: 

534 row = externalPhotoCalibCatalog.find(detectorId) 

535 if row is None: 

536 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; " 

537 "Using original photoCalib.", detectorId) 

538 else: 

539 photoCalib = row.getPhotoCalib() 

540 if photoCalib is None: 

541 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; " 

542 "Using original photoCalib.", detectorId) 

543 else: 

544 exposure.setPhotoCalib(photoCalib) 

545 

546 if externalSkyWcsCatalog is not None: 

547 row = externalSkyWcsCatalog.find(detectorId) 

548 if row is None: 

549 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; " 

550 "Using original skyWcs.", detectorId) 

551 else: 

552 skyWcs = row.getWcs() 

553 if skyWcs is None: 

554 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; " 

555 "Using original skyWcs.", detectorId) 

556 else: 

557 exposure.setWcs(skyWcs) 

558 

559 return exposure 

560 

561 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs): 

562 """Add replace columns with calibs evaluated at each centroid 

563 

564 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a 

565 a source catalog, by rerunning the plugins. 

566 

567 Parameters 

568 ---------- 

569 catalog : `lsst.afw.table.SourceCatalog` 

570 catalog to which calib columns will be added 

571 exposure : `lsst.afw.image.exposure.Exposure` 

572 Exposure with attached PhotoCalibs and SkyWcs attributes to be 

573 reevaluated at local centroids. Pixels are not required. 

574 exposureIdInfo : `lsst.obs.base.ExposureIdInfo` 

575 

576 Returns 

577 ------- 

578 newCat: `lsst.afw.table.SourceCatalog` 

579 Source Catalog with requested local calib columns 

580 """ 

581 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

582 measureConfig.doReplaceWithNoise = False 

583 

584 # Clear all slots, because we aren't running the relevant plugins. 

585 for slot in measureConfig.slots: 

586 setattr(measureConfig.slots, slot, None) 

587 

588 measureConfig.plugins.names = [] 

589 if self.config.doReevaluateSkyWcs: 

590 measureConfig.plugins.names.add('base_LocalWcs') 

591 self.log.info("Re-evaluating base_LocalWcs plugin") 

592 if self.config.doReevaluatePhotoCalib: 

593 measureConfig.plugins.names.add('base_LocalPhotoCalib') 

594 self.log.info("Re-evaluating base_LocalPhotoCalib plugin") 

595 pluginsNotToCopy = tuple(measureConfig.plugins.names) 

596 

597 # Create a new schema and catalog 

598 # Copy all columns from original except for the ones to reevaluate 

599 aliasMap = catalog.schema.getAliasMap() 

600 mapper = afwTable.SchemaMapper(catalog.schema) 

601 for item in catalog.schema: 

602 if not item.field.getName().startswith(pluginsNotToCopy): 

603 mapper.addMapping(item.key) 

604 

605 schema = mapper.getOutputSchema() 

606 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

607 schema.setAliasMap(aliasMap) 

608 newCat = afwTable.SourceCatalog(schema) 

609 newCat.extend(catalog, mapper=mapper) 

610 

611 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to 

612 # update here. LocalPhotoCalibs are applied during transform tasks. 

613 # Update coord_ra/coord_dec, which are expected to be positions on the 

614 # sky and are used as such in sdm tables without transform 

615 if self.config.doReevaluateSkyWcs and exposure.wcs is not None: 

616 afwTable.updateSourceCoords(exposure.wcs, newCat) 

617 

618 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

619 

620 return newCat 

621 

622 

623class PostprocessAnalysis(object): 

624 """Calculate columns from ParquetTable. 

625 

626 This object manages and organizes an arbitrary set of computations 

627 on a catalog. The catalog is defined by a 

628 `~lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such 

629 as a ``deepCoadd_obj`` dataset, and the computations are defined by a 

630 collection of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

631 a ``CompositeFunctor``). 

632 

633 After the object is initialized, accessing the ``.df`` attribute (which 

634 holds the `pandas.DataFrame` containing the results of the calculations) 

635 triggers computation of said dataframe. 

636 

637 One of the conveniences of using this object is the ability to define a 

638 desired common filter for all functors. This enables the same functor 

639 collection to be passed to several different `PostprocessAnalysis` objects 

640 without having to change the original functor collection, since the ``filt`` 

641 keyword argument of this object triggers an overwrite of the ``filt`` 

642 property for all functors in the collection. 

643 

644 This object also allows a list of refFlags to be passed, and defines a set 

645 of default refFlags that are always included even if not requested. 

646 

647 If a list of `~lsst.pipe.tasks.ParquetTable` object is passed, rather than a single one, 

648 then the calculations will be mapped over all the input catalogs. In 

649 principle, it should be straightforward to parallelize this activity, but 

650 initial tests have failed (see TODO in code comments). 

651 

652 Parameters 

653 ---------- 

654 parq : `~lsst.pipe.tasks.ParquetTable` (or list of such) 

655 Source catalog(s) for computation. 

656 

657 functors : `list`, `dict`, or `~lsst.pipe.tasks.functors.CompositeFunctor` 

658 Computations to do (functors that act on ``parq``). 

659 If a dict, the output 

660 DataFrame will have columns keyed accordingly. 

661 If a list, the column keys will come from the 

662 ``.shortname`` attribute of each functor. 

663 

664 filt : `str`, optional 

665 Filter in which to calculate. If provided, 

666 this will overwrite any existing ``.filt`` attribute 

667 of the provided functors. 

668 

669 flags : `list`, optional 

670 List of flags (per-band) to include in output table. 

671 Taken from the ``meas`` dataset if applied to a multilevel Object Table. 

672 

673 refFlags : `list`, optional 

674 List of refFlags (only reference band) to include in output table. 

675 

676 forcedFlags : `list`, optional 

677 List of flags (per-band) to include in output table. 

678 Taken from the ``forced_src`` dataset if applied to a 

679 multilevel Object Table. Intended for flags from measurement plugins 

680 only run during multi-band forced-photometry. 

681 """ 

682 _defaultRefFlags = [] 

683 _defaultFuncs = () 

684 

685 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

686 self.parq = parq 

687 self.functors = functors 

688 

689 self.filt = filt 

690 self.flags = list(flags) if flags is not None else [] 

691 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

692 self.refFlags = list(self._defaultRefFlags) 

693 if refFlags is not None: 

694 self.refFlags += list(refFlags) 

695 

696 self._df = None 

697 

698 @property 

699 def defaultFuncs(self): 

700 funcs = dict(self._defaultFuncs) 

701 return funcs 

702 

703 @property 

704 def func(self): 

705 additionalFuncs = self.defaultFuncs 

706 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

707 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

708 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

709 

710 if isinstance(self.functors, CompositeFunctor): 

711 func = self.functors 

712 else: 

713 func = CompositeFunctor(self.functors) 

714 

715 func.funcDict.update(additionalFuncs) 

716 func.filt = self.filt 

717 

718 return func 

719 

720 @property 

721 def noDupCols(self): 

722 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

723 

724 @property 

725 def df(self): 

726 if self._df is None: 

727 self.compute() 

728 return self._df 

729 

730 def compute(self, dropna=False, pool=None): 

731 # map over multiple parquet tables 

732 if type(self.parq) in (list, tuple): 

733 if pool is None: 

734 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

735 else: 

736 # TODO: Figure out why this doesn't work (pyarrow pickling 

737 # issues?) 

738 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

739 self._df = pd.concat(dflist) 

740 else: 

741 self._df = self.func(self.parq, dropna=dropna) 

742 

743 return self._df 

744 

745 

746class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

747 dimensions=()): 

748 """Expected Connections for subclasses of TransformCatalogBaseTask. 

749 

750 Must be subclassed. 

751 """ 

752 inputCatalog = connectionTypes.Input( 

753 name="", 

754 storageClass="DataFrame", 

755 ) 

756 outputCatalog = connectionTypes.Output( 

757 name="", 

758 storageClass="DataFrame", 

759 ) 

760 

761 

762class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

763 pipelineConnections=TransformCatalogBaseConnections): 

764 functorFile = pexConfig.Field( 

765 dtype=str, 

766 doc="Path to YAML file specifying Science Data Model functors to use " 

767 "when copying columns and computing calibrated values.", 

768 default=None, 

769 optional=True 

770 ) 

771 primaryKey = pexConfig.Field( 

772 dtype=str, 

773 doc="Name of column to be set as the DataFrame index. If None, the index" 

774 "will be named `id`", 

775 default=None, 

776 optional=True 

777 ) 

778 columnsFromDataId = pexConfig.ListField( 

779 dtype=str, 

780 default=None, 

781 optional=True, 

782 doc="Columns to extract from the dataId", 

783 ) 

784 

785 

786class TransformCatalogBaseTask(pipeBase.PipelineTask): 

787 """Base class for transforming/standardizing a catalog 

788 

789 by applying functors that convert units and apply calibrations. 

790 The purpose of this task is to perform a set of computations on 

791 an input `ParquetTable` dataset (such as ``deepCoadd_obj``) and write the 

792 results to a new dataset (which needs to be declared in an ``outputDataset`` 

793 attribute). 

794 

795 The calculations to be performed are defined in a YAML file that specifies 

796 a set of functors to be computed, provided as 

797 a ``--functorFile`` config parameter. An example of such a YAML file 

798 is the following: 

799 

800 funcs: 

801 psfMag: 

802 functor: Mag 

803 args: 

804 - base_PsfFlux 

805 filt: HSC-G 

806 dataset: meas 

807 cmodel_magDiff: 

808 functor: MagDiff 

809 args: 

810 - modelfit_CModel 

811 - base_PsfFlux 

812 filt: HSC-G 

813 gauss_magDiff: 

814 functor: MagDiff 

815 args: 

816 - base_GaussianFlux 

817 - base_PsfFlux 

818 filt: HSC-G 

819 count: 

820 functor: Column 

821 args: 

822 - base_InputCount_value 

823 filt: HSC-G 

824 deconvolved_moments: 

825 functor: DeconvolvedMoments 

826 filt: HSC-G 

827 dataset: forced_src 

828 refFlags: 

829 - calib_psfUsed 

830 - merge_measurement_i 

831 - merge_measurement_r 

832 - merge_measurement_z 

833 - merge_measurement_y 

834 - merge_measurement_g 

835 - base_PixelFlags_flag_inexact_psfCenter 

836 - detect_isPrimary 

837 

838 The names for each entry under "func" will become the names of columns in 

839 the output dataset. All the functors referenced are defined in 

840 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each 

841 functor are in the `args` list, and any additional entries for each column 

842 other than "functor" or "args" (e.g., ``'filt'``, ``'dataset'``) are treated as 

843 keyword arguments to be passed to the functor initialization. 

844 

845 The "flags" entry is the default shortcut for `Column` functors. 

846 All columns listed under "flags" will be copied to the output table 

847 untransformed. They can be of any datatype. 

848 In the special case of transforming a multi-level oject table with 

849 band and dataset indices (deepCoadd_obj), these will be taked from the 

850 `meas` dataset and exploded out per band. 

851 

852 There are two special shortcuts that only apply when transforming 

853 multi-level Object (deepCoadd_obj) tables: 

854 - The "refFlags" entry is shortcut for `Column` functor 

855 taken from the `'ref'` dataset if transforming an ObjectTable. 

856 - The "forcedFlags" entry is shortcut for `Column` functors. 

857 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

858 These are expanded out per band. 

859 

860 

861 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

862 to organize and excecute the calculations. 

863 """ 

864 @property 

865 def _DefaultName(self): 

866 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

867 

868 @property 

869 def outputDataset(self): 

870 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

871 

872 @property 

873 def inputDataset(self): 

874 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

875 

876 @property 

877 def ConfigClass(self): 

878 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

879 

880 def __init__(self, *args, **kwargs): 

881 super().__init__(*args, **kwargs) 

882 if self.config.functorFile: 

883 self.log.info('Loading tranform functor definitions from %s', 

884 self.config.functorFile) 

885 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

886 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

887 else: 

888 self.funcs = None 

889 

890 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

891 inputs = butlerQC.get(inputRefs) 

892 if self.funcs is None: 

893 raise ValueError("config.functorFile is None. " 

894 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

895 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

896 dataId=outputRefs.outputCatalog.dataId.full) 

897 outputs = pipeBase.Struct(outputCatalog=result) 

898 butlerQC.put(outputs, outputRefs) 

899 

900 def run(self, parq, funcs=None, dataId=None, band=None): 

901 """Do postprocessing calculations 

902 

903 Takes a `ParquetTable` object and dataId, 

904 returns a dataframe with results of postprocessing calculations. 

905 

906 Parameters 

907 ---------- 

908 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

909 ParquetTable from which calculations are done. 

910 funcs : `lsst.pipe.tasks.functors.Functors` 

911 Functors to apply to the table's columns 

912 dataId : dict, optional 

913 Used to add a `patchId` column to the output dataframe. 

914 band : `str`, optional 

915 Filter band that is being processed. 

916 

917 Returns 

918 ------ 

919 df : `pandas.DataFrame` 

920 """ 

921 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

922 

923 df = self.transform(band, parq, funcs, dataId).df 

924 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

925 return df 

926 

927 def getFunctors(self): 

928 return self.funcs 

929 

930 def getAnalysis(self, parq, funcs=None, band=None): 

931 if funcs is None: 

932 funcs = self.funcs 

933 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

934 return analysis 

935 

936 def transform(self, band, parq, funcs, dataId): 

937 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

938 df = analysis.df 

939 if dataId and self.config.columnsFromDataId: 

940 for key in self.config.columnsFromDataId: 

941 if key in dataId: 

942 df[str(key)] = dataId[key] 

943 else: 

944 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}") 

945 

946 if self.config.primaryKey: 

947 if df.index.name != self.config.primaryKey and self.config.primaryKey in df: 

948 df.reset_index(inplace=True, drop=True) 

949 df.set_index(self.config.primaryKey, inplace=True) 

950 

951 return pipeBase.Struct( 

952 df=df, 

953 analysis=analysis 

954 ) 

955 

956 

957class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

958 defaultTemplates={"coaddName": "deep"}, 

959 dimensions=("tract", "patch", "skymap")): 

960 inputCatalog = connectionTypes.Input( 

961 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

962 "stored as a DataFrame with a multi-level column index per-patch.", 

963 dimensions=("tract", "patch", "skymap"), 

964 storageClass="DataFrame", 

965 name="{coaddName}Coadd_obj", 

966 deferLoad=True, 

967 ) 

968 outputCatalog = connectionTypes.Output( 

969 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

970 "data model.", 

971 dimensions=("tract", "patch", "skymap"), 

972 storageClass="DataFrame", 

973 name="objectTable" 

974 ) 

975 

976 

977class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

978 pipelineConnections=TransformObjectCatalogConnections): 

979 coaddName = pexConfig.Field( 

980 dtype=str, 

981 default="deep", 

982 doc="Name of coadd" 

983 ) 

984 # TODO: remove in DM-27177 

985 filterMap = pexConfig.DictField( 

986 keytype=str, 

987 itemtype=str, 

988 default={}, 

989 doc=("Dictionary mapping full filter name to short one for column name munging." 

990 "These filters determine the output columns no matter what filters the " 

991 "input data actually contain."), 

992 deprecated=("Coadds are now identified by the band, so this transform is unused." 

993 "Will be removed after v22.") 

994 ) 

995 outputBands = pexConfig.ListField( 

996 dtype=str, 

997 default=None, 

998 optional=True, 

999 doc=("These bands and only these bands will appear in the output," 

1000 " NaN-filled if the input does not include them." 

1001 " If None, then use all bands found in the input.") 

1002 ) 

1003 camelCase = pexConfig.Field( 

1004 dtype=bool, 

1005 default=False, 

1006 doc=("Write per-band columns names with camelCase, else underscore " 

1007 "For example: gPsFlux instead of g_PsFlux.") 

1008 ) 

1009 multilevelOutput = pexConfig.Field( 

1010 dtype=bool, 

1011 default=False, 

1012 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

1013 "and name-munged (False).") 

1014 ) 

1015 goodFlags = pexConfig.ListField( 

1016 dtype=str, 

1017 default=[], 

1018 doc=("List of 'good' flags that should be set False when populating empty tables. " 

1019 "All other flags are considered to be 'bad' flags and will be set to True.") 

1020 ) 

1021 floatFillValue = pexConfig.Field( 

1022 dtype=float, 

1023 default=np.nan, 

1024 doc="Fill value for float fields when populating empty tables." 

1025 ) 

1026 integerFillValue = pexConfig.Field( 

1027 dtype=int, 

1028 default=-1, 

1029 doc="Fill value for integer fields when populating empty tables." 

1030 ) 

1031 

1032 def setDefaults(self): 

1033 super().setDefaults() 

1034 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml') 

1035 self.primaryKey = 'objectId' 

1036 self.columnsFromDataId = ['tract', 'patch'] 

1037 self.goodFlags = ['calib_astrometry_used', 

1038 'calib_photometry_reserved', 

1039 'calib_photometry_used', 

1040 'calib_psf_candidate', 

1041 'calib_psf_reserved', 

1042 'calib_psf_used'] 

1043 

1044 

1045class TransformObjectCatalogTask(TransformCatalogBaseTask): 

1046 """Produce a flattened Object Table to match the format specified in 

1047 sdm_schemas. 

1048 

1049 Do the same set of postprocessing calculations on all bands. 

1050 

1051 This is identical to `TransformCatalogBaseTask`, except for that it does 

1052 the specified functor calculations for all filters present in the 

1053 input `deepCoadd_obj` table. Any specific ``"filt"`` keywords specified 

1054 by the YAML file will be superceded. 

1055 """ 

1056 _DefaultName = "transformObjectCatalog" 

1057 ConfigClass = TransformObjectCatalogConfig 

1058 

1059 def run(self, parq, funcs=None, dataId=None, band=None): 

1060 # NOTE: band kwarg is ignored here. 

1061 dfDict = {} 

1062 analysisDict = {} 

1063 templateDf = pd.DataFrame() 

1064 

1065 if isinstance(parq, DeferredDatasetHandle): 

1066 columns = parq.get(component='columns') 

1067 inputBands = columns.unique(level=1).values 

1068 else: 

1069 inputBands = parq.columnLevelNames['band'] 

1070 

1071 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

1072 

1073 # Perform transform for data of filters that exist in parq. 

1074 for inputBand in inputBands: 

1075 if inputBand not in outputBands: 

1076 self.log.info("Ignoring %s band data in the input", inputBand) 

1077 continue 

1078 self.log.info("Transforming the catalog of band %s", inputBand) 

1079 result = self.transform(inputBand, parq, funcs, dataId) 

1080 dfDict[inputBand] = result.df 

1081 analysisDict[inputBand] = result.analysis 

1082 if templateDf.empty: 

1083 templateDf = result.df 

1084 

1085 # Put filler values in columns of other wanted bands 

1086 for filt in outputBands: 

1087 if filt not in dfDict: 

1088 self.log.info("Adding empty columns for band %s", filt) 

1089 dfTemp = templateDf.copy() 

1090 for col in dfTemp.columns: 

1091 testValue = dfTemp[col].values[0] 

1092 if isinstance(testValue, (np.bool_, pd.BooleanDtype)): 

1093 # Boolean flag type, check if it is a "good" flag 

1094 if col in self.config.goodFlags: 

1095 fillValue = False 

1096 else: 

1097 fillValue = True 

1098 elif isinstance(testValue, numbers.Integral): 

1099 # Checking numbers.Integral catches all flavors 

1100 # of python, numpy, pandas, etc. integers. 

1101 # We must ensure this is not an unsigned integer. 

1102 if isinstance(testValue, np.unsignedinteger): 

1103 raise ValueError("Parquet tables may not have unsigned integer columns.") 

1104 else: 

1105 fillValue = self.config.integerFillValue 

1106 else: 

1107 fillValue = self.config.floatFillValue 

1108 dfTemp[col].values[:] = fillValue 

1109 dfDict[filt] = dfTemp 

1110 

1111 # This makes a multilevel column index, with band as first level 

1112 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

1113 

1114 if not self.config.multilevelOutput: 

1115 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

1116 if self.config.primaryKey in noDupCols: 

1117 noDupCols.remove(self.config.primaryKey) 

1118 if dataId and self.config.columnsFromDataId: 

1119 noDupCols += self.config.columnsFromDataId 

1120 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

1121 inputBands=inputBands) 

1122 

1123 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

1124 

1125 return df 

1126 

1127 

1128class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

1129 dimensions=("tract", "skymap")): 

1130 inputCatalogs = connectionTypes.Input( 

1131 doc="Per-Patch objectTables conforming to the standard data model.", 

1132 name="objectTable", 

1133 storageClass="DataFrame", 

1134 dimensions=("tract", "patch", "skymap"), 

1135 multiple=True, 

1136 ) 

1137 outputCatalog = connectionTypes.Output( 

1138 doc="Pre-tract horizontal concatenation of the input objectTables", 

1139 name="objectTable_tract", 

1140 storageClass="DataFrame", 

1141 dimensions=("tract", "skymap"), 

1142 ) 

1143 

1144 

1145class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

1146 pipelineConnections=ConsolidateObjectTableConnections): 

1147 coaddName = pexConfig.Field( 

1148 dtype=str, 

1149 default="deep", 

1150 doc="Name of coadd" 

1151 ) 

1152 

1153 

1154class ConsolidateObjectTableTask(pipeBase.PipelineTask): 

1155 """Write patch-merged source tables to a tract-level parquet file. 

1156 

1157 Concatenates `objectTable` list into a per-visit `objectTable_tract`. 

1158 """ 

1159 _DefaultName = "consolidateObjectTable" 

1160 ConfigClass = ConsolidateObjectTableConfig 

1161 

1162 inputDataset = 'objectTable' 

1163 outputDataset = 'objectTable_tract' 

1164 

1165 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1166 inputs = butlerQC.get(inputRefs) 

1167 self.log.info("Concatenating %s per-patch Object Tables", 

1168 len(inputs['inputCatalogs'])) 

1169 df = pd.concat(inputs['inputCatalogs']) 

1170 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1171 

1172 

1173class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

1174 defaultTemplates={"catalogType": ""}, 

1175 dimensions=("instrument", "visit", "detector")): 

1176 

1177 inputCatalog = connectionTypes.Input( 

1178 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

1179 name="{catalogType}source", 

1180 storageClass="DataFrame", 

1181 dimensions=("instrument", "visit", "detector"), 

1182 deferLoad=True 

1183 ) 

1184 outputCatalog = connectionTypes.Output( 

1185 doc="Narrower, per-detector Source Table transformed and converted per a " 

1186 "specified set of functors", 

1187 name="{catalogType}sourceTable", 

1188 storageClass="DataFrame", 

1189 dimensions=("instrument", "visit", "detector") 

1190 ) 

1191 

1192 

1193class TransformSourceTableConfig(TransformCatalogBaseConfig, 

1194 pipelineConnections=TransformSourceTableConnections): 

1195 

1196 def setDefaults(self): 

1197 super().setDefaults() 

1198 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml') 

1199 self.primaryKey = 'sourceId' 

1200 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter'] 

1201 

1202 

1203class TransformSourceTableTask(TransformCatalogBaseTask): 

1204 """Transform/standardize a source catalog 

1205 """ 

1206 _DefaultName = "transformSourceTable" 

1207 ConfigClass = TransformSourceTableConfig 

1208 

1209 

1210class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1211 dimensions=("instrument", "visit",), 

1212 defaultTemplates={"calexpType": ""}): 

1213 calexp = connectionTypes.Input( 

1214 doc="Processed exposures used for metadata", 

1215 name="calexp", 

1216 storageClass="ExposureF", 

1217 dimensions=("instrument", "visit", "detector"), 

1218 deferLoad=True, 

1219 multiple=True, 

1220 ) 

1221 visitSummary = connectionTypes.Output( 

1222 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1223 "detector id for the id and are sorted for fast lookups of a " 

1224 "detector."), 

1225 name="visitSummary", 

1226 storageClass="ExposureCatalog", 

1227 dimensions=("instrument", "visit"), 

1228 ) 

1229 visitSummarySchema = connectionTypes.InitOutput( 

1230 doc="Schema of the visitSummary catalog", 

1231 name="visitSummary_schema", 

1232 storageClass="ExposureCatalog", 

1233 ) 

1234 

1235 

1236class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1237 pipelineConnections=ConsolidateVisitSummaryConnections): 

1238 """Config for ConsolidateVisitSummaryTask""" 

1239 pass 

1240 

1241 

1242class ConsolidateVisitSummaryTask(pipeBase.PipelineTask): 

1243 """Task to consolidate per-detector visit metadata. 

1244 

1245 This task aggregates the following metadata from all the detectors in a 

1246 single visit into an exposure catalog: 

1247 - The visitInfo. 

1248 - The wcs. 

1249 - The photoCalib. 

1250 - The physical_filter and band (if available). 

1251 - The psf size, shape, and effective area at the center of the detector. 

1252 - The corners of the bounding box in right ascension/declination. 

1253 

1254 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1255 are not persisted here because of storage concerns, and because of their 

1256 limited utility as summary statistics. 

1257 

1258 Tests for this task are performed in ci_hsc_gen3. 

1259 """ 

1260 _DefaultName = "consolidateVisitSummary" 

1261 ConfigClass = ConsolidateVisitSummaryConfig 

1262 

1263 def __init__(self, **kwargs): 

1264 super().__init__(**kwargs) 

1265 self.schema = afwTable.ExposureTable.makeMinimalSchema() 

1266 self.schema.addField('visit', type='L', doc='Visit number') 

1267 self.schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1268 self.schema.addField('band', type='String', size=32, doc='Name of band') 

1269 ExposureSummaryStats.update_schema(self.schema) 

1270 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema) 

1271 

1272 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1273 dataRefs = butlerQC.get(inputRefs.calexp) 

1274 visit = dataRefs[0].dataId.byName()['visit'] 

1275 

1276 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1277 len(dataRefs), visit) 

1278 

1279 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1280 

1281 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1282 

1283 def _combineExposureMetadata(self, visit, dataRefs): 

1284 """Make a combined exposure catalog from a list of dataRefs. 

1285 These dataRefs must point to exposures with wcs, summaryStats, 

1286 and other visit metadata. 

1287 

1288 Parameters 

1289 ---------- 

1290 visit : `int` 

1291 Visit identification number. 

1292 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1293 List of dataRefs in visit. 

1294 

1295 Returns 

1296 ------- 

1297 visitSummary : `lsst.afw.table.ExposureCatalog` 

1298 Exposure catalog with per-detector summary information. 

1299 """ 

1300 cat = afwTable.ExposureCatalog(self.schema) 

1301 cat.resize(len(dataRefs)) 

1302 

1303 cat['visit'] = visit 

1304 

1305 for i, dataRef in enumerate(dataRefs): 

1306 visitInfo = dataRef.get(component='visitInfo') 

1307 filterLabel = dataRef.get(component='filter') 

1308 summaryStats = dataRef.get(component='summaryStats') 

1309 detector = dataRef.get(component='detector') 

1310 wcs = dataRef.get(component='wcs') 

1311 photoCalib = dataRef.get(component='photoCalib') 

1312 detector = dataRef.get(component='detector') 

1313 bbox = dataRef.get(component='bbox') 

1314 validPolygon = dataRef.get(component='validPolygon') 

1315 

1316 rec = cat[i] 

1317 rec.setBBox(bbox) 

1318 rec.setVisitInfo(visitInfo) 

1319 rec.setWcs(wcs) 

1320 rec.setPhotoCalib(photoCalib) 

1321 rec.setValidPolygon(validPolygon) 

1322 

1323 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1324 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1325 rec.setId(detector.getId()) 

1326 summaryStats.update_record(rec) 

1327 

1328 metadata = dafBase.PropertyList() 

1329 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1330 # We are looping over existing datarefs, so the following is true 

1331 metadata.add("COMMENT", "Only detectors with data have entries.") 

1332 cat.setMetadata(metadata) 

1333 

1334 cat.sort() 

1335 return cat 

1336 

1337 

1338class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1339 defaultTemplates={"catalogType": ""}, 

1340 dimensions=("instrument", "visit")): 

1341 inputCatalogs = connectionTypes.Input( 

1342 doc="Input per-detector Source Tables", 

1343 name="{catalogType}sourceTable", 

1344 storageClass="DataFrame", 

1345 dimensions=("instrument", "visit", "detector"), 

1346 multiple=True 

1347 ) 

1348 outputCatalog = connectionTypes.Output( 

1349 doc="Per-visit concatenation of Source Table", 

1350 name="{catalogType}sourceTable_visit", 

1351 storageClass="DataFrame", 

1352 dimensions=("instrument", "visit") 

1353 ) 

1354 

1355 

1356class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1357 pipelineConnections=ConsolidateSourceTableConnections): 

1358 pass 

1359 

1360 

1361class ConsolidateSourceTableTask(pipeBase.PipelineTask): 

1362 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1363 """ 

1364 _DefaultName = 'consolidateSourceTable' 

1365 ConfigClass = ConsolidateSourceTableConfig 

1366 

1367 inputDataset = 'sourceTable' 

1368 outputDataset = 'sourceTable_visit' 

1369 

1370 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1371 from .makeWarp import reorderRefs 

1372 

1373 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs] 

1374 detectorOrder.sort() 

1375 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector') 

1376 inputs = butlerQC.get(inputRefs) 

1377 self.log.info("Concatenating %s per-detector Source Tables", 

1378 len(inputs['inputCatalogs'])) 

1379 df = pd.concat(inputs['inputCatalogs']) 

1380 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1381 

1382 

1383class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1384 dimensions=("instrument",), 

1385 defaultTemplates={"calexpType": ""}): 

1386 visitSummaryRefs = connectionTypes.Input( 

1387 doc="Data references for per-visit consolidated exposure metadata", 

1388 name="finalVisitSummary", 

1389 storageClass="ExposureCatalog", 

1390 dimensions=("instrument", "visit"), 

1391 multiple=True, 

1392 deferLoad=True, 

1393 ) 

1394 outputCatalog = connectionTypes.Output( 

1395 doc="CCD and Visit metadata table", 

1396 name="ccdVisitTable", 

1397 storageClass="DataFrame", 

1398 dimensions=("instrument",) 

1399 ) 

1400 

1401 

1402class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1403 pipelineConnections=MakeCcdVisitTableConnections): 

1404 pass 

1405 

1406 

1407class MakeCcdVisitTableTask(pipeBase.PipelineTask): 

1408 """Produce a `ccdVisitTable` from the visit summary exposure catalogs. 

1409 """ 

1410 _DefaultName = 'makeCcdVisitTable' 

1411 ConfigClass = MakeCcdVisitTableConfig 

1412 

1413 def run(self, visitSummaryRefs): 

1414 """Make a table of ccd information from the visit summary catalogs. 

1415 

1416 Parameters 

1417 ---------- 

1418 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1419 List of DeferredDatasetHandles pointing to exposure catalogs with 

1420 per-detector summary information. 

1421 

1422 Returns 

1423 ------- 

1424 result : `lsst.pipe.Base.Struct` 

1425 Results struct with attribute: 

1426 

1427 ``outputCatalog`` 

1428 Catalog of ccd and visit information. 

1429 """ 

1430 ccdEntries = [] 

1431 for visitSummaryRef in visitSummaryRefs: 

1432 visitSummary = visitSummaryRef.get() 

1433 visitInfo = visitSummary[0].getVisitInfo() 

1434 

1435 ccdEntry = {} 

1436 summaryTable = visitSummary.asAstropy() 

1437 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance', 

1438 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise', 

1439 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar', 

1440 'psfStarDeltaE1Median', 'psfStarDeltaE2Median', 

1441 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter', 

1442 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter', 

1443 'psfStarScaledDeltaSizeScatter', 

1444 'psfTraceRadiusDelta', 'maxDistToNearestPsf'] 

1445 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1446 # 'visit' is the human readable visit number. 

1447 # 'visitId' is the key to the visitId table. They are the same. 

1448 # Technically you should join to get the visit from the visit 

1449 # table. 

1450 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"}) 

1451 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in 

1452 summaryTable['id']] 

1453 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId) 

1454 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds] 

1455 ccdEntry['ccdVisitId'] = ccdVisitIds 

1456 ccdEntry['detector'] = summaryTable['id'] 

1457 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() if vR.getWcs() 

1458 else np.nan for vR in visitSummary]) 

1459 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1460 

1461 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1462 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1463 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1464 expTime = visitInfo.getExposureTime() 

1465 ccdEntry['expTime'] = expTime 

1466 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1467 expTime_days = expTime / (60*60*24) 

1468 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days 

1469 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1470 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1471 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1472 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1473 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1474 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1475 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1476 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1477 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1478 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1479 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1480 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, 

1481 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. 

1482 # values are actually wanted. 

1483 ccdEntries.append(ccdEntry) 

1484 

1485 outputCatalog = pd.concat(ccdEntries) 

1486 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True) 

1487 return pipeBase.Struct(outputCatalog=outputCatalog) 

1488 

1489 

1490class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1491 dimensions=("instrument",), 

1492 defaultTemplates={"calexpType": ""}): 

1493 visitSummaries = connectionTypes.Input( 

1494 doc="Per-visit consolidated exposure metadata", 

1495 name="finalVisitSummary", 

1496 storageClass="ExposureCatalog", 

1497 dimensions=("instrument", "visit",), 

1498 multiple=True, 

1499 deferLoad=True, 

1500 ) 

1501 outputCatalog = connectionTypes.Output( 

1502 doc="Visit metadata table", 

1503 name="visitTable", 

1504 storageClass="DataFrame", 

1505 dimensions=("instrument",) 

1506 ) 

1507 

1508 

1509class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1510 pipelineConnections=MakeVisitTableConnections): 

1511 pass 

1512 

1513 

1514class MakeVisitTableTask(pipeBase.PipelineTask): 

1515 """Produce a `visitTable` from the visit summary exposure catalogs. 

1516 """ 

1517 _DefaultName = 'makeVisitTable' 

1518 ConfigClass = MakeVisitTableConfig 

1519 

1520 def run(self, visitSummaries): 

1521 """Make a table of visit information from the visit summary catalogs. 

1522 

1523 Parameters 

1524 ---------- 

1525 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog` 

1526 List of exposure catalogs with per-detector summary information. 

1527 Returns 

1528 ------- 

1529 result : `lsst.pipe.Base.Struct` 

1530 Results struct with attribute: 

1531 

1532 ``outputCatalog`` 

1533 Catalog of visit information. 

1534 """ 

1535 visitEntries = [] 

1536 for visitSummary in visitSummaries: 

1537 visitSummary = visitSummary.get() 

1538 visitRow = visitSummary[0] 

1539 visitInfo = visitRow.getVisitInfo() 

1540 

1541 visitEntry = {} 

1542 visitEntry["visitId"] = visitRow['visit'] 

1543 visitEntry["visit"] = visitRow['visit'] 

1544 visitEntry["physical_filter"] = visitRow['physical_filter'] 

1545 visitEntry["band"] = visitRow['band'] 

1546 raDec = visitInfo.getBoresightRaDec() 

1547 visitEntry["ra"] = raDec.getRa().asDegrees() 

1548 visitEntry["decl"] = raDec.getDec().asDegrees() 

1549 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1550 azAlt = visitInfo.getBoresightAzAlt() 

1551 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1552 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1553 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1554 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1555 expTime = visitInfo.getExposureTime() 

1556 visitEntry["expTime"] = expTime 

1557 visitEntry["expMidpt"] = visitInfo.getDate().toPython() 

1558 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1559 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1560 expTime_days = expTime / (60*60*24) 

1561 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days 

1562 visitEntries.append(visitEntry) 

1563 

1564 # TODO: DM-30623, Add programId, exposureType, cameraTemp, 

1565 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp, 

1566 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures. 

1567 

1568 outputCatalog = pd.DataFrame(data=visitEntries) 

1569 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True) 

1570 return pipeBase.Struct(outputCatalog=outputCatalog) 

1571 

1572 

1573class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1574 dimensions=("instrument", "visit", "detector", "skymap", "tract")): 

1575 

1576 inputCatalog = connectionTypes.Input( 

1577 doc="Primary per-detector, single-epoch forced-photometry catalog. " 

1578 "By default, it is the output of ForcedPhotCcdTask on calexps", 

1579 name="forced_src", 

1580 storageClass="SourceCatalog", 

1581 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1582 ) 

1583 inputCatalogDiff = connectionTypes.Input( 

1584 doc="Secondary multi-epoch, per-detector, forced photometry catalog. " 

1585 "By default, it is the output of ForcedPhotCcdTask run on image differences.", 

1586 name="forced_diff", 

1587 storageClass="SourceCatalog", 

1588 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1589 ) 

1590 outputCatalog = connectionTypes.Output( 

1591 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format", 

1592 name="mergedForcedSource", 

1593 storageClass="DataFrame", 

1594 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1595 ) 

1596 

1597 

1598class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig, 

1599 pipelineConnections=WriteForcedSourceTableConnections): 

1600 key = lsst.pex.config.Field( 

1601 doc="Column on which to join the two input tables on and make the primary key of the output", 

1602 dtype=str, 

1603 default="objectId", 

1604 ) 

1605 

1606 

1607class WriteForcedSourceTableTask(pipeBase.PipelineTask): 

1608 """Merge and convert per-detector forced source catalogs to parquet. 

1609 

1610 Because the predecessor ForcedPhotCcdTask operates per-detector, 

1611 per-tract, (i.e., it has tract in its dimensions), detectors 

1612 on the tract boundary may have multiple forced source catalogs. 

1613 

1614 The successor task TransformForcedSourceTable runs per-patch 

1615 and temporally-aggregates overlapping mergedForcedSource catalogs from all 

1616 available multiple epochs. 

1617 """ 

1618 _DefaultName = "writeForcedSourceTable" 

1619 ConfigClass = WriteForcedSourceTableConfig 

1620 

1621 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1622 inputs = butlerQC.get(inputRefs) 

1623 # Add ccdVisitId to allow joining with CcdVisitTable 

1624 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

1625 inputs['band'] = butlerQC.quantum.dataId.full['band'] 

1626 outputs = self.run(**inputs) 

1627 butlerQC.put(outputs, outputRefs) 

1628 

1629 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None): 

1630 dfs = [] 

1631 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')): 

1632 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False) 

1633 df = df.reindex(sorted(df.columns), axis=1) 

1634 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA 

1635 df['band'] = band if band else pd.NA 

1636 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns], 

1637 names=('dataset', 'column')) 

1638 

1639 dfs.append(df) 

1640 

1641 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

1642 return pipeBase.Struct(outputCatalog=outputCatalog) 

1643 

1644 

1645class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1646 dimensions=("instrument", "skymap", "patch", "tract")): 

1647 

1648 inputCatalogs = connectionTypes.Input( 

1649 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask", 

1650 name="mergedForcedSource", 

1651 storageClass="DataFrame", 

1652 dimensions=("instrument", "visit", "detector", "skymap", "tract"), 

1653 multiple=True, 

1654 deferLoad=True 

1655 ) 

1656 referenceCatalog = connectionTypes.Input( 

1657 doc="Reference catalog which was used to seed the forcedPhot. Columns " 

1658 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner " 

1659 "are expected.", 

1660 name="objectTable", 

1661 storageClass="DataFrame", 

1662 dimensions=("tract", "patch", "skymap"), 

1663 deferLoad=True 

1664 ) 

1665 outputCatalog = connectionTypes.Output( 

1666 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a " 

1667 "specified set of functors", 

1668 name="forcedSourceTable", 

1669 storageClass="DataFrame", 

1670 dimensions=("tract", "patch", "skymap") 

1671 ) 

1672 

1673 

1674class TransformForcedSourceTableConfig(TransformCatalogBaseConfig, 

1675 pipelineConnections=TransformForcedSourceTableConnections): 

1676 referenceColumns = pexConfig.ListField( 

1677 dtype=str, 

1678 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"], 

1679 optional=True, 

1680 doc="Columns to pull from reference catalog", 

1681 ) 

1682 keyRef = lsst.pex.config.Field( 

1683 doc="Column on which to join the two input tables on and make the primary key of the output", 

1684 dtype=str, 

1685 default="objectId", 

1686 ) 

1687 key = lsst.pex.config.Field( 

1688 doc="Rename the output DataFrame index to this name", 

1689 dtype=str, 

1690 default="forcedSourceId", 

1691 ) 

1692 

1693 def setDefaults(self): 

1694 super().setDefaults() 

1695 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml') 

1696 self.columnsFromDataId = ['tract', 'patch'] 

1697 

1698 

1699class TransformForcedSourceTableTask(TransformCatalogBaseTask): 

1700 """Transform/standardize a ForcedSource catalog 

1701 

1702 Transforms each wide, per-detector forcedSource parquet table per the 

1703 specification file (per-camera defaults found in ForcedSource.yaml). 

1704 All epochs that overlap the patch are aggregated into one per-patch 

1705 narrow-parquet file. 

1706 

1707 No de-duplication of rows is performed. Duplicate resolutions flags are 

1708 pulled in from the referenceCatalog: `detect_isPrimary`, 

1709 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate 

1710 for analysis or compare duplicates for QA. 

1711 

1712 The resulting table includes multiple bands. Epochs (MJDs) and other useful 

1713 per-visit rows can be retreived by joining with the CcdVisitTable on 

1714 ccdVisitId. 

1715 """ 

1716 _DefaultName = "transformForcedSourceTable" 

1717 ConfigClass = TransformForcedSourceTableConfig 

1718 

1719 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1720 inputs = butlerQC.get(inputRefs) 

1721 if self.funcs is None: 

1722 raise ValueError("config.functorFile is None. " 

1723 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

1724 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs, 

1725 dataId=outputRefs.outputCatalog.dataId.full) 

1726 

1727 butlerQC.put(outputs, outputRefs) 

1728 

1729 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None): 

1730 dfs = [] 

1731 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns}) 

1732 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs))) 

1733 for handle in inputCatalogs: 

1734 result = self.transform(None, handle, funcs, dataId) 

1735 # Filter for only rows that were detected on (overlap) the patch 

1736 dfs.append(result.df.join(ref, how='inner')) 

1737 

1738 outputCatalog = pd.concat(dfs) 

1739 

1740 # Now that we are done joining on config.keyRef 

1741 # Change index to config.key by 

1742 outputCatalog.index.rename(self.config.keyRef, inplace=True) 

1743 # Add config.keyRef to the column list 

1744 outputCatalog.reset_index(inplace=True) 

1745 # Set the forcedSourceId to the index. This is specified in the 

1746 # ForcedSource.yaml 

1747 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True) 

1748 # Rename it to the config.key 

1749 outputCatalog.index.rename(self.config.key, inplace=True) 

1750 

1751 self.log.info("Made a table of %d columns and %d rows", 

1752 len(outputCatalog.columns), len(outputCatalog)) 

1753 return pipeBase.Struct(outputCatalog=outputCatalog) 

1754 

1755 

1756class ConsolidateTractConnections(pipeBase.PipelineTaskConnections, 

1757 defaultTemplates={"catalogType": ""}, 

1758 dimensions=("instrument", "tract")): 

1759 inputCatalogs = connectionTypes.Input( 

1760 doc="Input per-patch DataFrame Tables to be concatenated", 

1761 name="{catalogType}ForcedSourceTable", 

1762 storageClass="DataFrame", 

1763 dimensions=("tract", "patch", "skymap"), 

1764 multiple=True, 

1765 ) 

1766 

1767 outputCatalog = connectionTypes.Output( 

1768 doc="Output per-tract concatenation of DataFrame Tables", 

1769 name="{catalogType}ForcedSourceTable_tract", 

1770 storageClass="DataFrame", 

1771 dimensions=("tract", "skymap"), 

1772 ) 

1773 

1774 

1775class ConsolidateTractConfig(pipeBase.PipelineTaskConfig, 

1776 pipelineConnections=ConsolidateTractConnections): 

1777 pass 

1778 

1779 

1780class ConsolidateTractTask(pipeBase.PipelineTask): 

1781 """Concatenate any per-patch, dataframe list into a single 

1782 per-tract DataFrame. 

1783 """ 

1784 _DefaultName = 'ConsolidateTract' 

1785 ConfigClass = ConsolidateTractConfig 

1786 

1787 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1788 inputs = butlerQC.get(inputRefs) 

1789 # Not checking at least one inputCatalog exists because that'd be an 

1790 # empty QG. 

1791 self.log.info("Concatenating %s per-patch %s Tables", 

1792 len(inputs['inputCatalogs']), 

1793 inputRefs.inputCatalogs[0].datasetType.name) 

1794 df = pd.concat(inputs['inputCatalogs']) 

1795 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)