Coverage for python/lsst/pipe/tasks/postprocess.py: 28%

651 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-13 11:43 +0000

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["WriteObjectTableConfig", "WriteObjectTableTask", 

23 "WriteSourceTableConfig", "WriteSourceTableTask", 

24 "WriteRecalibratedSourceTableConfig", "WriteRecalibratedSourceTableTask", 

25 "PostprocessAnalysis", 

26 "TransformCatalogBaseConfig", "TransformCatalogBaseTask", 

27 "TransformObjectCatalogConfig", "TransformObjectCatalogTask", 

28 "ConsolidateObjectTableConfig", "ConsolidateObjectTableTask", 

29 "TransformSourceTableConfig", "TransformSourceTableTask", 

30 "ConsolidateVisitSummaryConfig", "ConsolidateVisitSummaryTask", 

31 "ConsolidateSourceTableConfig", "ConsolidateSourceTableTask", 

32 "MakeCcdVisitTableConfig", "MakeCcdVisitTableTask", 

33 "MakeVisitTableConfig", "MakeVisitTableTask", 

34 "WriteForcedSourceTableConfig", "WriteForcedSourceTableTask", 

35 "TransformForcedSourceTableConfig", "TransformForcedSourceTableTask", 

36 "ConsolidateTractConfig", "ConsolidateTractTask"] 

37 

38import functools 

39import pandas as pd 

40import logging 

41import numpy as np 

42import numbers 

43import os 

44 

45import lsst.geom 

46import lsst.pex.config as pexConfig 

47import lsst.pipe.base as pipeBase 

48import lsst.daf.base as dafBase 

49from lsst.pipe.base import connectionTypes 

50import lsst.afw.table as afwTable 

51from lsst.afw.image import ExposureSummaryStats 

52from lsst.meas.base import SingleFrameMeasurementTask, DetectorVisitIdGeneratorConfig 

53from lsst.skymap import BaseSkyMap 

54 

55from .functors import CompositeFunctor, Column 

56 

57log = logging.getLogger(__name__) 

58 

59 

60def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

61 """Flattens a dataframe with multilevel column index. 

62 """ 

63 newDf = pd.DataFrame() 

64 # band is the level 0 index 

65 dfBands = df.columns.unique(level=0).values 

66 for band in dfBands: 

67 subdf = df[band] 

68 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

69 newColumns = {c: columnFormat.format(band, c) 

70 for c in subdf.columns if c not in noDupCols} 

71 cols = list(newColumns.keys()) 

72 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

73 

74 # Band must be present in the input and output or else column is all NaN: 

75 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

76 # Get the unexploded columns from any present band's partition 

77 noDupDf = df[presentBands[0]][noDupCols] 

78 newDf = pd.concat([noDupDf, newDf], axis=1) 

79 return newDf 

80 

81 

82class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

83 defaultTemplates={"coaddName": "deep"}, 

84 dimensions=("tract", "patch", "skymap")): 

85 inputCatalogMeas = connectionTypes.Input( 

86 doc="Catalog of source measurements on the deepCoadd.", 

87 dimensions=("tract", "patch", "band", "skymap"), 

88 storageClass="SourceCatalog", 

89 name="{coaddName}Coadd_meas", 

90 multiple=True 

91 ) 

92 inputCatalogForcedSrc = connectionTypes.Input( 

93 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

94 dimensions=("tract", "patch", "band", "skymap"), 

95 storageClass="SourceCatalog", 

96 name="{coaddName}Coadd_forced_src", 

97 multiple=True 

98 ) 

99 inputCatalogRef = connectionTypes.Input( 

100 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

101 "for each detection in deepCoadd_mergeDet.", 

102 dimensions=("tract", "patch", "skymap"), 

103 storageClass="SourceCatalog", 

104 name="{coaddName}Coadd_ref" 

105 ) 

106 outputCatalog = connectionTypes.Output( 

107 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

108 "stored as a DataFrame with a multi-level column index per-patch.", 

109 dimensions=("tract", "patch", "skymap"), 

110 storageClass="DataFrame", 

111 name="{coaddName}Coadd_obj" 

112 ) 

113 

114 

115class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

116 pipelineConnections=WriteObjectTableConnections): 

117 engine = pexConfig.Field( 

118 dtype=str, 

119 default="pyarrow", 

120 doc="Parquet engine for writing (pyarrow or fastparquet)", 

121 deprecated="This config is no longer used, and will be removed after v26." 

122 ) 

123 coaddName = pexConfig.Field( 

124 dtype=str, 

125 default="deep", 

126 doc="Name of coadd" 

127 ) 

128 

129 

130class WriteObjectTableTask(pipeBase.PipelineTask): 

131 """Write filter-merged source tables as a DataFrame in parquet format. 

132 """ 

133 _DefaultName = "writeObjectTable" 

134 ConfigClass = WriteObjectTableConfig 

135 

136 # Names of table datasets to be merged 

137 inputDatasets = ('forced_src', 'meas', 'ref') 

138 

139 # Tag of output dataset written by `MergeSourcesTask.write` 

140 outputDataset = 'obj' 

141 

142 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

143 inputs = butlerQC.get(inputRefs) 

144 

145 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

146 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

147 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

148 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

149 

150 catalogs = {} 

151 for band in measDict.keys(): 

152 catalogs[band] = {'meas': measDict[band]['meas'], 

153 'forced_src': forcedSourceDict[band]['forced_src'], 

154 'ref': inputs['inputCatalogRef']} 

155 dataId = butlerQC.quantum.dataId 

156 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

157 outputs = pipeBase.Struct(outputCatalog=df) 

158 butlerQC.put(outputs, outputRefs) 

159 

160 def run(self, catalogs, tract, patch): 

161 """Merge multiple catalogs. 

162 

163 Parameters 

164 ---------- 

165 catalogs : `dict` 

166 Mapping from filter names to dict of catalogs. 

167 tract : int 

168 tractId to use for the tractId column. 

169 patch : str 

170 patchId to use for the patchId column. 

171 

172 Returns 

173 ------- 

174 catalog : `pandas.DataFrame` 

175 Merged dataframe. 

176 """ 

177 dfs = [] 

178 for filt, tableDict in catalogs.items(): 

179 for dataset, table in tableDict.items(): 

180 # Convert afwTable to pandas DataFrame 

181 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

182 

183 # Sort columns by name, to ensure matching schema among patches 

184 df = df.reindex(sorted(df.columns), axis=1) 

185 df = df.assign(tractId=tract, patchId=patch) 

186 

187 # Make columns a 3-level MultiIndex 

188 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

189 names=('dataset', 'band', 'column')) 

190 dfs.append(df) 

191 

192 # We do this dance and not `pd.concat(dfs)` because the pandas 

193 # concatenation uses infinite memory. 

194 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

195 return catalog 

196 

197 

198class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

199 defaultTemplates={"catalogType": ""}, 

200 dimensions=("instrument", "visit", "detector")): 

201 

202 catalog = connectionTypes.Input( 

203 doc="Input full-depth catalog of sources produced by CalibrateTask", 

204 name="{catalogType}src", 

205 storageClass="SourceCatalog", 

206 dimensions=("instrument", "visit", "detector") 

207 ) 

208 outputCatalog = connectionTypes.Output( 

209 doc="Catalog of sources, `src` in DataFrame/Parquet format. The 'id' column is " 

210 "replaced with an index; all other columns are unchanged.", 

211 name="{catalogType}source", 

212 storageClass="DataFrame", 

213 dimensions=("instrument", "visit", "detector") 

214 ) 

215 

216 

217class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

218 pipelineConnections=WriteSourceTableConnections): 

219 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

220 

221 

222class WriteSourceTableTask(pipeBase.PipelineTask): 

223 """Write source table to DataFrame Parquet format. 

224 """ 

225 _DefaultName = "writeSourceTable" 

226 ConfigClass = WriteSourceTableConfig 

227 

228 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

229 inputs = butlerQC.get(inputRefs) 

230 inputs['ccdVisitId'] = self.config.idGenerator.apply(butlerQC.quantum.dataId).catalog_id 

231 result = self.run(**inputs) 

232 outputs = pipeBase.Struct(outputCatalog=result.table) 

233 butlerQC.put(outputs, outputRefs) 

234 

235 def run(self, catalog, ccdVisitId=None, **kwargs): 

236 """Convert `src` catalog to DataFrame 

237 

238 Parameters 

239 ---------- 

240 catalog: `afwTable.SourceCatalog` 

241 catalog to be converted 

242 ccdVisitId: `int` 

243 ccdVisitId to be added as a column 

244 **kwargs 

245 Additional keyword arguments are ignored as a convenience for 

246 subclasses that pass the same arguments to several different 

247 methods. 

248 

249 Returns 

250 ------- 

251 result : `~lsst.pipe.base.Struct` 

252 ``table`` 

253 `DataFrame` version of the input catalog 

254 """ 

255 self.log.info("Generating DataFrame from src catalog ccdVisitId=%s", ccdVisitId) 

256 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

257 df['ccdVisitId'] = ccdVisitId 

258 

259 return pipeBase.Struct(table=df) 

260 

261 

262class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections, 

263 defaultTemplates={"catalogType": "", 

264 "skyWcsName": "gbdesAstrometricFit", 

265 "photoCalibName": "fgcm"}, 

266 dimensions=("instrument", "visit", "detector", "skymap")): 

267 skyMap = connectionTypes.Input( 

268 doc="skyMap needed to choose which tract-level calibrations to use when multiple available", 

269 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

270 storageClass="SkyMap", 

271 dimensions=("skymap",), 

272 ) 

273 exposure = connectionTypes.Input( 

274 doc="Input exposure to perform photometry on.", 

275 name="calexp", 

276 storageClass="ExposureF", 

277 dimensions=["instrument", "visit", "detector"], 

278 ) 

279 externalSkyWcsTractCatalog = connectionTypes.Input( 

280 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector " 

281 "id for the catalog id, sorted on id for fast lookup."), 

282 name="{skyWcsName}SkyWcsCatalog", 

283 storageClass="ExposureCatalog", 

284 dimensions=["instrument", "visit", "tract"], 

285 multiple=True 

286 ) 

287 externalSkyWcsGlobalCatalog = connectionTypes.Input( 

288 doc=("Per-visit wcs calibrations computed globally (with no tract information). " 

289 "These catalogs use the detector id for the catalog id, sorted on id for " 

290 "fast lookup."), 

291 name="finalVisitSummary", 

292 storageClass="ExposureCatalog", 

293 dimensions=["instrument", "visit"], 

294 ) 

295 externalPhotoCalibTractCatalog = connectionTypes.Input( 

296 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the " 

297 "detector id for the catalog id, sorted on id for fast lookup."), 

298 name="{photoCalibName}PhotoCalibCatalog", 

299 storageClass="ExposureCatalog", 

300 dimensions=["instrument", "visit", "tract"], 

301 multiple=True 

302 ) 

303 externalPhotoCalibGlobalCatalog = connectionTypes.Input( 

304 doc=("Per-visit photometric calibrations computed globally (with no tract " 

305 "information). These catalogs use the detector id for the catalog id, " 

306 "sorted on id for fast lookup."), 

307 name="finalVisitSummary", 

308 storageClass="ExposureCatalog", 

309 dimensions=["instrument", "visit"], 

310 ) 

311 

312 def __init__(self, *, config=None): 

313 super().__init__(config=config) 

314 # Same connection boilerplate as all other applications of 

315 # Global/Tract calibrations 

316 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs: 

317 if config.useGlobalExternalSkyWcs: 

318 self.inputs.remove("externalSkyWcsTractCatalog") 

319 else: 

320 self.inputs.remove("externalSkyWcsGlobalCatalog") 

321 else: 

322 self.inputs.remove("externalSkyWcsTractCatalog") 

323 self.inputs.remove("externalSkyWcsGlobalCatalog") 

324 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib: 

325 if config.useGlobalExternalPhotoCalib: 

326 self.inputs.remove("externalPhotoCalibTractCatalog") 

327 else: 

328 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

329 else: 

330 self.inputs.remove("externalPhotoCalibTractCatalog") 

331 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

332 

333 

334class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig, 

335 pipelineConnections=WriteRecalibratedSourceTableConnections): 

336 

337 doReevaluatePhotoCalib = pexConfig.Field( 

338 dtype=bool, 

339 default=True, 

340 doc=("Add or replace local photoCalib columns") 

341 ) 

342 doReevaluateSkyWcs = pexConfig.Field( 

343 dtype=bool, 

344 default=True, 

345 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec") 

346 ) 

347 doApplyExternalPhotoCalib = pexConfig.Field( 

348 dtype=bool, 

349 default=True, 

350 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ", 

351 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."), 

352 ) 

353 doApplyExternalSkyWcs = pexConfig.Field( 

354 dtype=bool, 

355 default=True, 

356 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ", 

357 "else use the wcs already attached to the exposure."), 

358 ) 

359 useGlobalExternalPhotoCalib = pexConfig.Field( 

360 dtype=bool, 

361 default=True, 

362 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations " 

363 "that are not run per-tract. When False, use per-tract photometric " 

364 "calibration files.") 

365 ) 

366 useGlobalExternalSkyWcs = pexConfig.Field( 

367 dtype=bool, 

368 default=True, 

369 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations " 

370 "that are not run per-tract. When False, use per-tract wcs " 

371 "files.") 

372 ) 

373 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

374 

375 def validate(self): 

376 super().validate() 

377 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs: 

378 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False" 

379 "External SkyWcs will not be read or evaluated.") 

380 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib: 

381 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False." 

382 "External PhotoCalib will not be read or evaluated.") 

383 

384 

385class WriteRecalibratedSourceTableTask(WriteSourceTableTask): 

386 """Write source table to DataFrame Parquet format. 

387 """ 

388 _DefaultName = "writeRecalibratedSourceTable" 

389 ConfigClass = WriteRecalibratedSourceTableConfig 

390 

391 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

392 inputs = butlerQC.get(inputRefs) 

393 

394 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId) 

395 inputs['idGenerator'] = idGenerator 

396 inputs['ccdVisitId'] = idGenerator.catalog_id 

397 

398 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs: 

399 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

400 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs) 

401 

402 inputs['catalog'] = self.addCalibColumns(**inputs) 

403 

404 result = self.run(**inputs) 

405 outputs = pipeBase.Struct(outputCatalog=result.table) 

406 butlerQC.put(outputs, outputRefs) 

407 

408 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None, 

409 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None, 

410 externalPhotoCalibTractCatalog=None, **kwargs): 

411 """Apply external calibrations to exposure per configuration 

412 

413 When multiple tract-level calibrations overlap, select the one with the 

414 center closest to detector. 

415 

416 Parameters 

417 ---------- 

418 inputRefs : `~lsst.pipe.base.InputQuantizedConnection`, for dataIds of 

419 tract-level calibs. 

420 skyMap : `~lsst.skymap.BaseSkyMap` 

421 skyMap to lookup tract geometry and WCS. 

422 exposure : `lsst.afw.image.exposure.Exposure` 

423 Input exposure to adjust calibrations. 

424 externalSkyWcsGlobalCatalog : `~lsst.afw.table.ExposureCatalog`, optional 

425 Exposure catalog with external skyWcs to be applied per config 

426 externalSkyWcsTractCatalog : `~lsst.afw.table.ExposureCatalog`, optional 

427 Exposure catalog with external skyWcs to be applied per config 

428 externalPhotoCalibGlobalCatalog : `~lsst.afw.table.ExposureCatalog`, optional 

429 Exposure catalog with external photoCalib to be applied per config 

430 externalPhotoCalibTractCatalog : `~lsst.afw.table.ExposureCatalog`, optional 

431 Exposure catalog with external photoCalib to be applied per config 

432 **kwargs 

433 Additional keyword arguments are ignored to facilitate passing the 

434 same arguments to several methods. 

435 

436 Returns 

437 ------- 

438 exposure : `lsst.afw.image.exposure.Exposure` 

439 Exposure with adjusted calibrations. 

440 """ 

441 if not self.config.doApplyExternalSkyWcs: 

442 # Do not modify the exposure's SkyWcs 

443 externalSkyWcsCatalog = None 

444 elif self.config.useGlobalExternalSkyWcs: 

445 # Use the global external SkyWcs 

446 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog 

447 self.log.info('Applying global SkyWcs') 

448 else: 

449 # use tract-level external SkyWcs from the closest overlapping tract 

450 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog') 

451 tracts = [ref.dataId['tract'] for ref in inputRef] 

452 if len(tracts) == 1: 

453 ind = 0 

454 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind]) 

455 else: 

456 if exposure.getWcs() is None: # TODO: could this look-up use the externalPhotoCalib? 

457 raise ValueError("Trying to locate nearest tract, but exposure.wcs is None.") 

458 ind = self.getClosestTract(tracts, skyMap, 

459 exposure.getBBox(), exposure.getWcs()) 

460 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). ' 

461 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

462 

463 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind] 

464 

465 if not self.config.doApplyExternalPhotoCalib: 

466 # Do not modify the exposure's PhotoCalib 

467 externalPhotoCalibCatalog = None 

468 elif self.config.useGlobalExternalPhotoCalib: 

469 # Use the global external PhotoCalib 

470 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog 

471 self.log.info('Applying global PhotoCalib') 

472 else: 

473 # use tract-level external PhotoCalib from the closest overlapping tract 

474 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog') 

475 tracts = [ref.dataId['tract'] for ref in inputRef] 

476 if len(tracts) == 1: 

477 ind = 0 

478 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind]) 

479 else: 

480 ind = self.getClosestTract(tracts, skyMap, 

481 exposure.getBBox(), exposure.getWcs()) 

482 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). ' 

483 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

484 

485 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind] 

486 

487 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog) 

488 

489 def getClosestTract(self, tracts, skyMap, bbox, wcs): 

490 """Find the index of the tract closest to detector from list of tractIds 

491 

492 Parameters 

493 ---------- 

494 tracts: `list` [`int`] 

495 Iterable of integer tractIds 

496 skyMap : `~lsst.skymap.BaseSkyMap` 

497 skyMap to lookup tract geometry and wcs 

498 bbox : `~lsst.geom.Box2I` 

499 Detector bbox, center of which will compared to tract centers 

500 wcs : `~lsst.afw.geom.SkyWcs` 

501 Detector Wcs object to map the detector center to SkyCoord 

502 

503 Returns 

504 ------- 

505 index : `int` 

506 """ 

507 if len(tracts) == 1: 

508 return 0 

509 

510 center = wcs.pixelToSky(bbox.getCenter()) 

511 sep = [] 

512 for tractId in tracts: 

513 tract = skyMap[tractId] 

514 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter()) 

515 sep.append(center.separation(tractCenter)) 

516 

517 return np.argmin(sep) 

518 

519 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None): 

520 """Prepare a calibrated exposure and apply external calibrations 

521 if so configured. 

522 

523 Parameters 

524 ---------- 

525 exposure : `lsst.afw.image.exposure.Exposure` 

526 Input exposure to adjust calibrations. 

527 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional 

528 Exposure catalog with external skyWcs to be applied 

529 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id 

530 for the catalog id, sorted on id for fast lookup. 

531 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional 

532 Exposure catalog with external photoCalib to be applied 

533 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector 

534 id for the catalog id, sorted on id for fast lookup. 

535 

536 Returns 

537 ------- 

538 exposure : `lsst.afw.image.exposure.Exposure` 

539 Exposure with adjusted calibrations. 

540 """ 

541 detectorId = exposure.getInfo().getDetector().getId() 

542 

543 if externalPhotoCalibCatalog is not None: 

544 row = externalPhotoCalibCatalog.find(detectorId) 

545 if row is None: 

546 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; " 

547 "Using original photoCalib.", detectorId) 

548 else: 

549 photoCalib = row.getPhotoCalib() 

550 if photoCalib is None: 

551 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; " 

552 "Using original photoCalib.", detectorId) 

553 else: 

554 exposure.setPhotoCalib(photoCalib) 

555 

556 if externalSkyWcsCatalog is not None: 

557 row = externalSkyWcsCatalog.find(detectorId) 

558 if row is None: 

559 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; " 

560 "Using original skyWcs.", detectorId) 

561 else: 

562 skyWcs = row.getWcs() 

563 if skyWcs is None: 

564 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; " 

565 "Using original skyWcs.", detectorId) 

566 else: 

567 exposure.setWcs(skyWcs) 

568 

569 return exposure 

570 

571 def addCalibColumns(self, catalog, exposure, idGenerator, **kwargs): 

572 """Add replace columns with calibs evaluated at each centroid 

573 

574 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a 

575 a source catalog, by rerunning the plugins. 

576 

577 Parameters 

578 ---------- 

579 catalog : `lsst.afw.table.SourceCatalog` 

580 catalog to which calib columns will be added 

581 exposure : `lsst.afw.image.exposure.Exposure` 

582 Exposure with attached PhotoCalibs and SkyWcs attributes to be 

583 reevaluated at local centroids. Pixels are not required. 

584 idGenerator : `lsst.meas.base.IdGenerator` 

585 Object that generates Source IDs and random seeds. 

586 **kwargs 

587 Additional keyword arguments are ignored to facilitate passing the 

588 same arguments to several methods. 

589 

590 Returns 

591 ------- 

592 newCat: `lsst.afw.table.SourceCatalog` 

593 Source Catalog with requested local calib columns 

594 """ 

595 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

596 measureConfig.doReplaceWithNoise = False 

597 

598 # Clear all slots, because we aren't running the relevant plugins. 

599 for slot in measureConfig.slots: 

600 setattr(measureConfig.slots, slot, None) 

601 

602 measureConfig.plugins.names = [] 

603 if self.config.doReevaluateSkyWcs: 

604 measureConfig.plugins.names.add('base_LocalWcs') 

605 self.log.info("Re-evaluating base_LocalWcs plugin") 

606 if self.config.doReevaluatePhotoCalib: 

607 measureConfig.plugins.names.add('base_LocalPhotoCalib') 

608 self.log.info("Re-evaluating base_LocalPhotoCalib plugin") 

609 pluginsNotToCopy = tuple(measureConfig.plugins.names) 

610 

611 # Create a new schema and catalog 

612 # Copy all columns from original except for the ones to reevaluate 

613 aliasMap = catalog.schema.getAliasMap() 

614 mapper = afwTable.SchemaMapper(catalog.schema) 

615 for item in catalog.schema: 

616 if not item.field.getName().startswith(pluginsNotToCopy): 

617 mapper.addMapping(item.key) 

618 

619 schema = mapper.getOutputSchema() 

620 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

621 schema.setAliasMap(aliasMap) 

622 newCat = afwTable.SourceCatalog(schema) 

623 newCat.extend(catalog, mapper=mapper) 

624 

625 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to 

626 # update here. LocalPhotoCalibs are applied during transform tasks. 

627 # Update coord_ra/coord_dec, which are expected to be positions on the 

628 # sky and are used as such in sdm tables without transform 

629 if self.config.doReevaluateSkyWcs and exposure.wcs is not None: 

630 afwTable.updateSourceCoords(exposure.wcs, newCat) 

631 

632 measurement.run(measCat=newCat, exposure=exposure, exposureId=idGenerator.catalog_id) 

633 

634 return newCat 

635 

636 

637class PostprocessAnalysis(object): 

638 """Calculate columns from DataFrames or handles storing DataFrames. 

639 

640 This object manages and organizes an arbitrary set of computations 

641 on a catalog. The catalog is defined by a 

642 `DeferredDatasetHandle` or `InMemoryDatasetHandle` object 

643 (or list thereof), such as a ``deepCoadd_obj`` dataset, and the 

644 computations are defined by a collection of 

645 `~lsst.pipe.tasks.functors.Functor` objects (or, equivalently, a 

646 ``CompositeFunctor``). 

647 

648 After the object is initialized, accessing the ``.df`` attribute (which 

649 holds the `pandas.DataFrame` containing the results of the calculations) 

650 triggers computation of said dataframe. 

651 

652 One of the conveniences of using this object is the ability to define a 

653 desired common filter for all functors. This enables the same functor 

654 collection to be passed to several different `PostprocessAnalysis` objects 

655 without having to change the original functor collection, since the ``filt`` 

656 keyword argument of this object triggers an overwrite of the ``filt`` 

657 property for all functors in the collection. 

658 

659 This object also allows a list of refFlags to be passed, and defines a set 

660 of default refFlags that are always included even if not requested. 

661 

662 If a list of DataFrames or Handles is passed, rather than a single one, 

663 then the calculations will be mapped over all the input catalogs. In 

664 principle, it should be straightforward to parallelize this activity, but 

665 initial tests have failed (see TODO in code comments). 

666 

667 Parameters 

668 ---------- 

669 handles : `~lsst.daf.butler.DeferredDatasetHandle` or 

670 `~lsst.pipe.base.InMemoryDatasetHandle` or 

671 list of these. 

672 Source catalog(s) for computation. 

673 functors : `list`, `dict`, or `~lsst.pipe.tasks.functors.CompositeFunctor` 

674 Computations to do (functors that act on ``handles``). 

675 If a dict, the output 

676 DataFrame will have columns keyed accordingly. 

677 If a list, the column keys will come from the 

678 ``.shortname`` attribute of each functor. 

679 

680 filt : `str`, optional 

681 Filter in which to calculate. If provided, 

682 this will overwrite any existing ``.filt`` attribute 

683 of the provided functors. 

684 

685 flags : `list`, optional 

686 List of flags (per-band) to include in output table. 

687 Taken from the ``meas`` dataset if applied to a multilevel Object Table. 

688 

689 refFlags : `list`, optional 

690 List of refFlags (only reference band) to include in output table. 

691 

692 forcedFlags : `list`, optional 

693 List of flags (per-band) to include in output table. 

694 Taken from the ``forced_src`` dataset if applied to a 

695 multilevel Object Table. Intended for flags from measurement plugins 

696 only run during multi-band forced-photometry. 

697 """ 

698 _defaultRefFlags = [] 

699 _defaultFuncs = () 

700 

701 def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

702 self.handles = handles 

703 self.functors = functors 

704 

705 self.filt = filt 

706 self.flags = list(flags) if flags is not None else [] 

707 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

708 self.refFlags = list(self._defaultRefFlags) 

709 if refFlags is not None: 

710 self.refFlags += list(refFlags) 

711 

712 self._df = None 

713 

714 @property 

715 def defaultFuncs(self): 

716 funcs = dict(self._defaultFuncs) 

717 return funcs 

718 

719 @property 

720 def func(self): 

721 additionalFuncs = self.defaultFuncs 

722 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

723 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

724 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

725 

726 if isinstance(self.functors, CompositeFunctor): 

727 func = self.functors 

728 else: 

729 func = CompositeFunctor(self.functors) 

730 

731 func.funcDict.update(additionalFuncs) 

732 func.filt = self.filt 

733 

734 return func 

735 

736 @property 

737 def noDupCols(self): 

738 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

739 

740 @property 

741 def df(self): 

742 if self._df is None: 

743 self.compute() 

744 return self._df 

745 

746 def compute(self, dropna=False, pool=None): 

747 # map over multiple handles 

748 if type(self.handles) in (list, tuple): 

749 if pool is None: 

750 dflist = [self.func(handle, dropna=dropna) for handle in self.handles] 

751 else: 

752 # TODO: Figure out why this doesn't work (pyarrow pickling 

753 # issues?) 

754 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.handles) 

755 self._df = pd.concat(dflist) 

756 else: 

757 self._df = self.func(self.handles, dropna=dropna) 

758 

759 return self._df 

760 

761 

762class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

763 dimensions=()): 

764 """Expected Connections for subclasses of TransformCatalogBaseTask. 

765 

766 Must be subclassed. 

767 """ 

768 inputCatalog = connectionTypes.Input( 

769 name="", 

770 storageClass="DataFrame", 

771 ) 

772 outputCatalog = connectionTypes.Output( 

773 name="", 

774 storageClass="DataFrame", 

775 ) 

776 

777 

778class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

779 pipelineConnections=TransformCatalogBaseConnections): 

780 functorFile = pexConfig.Field( 

781 dtype=str, 

782 doc="Path to YAML file specifying Science Data Model functors to use " 

783 "when copying columns and computing calibrated values.", 

784 default=None, 

785 optional=True 

786 ) 

787 primaryKey = pexConfig.Field( 

788 dtype=str, 

789 doc="Name of column to be set as the DataFrame index. If None, the index" 

790 "will be named `id`", 

791 default=None, 

792 optional=True 

793 ) 

794 columnsFromDataId = pexConfig.ListField( 

795 dtype=str, 

796 default=None, 

797 optional=True, 

798 doc="Columns to extract from the dataId", 

799 ) 

800 

801 

802class TransformCatalogBaseTask(pipeBase.PipelineTask): 

803 """Base class for transforming/standardizing a catalog by applying functors 

804 that convert units and apply calibrations. 

805 

806 The purpose of this task is to perform a set of computations on an input 

807 ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` that holds a 

808 ``DataFrame`` dataset (such as ``deepCoadd_obj``), and write the results to 

809 a new dataset (which needs to be declared in an ``outputDataset`` 

810 attribute). 

811 

812 The calculations to be performed are defined in a YAML file that specifies 

813 a set of functors to be computed, provided as a ``--functorFile`` config 

814 parameter. An example of such a YAML file is the following: 

815 

816 funcs: 

817 sourceId: 

818 functor: Index 

819 x: 

820 functor: Column 

821 args: slot_Centroid_x 

822 y: 

823 functor: Column 

824 args: slot_Centroid_y 

825 psfFlux: 

826 functor: LocalNanojansky 

827 args: 

828 - slot_PsfFlux_instFlux 

829 - slot_PsfFlux_instFluxErr 

830 - base_LocalPhotoCalib 

831 - base_LocalPhotoCalibErr 

832 psfFluxErr: 

833 functor: LocalNanojanskyErr 

834 args: 

835 - slot_PsfFlux_instFlux 

836 - slot_PsfFlux_instFluxErr 

837 - base_LocalPhotoCalib 

838 - base_LocalPhotoCalibErr 

839 flags: 

840 - detect_isPrimary 

841 

842 The names for each entry under "func" will become the names of columns in 

843 the output dataset. All the functors referenced are defined in 

844 `~lsst.pipe.tasks.functors`. Positional arguments to be passed to each 

845 functor are in the `args` list, and any additional entries for each column 

846 other than "functor" or "args" (e.g., ``'filt'``, ``'dataset'``) are 

847 treated as keyword arguments to be passed to the functor initialization. 

848 

849 The "flags" entry is the default shortcut for `Column` functors. 

850 All columns listed under "flags" will be copied to the output table 

851 untransformed. They can be of any datatype. 

852 In the special case of transforming a multi-level oject table with 

853 band and dataset indices (deepCoadd_obj), these will be taked from the 

854 `meas` dataset and exploded out per band. 

855 

856 There are two special shortcuts that only apply when transforming 

857 multi-level Object (deepCoadd_obj) tables: 

858 - The "refFlags" entry is shortcut for `Column` functor 

859 taken from the `'ref'` dataset if transforming an ObjectTable. 

860 - The "forcedFlags" entry is shortcut for `Column` functors. 

861 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

862 These are expanded out per band. 

863 

864 

865 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

866 to organize and excecute the calculations. 

867 """ 

868 @property 

869 def _DefaultName(self): 

870 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

871 

872 @property 

873 def outputDataset(self): 

874 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

875 

876 @property 

877 def inputDataset(self): 

878 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

879 

880 @property 

881 def ConfigClass(self): 

882 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

883 

884 def __init__(self, *args, **kwargs): 

885 super().__init__(*args, **kwargs) 

886 if self.config.functorFile: 

887 self.log.info('Loading tranform functor definitions from %s', 

888 self.config.functorFile) 

889 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

890 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

891 else: 

892 self.funcs = None 

893 

894 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

895 inputs = butlerQC.get(inputRefs) 

896 if self.funcs is None: 

897 raise ValueError("config.functorFile is None. " 

898 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

899 result = self.run(handle=inputs['inputCatalog'], funcs=self.funcs, 

900 dataId=outputRefs.outputCatalog.dataId.full) 

901 outputs = pipeBase.Struct(outputCatalog=result) 

902 butlerQC.put(outputs, outputRefs) 

903 

904 def run(self, handle, funcs=None, dataId=None, band=None): 

905 """Do postprocessing calculations 

906 

907 Takes a ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` or 

908 ``DataFrame`` object and dataId, 

909 returns a dataframe with results of postprocessing calculations. 

910 

911 Parameters 

912 ---------- 

913 handles : `~lsst.daf.butler.DeferredDatasetHandle` or 

914 `~lsst.pipe.base.InMemoryDatasetHandle` or 

915 `~pandas.DataFrame`, or list of these. 

916 DataFrames from which calculations are done. 

917 funcs : `~lsst.pipe.tasks.functors.Functor` 

918 Functors to apply to the table's columns 

919 dataId : dict, optional 

920 Used to add a `patchId` column to the output dataframe. 

921 band : `str`, optional 

922 Filter band that is being processed. 

923 

924 Returns 

925 ------- 

926 df : `pandas.DataFrame` 

927 """ 

928 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

929 

930 df = self.transform(band, handle, funcs, dataId).df 

931 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

932 return df 

933 

934 def getFunctors(self): 

935 return self.funcs 

936 

937 def getAnalysis(self, handles, funcs=None, band=None): 

938 if funcs is None: 

939 funcs = self.funcs 

940 analysis = PostprocessAnalysis(handles, funcs, filt=band) 

941 return analysis 

942 

943 def transform(self, band, handles, funcs, dataId): 

944 analysis = self.getAnalysis(handles, funcs=funcs, band=band) 

945 df = analysis.df 

946 if dataId and self.config.columnsFromDataId: 

947 for key in self.config.columnsFromDataId: 

948 if key in dataId: 

949 df[str(key)] = dataId[key] 

950 else: 

951 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}") 

952 

953 if self.config.primaryKey: 

954 if df.index.name != self.config.primaryKey and self.config.primaryKey in df: 

955 df.reset_index(inplace=True, drop=True) 

956 df.set_index(self.config.primaryKey, inplace=True) 

957 

958 return pipeBase.Struct( 

959 df=df, 

960 analysis=analysis 

961 ) 

962 

963 

964class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

965 defaultTemplates={"coaddName": "deep"}, 

966 dimensions=("tract", "patch", "skymap")): 

967 inputCatalog = connectionTypes.Input( 

968 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

969 "stored as a DataFrame with a multi-level column index per-patch.", 

970 dimensions=("tract", "patch", "skymap"), 

971 storageClass="DataFrame", 

972 name="{coaddName}Coadd_obj", 

973 deferLoad=True, 

974 ) 

975 outputCatalog = connectionTypes.Output( 

976 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

977 "data model.", 

978 dimensions=("tract", "patch", "skymap"), 

979 storageClass="DataFrame", 

980 name="objectTable" 

981 ) 

982 

983 

984class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

985 pipelineConnections=TransformObjectCatalogConnections): 

986 coaddName = pexConfig.Field( 

987 dtype=str, 

988 default="deep", 

989 doc="Name of coadd" 

990 ) 

991 # TODO: remove in DM-27177 

992 filterMap = pexConfig.DictField( 

993 keytype=str, 

994 itemtype=str, 

995 default={}, 

996 doc=("Dictionary mapping full filter name to short one for column name munging." 

997 "These filters determine the output columns no matter what filters the " 

998 "input data actually contain."), 

999 deprecated=("Coadds are now identified by the band, so this transform is unused." 

1000 "Will be removed after v22.") 

1001 ) 

1002 outputBands = pexConfig.ListField( 

1003 dtype=str, 

1004 default=None, 

1005 optional=True, 

1006 doc=("These bands and only these bands will appear in the output," 

1007 " NaN-filled if the input does not include them." 

1008 " If None, then use all bands found in the input.") 

1009 ) 

1010 camelCase = pexConfig.Field( 

1011 dtype=bool, 

1012 default=False, 

1013 doc=("Write per-band columns names with camelCase, else underscore " 

1014 "For example: gPsFlux instead of g_PsFlux.") 

1015 ) 

1016 multilevelOutput = pexConfig.Field( 

1017 dtype=bool, 

1018 default=False, 

1019 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

1020 "and name-munged (False).") 

1021 ) 

1022 goodFlags = pexConfig.ListField( 

1023 dtype=str, 

1024 default=[], 

1025 doc=("List of 'good' flags that should be set False when populating empty tables. " 

1026 "All other flags are considered to be 'bad' flags and will be set to True.") 

1027 ) 

1028 floatFillValue = pexConfig.Field( 

1029 dtype=float, 

1030 default=np.nan, 

1031 doc="Fill value for float fields when populating empty tables." 

1032 ) 

1033 integerFillValue = pexConfig.Field( 

1034 dtype=int, 

1035 default=-1, 

1036 doc="Fill value for integer fields when populating empty tables." 

1037 ) 

1038 

1039 def setDefaults(self): 

1040 super().setDefaults() 

1041 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml') 

1042 self.primaryKey = 'objectId' 

1043 self.columnsFromDataId = ['tract', 'patch'] 

1044 self.goodFlags = ['calib_astrometry_used', 

1045 'calib_photometry_reserved', 

1046 'calib_photometry_used', 

1047 'calib_psf_candidate', 

1048 'calib_psf_reserved', 

1049 'calib_psf_used'] 

1050 

1051 

1052class TransformObjectCatalogTask(TransformCatalogBaseTask): 

1053 """Produce a flattened Object Table to match the format specified in 

1054 sdm_schemas. 

1055 

1056 Do the same set of postprocessing calculations on all bands. 

1057 

1058 This is identical to `TransformCatalogBaseTask`, except for that it does 

1059 the specified functor calculations for all filters present in the 

1060 input `deepCoadd_obj` table. Any specific ``"filt"`` keywords specified 

1061 by the YAML file will be superceded. 

1062 """ 

1063 _DefaultName = "transformObjectCatalog" 

1064 ConfigClass = TransformObjectCatalogConfig 

1065 

1066 def run(self, handle, funcs=None, dataId=None, band=None): 

1067 # NOTE: band kwarg is ignored here. 

1068 dfDict = {} 

1069 analysisDict = {} 

1070 templateDf = pd.DataFrame() 

1071 

1072 columns = handle.get(component='columns') 

1073 inputBands = columns.unique(level=1).values 

1074 

1075 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

1076 

1077 # Perform transform for data of filters that exist in the handle dataframe. 

1078 for inputBand in inputBands: 

1079 if inputBand not in outputBands: 

1080 self.log.info("Ignoring %s band data in the input", inputBand) 

1081 continue 

1082 self.log.info("Transforming the catalog of band %s", inputBand) 

1083 result = self.transform(inputBand, handle, funcs, dataId) 

1084 dfDict[inputBand] = result.df 

1085 analysisDict[inputBand] = result.analysis 

1086 if templateDf.empty: 

1087 templateDf = result.df 

1088 

1089 # Put filler values in columns of other wanted bands 

1090 for filt in outputBands: 

1091 if filt not in dfDict: 

1092 self.log.info("Adding empty columns for band %s", filt) 

1093 dfTemp = templateDf.copy() 

1094 for col in dfTemp.columns: 

1095 testValue = dfTemp[col].values[0] 

1096 if isinstance(testValue, (np.bool_, pd.BooleanDtype)): 

1097 # Boolean flag type, check if it is a "good" flag 

1098 if col in self.config.goodFlags: 

1099 fillValue = False 

1100 else: 

1101 fillValue = True 

1102 elif isinstance(testValue, numbers.Integral): 

1103 # Checking numbers.Integral catches all flavors 

1104 # of python, numpy, pandas, etc. integers. 

1105 # We must ensure this is not an unsigned integer. 

1106 if isinstance(testValue, np.unsignedinteger): 

1107 raise ValueError("Parquet tables may not have unsigned integer columns.") 

1108 else: 

1109 fillValue = self.config.integerFillValue 

1110 else: 

1111 fillValue = self.config.floatFillValue 

1112 dfTemp[col].values[:] = fillValue 

1113 dfDict[filt] = dfTemp 

1114 

1115 # This makes a multilevel column index, with band as first level 

1116 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

1117 

1118 if not self.config.multilevelOutput: 

1119 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

1120 if self.config.primaryKey in noDupCols: 

1121 noDupCols.remove(self.config.primaryKey) 

1122 if dataId and self.config.columnsFromDataId: 

1123 noDupCols += self.config.columnsFromDataId 

1124 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

1125 inputBands=inputBands) 

1126 

1127 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

1128 

1129 return df 

1130 

1131 

1132class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

1133 dimensions=("tract", "skymap")): 

1134 inputCatalogs = connectionTypes.Input( 

1135 doc="Per-Patch objectTables conforming to the standard data model.", 

1136 name="objectTable", 

1137 storageClass="DataFrame", 

1138 dimensions=("tract", "patch", "skymap"), 

1139 multiple=True, 

1140 ) 

1141 outputCatalog = connectionTypes.Output( 

1142 doc="Pre-tract horizontal concatenation of the input objectTables", 

1143 name="objectTable_tract", 

1144 storageClass="DataFrame", 

1145 dimensions=("tract", "skymap"), 

1146 ) 

1147 

1148 

1149class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

1150 pipelineConnections=ConsolidateObjectTableConnections): 

1151 coaddName = pexConfig.Field( 

1152 dtype=str, 

1153 default="deep", 

1154 doc="Name of coadd" 

1155 ) 

1156 

1157 

1158class ConsolidateObjectTableTask(pipeBase.PipelineTask): 

1159 """Write patch-merged source tables to a tract-level DataFrame Parquet file. 

1160 

1161 Concatenates `objectTable` list into a per-visit `objectTable_tract`. 

1162 """ 

1163 _DefaultName = "consolidateObjectTable" 

1164 ConfigClass = ConsolidateObjectTableConfig 

1165 

1166 inputDataset = 'objectTable' 

1167 outputDataset = 'objectTable_tract' 

1168 

1169 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1170 inputs = butlerQC.get(inputRefs) 

1171 self.log.info("Concatenating %s per-patch Object Tables", 

1172 len(inputs['inputCatalogs'])) 

1173 df = pd.concat(inputs['inputCatalogs']) 

1174 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1175 

1176 

1177class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

1178 defaultTemplates={"catalogType": ""}, 

1179 dimensions=("instrument", "visit", "detector")): 

1180 

1181 inputCatalog = connectionTypes.Input( 

1182 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

1183 name="{catalogType}source", 

1184 storageClass="DataFrame", 

1185 dimensions=("instrument", "visit", "detector"), 

1186 deferLoad=True 

1187 ) 

1188 outputCatalog = connectionTypes.Output( 

1189 doc="Narrower, per-detector Source Table transformed and converted per a " 

1190 "specified set of functors", 

1191 name="{catalogType}sourceTable", 

1192 storageClass="DataFrame", 

1193 dimensions=("instrument", "visit", "detector") 

1194 ) 

1195 

1196 

1197class TransformSourceTableConfig(TransformCatalogBaseConfig, 

1198 pipelineConnections=TransformSourceTableConnections): 

1199 

1200 def setDefaults(self): 

1201 super().setDefaults() 

1202 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml') 

1203 self.primaryKey = 'sourceId' 

1204 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter'] 

1205 

1206 

1207class TransformSourceTableTask(TransformCatalogBaseTask): 

1208 """Transform/standardize a source catalog 

1209 """ 

1210 _DefaultName = "transformSourceTable" 

1211 ConfigClass = TransformSourceTableConfig 

1212 

1213 

1214class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1215 dimensions=("instrument", "visit",), 

1216 defaultTemplates={"calexpType": ""}): 

1217 calexp = connectionTypes.Input( 

1218 doc="Processed exposures used for metadata", 

1219 name="calexp", 

1220 storageClass="ExposureF", 

1221 dimensions=("instrument", "visit", "detector"), 

1222 deferLoad=True, 

1223 multiple=True, 

1224 ) 

1225 visitSummary = connectionTypes.Output( 

1226 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1227 "detector id for the id and are sorted for fast lookups of a " 

1228 "detector."), 

1229 name="visitSummary", 

1230 storageClass="ExposureCatalog", 

1231 dimensions=("instrument", "visit"), 

1232 ) 

1233 visitSummarySchema = connectionTypes.InitOutput( 

1234 doc="Schema of the visitSummary catalog", 

1235 name="visitSummary_schema", 

1236 storageClass="ExposureCatalog", 

1237 ) 

1238 

1239 

1240class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1241 pipelineConnections=ConsolidateVisitSummaryConnections): 

1242 """Config for ConsolidateVisitSummaryTask""" 

1243 pass 

1244 

1245 

1246class ConsolidateVisitSummaryTask(pipeBase.PipelineTask): 

1247 """Task to consolidate per-detector visit metadata. 

1248 

1249 This task aggregates the following metadata from all the detectors in a 

1250 single visit into an exposure catalog: 

1251 - The visitInfo. 

1252 - The wcs. 

1253 - The photoCalib. 

1254 - The physical_filter and band (if available). 

1255 - The psf size, shape, and effective area at the center of the detector. 

1256 - The corners of the bounding box in right ascension/declination. 

1257 

1258 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1259 are not persisted here because of storage concerns, and because of their 

1260 limited utility as summary statistics. 

1261 

1262 Tests for this task are performed in ci_hsc_gen3. 

1263 """ 

1264 _DefaultName = "consolidateVisitSummary" 

1265 ConfigClass = ConsolidateVisitSummaryConfig 

1266 

1267 def __init__(self, **kwargs): 

1268 super().__init__(**kwargs) 

1269 self.schema = afwTable.ExposureTable.makeMinimalSchema() 

1270 self.schema.addField('visit', type='L', doc='Visit number') 

1271 self.schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1272 self.schema.addField('band', type='String', size=32, doc='Name of band') 

1273 ExposureSummaryStats.update_schema(self.schema) 

1274 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema) 

1275 

1276 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1277 dataRefs = butlerQC.get(inputRefs.calexp) 

1278 visit = dataRefs[0].dataId.byName()['visit'] 

1279 

1280 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1281 len(dataRefs), visit) 

1282 

1283 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1284 

1285 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1286 

1287 def _combineExposureMetadata(self, visit, dataRefs): 

1288 """Make a combined exposure catalog from a list of dataRefs. 

1289 These dataRefs must point to exposures with wcs, summaryStats, 

1290 and other visit metadata. 

1291 

1292 Parameters 

1293 ---------- 

1294 visit : `int` 

1295 Visit identification number. 

1296 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1297 List of dataRefs in visit. 

1298 

1299 Returns 

1300 ------- 

1301 visitSummary : `lsst.afw.table.ExposureCatalog` 

1302 Exposure catalog with per-detector summary information. 

1303 """ 

1304 cat = afwTable.ExposureCatalog(self.schema) 

1305 cat.resize(len(dataRefs)) 

1306 

1307 cat['visit'] = visit 

1308 

1309 for i, dataRef in enumerate(dataRefs): 

1310 visitInfo = dataRef.get(component='visitInfo') 

1311 filterLabel = dataRef.get(component='filter') 

1312 summaryStats = dataRef.get(component='summaryStats') 

1313 detector = dataRef.get(component='detector') 

1314 wcs = dataRef.get(component='wcs') 

1315 photoCalib = dataRef.get(component='photoCalib') 

1316 detector = dataRef.get(component='detector') 

1317 bbox = dataRef.get(component='bbox') 

1318 validPolygon = dataRef.get(component='validPolygon') 

1319 

1320 rec = cat[i] 

1321 rec.setBBox(bbox) 

1322 rec.setVisitInfo(visitInfo) 

1323 rec.setWcs(wcs) 

1324 rec.setPhotoCalib(photoCalib) 

1325 rec.setValidPolygon(validPolygon) 

1326 

1327 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1328 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1329 rec.setId(detector.getId()) 

1330 summaryStats.update_record(rec) 

1331 

1332 metadata = dafBase.PropertyList() 

1333 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1334 # We are looping over existing datarefs, so the following is true 

1335 metadata.add("COMMENT", "Only detectors with data have entries.") 

1336 cat.setMetadata(metadata) 

1337 

1338 cat.sort() 

1339 return cat 

1340 

1341 

1342class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1343 defaultTemplates={"catalogType": ""}, 

1344 dimensions=("instrument", "visit")): 

1345 inputCatalogs = connectionTypes.Input( 

1346 doc="Input per-detector Source Tables", 

1347 name="{catalogType}sourceTable", 

1348 storageClass="DataFrame", 

1349 dimensions=("instrument", "visit", "detector"), 

1350 multiple=True 

1351 ) 

1352 outputCatalog = connectionTypes.Output( 

1353 doc="Per-visit concatenation of Source Table", 

1354 name="{catalogType}sourceTable_visit", 

1355 storageClass="DataFrame", 

1356 dimensions=("instrument", "visit") 

1357 ) 

1358 

1359 

1360class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1361 pipelineConnections=ConsolidateSourceTableConnections): 

1362 pass 

1363 

1364 

1365class ConsolidateSourceTableTask(pipeBase.PipelineTask): 

1366 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1367 """ 

1368 _DefaultName = 'consolidateSourceTable' 

1369 ConfigClass = ConsolidateSourceTableConfig 

1370 

1371 inputDataset = 'sourceTable' 

1372 outputDataset = 'sourceTable_visit' 

1373 

1374 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1375 from .makeWarp import reorderRefs 

1376 

1377 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs] 

1378 detectorOrder.sort() 

1379 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector') 

1380 inputs = butlerQC.get(inputRefs) 

1381 self.log.info("Concatenating %s per-detector Source Tables", 

1382 len(inputs['inputCatalogs'])) 

1383 df = pd.concat(inputs['inputCatalogs']) 

1384 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1385 

1386 

1387class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1388 dimensions=("instrument",), 

1389 defaultTemplates={"calexpType": ""}): 

1390 visitSummaryRefs = connectionTypes.Input( 

1391 doc="Data references for per-visit consolidated exposure metadata", 

1392 name="finalVisitSummary", 

1393 storageClass="ExposureCatalog", 

1394 dimensions=("instrument", "visit"), 

1395 multiple=True, 

1396 deferLoad=True, 

1397 ) 

1398 outputCatalog = connectionTypes.Output( 

1399 doc="CCD and Visit metadata table", 

1400 name="ccdVisitTable", 

1401 storageClass="DataFrame", 

1402 dimensions=("instrument",) 

1403 ) 

1404 

1405 

1406class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1407 pipelineConnections=MakeCcdVisitTableConnections): 

1408 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

1409 

1410 

1411class MakeCcdVisitTableTask(pipeBase.PipelineTask): 

1412 """Produce a `ccdVisitTable` from the visit summary exposure catalogs. 

1413 """ 

1414 _DefaultName = 'makeCcdVisitTable' 

1415 ConfigClass = MakeCcdVisitTableConfig 

1416 

1417 def run(self, visitSummaryRefs): 

1418 """Make a table of ccd information from the visit summary catalogs. 

1419 

1420 Parameters 

1421 ---------- 

1422 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1423 List of DeferredDatasetHandles pointing to exposure catalogs with 

1424 per-detector summary information. 

1425 

1426 Returns 

1427 ------- 

1428 result : `~lsst.pipe.base.Struct` 

1429 Results struct with attribute: 

1430 

1431 ``outputCatalog`` 

1432 Catalog of ccd and visit information. 

1433 """ 

1434 ccdEntries = [] 

1435 for visitSummaryRef in visitSummaryRefs: 

1436 visitSummary = visitSummaryRef.get() 

1437 visitInfo = visitSummary[0].getVisitInfo() 

1438 

1439 ccdEntry = {} 

1440 summaryTable = visitSummary.asAstropy() 

1441 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'dec', 'zenithDistance', 

1442 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise', 

1443 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar', 

1444 'psfStarDeltaE1Median', 'psfStarDeltaE2Median', 

1445 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter', 

1446 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter', 

1447 'psfStarScaledDeltaSizeScatter', 

1448 'psfTraceRadiusDelta', 'maxDistToNearestPsf'] 

1449 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1450 # 'visit' is the human readable visit number. 

1451 # 'visitId' is the key to the visitId table. They are the same. 

1452 # Technically you should join to get the visit from the visit 

1453 # table. 

1454 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"}) 

1455 

1456 # RFC-924: Temporarily keep a duplicate "decl" entry for backwards 

1457 # compatibility. To be removed after September 2023. 

1458 ccdEntry["decl"] = ccdEntry.loc[:, "dec"] 

1459 

1460 ccdEntry['ccdVisitId'] = [ 

1461 self.config.idGenerator.apply( 

1462 visitSummaryRef.dataId, 

1463 detector=detector_id, 

1464 is_exposure=False, 

1465 ).catalog_id # The "catalog ID" here is the ccdVisit ID 

1466 # because it's usually the ID for a whole catalog 

1467 # with a {visit, detector}, and that's the main 

1468 # use case for IdGenerator. This usage for a 

1469 # summary table is rare. 

1470 for detector_id in summaryTable['id'] 

1471 ] 

1472 ccdEntry['detector'] = summaryTable['id'] 

1473 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() if vR.getWcs() 

1474 else np.nan for vR in visitSummary]) 

1475 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1476 

1477 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1478 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1479 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1480 expTime = visitInfo.getExposureTime() 

1481 ccdEntry['expTime'] = expTime 

1482 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1483 expTime_days = expTime / (60*60*24) 

1484 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days 

1485 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1486 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1487 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1488 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1489 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1490 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1491 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1492 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1493 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1494 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1495 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1496 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, 

1497 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. 

1498 # values are actually wanted. 

1499 ccdEntries.append(ccdEntry) 

1500 

1501 outputCatalog = pd.concat(ccdEntries) 

1502 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True) 

1503 return pipeBase.Struct(outputCatalog=outputCatalog) 

1504 

1505 

1506class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1507 dimensions=("instrument",), 

1508 defaultTemplates={"calexpType": ""}): 

1509 visitSummaries = connectionTypes.Input( 

1510 doc="Per-visit consolidated exposure metadata", 

1511 name="finalVisitSummary", 

1512 storageClass="ExposureCatalog", 

1513 dimensions=("instrument", "visit",), 

1514 multiple=True, 

1515 deferLoad=True, 

1516 ) 

1517 outputCatalog = connectionTypes.Output( 

1518 doc="Visit metadata table", 

1519 name="visitTable", 

1520 storageClass="DataFrame", 

1521 dimensions=("instrument",) 

1522 ) 

1523 

1524 

1525class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1526 pipelineConnections=MakeVisitTableConnections): 

1527 pass 

1528 

1529 

1530class MakeVisitTableTask(pipeBase.PipelineTask): 

1531 """Produce a `visitTable` from the visit summary exposure catalogs. 

1532 """ 

1533 _DefaultName = 'makeVisitTable' 

1534 ConfigClass = MakeVisitTableConfig 

1535 

1536 def run(self, visitSummaries): 

1537 """Make a table of visit information from the visit summary catalogs. 

1538 

1539 Parameters 

1540 ---------- 

1541 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog` 

1542 List of exposure catalogs with per-detector summary information. 

1543 Returns 

1544 ------- 

1545 result : `~lsst.pipe.base.Struct` 

1546 Results struct with attribute: 

1547 

1548 ``outputCatalog`` 

1549 Catalog of visit information. 

1550 """ 

1551 visitEntries = [] 

1552 for visitSummary in visitSummaries: 

1553 visitSummary = visitSummary.get() 

1554 visitRow = visitSummary[0] 

1555 visitInfo = visitRow.getVisitInfo() 

1556 

1557 visitEntry = {} 

1558 visitEntry["visitId"] = visitRow['visit'] 

1559 visitEntry["visit"] = visitRow['visit'] 

1560 visitEntry["physical_filter"] = visitRow['physical_filter'] 

1561 visitEntry["band"] = visitRow['band'] 

1562 raDec = visitInfo.getBoresightRaDec() 

1563 visitEntry["ra"] = raDec.getRa().asDegrees() 

1564 visitEntry["dec"] = raDec.getDec().asDegrees() 

1565 

1566 # RFC-924: Temporarily keep a duplicate "decl" entry for backwards 

1567 # compatibility. To be removed after September 2023. 

1568 visitEntry["decl"] = visitEntry["dec"] 

1569 

1570 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1571 azAlt = visitInfo.getBoresightAzAlt() 

1572 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1573 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1574 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1575 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1576 expTime = visitInfo.getExposureTime() 

1577 visitEntry["expTime"] = expTime 

1578 visitEntry["expMidpt"] = visitInfo.getDate().toPython() 

1579 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1580 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1581 expTime_days = expTime / (60*60*24) 

1582 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days 

1583 visitEntries.append(visitEntry) 

1584 

1585 # TODO: DM-30623, Add programId, exposureType, cameraTemp, 

1586 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp, 

1587 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures. 

1588 

1589 outputCatalog = pd.DataFrame(data=visitEntries) 

1590 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True) 

1591 return pipeBase.Struct(outputCatalog=outputCatalog) 

1592 

1593 

1594class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1595 dimensions=("instrument", "visit", "detector", "skymap", "tract")): 

1596 

1597 inputCatalog = connectionTypes.Input( 

1598 doc="Primary per-detector, single-epoch forced-photometry catalog. " 

1599 "By default, it is the output of ForcedPhotCcdTask on calexps", 

1600 name="forced_src", 

1601 storageClass="SourceCatalog", 

1602 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1603 ) 

1604 inputCatalogDiff = connectionTypes.Input( 

1605 doc="Secondary multi-epoch, per-detector, forced photometry catalog. " 

1606 "By default, it is the output of ForcedPhotCcdTask run on image differences.", 

1607 name="forced_diff", 

1608 storageClass="SourceCatalog", 

1609 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1610 ) 

1611 outputCatalog = connectionTypes.Output( 

1612 doc="InputCatalogs horizonatally joined on `objectId` in DataFrame parquet format", 

1613 name="mergedForcedSource", 

1614 storageClass="DataFrame", 

1615 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1616 ) 

1617 

1618 

1619class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig, 

1620 pipelineConnections=WriteForcedSourceTableConnections): 

1621 key = lsst.pex.config.Field( 

1622 doc="Column on which to join the two input tables on and make the primary key of the output", 

1623 dtype=str, 

1624 default="objectId", 

1625 ) 

1626 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

1627 

1628 

1629class WriteForcedSourceTableTask(pipeBase.PipelineTask): 

1630 """Merge and convert per-detector forced source catalogs to DataFrame Parquet format. 

1631 

1632 Because the predecessor ForcedPhotCcdTask operates per-detector, 

1633 per-tract, (i.e., it has tract in its dimensions), detectors 

1634 on the tract boundary may have multiple forced source catalogs. 

1635 

1636 The successor task TransformForcedSourceTable runs per-patch 

1637 and temporally-aggregates overlapping mergedForcedSource catalogs from all 

1638 available multiple epochs. 

1639 """ 

1640 _DefaultName = "writeForcedSourceTable" 

1641 ConfigClass = WriteForcedSourceTableConfig 

1642 

1643 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1644 inputs = butlerQC.get(inputRefs) 

1645 # Add ccdVisitId to allow joining with CcdVisitTable 

1646 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId) 

1647 inputs['ccdVisitId'] = idGenerator.catalog_id 

1648 inputs['band'] = butlerQC.quantum.dataId.full['band'] 

1649 outputs = self.run(**inputs) 

1650 butlerQC.put(outputs, outputRefs) 

1651 

1652 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None): 

1653 dfs = [] 

1654 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')): 

1655 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False) 

1656 df = df.reindex(sorted(df.columns), axis=1) 

1657 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA 

1658 df['band'] = band if band else pd.NA 

1659 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns], 

1660 names=('dataset', 'column')) 

1661 

1662 dfs.append(df) 

1663 

1664 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

1665 return pipeBase.Struct(outputCatalog=outputCatalog) 

1666 

1667 

1668class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1669 dimensions=("instrument", "skymap", "patch", "tract")): 

1670 

1671 inputCatalogs = connectionTypes.Input( 

1672 doc="DataFrames of merged ForcedSources produced by WriteForcedSourceTableTask", 

1673 name="mergedForcedSource", 

1674 storageClass="DataFrame", 

1675 dimensions=("instrument", "visit", "detector", "skymap", "tract"), 

1676 multiple=True, 

1677 deferLoad=True 

1678 ) 

1679 referenceCatalog = connectionTypes.Input( 

1680 doc="Reference catalog which was used to seed the forcedPhot. Columns " 

1681 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner " 

1682 "are expected.", 

1683 name="objectTable", 

1684 storageClass="DataFrame", 

1685 dimensions=("tract", "patch", "skymap"), 

1686 deferLoad=True 

1687 ) 

1688 outputCatalog = connectionTypes.Output( 

1689 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a " 

1690 "specified set of functors", 

1691 name="forcedSourceTable", 

1692 storageClass="DataFrame", 

1693 dimensions=("tract", "patch", "skymap") 

1694 ) 

1695 

1696 

1697class TransformForcedSourceTableConfig(TransformCatalogBaseConfig, 

1698 pipelineConnections=TransformForcedSourceTableConnections): 

1699 referenceColumns = pexConfig.ListField( 

1700 dtype=str, 

1701 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"], 

1702 optional=True, 

1703 doc="Columns to pull from reference catalog", 

1704 ) 

1705 keyRef = lsst.pex.config.Field( 

1706 doc="Column on which to join the two input tables on and make the primary key of the output", 

1707 dtype=str, 

1708 default="objectId", 

1709 ) 

1710 key = lsst.pex.config.Field( 

1711 doc="Rename the output DataFrame index to this name", 

1712 dtype=str, 

1713 default="forcedSourceId", 

1714 ) 

1715 

1716 def setDefaults(self): 

1717 super().setDefaults() 

1718 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml') 

1719 self.columnsFromDataId = ['tract', 'patch'] 

1720 

1721 

1722class TransformForcedSourceTableTask(TransformCatalogBaseTask): 

1723 """Transform/standardize a ForcedSource catalog 

1724 

1725 Transforms each wide, per-detector forcedSource DataFrame per the 

1726 specification file (per-camera defaults found in ForcedSource.yaml). 

1727 All epochs that overlap the patch are aggregated into one per-patch 

1728 narrow-DataFrame file. 

1729 

1730 No de-duplication of rows is performed. Duplicate resolutions flags are 

1731 pulled in from the referenceCatalog: `detect_isPrimary`, 

1732 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate 

1733 for analysis or compare duplicates for QA. 

1734 

1735 The resulting table includes multiple bands. Epochs (MJDs) and other useful 

1736 per-visit rows can be retreived by joining with the CcdVisitTable on 

1737 ccdVisitId. 

1738 """ 

1739 _DefaultName = "transformForcedSourceTable" 

1740 ConfigClass = TransformForcedSourceTableConfig 

1741 

1742 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1743 inputs = butlerQC.get(inputRefs) 

1744 if self.funcs is None: 

1745 raise ValueError("config.functorFile is None. " 

1746 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

1747 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs, 

1748 dataId=outputRefs.outputCatalog.dataId.full) 

1749 

1750 butlerQC.put(outputs, outputRefs) 

1751 

1752 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None): 

1753 dfs = [] 

1754 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns}) 

1755 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs))) 

1756 for handle in inputCatalogs: 

1757 result = self.transform(None, handle, funcs, dataId) 

1758 # Filter for only rows that were detected on (overlap) the patch 

1759 dfs.append(result.df.join(ref, how='inner')) 

1760 

1761 outputCatalog = pd.concat(dfs) 

1762 

1763 # Now that we are done joining on config.keyRef 

1764 # Change index to config.key by 

1765 outputCatalog.index.rename(self.config.keyRef, inplace=True) 

1766 # Add config.keyRef to the column list 

1767 outputCatalog.reset_index(inplace=True) 

1768 # Set the forcedSourceId to the index. This is specified in the 

1769 # ForcedSource.yaml 

1770 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True) 

1771 # Rename it to the config.key 

1772 outputCatalog.index.rename(self.config.key, inplace=True) 

1773 

1774 self.log.info("Made a table of %d columns and %d rows", 

1775 len(outputCatalog.columns), len(outputCatalog)) 

1776 return pipeBase.Struct(outputCatalog=outputCatalog) 

1777 

1778 

1779class ConsolidateTractConnections(pipeBase.PipelineTaskConnections, 

1780 defaultTemplates={"catalogType": ""}, 

1781 dimensions=("instrument", "tract")): 

1782 inputCatalogs = connectionTypes.Input( 

1783 doc="Input per-patch DataFrame Tables to be concatenated", 

1784 name="{catalogType}ForcedSourceTable", 

1785 storageClass="DataFrame", 

1786 dimensions=("tract", "patch", "skymap"), 

1787 multiple=True, 

1788 ) 

1789 

1790 outputCatalog = connectionTypes.Output( 

1791 doc="Output per-tract concatenation of DataFrame Tables", 

1792 name="{catalogType}ForcedSourceTable_tract", 

1793 storageClass="DataFrame", 

1794 dimensions=("tract", "skymap"), 

1795 ) 

1796 

1797 

1798class ConsolidateTractConfig(pipeBase.PipelineTaskConfig, 

1799 pipelineConnections=ConsolidateTractConnections): 

1800 pass 

1801 

1802 

1803class ConsolidateTractTask(pipeBase.PipelineTask): 

1804 """Concatenate any per-patch, dataframe list into a single 

1805 per-tract DataFrame. 

1806 """ 

1807 _DefaultName = 'ConsolidateTract' 

1808 ConfigClass = ConsolidateTractConfig 

1809 

1810 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1811 inputs = butlerQC.get(inputRefs) 

1812 # Not checking at least one inputCatalog exists because that'd be an 

1813 # empty QG. 

1814 self.log.info("Concatenating %s per-patch %s Tables", 

1815 len(inputs['inputCatalogs']), 

1816 inputRefs.inputCatalogs[0].datasetType.name) 

1817 df = pd.concat(inputs['inputCatalogs']) 

1818 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)