Coverage for python/lsst/pipe/tasks/postprocess.py: 27%

649 statements  

« prev     ^ index     » next       coverage.py v7.2.6, created at 2023-05-26 02:56 -0700

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["WriteObjectTableConfig", "WriteObjectTableTask", 

23 "WriteSourceTableConfig", "WriteSourceTableTask", 

24 "WriteRecalibratedSourceTableConfig", "WriteRecalibratedSourceTableTask", 

25 "PostprocessAnalysis", 

26 "TransformCatalogBaseConfig", "TransformCatalogBaseTask", 

27 "TransformObjectCatalogConfig", "TransformObjectCatalogTask", 

28 "ConsolidateObjectTableConfig", "ConsolidateObjectTableTask", 

29 "TransformSourceTableConfig", "TransformSourceTableTask", 

30 "ConsolidateVisitSummaryConfig", "ConsolidateVisitSummaryTask", 

31 "ConsolidateSourceTableConfig", "ConsolidateSourceTableTask", 

32 "MakeCcdVisitTableConfig", "MakeCcdVisitTableTask", 

33 "MakeVisitTableConfig", "MakeVisitTableTask", 

34 "WriteForcedSourceTableConfig", "WriteForcedSourceTableTask", 

35 "TransformForcedSourceTableConfig", "TransformForcedSourceTableTask", 

36 "ConsolidateTractConfig", "ConsolidateTractTask"] 

37 

38import functools 

39import pandas as pd 

40import logging 

41import numpy as np 

42import numbers 

43import os 

44 

45import lsst.geom 

46import lsst.pex.config as pexConfig 

47import lsst.pipe.base as pipeBase 

48import lsst.daf.base as dafBase 

49from lsst.pipe.base import connectionTypes 

50import lsst.afw.table as afwTable 

51from lsst.afw.image import ExposureSummaryStats 

52from lsst.meas.base import SingleFrameMeasurementTask, DetectorVisitIdGeneratorConfig 

53from lsst.skymap import BaseSkyMap 

54 

55from .functors import CompositeFunctor, Column 

56 

57log = logging.getLogger(__name__) 

58 

59 

60def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

61 """Flattens a dataframe with multilevel column index. 

62 """ 

63 newDf = pd.DataFrame() 

64 # band is the level 0 index 

65 dfBands = df.columns.unique(level=0).values 

66 for band in dfBands: 

67 subdf = df[band] 

68 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

69 newColumns = {c: columnFormat.format(band, c) 

70 for c in subdf.columns if c not in noDupCols} 

71 cols = list(newColumns.keys()) 

72 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

73 

74 # Band must be present in the input and output or else column is all NaN: 

75 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

76 # Get the unexploded columns from any present band's partition 

77 noDupDf = df[presentBands[0]][noDupCols] 

78 newDf = pd.concat([noDupDf, newDf], axis=1) 

79 return newDf 

80 

81 

82class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

83 defaultTemplates={"coaddName": "deep"}, 

84 dimensions=("tract", "patch", "skymap")): 

85 inputCatalogMeas = connectionTypes.Input( 

86 doc="Catalog of source measurements on the deepCoadd.", 

87 dimensions=("tract", "patch", "band", "skymap"), 

88 storageClass="SourceCatalog", 

89 name="{coaddName}Coadd_meas", 

90 multiple=True 

91 ) 

92 inputCatalogForcedSrc = connectionTypes.Input( 

93 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

94 dimensions=("tract", "patch", "band", "skymap"), 

95 storageClass="SourceCatalog", 

96 name="{coaddName}Coadd_forced_src", 

97 multiple=True 

98 ) 

99 inputCatalogRef = connectionTypes.Input( 

100 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

101 "for each detection in deepCoadd_mergeDet.", 

102 dimensions=("tract", "patch", "skymap"), 

103 storageClass="SourceCatalog", 

104 name="{coaddName}Coadd_ref" 

105 ) 

106 outputCatalog = connectionTypes.Output( 

107 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

108 "stored as a DataFrame with a multi-level column index per-patch.", 

109 dimensions=("tract", "patch", "skymap"), 

110 storageClass="DataFrame", 

111 name="{coaddName}Coadd_obj" 

112 ) 

113 

114 

115class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

116 pipelineConnections=WriteObjectTableConnections): 

117 engine = pexConfig.Field( 

118 dtype=str, 

119 default="pyarrow", 

120 doc="Parquet engine for writing (pyarrow or fastparquet)", 

121 deprecated="This config is no longer used, and will be removed after v26." 

122 ) 

123 coaddName = pexConfig.Field( 

124 dtype=str, 

125 default="deep", 

126 doc="Name of coadd" 

127 ) 

128 

129 

130class WriteObjectTableTask(pipeBase.PipelineTask): 

131 """Write filter-merged source tables as a DataFrame in parquet format. 

132 """ 

133 _DefaultName = "writeObjectTable" 

134 ConfigClass = WriteObjectTableConfig 

135 

136 # Names of table datasets to be merged 

137 inputDatasets = ('forced_src', 'meas', 'ref') 

138 

139 # Tag of output dataset written by `MergeSourcesTask.write` 

140 outputDataset = 'obj' 

141 

142 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

143 inputs = butlerQC.get(inputRefs) 

144 

145 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

146 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

147 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

148 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

149 

150 catalogs = {} 

151 for band in measDict.keys(): 

152 catalogs[band] = {'meas': measDict[band]['meas'], 

153 'forced_src': forcedSourceDict[band]['forced_src'], 

154 'ref': inputs['inputCatalogRef']} 

155 dataId = butlerQC.quantum.dataId 

156 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

157 outputs = pipeBase.Struct(outputCatalog=df) 

158 butlerQC.put(outputs, outputRefs) 

159 

160 def run(self, catalogs, tract, patch): 

161 """Merge multiple catalogs. 

162 

163 Parameters 

164 ---------- 

165 catalogs : `dict` 

166 Mapping from filter names to dict of catalogs. 

167 tract : int 

168 tractId to use for the tractId column. 

169 patch : str 

170 patchId to use for the patchId column. 

171 

172 Returns 

173 ------- 

174 catalog : `pandas.DataFrame` 

175 Merged dataframe. 

176 """ 

177 dfs = [] 

178 for filt, tableDict in catalogs.items(): 

179 for dataset, table in tableDict.items(): 

180 # Convert afwTable to pandas DataFrame 

181 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

182 

183 # Sort columns by name, to ensure matching schema among patches 

184 df = df.reindex(sorted(df.columns), axis=1) 

185 df = df.assign(tractId=tract, patchId=patch) 

186 

187 # Make columns a 3-level MultiIndex 

188 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

189 names=('dataset', 'band', 'column')) 

190 dfs.append(df) 

191 

192 # We do this dance and not `pd.concat(dfs)` because the pandas 

193 # concatenation uses infinite memory. 

194 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

195 return catalog 

196 

197 

198class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

199 defaultTemplates={"catalogType": ""}, 

200 dimensions=("instrument", "visit", "detector")): 

201 

202 catalog = connectionTypes.Input( 

203 doc="Input full-depth catalog of sources produced by CalibrateTask", 

204 name="{catalogType}src", 

205 storageClass="SourceCatalog", 

206 dimensions=("instrument", "visit", "detector") 

207 ) 

208 outputCatalog = connectionTypes.Output( 

209 doc="Catalog of sources, `src` in DataFrame/Parquet format. The 'id' column is " 

210 "replaced with an index; all other columns are unchanged.", 

211 name="{catalogType}source", 

212 storageClass="DataFrame", 

213 dimensions=("instrument", "visit", "detector") 

214 ) 

215 

216 

217class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

218 pipelineConnections=WriteSourceTableConnections): 

219 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

220 

221 

222class WriteSourceTableTask(pipeBase.PipelineTask): 

223 """Write source table to DataFrame Parquet format. 

224 """ 

225 _DefaultName = "writeSourceTable" 

226 ConfigClass = WriteSourceTableConfig 

227 

228 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

229 inputs = butlerQC.get(inputRefs) 

230 inputs['ccdVisitId'] = self.config.idGenerator.apply(butlerQC.quantum.dataId).catalog_id 

231 result = self.run(**inputs) 

232 outputs = pipeBase.Struct(outputCatalog=result.table) 

233 butlerQC.put(outputs, outputRefs) 

234 

235 def run(self, catalog, ccdVisitId=None, **kwargs): 

236 """Convert `src` catalog to DataFrame 

237 

238 Parameters 

239 ---------- 

240 catalog: `afwTable.SourceCatalog` 

241 catalog to be converted 

242 ccdVisitId: `int` 

243 ccdVisitId to be added as a column 

244 **kwargs 

245 Additional keyword arguments are ignored as a convenience for 

246 subclasses that pass the same arguments to several different 

247 methods. 

248 

249 Returns 

250 ------- 

251 result : `lsst.pipe.base.Struct` 

252 ``table`` 

253 `DataFrame` version of the input catalog 

254 """ 

255 self.log.info("Generating DataFrame from src catalog ccdVisitId=%s", ccdVisitId) 

256 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

257 df['ccdVisitId'] = ccdVisitId 

258 return pipeBase.Struct(table=df) 

259 

260 

261class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections, 

262 defaultTemplates={"catalogType": "", 

263 "skyWcsName": "gbdesAstrometricFit", 

264 "photoCalibName": "fgcm"}, 

265 dimensions=("instrument", "visit", "detector", "skymap")): 

266 skyMap = connectionTypes.Input( 

267 doc="skyMap needed to choose which tract-level calibrations to use when multiple available", 

268 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

269 storageClass="SkyMap", 

270 dimensions=("skymap",), 

271 ) 

272 exposure = connectionTypes.Input( 

273 doc="Input exposure to perform photometry on.", 

274 name="calexp", 

275 storageClass="ExposureF", 

276 dimensions=["instrument", "visit", "detector"], 

277 ) 

278 externalSkyWcsTractCatalog = connectionTypes.Input( 

279 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector " 

280 "id for the catalog id, sorted on id for fast lookup."), 

281 name="{skyWcsName}SkyWcsCatalog", 

282 storageClass="ExposureCatalog", 

283 dimensions=["instrument", "visit", "tract"], 

284 multiple=True 

285 ) 

286 externalSkyWcsGlobalCatalog = connectionTypes.Input( 

287 doc=("Per-visit wcs calibrations computed globally (with no tract information). " 

288 "These catalogs use the detector id for the catalog id, sorted on id for " 

289 "fast lookup."), 

290 name="finalVisitSummary", 

291 storageClass="ExposureCatalog", 

292 dimensions=["instrument", "visit"], 

293 ) 

294 externalPhotoCalibTractCatalog = connectionTypes.Input( 

295 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the " 

296 "detector id for the catalog id, sorted on id for fast lookup."), 

297 name="{photoCalibName}PhotoCalibCatalog", 

298 storageClass="ExposureCatalog", 

299 dimensions=["instrument", "visit", "tract"], 

300 multiple=True 

301 ) 

302 externalPhotoCalibGlobalCatalog = connectionTypes.Input( 

303 doc=("Per-visit photometric calibrations computed globally (with no tract " 

304 "information). These catalogs use the detector id for the catalog id, " 

305 "sorted on id for fast lookup."), 

306 name="finalVisitSummary", 

307 storageClass="ExposureCatalog", 

308 dimensions=["instrument", "visit"], 

309 ) 

310 

311 def __init__(self, *, config=None): 

312 super().__init__(config=config) 

313 # Same connection boilerplate as all other applications of 

314 # Global/Tract calibrations 

315 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs: 

316 if config.useGlobalExternalSkyWcs: 

317 self.inputs.remove("externalSkyWcsTractCatalog") 

318 else: 

319 self.inputs.remove("externalSkyWcsGlobalCatalog") 

320 else: 

321 self.inputs.remove("externalSkyWcsTractCatalog") 

322 self.inputs.remove("externalSkyWcsGlobalCatalog") 

323 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib: 

324 if config.useGlobalExternalPhotoCalib: 

325 self.inputs.remove("externalPhotoCalibTractCatalog") 

326 else: 

327 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

328 else: 

329 self.inputs.remove("externalPhotoCalibTractCatalog") 

330 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

331 

332 

333class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig, 

334 pipelineConnections=WriteRecalibratedSourceTableConnections): 

335 

336 doReevaluatePhotoCalib = pexConfig.Field( 

337 dtype=bool, 

338 default=True, 

339 doc=("Add or replace local photoCalib columns") 

340 ) 

341 doReevaluateSkyWcs = pexConfig.Field( 

342 dtype=bool, 

343 default=True, 

344 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec") 

345 ) 

346 doApplyExternalPhotoCalib = pexConfig.Field( 

347 dtype=bool, 

348 default=True, 

349 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ", 

350 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."), 

351 ) 

352 doApplyExternalSkyWcs = pexConfig.Field( 

353 dtype=bool, 

354 default=True, 

355 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ", 

356 "else use the wcs already attached to the exposure."), 

357 ) 

358 useGlobalExternalPhotoCalib = pexConfig.Field( 

359 dtype=bool, 

360 default=True, 

361 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations " 

362 "that are not run per-tract. When False, use per-tract photometric " 

363 "calibration files.") 

364 ) 

365 useGlobalExternalSkyWcs = pexConfig.Field( 

366 dtype=bool, 

367 default=True, 

368 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations " 

369 "that are not run per-tract. When False, use per-tract wcs " 

370 "files.") 

371 ) 

372 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

373 

374 def validate(self): 

375 super().validate() 

376 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs: 

377 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False" 

378 "External SkyWcs will not be read or evaluated.") 

379 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib: 

380 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False." 

381 "External PhotoCalib will not be read or evaluated.") 

382 

383 

384class WriteRecalibratedSourceTableTask(WriteSourceTableTask): 

385 """Write source table to DataFrame Parquet format. 

386 """ 

387 _DefaultName = "writeRecalibratedSourceTable" 

388 ConfigClass = WriteRecalibratedSourceTableConfig 

389 

390 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

391 inputs = butlerQC.get(inputRefs) 

392 

393 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId) 

394 inputs['idGenerator'] = idGenerator 

395 inputs['ccdVisitId'] = idGenerator.catalog_id 

396 

397 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs: 

398 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

399 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs) 

400 

401 inputs['catalog'] = self.addCalibColumns(**inputs) 

402 

403 result = self.run(**inputs) 

404 outputs = pipeBase.Struct(outputCatalog=result.table) 

405 butlerQC.put(outputs, outputRefs) 

406 

407 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None, 

408 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None, 

409 externalPhotoCalibTractCatalog=None, **kwargs): 

410 """Apply external calibrations to exposure per configuration 

411 

412 When multiple tract-level calibrations overlap, select the one with the 

413 center closest to detector. 

414 

415 Parameters 

416 ---------- 

417 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of 

418 tract-level calibs. 

419 skyMap : `lsst.skymap.SkyMap` 

420 exposure : `lsst.afw.image.exposure.Exposure` 

421 Input exposure to adjust calibrations. 

422 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

423 Exposure catalog with external skyWcs to be applied per config 

424 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

425 Exposure catalog with external skyWcs to be applied per config 

426 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

427 Exposure catalog with external photoCalib to be applied per config 

428 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

429 Exposure catalog with external photoCalib to be applied per config 

430 **kwargs 

431 Additional keyword arguments are ignored to facilitate passing the 

432 same arguments to several methods. 

433 

434 Returns 

435 ------- 

436 exposure : `lsst.afw.image.exposure.Exposure` 

437 Exposure with adjusted calibrations. 

438 """ 

439 if not self.config.doApplyExternalSkyWcs: 

440 # Do not modify the exposure's SkyWcs 

441 externalSkyWcsCatalog = None 

442 elif self.config.useGlobalExternalSkyWcs: 

443 # Use the global external SkyWcs 

444 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog 

445 self.log.info('Applying global SkyWcs') 

446 else: 

447 # use tract-level external SkyWcs from the closest overlapping tract 

448 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog') 

449 tracts = [ref.dataId['tract'] for ref in inputRef] 

450 if len(tracts) == 1: 

451 ind = 0 

452 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind]) 

453 else: 

454 if exposure.getWcs() is None: # TODO: could this look-up use the externalPhotoCalib? 

455 raise ValueError("Trying to locate nearest tract, but exposure.wcs is None.") 

456 ind = self.getClosestTract(tracts, skyMap, 

457 exposure.getBBox(), exposure.getWcs()) 

458 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). ' 

459 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

460 

461 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind] 

462 

463 if not self.config.doApplyExternalPhotoCalib: 

464 # Do not modify the exposure's PhotoCalib 

465 externalPhotoCalibCatalog = None 

466 elif self.config.useGlobalExternalPhotoCalib: 

467 # Use the global external PhotoCalib 

468 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog 

469 self.log.info('Applying global PhotoCalib') 

470 else: 

471 # use tract-level external PhotoCalib from the closest overlapping tract 

472 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog') 

473 tracts = [ref.dataId['tract'] for ref in inputRef] 

474 if len(tracts) == 1: 

475 ind = 0 

476 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind]) 

477 else: 

478 ind = self.getClosestTract(tracts, skyMap, 

479 exposure.getBBox(), exposure.getWcs()) 

480 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). ' 

481 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

482 

483 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind] 

484 

485 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog) 

486 

487 def getClosestTract(self, tracts, skyMap, bbox, wcs): 

488 """Find the index of the tract closest to detector from list of tractIds 

489 

490 Parameters 

491 ---------- 

492 tracts: `list` [`int`] 

493 Iterable of integer tractIds 

494 skyMap : `lsst.skymap.SkyMap` 

495 skyMap to lookup tract geometry and wcs 

496 bbox : `lsst.geom.Box2I` 

497 Detector bbox, center of which will compared to tract centers 

498 wcs : `lsst.afw.geom.SkyWcs` 

499 Detector Wcs object to map the detector center to SkyCoord 

500 

501 Returns 

502 ------- 

503 index : `int` 

504 """ 

505 if len(tracts) == 1: 

506 return 0 

507 

508 center = wcs.pixelToSky(bbox.getCenter()) 

509 sep = [] 

510 for tractId in tracts: 

511 tract = skyMap[tractId] 

512 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter()) 

513 sep.append(center.separation(tractCenter)) 

514 

515 return np.argmin(sep) 

516 

517 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None): 

518 """Prepare a calibrated exposure and apply external calibrations 

519 if so configured. 

520 

521 Parameters 

522 ---------- 

523 exposure : `lsst.afw.image.exposure.Exposure` 

524 Input exposure to adjust calibrations. 

525 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional 

526 Exposure catalog with external skyWcs to be applied 

527 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id 

528 for the catalog id, sorted on id for fast lookup. 

529 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional 

530 Exposure catalog with external photoCalib to be applied 

531 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector 

532 id for the catalog id, sorted on id for fast lookup. 

533 

534 Returns 

535 ------- 

536 exposure : `lsst.afw.image.exposure.Exposure` 

537 Exposure with adjusted calibrations. 

538 """ 

539 detectorId = exposure.getInfo().getDetector().getId() 

540 

541 if externalPhotoCalibCatalog is not None: 

542 row = externalPhotoCalibCatalog.find(detectorId) 

543 if row is None: 

544 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; " 

545 "Using original photoCalib.", detectorId) 

546 else: 

547 photoCalib = row.getPhotoCalib() 

548 if photoCalib is None: 

549 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; " 

550 "Using original photoCalib.", detectorId) 

551 else: 

552 exposure.setPhotoCalib(photoCalib) 

553 

554 if externalSkyWcsCatalog is not None: 

555 row = externalSkyWcsCatalog.find(detectorId) 

556 if row is None: 

557 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; " 

558 "Using original skyWcs.", detectorId) 

559 else: 

560 skyWcs = row.getWcs() 

561 if skyWcs is None: 

562 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; " 

563 "Using original skyWcs.", detectorId) 

564 else: 

565 exposure.setWcs(skyWcs) 

566 

567 return exposure 

568 

569 def addCalibColumns(self, catalog, exposure, idGenerator, **kwargs): 

570 """Add replace columns with calibs evaluated at each centroid 

571 

572 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a 

573 a source catalog, by rerunning the plugins. 

574 

575 Parameters 

576 ---------- 

577 catalog : `lsst.afw.table.SourceCatalog` 

578 catalog to which calib columns will be added 

579 exposure : `lsst.afw.image.exposure.Exposure` 

580 Exposure with attached PhotoCalibs and SkyWcs attributes to be 

581 reevaluated at local centroids. Pixels are not required. 

582 idGenerator : `lsst.meas.base.IdGenerator` 

583 Object that generates Source IDs and random seeds. 

584 **kwargs 

585 Additional keyword arguments are ignored to facilitate passing the 

586 same arguments to several methods. 

587 

588 Returns 

589 ------- 

590 newCat: `lsst.afw.table.SourceCatalog` 

591 Source Catalog with requested local calib columns 

592 """ 

593 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

594 measureConfig.doReplaceWithNoise = False 

595 

596 # Clear all slots, because we aren't running the relevant plugins. 

597 for slot in measureConfig.slots: 

598 setattr(measureConfig.slots, slot, None) 

599 

600 measureConfig.plugins.names = [] 

601 if self.config.doReevaluateSkyWcs: 

602 measureConfig.plugins.names.add('base_LocalWcs') 

603 self.log.info("Re-evaluating base_LocalWcs plugin") 

604 if self.config.doReevaluatePhotoCalib: 

605 measureConfig.plugins.names.add('base_LocalPhotoCalib') 

606 self.log.info("Re-evaluating base_LocalPhotoCalib plugin") 

607 pluginsNotToCopy = tuple(measureConfig.plugins.names) 

608 

609 # Create a new schema and catalog 

610 # Copy all columns from original except for the ones to reevaluate 

611 aliasMap = catalog.schema.getAliasMap() 

612 mapper = afwTable.SchemaMapper(catalog.schema) 

613 for item in catalog.schema: 

614 if not item.field.getName().startswith(pluginsNotToCopy): 

615 mapper.addMapping(item.key) 

616 

617 schema = mapper.getOutputSchema() 

618 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

619 schema.setAliasMap(aliasMap) 

620 newCat = afwTable.SourceCatalog(schema) 

621 newCat.extend(catalog, mapper=mapper) 

622 

623 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to 

624 # update here. LocalPhotoCalibs are applied during transform tasks. 

625 # Update coord_ra/coord_dec, which are expected to be positions on the 

626 # sky and are used as such in sdm tables without transform 

627 if self.config.doReevaluateSkyWcs and exposure.wcs is not None: 

628 afwTable.updateSourceCoords(exposure.wcs, newCat) 

629 

630 measurement.run(measCat=newCat, exposure=exposure, exposureId=idGenerator.catalog_id) 

631 

632 return newCat 

633 

634 

635class PostprocessAnalysis(object): 

636 """Calculate columns from DataFrames or handles storing DataFrames. 

637 

638 This object manages and organizes an arbitrary set of computations 

639 on a catalog. The catalog is defined by a 

640 `DeferredDatasetHandle` or `InMemoryDatasetHandle` object 

641 (or list thereof), such as a ``deepCoadd_obj`` dataset, and the 

642 computations are defined by a collection of `lsst.pipe.tasks.functor.Functor` 

643 objects (or, equivalently, a ``CompositeFunctor``). 

644 

645 After the object is initialized, accessing the ``.df`` attribute (which 

646 holds the `pandas.DataFrame` containing the results of the calculations) 

647 triggers computation of said dataframe. 

648 

649 One of the conveniences of using this object is the ability to define a 

650 desired common filter for all functors. This enables the same functor 

651 collection to be passed to several different `PostprocessAnalysis` objects 

652 without having to change the original functor collection, since the ``filt`` 

653 keyword argument of this object triggers an overwrite of the ``filt`` 

654 property for all functors in the collection. 

655 

656 This object also allows a list of refFlags to be passed, and defines a set 

657 of default refFlags that are always included even if not requested. 

658 

659 If a list of DataFrames or Handles is passed, rather than a single one, 

660 then the calculations will be mapped over all the input catalogs. In 

661 principle, it should be straightforward to parallelize this activity, but 

662 initial tests have failed (see TODO in code comments). 

663 

664 Parameters 

665 ---------- 

666 handles : `lsst.daf.butler.DeferredDatasetHandle` or 

667 `lsst.pipe.base.InMemoryDatasetHandle` or 

668 list of these. 

669 Source catalog(s) for computation. 

670 functors : `list`, `dict`, or `~lsst.pipe.tasks.functors.CompositeFunctor` 

671 Computations to do (functors that act on ``handles``). 

672 If a dict, the output 

673 DataFrame will have columns keyed accordingly. 

674 If a list, the column keys will come from the 

675 ``.shortname`` attribute of each functor. 

676 

677 filt : `str`, optional 

678 Filter in which to calculate. If provided, 

679 this will overwrite any existing ``.filt`` attribute 

680 of the provided functors. 

681 

682 flags : `list`, optional 

683 List of flags (per-band) to include in output table. 

684 Taken from the ``meas`` dataset if applied to a multilevel Object Table. 

685 

686 refFlags : `list`, optional 

687 List of refFlags (only reference band) to include in output table. 

688 

689 forcedFlags : `list`, optional 

690 List of flags (per-band) to include in output table. 

691 Taken from the ``forced_src`` dataset if applied to a 

692 multilevel Object Table. Intended for flags from measurement plugins 

693 only run during multi-band forced-photometry. 

694 """ 

695 _defaultRefFlags = [] 

696 _defaultFuncs = () 

697 

698 def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

699 self.handles = handles 

700 self.functors = functors 

701 

702 self.filt = filt 

703 self.flags = list(flags) if flags is not None else [] 

704 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

705 self.refFlags = list(self._defaultRefFlags) 

706 if refFlags is not None: 

707 self.refFlags += list(refFlags) 

708 

709 self._df = None 

710 

711 @property 

712 def defaultFuncs(self): 

713 funcs = dict(self._defaultFuncs) 

714 return funcs 

715 

716 @property 

717 def func(self): 

718 additionalFuncs = self.defaultFuncs 

719 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

720 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

721 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

722 

723 if isinstance(self.functors, CompositeFunctor): 

724 func = self.functors 

725 else: 

726 func = CompositeFunctor(self.functors) 

727 

728 func.funcDict.update(additionalFuncs) 

729 func.filt = self.filt 

730 

731 return func 

732 

733 @property 

734 def noDupCols(self): 

735 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

736 

737 @property 

738 def df(self): 

739 if self._df is None: 

740 self.compute() 

741 return self._df 

742 

743 def compute(self, dropna=False, pool=None): 

744 # map over multiple handles 

745 if type(self.handles) in (list, tuple): 

746 if pool is None: 

747 dflist = [self.func(handle, dropna=dropna) for handle in self.handles] 

748 else: 

749 # TODO: Figure out why this doesn't work (pyarrow pickling 

750 # issues?) 

751 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.handles) 

752 self._df = pd.concat(dflist) 

753 else: 

754 self._df = self.func(self.handles, dropna=dropna) 

755 

756 return self._df 

757 

758 

759class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

760 dimensions=()): 

761 """Expected Connections for subclasses of TransformCatalogBaseTask. 

762 

763 Must be subclassed. 

764 """ 

765 inputCatalog = connectionTypes.Input( 

766 name="", 

767 storageClass="DataFrame", 

768 ) 

769 outputCatalog = connectionTypes.Output( 

770 name="", 

771 storageClass="DataFrame", 

772 ) 

773 

774 

775class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

776 pipelineConnections=TransformCatalogBaseConnections): 

777 functorFile = pexConfig.Field( 

778 dtype=str, 

779 doc="Path to YAML file specifying Science Data Model functors to use " 

780 "when copying columns and computing calibrated values.", 

781 default=None, 

782 optional=True 

783 ) 

784 primaryKey = pexConfig.Field( 

785 dtype=str, 

786 doc="Name of column to be set as the DataFrame index. If None, the index" 

787 "will be named `id`", 

788 default=None, 

789 optional=True 

790 ) 

791 columnsFromDataId = pexConfig.ListField( 

792 dtype=str, 

793 default=None, 

794 optional=True, 

795 doc="Columns to extract from the dataId", 

796 ) 

797 

798 

799class TransformCatalogBaseTask(pipeBase.PipelineTask): 

800 """Base class for transforming/standardizing a catalog 

801 

802 by applying functors that convert units and apply calibrations. 

803 The purpose of this task is to perform a set of computations on 

804 an input ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` that holds 

805 a ``DataFrame`` dataset (such as ``deepCoadd_obj``), and write the 

806 results to a new dataset (which needs to be declared in an ``outputDataset`` 

807 attribute). 

808 

809 The calculations to be performed are defined in a YAML file that specifies 

810 a set of functors to be computed, provided as 

811 a ``--functorFile`` config parameter. An example of such a YAML file 

812 is the following: 

813 

814 funcs: 

815 psfMag: 

816 functor: Mag 

817 args: 

818 - base_PsfFlux 

819 filt: HSC-G 

820 dataset: meas 

821 cmodel_magDiff: 

822 functor: MagDiff 

823 args: 

824 - modelfit_CModel 

825 - base_PsfFlux 

826 filt: HSC-G 

827 gauss_magDiff: 

828 functor: MagDiff 

829 args: 

830 - base_GaussianFlux 

831 - base_PsfFlux 

832 filt: HSC-G 

833 count: 

834 functor: Column 

835 args: 

836 - base_InputCount_value 

837 filt: HSC-G 

838 deconvolved_moments: 

839 functor: DeconvolvedMoments 

840 filt: HSC-G 

841 dataset: forced_src 

842 refFlags: 

843 - calib_psfUsed 

844 - merge_measurement_i 

845 - merge_measurement_r 

846 - merge_measurement_z 

847 - merge_measurement_y 

848 - merge_measurement_g 

849 - base_PixelFlags_flag_inexact_psfCenter 

850 - detect_isPrimary 

851 

852 The names for each entry under "func" will become the names of columns in 

853 the output dataset. All the functors referenced are defined in 

854 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each 

855 functor are in the `args` list, and any additional entries for each column 

856 other than "functor" or "args" (e.g., ``'filt'``, ``'dataset'``) are treated as 

857 keyword arguments to be passed to the functor initialization. 

858 

859 The "flags" entry is the default shortcut for `Column` functors. 

860 All columns listed under "flags" will be copied to the output table 

861 untransformed. They can be of any datatype. 

862 In the special case of transforming a multi-level oject table with 

863 band and dataset indices (deepCoadd_obj), these will be taked from the 

864 `meas` dataset and exploded out per band. 

865 

866 There are two special shortcuts that only apply when transforming 

867 multi-level Object (deepCoadd_obj) tables: 

868 - The "refFlags" entry is shortcut for `Column` functor 

869 taken from the `'ref'` dataset if transforming an ObjectTable. 

870 - The "forcedFlags" entry is shortcut for `Column` functors. 

871 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

872 These are expanded out per band. 

873 

874 

875 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

876 to organize and excecute the calculations. 

877 """ 

878 @property 

879 def _DefaultName(self): 

880 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

881 

882 @property 

883 def outputDataset(self): 

884 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

885 

886 @property 

887 def inputDataset(self): 

888 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

889 

890 @property 

891 def ConfigClass(self): 

892 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

893 

894 def __init__(self, *args, **kwargs): 

895 super().__init__(*args, **kwargs) 

896 if self.config.functorFile: 

897 self.log.info('Loading tranform functor definitions from %s', 

898 self.config.functorFile) 

899 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

900 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

901 else: 

902 self.funcs = None 

903 

904 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

905 inputs = butlerQC.get(inputRefs) 

906 if self.funcs is None: 

907 raise ValueError("config.functorFile is None. " 

908 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

909 result = self.run(handle=inputs['inputCatalog'], funcs=self.funcs, 

910 dataId=outputRefs.outputCatalog.dataId.full) 

911 outputs = pipeBase.Struct(outputCatalog=result) 

912 butlerQC.put(outputs, outputRefs) 

913 

914 def run(self, handle, funcs=None, dataId=None, band=None): 

915 """Do postprocessing calculations 

916 

917 Takes a ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` or 

918 ``DataFrame`` object and dataId, 

919 returns a dataframe with results of postprocessing calculations. 

920 

921 Parameters 

922 ---------- 

923 handles : `lsst.daf.butler.DeferredDatasetHandle` or 

924 `lsst.pipe.base.InMemoryDatasetHandle` or 

925 `pandas.DataFrame`, or list of these. 

926 DataFrames from which calculations are done. 

927 funcs : `lsst.pipe.tasks.functors.Functors` 

928 Functors to apply to the table's columns 

929 dataId : dict, optional 

930 Used to add a `patchId` column to the output dataframe. 

931 band : `str`, optional 

932 Filter band that is being processed. 

933 

934 Returns 

935 ------ 

936 df : `pandas.DataFrame` 

937 """ 

938 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

939 

940 df = self.transform(band, handle, funcs, dataId).df 

941 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

942 return df 

943 

944 def getFunctors(self): 

945 return self.funcs 

946 

947 def getAnalysis(self, handles, funcs=None, band=None): 

948 if funcs is None: 

949 funcs = self.funcs 

950 analysis = PostprocessAnalysis(handles, funcs, filt=band) 

951 return analysis 

952 

953 def transform(self, band, handles, funcs, dataId): 

954 analysis = self.getAnalysis(handles, funcs=funcs, band=band) 

955 df = analysis.df 

956 if dataId and self.config.columnsFromDataId: 

957 for key in self.config.columnsFromDataId: 

958 if key in dataId: 

959 df[str(key)] = dataId[key] 

960 else: 

961 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}") 

962 

963 if self.config.primaryKey: 

964 if df.index.name != self.config.primaryKey and self.config.primaryKey in df: 

965 df.reset_index(inplace=True, drop=True) 

966 df.set_index(self.config.primaryKey, inplace=True) 

967 

968 return pipeBase.Struct( 

969 df=df, 

970 analysis=analysis 

971 ) 

972 

973 

974class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

975 defaultTemplates={"coaddName": "deep"}, 

976 dimensions=("tract", "patch", "skymap")): 

977 inputCatalog = connectionTypes.Input( 

978 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

979 "stored as a DataFrame with a multi-level column index per-patch.", 

980 dimensions=("tract", "patch", "skymap"), 

981 storageClass="DataFrame", 

982 name="{coaddName}Coadd_obj", 

983 deferLoad=True, 

984 ) 

985 outputCatalog = connectionTypes.Output( 

986 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

987 "data model.", 

988 dimensions=("tract", "patch", "skymap"), 

989 storageClass="DataFrame", 

990 name="objectTable" 

991 ) 

992 

993 

994class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

995 pipelineConnections=TransformObjectCatalogConnections): 

996 coaddName = pexConfig.Field( 

997 dtype=str, 

998 default="deep", 

999 doc="Name of coadd" 

1000 ) 

1001 # TODO: remove in DM-27177 

1002 filterMap = pexConfig.DictField( 

1003 keytype=str, 

1004 itemtype=str, 

1005 default={}, 

1006 doc=("Dictionary mapping full filter name to short one for column name munging." 

1007 "These filters determine the output columns no matter what filters the " 

1008 "input data actually contain."), 

1009 deprecated=("Coadds are now identified by the band, so this transform is unused." 

1010 "Will be removed after v22.") 

1011 ) 

1012 outputBands = pexConfig.ListField( 

1013 dtype=str, 

1014 default=None, 

1015 optional=True, 

1016 doc=("These bands and only these bands will appear in the output," 

1017 " NaN-filled if the input does not include them." 

1018 " If None, then use all bands found in the input.") 

1019 ) 

1020 camelCase = pexConfig.Field( 

1021 dtype=bool, 

1022 default=False, 

1023 doc=("Write per-band columns names with camelCase, else underscore " 

1024 "For example: gPsFlux instead of g_PsFlux.") 

1025 ) 

1026 multilevelOutput = pexConfig.Field( 

1027 dtype=bool, 

1028 default=False, 

1029 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

1030 "and name-munged (False).") 

1031 ) 

1032 goodFlags = pexConfig.ListField( 

1033 dtype=str, 

1034 default=[], 

1035 doc=("List of 'good' flags that should be set False when populating empty tables. " 

1036 "All other flags are considered to be 'bad' flags and will be set to True.") 

1037 ) 

1038 floatFillValue = pexConfig.Field( 

1039 dtype=float, 

1040 default=np.nan, 

1041 doc="Fill value for float fields when populating empty tables." 

1042 ) 

1043 integerFillValue = pexConfig.Field( 

1044 dtype=int, 

1045 default=-1, 

1046 doc="Fill value for integer fields when populating empty tables." 

1047 ) 

1048 

1049 def setDefaults(self): 

1050 super().setDefaults() 

1051 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml') 

1052 self.primaryKey = 'objectId' 

1053 self.columnsFromDataId = ['tract', 'patch'] 

1054 self.goodFlags = ['calib_astrometry_used', 

1055 'calib_photometry_reserved', 

1056 'calib_photometry_used', 

1057 'calib_psf_candidate', 

1058 'calib_psf_reserved', 

1059 'calib_psf_used'] 

1060 

1061 

1062class TransformObjectCatalogTask(TransformCatalogBaseTask): 

1063 """Produce a flattened Object Table to match the format specified in 

1064 sdm_schemas. 

1065 

1066 Do the same set of postprocessing calculations on all bands. 

1067 

1068 This is identical to `TransformCatalogBaseTask`, except for that it does 

1069 the specified functor calculations for all filters present in the 

1070 input `deepCoadd_obj` table. Any specific ``"filt"`` keywords specified 

1071 by the YAML file will be superceded. 

1072 """ 

1073 _DefaultName = "transformObjectCatalog" 

1074 ConfigClass = TransformObjectCatalogConfig 

1075 

1076 def run(self, handle, funcs=None, dataId=None, band=None): 

1077 # NOTE: band kwarg is ignored here. 

1078 dfDict = {} 

1079 analysisDict = {} 

1080 templateDf = pd.DataFrame() 

1081 

1082 columns = handle.get(component='columns') 

1083 inputBands = columns.unique(level=1).values 

1084 

1085 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

1086 

1087 # Perform transform for data of filters that exist in the handle dataframe. 

1088 for inputBand in inputBands: 

1089 if inputBand not in outputBands: 

1090 self.log.info("Ignoring %s band data in the input", inputBand) 

1091 continue 

1092 self.log.info("Transforming the catalog of band %s", inputBand) 

1093 result = self.transform(inputBand, handle, funcs, dataId) 

1094 dfDict[inputBand] = result.df 

1095 analysisDict[inputBand] = result.analysis 

1096 if templateDf.empty: 

1097 templateDf = result.df 

1098 

1099 # Put filler values in columns of other wanted bands 

1100 for filt in outputBands: 

1101 if filt not in dfDict: 

1102 self.log.info("Adding empty columns for band %s", filt) 

1103 dfTemp = templateDf.copy() 

1104 for col in dfTemp.columns: 

1105 testValue = dfTemp[col].values[0] 

1106 if isinstance(testValue, (np.bool_, pd.BooleanDtype)): 

1107 # Boolean flag type, check if it is a "good" flag 

1108 if col in self.config.goodFlags: 

1109 fillValue = False 

1110 else: 

1111 fillValue = True 

1112 elif isinstance(testValue, numbers.Integral): 

1113 # Checking numbers.Integral catches all flavors 

1114 # of python, numpy, pandas, etc. integers. 

1115 # We must ensure this is not an unsigned integer. 

1116 if isinstance(testValue, np.unsignedinteger): 

1117 raise ValueError("Parquet tables may not have unsigned integer columns.") 

1118 else: 

1119 fillValue = self.config.integerFillValue 

1120 else: 

1121 fillValue = self.config.floatFillValue 

1122 dfTemp[col].values[:] = fillValue 

1123 dfDict[filt] = dfTemp 

1124 

1125 # This makes a multilevel column index, with band as first level 

1126 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

1127 

1128 if not self.config.multilevelOutput: 

1129 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

1130 if self.config.primaryKey in noDupCols: 

1131 noDupCols.remove(self.config.primaryKey) 

1132 if dataId and self.config.columnsFromDataId: 

1133 noDupCols += self.config.columnsFromDataId 

1134 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

1135 inputBands=inputBands) 

1136 

1137 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

1138 

1139 return df 

1140 

1141 

1142class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

1143 dimensions=("tract", "skymap")): 

1144 inputCatalogs = connectionTypes.Input( 

1145 doc="Per-Patch objectTables conforming to the standard data model.", 

1146 name="objectTable", 

1147 storageClass="DataFrame", 

1148 dimensions=("tract", "patch", "skymap"), 

1149 multiple=True, 

1150 ) 

1151 outputCatalog = connectionTypes.Output( 

1152 doc="Pre-tract horizontal concatenation of the input objectTables", 

1153 name="objectTable_tract", 

1154 storageClass="DataFrame", 

1155 dimensions=("tract", "skymap"), 

1156 ) 

1157 

1158 

1159class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

1160 pipelineConnections=ConsolidateObjectTableConnections): 

1161 coaddName = pexConfig.Field( 

1162 dtype=str, 

1163 default="deep", 

1164 doc="Name of coadd" 

1165 ) 

1166 

1167 

1168class ConsolidateObjectTableTask(pipeBase.PipelineTask): 

1169 """Write patch-merged source tables to a tract-level DataFrame Parquet file. 

1170 

1171 Concatenates `objectTable` list into a per-visit `objectTable_tract`. 

1172 """ 

1173 _DefaultName = "consolidateObjectTable" 

1174 ConfigClass = ConsolidateObjectTableConfig 

1175 

1176 inputDataset = 'objectTable' 

1177 outputDataset = 'objectTable_tract' 

1178 

1179 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1180 inputs = butlerQC.get(inputRefs) 

1181 self.log.info("Concatenating %s per-patch Object Tables", 

1182 len(inputs['inputCatalogs'])) 

1183 df = pd.concat(inputs['inputCatalogs']) 

1184 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1185 

1186 

1187class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

1188 defaultTemplates={"catalogType": ""}, 

1189 dimensions=("instrument", "visit", "detector")): 

1190 

1191 inputCatalog = connectionTypes.Input( 

1192 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

1193 name="{catalogType}source", 

1194 storageClass="DataFrame", 

1195 dimensions=("instrument", "visit", "detector"), 

1196 deferLoad=True 

1197 ) 

1198 outputCatalog = connectionTypes.Output( 

1199 doc="Narrower, per-detector Source Table transformed and converted per a " 

1200 "specified set of functors", 

1201 name="{catalogType}sourceTable", 

1202 storageClass="DataFrame", 

1203 dimensions=("instrument", "visit", "detector") 

1204 ) 

1205 

1206 

1207class TransformSourceTableConfig(TransformCatalogBaseConfig, 

1208 pipelineConnections=TransformSourceTableConnections): 

1209 

1210 def setDefaults(self): 

1211 super().setDefaults() 

1212 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml') 

1213 self.primaryKey = 'sourceId' 

1214 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter'] 

1215 

1216 

1217class TransformSourceTableTask(TransformCatalogBaseTask): 

1218 """Transform/standardize a source catalog 

1219 """ 

1220 _DefaultName = "transformSourceTable" 

1221 ConfigClass = TransformSourceTableConfig 

1222 

1223 

1224class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1225 dimensions=("instrument", "visit",), 

1226 defaultTemplates={"calexpType": ""}): 

1227 calexp = connectionTypes.Input( 

1228 doc="Processed exposures used for metadata", 

1229 name="calexp", 

1230 storageClass="ExposureF", 

1231 dimensions=("instrument", "visit", "detector"), 

1232 deferLoad=True, 

1233 multiple=True, 

1234 ) 

1235 visitSummary = connectionTypes.Output( 

1236 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1237 "detector id for the id and are sorted for fast lookups of a " 

1238 "detector."), 

1239 name="visitSummary", 

1240 storageClass="ExposureCatalog", 

1241 dimensions=("instrument", "visit"), 

1242 ) 

1243 visitSummarySchema = connectionTypes.InitOutput( 

1244 doc="Schema of the visitSummary catalog", 

1245 name="visitSummary_schema", 

1246 storageClass="ExposureCatalog", 

1247 ) 

1248 

1249 

1250class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1251 pipelineConnections=ConsolidateVisitSummaryConnections): 

1252 """Config for ConsolidateVisitSummaryTask""" 

1253 pass 

1254 

1255 

1256class ConsolidateVisitSummaryTask(pipeBase.PipelineTask): 

1257 """Task to consolidate per-detector visit metadata. 

1258 

1259 This task aggregates the following metadata from all the detectors in a 

1260 single visit into an exposure catalog: 

1261 - The visitInfo. 

1262 - The wcs. 

1263 - The photoCalib. 

1264 - The physical_filter and band (if available). 

1265 - The psf size, shape, and effective area at the center of the detector. 

1266 - The corners of the bounding box in right ascension/declination. 

1267 

1268 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1269 are not persisted here because of storage concerns, and because of their 

1270 limited utility as summary statistics. 

1271 

1272 Tests for this task are performed in ci_hsc_gen3. 

1273 """ 

1274 _DefaultName = "consolidateVisitSummary" 

1275 ConfigClass = ConsolidateVisitSummaryConfig 

1276 

1277 def __init__(self, **kwargs): 

1278 super().__init__(**kwargs) 

1279 self.schema = afwTable.ExposureTable.makeMinimalSchema() 

1280 self.schema.addField('visit', type='L', doc='Visit number') 

1281 self.schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1282 self.schema.addField('band', type='String', size=32, doc='Name of band') 

1283 ExposureSummaryStats.update_schema(self.schema) 

1284 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema) 

1285 

1286 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1287 dataRefs = butlerQC.get(inputRefs.calexp) 

1288 visit = dataRefs[0].dataId.byName()['visit'] 

1289 

1290 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1291 len(dataRefs), visit) 

1292 

1293 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1294 

1295 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1296 

1297 def _combineExposureMetadata(self, visit, dataRefs): 

1298 """Make a combined exposure catalog from a list of dataRefs. 

1299 These dataRefs must point to exposures with wcs, summaryStats, 

1300 and other visit metadata. 

1301 

1302 Parameters 

1303 ---------- 

1304 visit : `int` 

1305 Visit identification number. 

1306 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1307 List of dataRefs in visit. 

1308 

1309 Returns 

1310 ------- 

1311 visitSummary : `lsst.afw.table.ExposureCatalog` 

1312 Exposure catalog with per-detector summary information. 

1313 """ 

1314 cat = afwTable.ExposureCatalog(self.schema) 

1315 cat.resize(len(dataRefs)) 

1316 

1317 cat['visit'] = visit 

1318 

1319 for i, dataRef in enumerate(dataRefs): 

1320 visitInfo = dataRef.get(component='visitInfo') 

1321 filterLabel = dataRef.get(component='filter') 

1322 summaryStats = dataRef.get(component='summaryStats') 

1323 detector = dataRef.get(component='detector') 

1324 wcs = dataRef.get(component='wcs') 

1325 photoCalib = dataRef.get(component='photoCalib') 

1326 detector = dataRef.get(component='detector') 

1327 bbox = dataRef.get(component='bbox') 

1328 validPolygon = dataRef.get(component='validPolygon') 

1329 

1330 rec = cat[i] 

1331 rec.setBBox(bbox) 

1332 rec.setVisitInfo(visitInfo) 

1333 rec.setWcs(wcs) 

1334 rec.setPhotoCalib(photoCalib) 

1335 rec.setValidPolygon(validPolygon) 

1336 

1337 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1338 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1339 rec.setId(detector.getId()) 

1340 summaryStats.update_record(rec) 

1341 

1342 metadata = dafBase.PropertyList() 

1343 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1344 # We are looping over existing datarefs, so the following is true 

1345 metadata.add("COMMENT", "Only detectors with data have entries.") 

1346 cat.setMetadata(metadata) 

1347 

1348 cat.sort() 

1349 return cat 

1350 

1351 

1352class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1353 defaultTemplates={"catalogType": ""}, 

1354 dimensions=("instrument", "visit")): 

1355 inputCatalogs = connectionTypes.Input( 

1356 doc="Input per-detector Source Tables", 

1357 name="{catalogType}sourceTable", 

1358 storageClass="DataFrame", 

1359 dimensions=("instrument", "visit", "detector"), 

1360 multiple=True 

1361 ) 

1362 outputCatalog = connectionTypes.Output( 

1363 doc="Per-visit concatenation of Source Table", 

1364 name="{catalogType}sourceTable_visit", 

1365 storageClass="DataFrame", 

1366 dimensions=("instrument", "visit") 

1367 ) 

1368 

1369 

1370class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1371 pipelineConnections=ConsolidateSourceTableConnections): 

1372 pass 

1373 

1374 

1375class ConsolidateSourceTableTask(pipeBase.PipelineTask): 

1376 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1377 """ 

1378 _DefaultName = 'consolidateSourceTable' 

1379 ConfigClass = ConsolidateSourceTableConfig 

1380 

1381 inputDataset = 'sourceTable' 

1382 outputDataset = 'sourceTable_visit' 

1383 

1384 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1385 from .makeWarp import reorderRefs 

1386 

1387 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs] 

1388 detectorOrder.sort() 

1389 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector') 

1390 inputs = butlerQC.get(inputRefs) 

1391 self.log.info("Concatenating %s per-detector Source Tables", 

1392 len(inputs['inputCatalogs'])) 

1393 df = pd.concat(inputs['inputCatalogs']) 

1394 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1395 

1396 

1397class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1398 dimensions=("instrument",), 

1399 defaultTemplates={"calexpType": ""}): 

1400 visitSummaryRefs = connectionTypes.Input( 

1401 doc="Data references for per-visit consolidated exposure metadata", 

1402 name="finalVisitSummary", 

1403 storageClass="ExposureCatalog", 

1404 dimensions=("instrument", "visit"), 

1405 multiple=True, 

1406 deferLoad=True, 

1407 ) 

1408 outputCatalog = connectionTypes.Output( 

1409 doc="CCD and Visit metadata table", 

1410 name="ccdVisitTable", 

1411 storageClass="DataFrame", 

1412 dimensions=("instrument",) 

1413 ) 

1414 

1415 

1416class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1417 pipelineConnections=MakeCcdVisitTableConnections): 

1418 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

1419 

1420 

1421class MakeCcdVisitTableTask(pipeBase.PipelineTask): 

1422 """Produce a `ccdVisitTable` from the visit summary exposure catalogs. 

1423 """ 

1424 _DefaultName = 'makeCcdVisitTable' 

1425 ConfigClass = MakeCcdVisitTableConfig 

1426 

1427 def run(self, visitSummaryRefs): 

1428 """Make a table of ccd information from the visit summary catalogs. 

1429 

1430 Parameters 

1431 ---------- 

1432 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1433 List of DeferredDatasetHandles pointing to exposure catalogs with 

1434 per-detector summary information. 

1435 

1436 Returns 

1437 ------- 

1438 result : `lsst.pipe.Base.Struct` 

1439 Results struct with attribute: 

1440 

1441 ``outputCatalog`` 

1442 Catalog of ccd and visit information. 

1443 """ 

1444 ccdEntries = [] 

1445 for visitSummaryRef in visitSummaryRefs: 

1446 visitSummary = visitSummaryRef.get() 

1447 visitInfo = visitSummary[0].getVisitInfo() 

1448 

1449 ccdEntry = {} 

1450 summaryTable = visitSummary.asAstropy() 

1451 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance', 

1452 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise', 

1453 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar', 

1454 'psfStarDeltaE1Median', 'psfStarDeltaE2Median', 

1455 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter', 

1456 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter', 

1457 'psfStarScaledDeltaSizeScatter', 

1458 'psfTraceRadiusDelta', 'maxDistToNearestPsf'] 

1459 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1460 # 'visit' is the human readable visit number. 

1461 # 'visitId' is the key to the visitId table. They are the same. 

1462 # Technically you should join to get the visit from the visit 

1463 # table. 

1464 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"}) 

1465 ccdEntry['ccdVisitId'] = [ 

1466 self.config.idGenerator.apply( 

1467 visitSummaryRef.dataId, 

1468 detector=detector_id, 

1469 is_exposure=False, 

1470 ).catalog_id # The "catalog ID" here is the ccdVisit ID 

1471 # because it's usually the ID for a whole catalog 

1472 # with a {visit, detector}, and that's the main 

1473 # use case for IdGenerator. This usage for a 

1474 # summary table is rare. 

1475 for detector_id in summaryTable['id'] 

1476 ] 

1477 ccdEntry['detector'] = summaryTable['id'] 

1478 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() if vR.getWcs() 

1479 else np.nan for vR in visitSummary]) 

1480 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1481 

1482 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1483 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1484 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1485 expTime = visitInfo.getExposureTime() 

1486 ccdEntry['expTime'] = expTime 

1487 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1488 expTime_days = expTime / (60*60*24) 

1489 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days 

1490 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1491 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1492 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1493 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1494 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1495 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1496 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1497 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1498 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1499 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1500 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1501 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, 

1502 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. 

1503 # values are actually wanted. 

1504 ccdEntries.append(ccdEntry) 

1505 

1506 outputCatalog = pd.concat(ccdEntries) 

1507 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True) 

1508 return pipeBase.Struct(outputCatalog=outputCatalog) 

1509 

1510 

1511class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1512 dimensions=("instrument",), 

1513 defaultTemplates={"calexpType": ""}): 

1514 visitSummaries = connectionTypes.Input( 

1515 doc="Per-visit consolidated exposure metadata", 

1516 name="finalVisitSummary", 

1517 storageClass="ExposureCatalog", 

1518 dimensions=("instrument", "visit",), 

1519 multiple=True, 

1520 deferLoad=True, 

1521 ) 

1522 outputCatalog = connectionTypes.Output( 

1523 doc="Visit metadata table", 

1524 name="visitTable", 

1525 storageClass="DataFrame", 

1526 dimensions=("instrument",) 

1527 ) 

1528 

1529 

1530class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1531 pipelineConnections=MakeVisitTableConnections): 

1532 pass 

1533 

1534 

1535class MakeVisitTableTask(pipeBase.PipelineTask): 

1536 """Produce a `visitTable` from the visit summary exposure catalogs. 

1537 """ 

1538 _DefaultName = 'makeVisitTable' 

1539 ConfigClass = MakeVisitTableConfig 

1540 

1541 def run(self, visitSummaries): 

1542 """Make a table of visit information from the visit summary catalogs. 

1543 

1544 Parameters 

1545 ---------- 

1546 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog` 

1547 List of exposure catalogs with per-detector summary information. 

1548 Returns 

1549 ------- 

1550 result : `lsst.pipe.Base.Struct` 

1551 Results struct with attribute: 

1552 

1553 ``outputCatalog`` 

1554 Catalog of visit information. 

1555 """ 

1556 visitEntries = [] 

1557 for visitSummary in visitSummaries: 

1558 visitSummary = visitSummary.get() 

1559 visitRow = visitSummary[0] 

1560 visitInfo = visitRow.getVisitInfo() 

1561 

1562 visitEntry = {} 

1563 visitEntry["visitId"] = visitRow['visit'] 

1564 visitEntry["visit"] = visitRow['visit'] 

1565 visitEntry["physical_filter"] = visitRow['physical_filter'] 

1566 visitEntry["band"] = visitRow['band'] 

1567 raDec = visitInfo.getBoresightRaDec() 

1568 visitEntry["ra"] = raDec.getRa().asDegrees() 

1569 visitEntry["decl"] = raDec.getDec().asDegrees() 

1570 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1571 azAlt = visitInfo.getBoresightAzAlt() 

1572 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1573 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1574 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1575 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1576 expTime = visitInfo.getExposureTime() 

1577 visitEntry["expTime"] = expTime 

1578 visitEntry["expMidpt"] = visitInfo.getDate().toPython() 

1579 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1580 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1581 expTime_days = expTime / (60*60*24) 

1582 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days 

1583 visitEntries.append(visitEntry) 

1584 

1585 # TODO: DM-30623, Add programId, exposureType, cameraTemp, 

1586 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp, 

1587 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures. 

1588 

1589 outputCatalog = pd.DataFrame(data=visitEntries) 

1590 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True) 

1591 return pipeBase.Struct(outputCatalog=outputCatalog) 

1592 

1593 

1594class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1595 dimensions=("instrument", "visit", "detector", "skymap", "tract")): 

1596 

1597 inputCatalog = connectionTypes.Input( 

1598 doc="Primary per-detector, single-epoch forced-photometry catalog. " 

1599 "By default, it is the output of ForcedPhotCcdTask on calexps", 

1600 name="forced_src", 

1601 storageClass="SourceCatalog", 

1602 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1603 ) 

1604 inputCatalogDiff = connectionTypes.Input( 

1605 doc="Secondary multi-epoch, per-detector, forced photometry catalog. " 

1606 "By default, it is the output of ForcedPhotCcdTask run on image differences.", 

1607 name="forced_diff", 

1608 storageClass="SourceCatalog", 

1609 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1610 ) 

1611 outputCatalog = connectionTypes.Output( 

1612 doc="InputCatalogs horizonatally joined on `objectId` in DataFrame parquet format", 

1613 name="mergedForcedSource", 

1614 storageClass="DataFrame", 

1615 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1616 ) 

1617 

1618 

1619class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig, 

1620 pipelineConnections=WriteForcedSourceTableConnections): 

1621 key = lsst.pex.config.Field( 

1622 doc="Column on which to join the two input tables on and make the primary key of the output", 

1623 dtype=str, 

1624 default="objectId", 

1625 ) 

1626 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

1627 

1628 

1629class WriteForcedSourceTableTask(pipeBase.PipelineTask): 

1630 """Merge and convert per-detector forced source catalogs to DataFrame Parquet format. 

1631 

1632 Because the predecessor ForcedPhotCcdTask operates per-detector, 

1633 per-tract, (i.e., it has tract in its dimensions), detectors 

1634 on the tract boundary may have multiple forced source catalogs. 

1635 

1636 The successor task TransformForcedSourceTable runs per-patch 

1637 and temporally-aggregates overlapping mergedForcedSource catalogs from all 

1638 available multiple epochs. 

1639 """ 

1640 _DefaultName = "writeForcedSourceTable" 

1641 ConfigClass = WriteForcedSourceTableConfig 

1642 

1643 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1644 inputs = butlerQC.get(inputRefs) 

1645 # Add ccdVisitId to allow joining with CcdVisitTable 

1646 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId) 

1647 inputs['ccdVisitId'] = idGenerator.catalog_id 

1648 inputs['band'] = butlerQC.quantum.dataId.full['band'] 

1649 outputs = self.run(**inputs) 

1650 butlerQC.put(outputs, outputRefs) 

1651 

1652 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None): 

1653 dfs = [] 

1654 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')): 

1655 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False) 

1656 df = df.reindex(sorted(df.columns), axis=1) 

1657 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA 

1658 df['band'] = band if band else pd.NA 

1659 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns], 

1660 names=('dataset', 'column')) 

1661 

1662 dfs.append(df) 

1663 

1664 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

1665 return pipeBase.Struct(outputCatalog=outputCatalog) 

1666 

1667 

1668class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1669 dimensions=("instrument", "skymap", "patch", "tract")): 

1670 

1671 inputCatalogs = connectionTypes.Input( 

1672 doc="DataFrames of merged ForcedSources produced by WriteForcedSourceTableTask", 

1673 name="mergedForcedSource", 

1674 storageClass="DataFrame", 

1675 dimensions=("instrument", "visit", "detector", "skymap", "tract"), 

1676 multiple=True, 

1677 deferLoad=True 

1678 ) 

1679 referenceCatalog = connectionTypes.Input( 

1680 doc="Reference catalog which was used to seed the forcedPhot. Columns " 

1681 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner " 

1682 "are expected.", 

1683 name="objectTable", 

1684 storageClass="DataFrame", 

1685 dimensions=("tract", "patch", "skymap"), 

1686 deferLoad=True 

1687 ) 

1688 outputCatalog = connectionTypes.Output( 

1689 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a " 

1690 "specified set of functors", 

1691 name="forcedSourceTable", 

1692 storageClass="DataFrame", 

1693 dimensions=("tract", "patch", "skymap") 

1694 ) 

1695 

1696 

1697class TransformForcedSourceTableConfig(TransformCatalogBaseConfig, 

1698 pipelineConnections=TransformForcedSourceTableConnections): 

1699 referenceColumns = pexConfig.ListField( 

1700 dtype=str, 

1701 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"], 

1702 optional=True, 

1703 doc="Columns to pull from reference catalog", 

1704 ) 

1705 keyRef = lsst.pex.config.Field( 

1706 doc="Column on which to join the two input tables on and make the primary key of the output", 

1707 dtype=str, 

1708 default="objectId", 

1709 ) 

1710 key = lsst.pex.config.Field( 

1711 doc="Rename the output DataFrame index to this name", 

1712 dtype=str, 

1713 default="forcedSourceId", 

1714 ) 

1715 

1716 def setDefaults(self): 

1717 super().setDefaults() 

1718 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml') 

1719 self.columnsFromDataId = ['tract', 'patch'] 

1720 

1721 

1722class TransformForcedSourceTableTask(TransformCatalogBaseTask): 

1723 """Transform/standardize a ForcedSource catalog 

1724 

1725 Transforms each wide, per-detector forcedSource DataFrame per the 

1726 specification file (per-camera defaults found in ForcedSource.yaml). 

1727 All epochs that overlap the patch are aggregated into one per-patch 

1728 narrow-DataFrame file. 

1729 

1730 No de-duplication of rows is performed. Duplicate resolutions flags are 

1731 pulled in from the referenceCatalog: `detect_isPrimary`, 

1732 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate 

1733 for analysis or compare duplicates for QA. 

1734 

1735 The resulting table includes multiple bands. Epochs (MJDs) and other useful 

1736 per-visit rows can be retreived by joining with the CcdVisitTable on 

1737 ccdVisitId. 

1738 """ 

1739 _DefaultName = "transformForcedSourceTable" 

1740 ConfigClass = TransformForcedSourceTableConfig 

1741 

1742 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1743 inputs = butlerQC.get(inputRefs) 

1744 if self.funcs is None: 

1745 raise ValueError("config.functorFile is None. " 

1746 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

1747 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs, 

1748 dataId=outputRefs.outputCatalog.dataId.full) 

1749 

1750 butlerQC.put(outputs, outputRefs) 

1751 

1752 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None): 

1753 dfs = [] 

1754 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns}) 

1755 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs))) 

1756 for handle in inputCatalogs: 

1757 result = self.transform(None, handle, funcs, dataId) 

1758 # Filter for only rows that were detected on (overlap) the patch 

1759 dfs.append(result.df.join(ref, how='inner')) 

1760 

1761 outputCatalog = pd.concat(dfs) 

1762 

1763 # Now that we are done joining on config.keyRef 

1764 # Change index to config.key by 

1765 outputCatalog.index.rename(self.config.keyRef, inplace=True) 

1766 # Add config.keyRef to the column list 

1767 outputCatalog.reset_index(inplace=True) 

1768 # Set the forcedSourceId to the index. This is specified in the 

1769 # ForcedSource.yaml 

1770 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True) 

1771 # Rename it to the config.key 

1772 outputCatalog.index.rename(self.config.key, inplace=True) 

1773 

1774 self.log.info("Made a table of %d columns and %d rows", 

1775 len(outputCatalog.columns), len(outputCatalog)) 

1776 return pipeBase.Struct(outputCatalog=outputCatalog) 

1777 

1778 

1779class ConsolidateTractConnections(pipeBase.PipelineTaskConnections, 

1780 defaultTemplates={"catalogType": ""}, 

1781 dimensions=("instrument", "tract")): 

1782 inputCatalogs = connectionTypes.Input( 

1783 doc="Input per-patch DataFrame Tables to be concatenated", 

1784 name="{catalogType}ForcedSourceTable", 

1785 storageClass="DataFrame", 

1786 dimensions=("tract", "patch", "skymap"), 

1787 multiple=True, 

1788 ) 

1789 

1790 outputCatalog = connectionTypes.Output( 

1791 doc="Output per-tract concatenation of DataFrame Tables", 

1792 name="{catalogType}ForcedSourceTable_tract", 

1793 storageClass="DataFrame", 

1794 dimensions=("tract", "skymap"), 

1795 ) 

1796 

1797 

1798class ConsolidateTractConfig(pipeBase.PipelineTaskConfig, 

1799 pipelineConnections=ConsolidateTractConnections): 

1800 pass 

1801 

1802 

1803class ConsolidateTractTask(pipeBase.PipelineTask): 

1804 """Concatenate any per-patch, dataframe list into a single 

1805 per-tract DataFrame. 

1806 """ 

1807 _DefaultName = 'ConsolidateTract' 

1808 ConfigClass = ConsolidateTractConfig 

1809 

1810 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1811 inputs = butlerQC.get(inputRefs) 

1812 # Not checking at least one inputCatalog exists because that'd be an 

1813 # empty QG. 

1814 self.log.info("Concatenating %s per-patch %s Tables", 

1815 len(inputs['inputCatalogs']), 

1816 inputRefs.inputCatalogs[0].datasetType.name) 

1817 df = pd.concat(inputs['inputCatalogs']) 

1818 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)