Coverage for python/lsst/pipe/tasks/postprocess.py: 26%

651 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-11 02:44 -0700

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["WriteObjectTableConfig", "WriteObjectTableTask", 

23 "WriteSourceTableConfig", "WriteSourceTableTask", 

24 "WriteRecalibratedSourceTableConfig", "WriteRecalibratedSourceTableTask", 

25 "PostprocessAnalysis", 

26 "TransformCatalogBaseConfig", "TransformCatalogBaseTask", 

27 "TransformObjectCatalogConfig", "TransformObjectCatalogTask", 

28 "ConsolidateObjectTableConfig", "ConsolidateObjectTableTask", 

29 "TransformSourceTableConfig", "TransformSourceTableTask", 

30 "ConsolidateVisitSummaryConfig", "ConsolidateVisitSummaryTask", 

31 "ConsolidateSourceTableConfig", "ConsolidateSourceTableTask", 

32 "MakeCcdVisitTableConfig", "MakeCcdVisitTableTask", 

33 "MakeVisitTableConfig", "MakeVisitTableTask", 

34 "WriteForcedSourceTableConfig", "WriteForcedSourceTableTask", 

35 "TransformForcedSourceTableConfig", "TransformForcedSourceTableTask", 

36 "ConsolidateTractConfig", "ConsolidateTractTask"] 

37 

38import functools 

39import pandas as pd 

40import logging 

41import numpy as np 

42import numbers 

43import os 

44 

45import lsst.geom 

46import lsst.pex.config as pexConfig 

47import lsst.pipe.base as pipeBase 

48import lsst.daf.base as dafBase 

49from lsst.pipe.base import connectionTypes 

50import lsst.afw.table as afwTable 

51from lsst.afw.image import ExposureSummaryStats 

52from lsst.meas.base import SingleFrameMeasurementTask, DetectorVisitIdGeneratorConfig 

53from lsst.skymap import BaseSkyMap 

54 

55from .functors import CompositeFunctor, Column 

56 

57log = logging.getLogger(__name__) 

58 

59 

60def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

61 """Flattens a dataframe with multilevel column index. 

62 """ 

63 newDf = pd.DataFrame() 

64 # band is the level 0 index 

65 dfBands = df.columns.unique(level=0).values 

66 for band in dfBands: 

67 subdf = df[band] 

68 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

69 newColumns = {c: columnFormat.format(band, c) 

70 for c in subdf.columns if c not in noDupCols} 

71 cols = list(newColumns.keys()) 

72 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

73 

74 # Band must be present in the input and output or else column is all NaN: 

75 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

76 # Get the unexploded columns from any present band's partition 

77 noDupDf = df[presentBands[0]][noDupCols] 

78 newDf = pd.concat([noDupDf, newDf], axis=1) 

79 return newDf 

80 

81 

82class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

83 defaultTemplates={"coaddName": "deep"}, 

84 dimensions=("tract", "patch", "skymap")): 

85 inputCatalogMeas = connectionTypes.Input( 

86 doc="Catalog of source measurements on the deepCoadd.", 

87 dimensions=("tract", "patch", "band", "skymap"), 

88 storageClass="SourceCatalog", 

89 name="{coaddName}Coadd_meas", 

90 multiple=True 

91 ) 

92 inputCatalogForcedSrc = connectionTypes.Input( 

93 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

94 dimensions=("tract", "patch", "band", "skymap"), 

95 storageClass="SourceCatalog", 

96 name="{coaddName}Coadd_forced_src", 

97 multiple=True 

98 ) 

99 inputCatalogRef = connectionTypes.Input( 

100 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

101 "for each detection in deepCoadd_mergeDet.", 

102 dimensions=("tract", "patch", "skymap"), 

103 storageClass="SourceCatalog", 

104 name="{coaddName}Coadd_ref" 

105 ) 

106 outputCatalog = connectionTypes.Output( 

107 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

108 "stored as a DataFrame with a multi-level column index per-patch.", 

109 dimensions=("tract", "patch", "skymap"), 

110 storageClass="DataFrame", 

111 name="{coaddName}Coadd_obj" 

112 ) 

113 

114 

115class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

116 pipelineConnections=WriteObjectTableConnections): 

117 engine = pexConfig.Field( 

118 dtype=str, 

119 default="pyarrow", 

120 doc="Parquet engine for writing (pyarrow or fastparquet)", 

121 deprecated="This config is no longer used, and will be removed after v26." 

122 ) 

123 coaddName = pexConfig.Field( 

124 dtype=str, 

125 default="deep", 

126 doc="Name of coadd" 

127 ) 

128 

129 

130class WriteObjectTableTask(pipeBase.PipelineTask): 

131 """Write filter-merged source tables as a DataFrame in parquet format. 

132 """ 

133 _DefaultName = "writeObjectTable" 

134 ConfigClass = WriteObjectTableConfig 

135 

136 # Names of table datasets to be merged 

137 inputDatasets = ('forced_src', 'meas', 'ref') 

138 

139 # Tag of output dataset written by `MergeSourcesTask.write` 

140 outputDataset = 'obj' 

141 

142 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

143 inputs = butlerQC.get(inputRefs) 

144 

145 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

146 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

147 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

148 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

149 

150 catalogs = {} 

151 for band in measDict.keys(): 

152 catalogs[band] = {'meas': measDict[band]['meas'], 

153 'forced_src': forcedSourceDict[band]['forced_src'], 

154 'ref': inputs['inputCatalogRef']} 

155 dataId = butlerQC.quantum.dataId 

156 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

157 outputs = pipeBase.Struct(outputCatalog=df) 

158 butlerQC.put(outputs, outputRefs) 

159 

160 def run(self, catalogs, tract, patch): 

161 """Merge multiple catalogs. 

162 

163 Parameters 

164 ---------- 

165 catalogs : `dict` 

166 Mapping from filter names to dict of catalogs. 

167 tract : int 

168 tractId to use for the tractId column. 

169 patch : str 

170 patchId to use for the patchId column. 

171 

172 Returns 

173 ------- 

174 catalog : `pandas.DataFrame` 

175 Merged dataframe. 

176 """ 

177 dfs = [] 

178 for filt, tableDict in catalogs.items(): 

179 for dataset, table in tableDict.items(): 

180 # Convert afwTable to pandas DataFrame 

181 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

182 

183 # Sort columns by name, to ensure matching schema among patches 

184 df = df.reindex(sorted(df.columns), axis=1) 

185 df = df.assign(tractId=tract, patchId=patch) 

186 

187 # Make columns a 3-level MultiIndex 

188 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

189 names=('dataset', 'band', 'column')) 

190 dfs.append(df) 

191 

192 # We do this dance and not `pd.concat(dfs)` because the pandas 

193 # concatenation uses infinite memory. 

194 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

195 return catalog 

196 

197 

198class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

199 defaultTemplates={"catalogType": ""}, 

200 dimensions=("instrument", "visit", "detector")): 

201 

202 catalog = connectionTypes.Input( 

203 doc="Input full-depth catalog of sources produced by CalibrateTask", 

204 name="{catalogType}src", 

205 storageClass="SourceCatalog", 

206 dimensions=("instrument", "visit", "detector") 

207 ) 

208 outputCatalog = connectionTypes.Output( 

209 doc="Catalog of sources, `src` in DataFrame/Parquet format. The 'id' column is " 

210 "replaced with an index; all other columns are unchanged.", 

211 name="{catalogType}source", 

212 storageClass="DataFrame", 

213 dimensions=("instrument", "visit", "detector") 

214 ) 

215 

216 

217class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

218 pipelineConnections=WriteSourceTableConnections): 

219 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

220 

221 

222class WriteSourceTableTask(pipeBase.PipelineTask): 

223 """Write source table to DataFrame Parquet format. 

224 """ 

225 _DefaultName = "writeSourceTable" 

226 ConfigClass = WriteSourceTableConfig 

227 

228 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

229 inputs = butlerQC.get(inputRefs) 

230 inputs['ccdVisitId'] = self.config.idGenerator.apply(butlerQC.quantum.dataId).catalog_id 

231 result = self.run(**inputs) 

232 outputs = pipeBase.Struct(outputCatalog=result.table) 

233 butlerQC.put(outputs, outputRefs) 

234 

235 def run(self, catalog, ccdVisitId=None, **kwargs): 

236 """Convert `src` catalog to DataFrame 

237 

238 Parameters 

239 ---------- 

240 catalog: `afwTable.SourceCatalog` 

241 catalog to be converted 

242 ccdVisitId: `int` 

243 ccdVisitId to be added as a column 

244 **kwargs 

245 Additional keyword arguments are ignored as a convenience for 

246 subclasses that pass the same arguments to several different 

247 methods. 

248 

249 Returns 

250 ------- 

251 result : `lsst.pipe.base.Struct` 

252 ``table`` 

253 `DataFrame` version of the input catalog 

254 """ 

255 self.log.info("Generating DataFrame from src catalog ccdVisitId=%s", ccdVisitId) 

256 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

257 df['ccdVisitId'] = ccdVisitId 

258 

259 return pipeBase.Struct(table=df) 

260 

261 

262class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections, 

263 defaultTemplates={"catalogType": "", 

264 "skyWcsName": "gbdesAstrometricFit", 

265 "photoCalibName": "fgcm"}, 

266 dimensions=("instrument", "visit", "detector", "skymap")): 

267 skyMap = connectionTypes.Input( 

268 doc="skyMap needed to choose which tract-level calibrations to use when multiple available", 

269 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

270 storageClass="SkyMap", 

271 dimensions=("skymap",), 

272 ) 

273 exposure = connectionTypes.Input( 

274 doc="Input exposure to perform photometry on.", 

275 name="calexp", 

276 storageClass="ExposureF", 

277 dimensions=["instrument", "visit", "detector"], 

278 ) 

279 externalSkyWcsTractCatalog = connectionTypes.Input( 

280 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector " 

281 "id for the catalog id, sorted on id for fast lookup."), 

282 name="{skyWcsName}SkyWcsCatalog", 

283 storageClass="ExposureCatalog", 

284 dimensions=["instrument", "visit", "tract"], 

285 multiple=True 

286 ) 

287 externalSkyWcsGlobalCatalog = connectionTypes.Input( 

288 doc=("Per-visit wcs calibrations computed globally (with no tract information). " 

289 "These catalogs use the detector id for the catalog id, sorted on id for " 

290 "fast lookup."), 

291 name="finalVisitSummary", 

292 storageClass="ExposureCatalog", 

293 dimensions=["instrument", "visit"], 

294 ) 

295 externalPhotoCalibTractCatalog = connectionTypes.Input( 

296 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the " 

297 "detector id for the catalog id, sorted on id for fast lookup."), 

298 name="{photoCalibName}PhotoCalibCatalog", 

299 storageClass="ExposureCatalog", 

300 dimensions=["instrument", "visit", "tract"], 

301 multiple=True 

302 ) 

303 externalPhotoCalibGlobalCatalog = connectionTypes.Input( 

304 doc=("Per-visit photometric calibrations computed globally (with no tract " 

305 "information). These catalogs use the detector id for the catalog id, " 

306 "sorted on id for fast lookup."), 

307 name="finalVisitSummary", 

308 storageClass="ExposureCatalog", 

309 dimensions=["instrument", "visit"], 

310 ) 

311 

312 def __init__(self, *, config=None): 

313 super().__init__(config=config) 

314 # Same connection boilerplate as all other applications of 

315 # Global/Tract calibrations 

316 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs: 

317 if config.useGlobalExternalSkyWcs: 

318 self.inputs.remove("externalSkyWcsTractCatalog") 

319 else: 

320 self.inputs.remove("externalSkyWcsGlobalCatalog") 

321 else: 

322 self.inputs.remove("externalSkyWcsTractCatalog") 

323 self.inputs.remove("externalSkyWcsGlobalCatalog") 

324 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib: 

325 if config.useGlobalExternalPhotoCalib: 

326 self.inputs.remove("externalPhotoCalibTractCatalog") 

327 else: 

328 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

329 else: 

330 self.inputs.remove("externalPhotoCalibTractCatalog") 

331 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

332 

333 

334class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig, 

335 pipelineConnections=WriteRecalibratedSourceTableConnections): 

336 

337 doReevaluatePhotoCalib = pexConfig.Field( 

338 dtype=bool, 

339 default=True, 

340 doc=("Add or replace local photoCalib columns") 

341 ) 

342 doReevaluateSkyWcs = pexConfig.Field( 

343 dtype=bool, 

344 default=True, 

345 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec") 

346 ) 

347 doApplyExternalPhotoCalib = pexConfig.Field( 

348 dtype=bool, 

349 default=True, 

350 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ", 

351 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."), 

352 ) 

353 doApplyExternalSkyWcs = pexConfig.Field( 

354 dtype=bool, 

355 default=True, 

356 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ", 

357 "else use the wcs already attached to the exposure."), 

358 ) 

359 useGlobalExternalPhotoCalib = pexConfig.Field( 

360 dtype=bool, 

361 default=True, 

362 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations " 

363 "that are not run per-tract. When False, use per-tract photometric " 

364 "calibration files.") 

365 ) 

366 useGlobalExternalSkyWcs = pexConfig.Field( 

367 dtype=bool, 

368 default=True, 

369 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations " 

370 "that are not run per-tract. When False, use per-tract wcs " 

371 "files.") 

372 ) 

373 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

374 

375 def validate(self): 

376 super().validate() 

377 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs: 

378 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False" 

379 "External SkyWcs will not be read or evaluated.") 

380 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib: 

381 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False." 

382 "External PhotoCalib will not be read or evaluated.") 

383 

384 

385class WriteRecalibratedSourceTableTask(WriteSourceTableTask): 

386 """Write source table to DataFrame Parquet format. 

387 """ 

388 _DefaultName = "writeRecalibratedSourceTable" 

389 ConfigClass = WriteRecalibratedSourceTableConfig 

390 

391 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

392 inputs = butlerQC.get(inputRefs) 

393 

394 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId) 

395 inputs['idGenerator'] = idGenerator 

396 inputs['ccdVisitId'] = idGenerator.catalog_id 

397 

398 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs: 

399 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

400 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs) 

401 

402 inputs['catalog'] = self.addCalibColumns(**inputs) 

403 

404 result = self.run(**inputs) 

405 outputs = pipeBase.Struct(outputCatalog=result.table) 

406 butlerQC.put(outputs, outputRefs) 

407 

408 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None, 

409 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None, 

410 externalPhotoCalibTractCatalog=None, **kwargs): 

411 """Apply external calibrations to exposure per configuration 

412 

413 When multiple tract-level calibrations overlap, select the one with the 

414 center closest to detector. 

415 

416 Parameters 

417 ---------- 

418 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of 

419 tract-level calibs. 

420 skyMap : `lsst.skymap.SkyMap` 

421 exposure : `lsst.afw.image.exposure.Exposure` 

422 Input exposure to adjust calibrations. 

423 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

424 Exposure catalog with external skyWcs to be applied per config 

425 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

426 Exposure catalog with external skyWcs to be applied per config 

427 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

428 Exposure catalog with external photoCalib to be applied per config 

429 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

430 Exposure catalog with external photoCalib to be applied per config 

431 **kwargs 

432 Additional keyword arguments are ignored to facilitate passing the 

433 same arguments to several methods. 

434 

435 Returns 

436 ------- 

437 exposure : `lsst.afw.image.exposure.Exposure` 

438 Exposure with adjusted calibrations. 

439 """ 

440 if not self.config.doApplyExternalSkyWcs: 

441 # Do not modify the exposure's SkyWcs 

442 externalSkyWcsCatalog = None 

443 elif self.config.useGlobalExternalSkyWcs: 

444 # Use the global external SkyWcs 

445 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog 

446 self.log.info('Applying global SkyWcs') 

447 else: 

448 # use tract-level external SkyWcs from the closest overlapping tract 

449 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog') 

450 tracts = [ref.dataId['tract'] for ref in inputRef] 

451 if len(tracts) == 1: 

452 ind = 0 

453 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind]) 

454 else: 

455 if exposure.getWcs() is None: # TODO: could this look-up use the externalPhotoCalib? 

456 raise ValueError("Trying to locate nearest tract, but exposure.wcs is None.") 

457 ind = self.getClosestTract(tracts, skyMap, 

458 exposure.getBBox(), exposure.getWcs()) 

459 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). ' 

460 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

461 

462 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind] 

463 

464 if not self.config.doApplyExternalPhotoCalib: 

465 # Do not modify the exposure's PhotoCalib 

466 externalPhotoCalibCatalog = None 

467 elif self.config.useGlobalExternalPhotoCalib: 

468 # Use the global external PhotoCalib 

469 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog 

470 self.log.info('Applying global PhotoCalib') 

471 else: 

472 # use tract-level external PhotoCalib from the closest overlapping tract 

473 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog') 

474 tracts = [ref.dataId['tract'] for ref in inputRef] 

475 if len(tracts) == 1: 

476 ind = 0 

477 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind]) 

478 else: 

479 ind = self.getClosestTract(tracts, skyMap, 

480 exposure.getBBox(), exposure.getWcs()) 

481 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). ' 

482 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

483 

484 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind] 

485 

486 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog) 

487 

488 def getClosestTract(self, tracts, skyMap, bbox, wcs): 

489 """Find the index of the tract closest to detector from list of tractIds 

490 

491 Parameters 

492 ---------- 

493 tracts: `list` [`int`] 

494 Iterable of integer tractIds 

495 skyMap : `lsst.skymap.SkyMap` 

496 skyMap to lookup tract geometry and wcs 

497 bbox : `lsst.geom.Box2I` 

498 Detector bbox, center of which will compared to tract centers 

499 wcs : `lsst.afw.geom.SkyWcs` 

500 Detector Wcs object to map the detector center to SkyCoord 

501 

502 Returns 

503 ------- 

504 index : `int` 

505 """ 

506 if len(tracts) == 1: 

507 return 0 

508 

509 center = wcs.pixelToSky(bbox.getCenter()) 

510 sep = [] 

511 for tractId in tracts: 

512 tract = skyMap[tractId] 

513 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter()) 

514 sep.append(center.separation(tractCenter)) 

515 

516 return np.argmin(sep) 

517 

518 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None): 

519 """Prepare a calibrated exposure and apply external calibrations 

520 if so configured. 

521 

522 Parameters 

523 ---------- 

524 exposure : `lsst.afw.image.exposure.Exposure` 

525 Input exposure to adjust calibrations. 

526 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional 

527 Exposure catalog with external skyWcs to be applied 

528 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id 

529 for the catalog id, sorted on id for fast lookup. 

530 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional 

531 Exposure catalog with external photoCalib to be applied 

532 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector 

533 id for the catalog id, sorted on id for fast lookup. 

534 

535 Returns 

536 ------- 

537 exposure : `lsst.afw.image.exposure.Exposure` 

538 Exposure with adjusted calibrations. 

539 """ 

540 detectorId = exposure.getInfo().getDetector().getId() 

541 

542 if externalPhotoCalibCatalog is not None: 

543 row = externalPhotoCalibCatalog.find(detectorId) 

544 if row is None: 

545 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; " 

546 "Using original photoCalib.", detectorId) 

547 else: 

548 photoCalib = row.getPhotoCalib() 

549 if photoCalib is None: 

550 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; " 

551 "Using original photoCalib.", detectorId) 

552 else: 

553 exposure.setPhotoCalib(photoCalib) 

554 

555 if externalSkyWcsCatalog is not None: 

556 row = externalSkyWcsCatalog.find(detectorId) 

557 if row is None: 

558 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; " 

559 "Using original skyWcs.", detectorId) 

560 else: 

561 skyWcs = row.getWcs() 

562 if skyWcs is None: 

563 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; " 

564 "Using original skyWcs.", detectorId) 

565 else: 

566 exposure.setWcs(skyWcs) 

567 

568 return exposure 

569 

570 def addCalibColumns(self, catalog, exposure, idGenerator, **kwargs): 

571 """Add replace columns with calibs evaluated at each centroid 

572 

573 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a 

574 a source catalog, by rerunning the plugins. 

575 

576 Parameters 

577 ---------- 

578 catalog : `lsst.afw.table.SourceCatalog` 

579 catalog to which calib columns will be added 

580 exposure : `lsst.afw.image.exposure.Exposure` 

581 Exposure with attached PhotoCalibs and SkyWcs attributes to be 

582 reevaluated at local centroids. Pixels are not required. 

583 idGenerator : `lsst.meas.base.IdGenerator` 

584 Object that generates Source IDs and random seeds. 

585 **kwargs 

586 Additional keyword arguments are ignored to facilitate passing the 

587 same arguments to several methods. 

588 

589 Returns 

590 ------- 

591 newCat: `lsst.afw.table.SourceCatalog` 

592 Source Catalog with requested local calib columns 

593 """ 

594 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

595 measureConfig.doReplaceWithNoise = False 

596 

597 # Clear all slots, because we aren't running the relevant plugins. 

598 for slot in measureConfig.slots: 

599 setattr(measureConfig.slots, slot, None) 

600 

601 measureConfig.plugins.names = [] 

602 if self.config.doReevaluateSkyWcs: 

603 measureConfig.plugins.names.add('base_LocalWcs') 

604 self.log.info("Re-evaluating base_LocalWcs plugin") 

605 if self.config.doReevaluatePhotoCalib: 

606 measureConfig.plugins.names.add('base_LocalPhotoCalib') 

607 self.log.info("Re-evaluating base_LocalPhotoCalib plugin") 

608 pluginsNotToCopy = tuple(measureConfig.plugins.names) 

609 

610 # Create a new schema and catalog 

611 # Copy all columns from original except for the ones to reevaluate 

612 aliasMap = catalog.schema.getAliasMap() 

613 mapper = afwTable.SchemaMapper(catalog.schema) 

614 for item in catalog.schema: 

615 if not item.field.getName().startswith(pluginsNotToCopy): 

616 mapper.addMapping(item.key) 

617 

618 schema = mapper.getOutputSchema() 

619 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

620 schema.setAliasMap(aliasMap) 

621 newCat = afwTable.SourceCatalog(schema) 

622 newCat.extend(catalog, mapper=mapper) 

623 

624 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to 

625 # update here. LocalPhotoCalibs are applied during transform tasks. 

626 # Update coord_ra/coord_dec, which are expected to be positions on the 

627 # sky and are used as such in sdm tables without transform 

628 if self.config.doReevaluateSkyWcs and exposure.wcs is not None: 

629 afwTable.updateSourceCoords(exposure.wcs, newCat) 

630 

631 measurement.run(measCat=newCat, exposure=exposure, exposureId=idGenerator.catalog_id) 

632 

633 return newCat 

634 

635 

636class PostprocessAnalysis(object): 

637 """Calculate columns from DataFrames or handles storing DataFrames. 

638 

639 This object manages and organizes an arbitrary set of computations 

640 on a catalog. The catalog is defined by a 

641 `DeferredDatasetHandle` or `InMemoryDatasetHandle` object 

642 (or list thereof), such as a ``deepCoadd_obj`` dataset, and the 

643 computations are defined by a collection of `lsst.pipe.tasks.functor.Functor` 

644 objects (or, equivalently, a ``CompositeFunctor``). 

645 

646 After the object is initialized, accessing the ``.df`` attribute (which 

647 holds the `pandas.DataFrame` containing the results of the calculations) 

648 triggers computation of said dataframe. 

649 

650 One of the conveniences of using this object is the ability to define a 

651 desired common filter for all functors. This enables the same functor 

652 collection to be passed to several different `PostprocessAnalysis` objects 

653 without having to change the original functor collection, since the ``filt`` 

654 keyword argument of this object triggers an overwrite of the ``filt`` 

655 property for all functors in the collection. 

656 

657 This object also allows a list of refFlags to be passed, and defines a set 

658 of default refFlags that are always included even if not requested. 

659 

660 If a list of DataFrames or Handles is passed, rather than a single one, 

661 then the calculations will be mapped over all the input catalogs. In 

662 principle, it should be straightforward to parallelize this activity, but 

663 initial tests have failed (see TODO in code comments). 

664 

665 Parameters 

666 ---------- 

667 handles : `lsst.daf.butler.DeferredDatasetHandle` or 

668 `lsst.pipe.base.InMemoryDatasetHandle` or 

669 list of these. 

670 Source catalog(s) for computation. 

671 functors : `list`, `dict`, or `~lsst.pipe.tasks.functors.CompositeFunctor` 

672 Computations to do (functors that act on ``handles``). 

673 If a dict, the output 

674 DataFrame will have columns keyed accordingly. 

675 If a list, the column keys will come from the 

676 ``.shortname`` attribute of each functor. 

677 

678 filt : `str`, optional 

679 Filter in which to calculate. If provided, 

680 this will overwrite any existing ``.filt`` attribute 

681 of the provided functors. 

682 

683 flags : `list`, optional 

684 List of flags (per-band) to include in output table. 

685 Taken from the ``meas`` dataset if applied to a multilevel Object Table. 

686 

687 refFlags : `list`, optional 

688 List of refFlags (only reference band) to include in output table. 

689 

690 forcedFlags : `list`, optional 

691 List of flags (per-band) to include in output table. 

692 Taken from the ``forced_src`` dataset if applied to a 

693 multilevel Object Table. Intended for flags from measurement plugins 

694 only run during multi-band forced-photometry. 

695 """ 

696 _defaultRefFlags = [] 

697 _defaultFuncs = () 

698 

699 def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

700 self.handles = handles 

701 self.functors = functors 

702 

703 self.filt = filt 

704 self.flags = list(flags) if flags is not None else [] 

705 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

706 self.refFlags = list(self._defaultRefFlags) 

707 if refFlags is not None: 

708 self.refFlags += list(refFlags) 

709 

710 self._df = None 

711 

712 @property 

713 def defaultFuncs(self): 

714 funcs = dict(self._defaultFuncs) 

715 return funcs 

716 

717 @property 

718 def func(self): 

719 additionalFuncs = self.defaultFuncs 

720 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

721 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

722 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

723 

724 if isinstance(self.functors, CompositeFunctor): 

725 func = self.functors 

726 else: 

727 func = CompositeFunctor(self.functors) 

728 

729 func.funcDict.update(additionalFuncs) 

730 func.filt = self.filt 

731 

732 return func 

733 

734 @property 

735 def noDupCols(self): 

736 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

737 

738 @property 

739 def df(self): 

740 if self._df is None: 

741 self.compute() 

742 return self._df 

743 

744 def compute(self, dropna=False, pool=None): 

745 # map over multiple handles 

746 if type(self.handles) in (list, tuple): 

747 if pool is None: 

748 dflist = [self.func(handle, dropna=dropna) for handle in self.handles] 

749 else: 

750 # TODO: Figure out why this doesn't work (pyarrow pickling 

751 # issues?) 

752 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.handles) 

753 self._df = pd.concat(dflist) 

754 else: 

755 self._df = self.func(self.handles, dropna=dropna) 

756 

757 return self._df 

758 

759 

760class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

761 dimensions=()): 

762 """Expected Connections for subclasses of TransformCatalogBaseTask. 

763 

764 Must be subclassed. 

765 """ 

766 inputCatalog = connectionTypes.Input( 

767 name="", 

768 storageClass="DataFrame", 

769 ) 

770 outputCatalog = connectionTypes.Output( 

771 name="", 

772 storageClass="DataFrame", 

773 ) 

774 

775 

776class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

777 pipelineConnections=TransformCatalogBaseConnections): 

778 functorFile = pexConfig.Field( 

779 dtype=str, 

780 doc="Path to YAML file specifying Science Data Model functors to use " 

781 "when copying columns and computing calibrated values.", 

782 default=None, 

783 optional=True 

784 ) 

785 primaryKey = pexConfig.Field( 

786 dtype=str, 

787 doc="Name of column to be set as the DataFrame index. If None, the index" 

788 "will be named `id`", 

789 default=None, 

790 optional=True 

791 ) 

792 columnsFromDataId = pexConfig.ListField( 

793 dtype=str, 

794 default=None, 

795 optional=True, 

796 doc="Columns to extract from the dataId", 

797 ) 

798 

799 

800class TransformCatalogBaseTask(pipeBase.PipelineTask): 

801 """Base class for transforming/standardizing a catalog 

802 

803 by applying functors that convert units and apply calibrations. 

804 The purpose of this task is to perform a set of computations on 

805 an input ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` that holds 

806 a ``DataFrame`` dataset (such as ``deepCoadd_obj``), and write the 

807 results to a new dataset (which needs to be declared in an ``outputDataset`` 

808 attribute). 

809 

810 The calculations to be performed are defined in a YAML file that specifies 

811 a set of functors to be computed, provided as 

812 a ``--functorFile`` config parameter. An example of such a YAML file 

813 is the following: 

814 

815 funcs: 

816 psfMag: 

817 functor: Mag 

818 args: 

819 - base_PsfFlux 

820 filt: HSC-G 

821 dataset: meas 

822 cmodel_magDiff: 

823 functor: MagDiff 

824 args: 

825 - modelfit_CModel 

826 - base_PsfFlux 

827 filt: HSC-G 

828 gauss_magDiff: 

829 functor: MagDiff 

830 args: 

831 - base_GaussianFlux 

832 - base_PsfFlux 

833 filt: HSC-G 

834 count: 

835 functor: Column 

836 args: 

837 - base_InputCount_value 

838 filt: HSC-G 

839 deconvolved_moments: 

840 functor: DeconvolvedMoments 

841 filt: HSC-G 

842 dataset: forced_src 

843 refFlags: 

844 - calib_psfUsed 

845 - merge_measurement_i 

846 - merge_measurement_r 

847 - merge_measurement_z 

848 - merge_measurement_y 

849 - merge_measurement_g 

850 - base_PixelFlags_flag_inexact_psfCenter 

851 - detect_isPrimary 

852 

853 The names for each entry under "func" will become the names of columns in 

854 the output dataset. All the functors referenced are defined in 

855 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each 

856 functor are in the `args` list, and any additional entries for each column 

857 other than "functor" or "args" (e.g., ``'filt'``, ``'dataset'``) are treated as 

858 keyword arguments to be passed to the functor initialization. 

859 

860 The "flags" entry is the default shortcut for `Column` functors. 

861 All columns listed under "flags" will be copied to the output table 

862 untransformed. They can be of any datatype. 

863 In the special case of transforming a multi-level oject table with 

864 band and dataset indices (deepCoadd_obj), these will be taked from the 

865 `meas` dataset and exploded out per band. 

866 

867 There are two special shortcuts that only apply when transforming 

868 multi-level Object (deepCoadd_obj) tables: 

869 - The "refFlags" entry is shortcut for `Column` functor 

870 taken from the `'ref'` dataset if transforming an ObjectTable. 

871 - The "forcedFlags" entry is shortcut for `Column` functors. 

872 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

873 These are expanded out per band. 

874 

875 

876 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

877 to organize and excecute the calculations. 

878 """ 

879 @property 

880 def _DefaultName(self): 

881 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

882 

883 @property 

884 def outputDataset(self): 

885 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

886 

887 @property 

888 def inputDataset(self): 

889 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

890 

891 @property 

892 def ConfigClass(self): 

893 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

894 

895 def __init__(self, *args, **kwargs): 

896 super().__init__(*args, **kwargs) 

897 if self.config.functorFile: 

898 self.log.info('Loading tranform functor definitions from %s', 

899 self.config.functorFile) 

900 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

901 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

902 else: 

903 self.funcs = None 

904 

905 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

906 inputs = butlerQC.get(inputRefs) 

907 if self.funcs is None: 

908 raise ValueError("config.functorFile is None. " 

909 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

910 result = self.run(handle=inputs['inputCatalog'], funcs=self.funcs, 

911 dataId=outputRefs.outputCatalog.dataId.full) 

912 outputs = pipeBase.Struct(outputCatalog=result) 

913 butlerQC.put(outputs, outputRefs) 

914 

915 def run(self, handle, funcs=None, dataId=None, band=None): 

916 """Do postprocessing calculations 

917 

918 Takes a ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` or 

919 ``DataFrame`` object and dataId, 

920 returns a dataframe with results of postprocessing calculations. 

921 

922 Parameters 

923 ---------- 

924 handles : `lsst.daf.butler.DeferredDatasetHandle` or 

925 `lsst.pipe.base.InMemoryDatasetHandle` or 

926 `pandas.DataFrame`, or list of these. 

927 DataFrames from which calculations are done. 

928 funcs : `lsst.pipe.tasks.functors.Functors` 

929 Functors to apply to the table's columns 

930 dataId : dict, optional 

931 Used to add a `patchId` column to the output dataframe. 

932 band : `str`, optional 

933 Filter band that is being processed. 

934 

935 Returns 

936 ------ 

937 df : `pandas.DataFrame` 

938 """ 

939 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

940 

941 df = self.transform(band, handle, funcs, dataId).df 

942 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

943 return df 

944 

945 def getFunctors(self): 

946 return self.funcs 

947 

948 def getAnalysis(self, handles, funcs=None, band=None): 

949 if funcs is None: 

950 funcs = self.funcs 

951 analysis = PostprocessAnalysis(handles, funcs, filt=band) 

952 return analysis 

953 

954 def transform(self, band, handles, funcs, dataId): 

955 analysis = self.getAnalysis(handles, funcs=funcs, band=band) 

956 df = analysis.df 

957 if dataId and self.config.columnsFromDataId: 

958 for key in self.config.columnsFromDataId: 

959 if key in dataId: 

960 df[str(key)] = dataId[key] 

961 else: 

962 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}") 

963 

964 if self.config.primaryKey: 

965 if df.index.name != self.config.primaryKey and self.config.primaryKey in df: 

966 df.reset_index(inplace=True, drop=True) 

967 df.set_index(self.config.primaryKey, inplace=True) 

968 

969 return pipeBase.Struct( 

970 df=df, 

971 analysis=analysis 

972 ) 

973 

974 

975class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

976 defaultTemplates={"coaddName": "deep"}, 

977 dimensions=("tract", "patch", "skymap")): 

978 inputCatalog = connectionTypes.Input( 

979 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

980 "stored as a DataFrame with a multi-level column index per-patch.", 

981 dimensions=("tract", "patch", "skymap"), 

982 storageClass="DataFrame", 

983 name="{coaddName}Coadd_obj", 

984 deferLoad=True, 

985 ) 

986 outputCatalog = connectionTypes.Output( 

987 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

988 "data model.", 

989 dimensions=("tract", "patch", "skymap"), 

990 storageClass="DataFrame", 

991 name="objectTable" 

992 ) 

993 

994 

995class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

996 pipelineConnections=TransformObjectCatalogConnections): 

997 coaddName = pexConfig.Field( 

998 dtype=str, 

999 default="deep", 

1000 doc="Name of coadd" 

1001 ) 

1002 # TODO: remove in DM-27177 

1003 filterMap = pexConfig.DictField( 

1004 keytype=str, 

1005 itemtype=str, 

1006 default={}, 

1007 doc=("Dictionary mapping full filter name to short one for column name munging." 

1008 "These filters determine the output columns no matter what filters the " 

1009 "input data actually contain."), 

1010 deprecated=("Coadds are now identified by the band, so this transform is unused." 

1011 "Will be removed after v22.") 

1012 ) 

1013 outputBands = pexConfig.ListField( 

1014 dtype=str, 

1015 default=None, 

1016 optional=True, 

1017 doc=("These bands and only these bands will appear in the output," 

1018 " NaN-filled if the input does not include them." 

1019 " If None, then use all bands found in the input.") 

1020 ) 

1021 camelCase = pexConfig.Field( 

1022 dtype=bool, 

1023 default=False, 

1024 doc=("Write per-band columns names with camelCase, else underscore " 

1025 "For example: gPsFlux instead of g_PsFlux.") 

1026 ) 

1027 multilevelOutput = pexConfig.Field( 

1028 dtype=bool, 

1029 default=False, 

1030 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

1031 "and name-munged (False).") 

1032 ) 

1033 goodFlags = pexConfig.ListField( 

1034 dtype=str, 

1035 default=[], 

1036 doc=("List of 'good' flags that should be set False when populating empty tables. " 

1037 "All other flags are considered to be 'bad' flags and will be set to True.") 

1038 ) 

1039 floatFillValue = pexConfig.Field( 

1040 dtype=float, 

1041 default=np.nan, 

1042 doc="Fill value for float fields when populating empty tables." 

1043 ) 

1044 integerFillValue = pexConfig.Field( 

1045 dtype=int, 

1046 default=-1, 

1047 doc="Fill value for integer fields when populating empty tables." 

1048 ) 

1049 

1050 def setDefaults(self): 

1051 super().setDefaults() 

1052 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml') 

1053 self.primaryKey = 'objectId' 

1054 self.columnsFromDataId = ['tract', 'patch'] 

1055 self.goodFlags = ['calib_astrometry_used', 

1056 'calib_photometry_reserved', 

1057 'calib_photometry_used', 

1058 'calib_psf_candidate', 

1059 'calib_psf_reserved', 

1060 'calib_psf_used'] 

1061 

1062 

1063class TransformObjectCatalogTask(TransformCatalogBaseTask): 

1064 """Produce a flattened Object Table to match the format specified in 

1065 sdm_schemas. 

1066 

1067 Do the same set of postprocessing calculations on all bands. 

1068 

1069 This is identical to `TransformCatalogBaseTask`, except for that it does 

1070 the specified functor calculations for all filters present in the 

1071 input `deepCoadd_obj` table. Any specific ``"filt"`` keywords specified 

1072 by the YAML file will be superceded. 

1073 """ 

1074 _DefaultName = "transformObjectCatalog" 

1075 ConfigClass = TransformObjectCatalogConfig 

1076 

1077 def run(self, handle, funcs=None, dataId=None, band=None): 

1078 # NOTE: band kwarg is ignored here. 

1079 dfDict = {} 

1080 analysisDict = {} 

1081 templateDf = pd.DataFrame() 

1082 

1083 columns = handle.get(component='columns') 

1084 inputBands = columns.unique(level=1).values 

1085 

1086 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

1087 

1088 # Perform transform for data of filters that exist in the handle dataframe. 

1089 for inputBand in inputBands: 

1090 if inputBand not in outputBands: 

1091 self.log.info("Ignoring %s band data in the input", inputBand) 

1092 continue 

1093 self.log.info("Transforming the catalog of band %s", inputBand) 

1094 result = self.transform(inputBand, handle, funcs, dataId) 

1095 dfDict[inputBand] = result.df 

1096 analysisDict[inputBand] = result.analysis 

1097 if templateDf.empty: 

1098 templateDf = result.df 

1099 

1100 # Put filler values in columns of other wanted bands 

1101 for filt in outputBands: 

1102 if filt not in dfDict: 

1103 self.log.info("Adding empty columns for band %s", filt) 

1104 dfTemp = templateDf.copy() 

1105 for col in dfTemp.columns: 

1106 testValue = dfTemp[col].values[0] 

1107 if isinstance(testValue, (np.bool_, pd.BooleanDtype)): 

1108 # Boolean flag type, check if it is a "good" flag 

1109 if col in self.config.goodFlags: 

1110 fillValue = False 

1111 else: 

1112 fillValue = True 

1113 elif isinstance(testValue, numbers.Integral): 

1114 # Checking numbers.Integral catches all flavors 

1115 # of python, numpy, pandas, etc. integers. 

1116 # We must ensure this is not an unsigned integer. 

1117 if isinstance(testValue, np.unsignedinteger): 

1118 raise ValueError("Parquet tables may not have unsigned integer columns.") 

1119 else: 

1120 fillValue = self.config.integerFillValue 

1121 else: 

1122 fillValue = self.config.floatFillValue 

1123 dfTemp[col].values[:] = fillValue 

1124 dfDict[filt] = dfTemp 

1125 

1126 # This makes a multilevel column index, with band as first level 

1127 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

1128 

1129 if not self.config.multilevelOutput: 

1130 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

1131 if self.config.primaryKey in noDupCols: 

1132 noDupCols.remove(self.config.primaryKey) 

1133 if dataId and self.config.columnsFromDataId: 

1134 noDupCols += self.config.columnsFromDataId 

1135 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

1136 inputBands=inputBands) 

1137 

1138 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

1139 

1140 return df 

1141 

1142 

1143class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

1144 dimensions=("tract", "skymap")): 

1145 inputCatalogs = connectionTypes.Input( 

1146 doc="Per-Patch objectTables conforming to the standard data model.", 

1147 name="objectTable", 

1148 storageClass="DataFrame", 

1149 dimensions=("tract", "patch", "skymap"), 

1150 multiple=True, 

1151 ) 

1152 outputCatalog = connectionTypes.Output( 

1153 doc="Pre-tract horizontal concatenation of the input objectTables", 

1154 name="objectTable_tract", 

1155 storageClass="DataFrame", 

1156 dimensions=("tract", "skymap"), 

1157 ) 

1158 

1159 

1160class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

1161 pipelineConnections=ConsolidateObjectTableConnections): 

1162 coaddName = pexConfig.Field( 

1163 dtype=str, 

1164 default="deep", 

1165 doc="Name of coadd" 

1166 ) 

1167 

1168 

1169class ConsolidateObjectTableTask(pipeBase.PipelineTask): 

1170 """Write patch-merged source tables to a tract-level DataFrame Parquet file. 

1171 

1172 Concatenates `objectTable` list into a per-visit `objectTable_tract`. 

1173 """ 

1174 _DefaultName = "consolidateObjectTable" 

1175 ConfigClass = ConsolidateObjectTableConfig 

1176 

1177 inputDataset = 'objectTable' 

1178 outputDataset = 'objectTable_tract' 

1179 

1180 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1181 inputs = butlerQC.get(inputRefs) 

1182 self.log.info("Concatenating %s per-patch Object Tables", 

1183 len(inputs['inputCatalogs'])) 

1184 df = pd.concat(inputs['inputCatalogs']) 

1185 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1186 

1187 

1188class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

1189 defaultTemplates={"catalogType": ""}, 

1190 dimensions=("instrument", "visit", "detector")): 

1191 

1192 inputCatalog = connectionTypes.Input( 

1193 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

1194 name="{catalogType}source", 

1195 storageClass="DataFrame", 

1196 dimensions=("instrument", "visit", "detector"), 

1197 deferLoad=True 

1198 ) 

1199 outputCatalog = connectionTypes.Output( 

1200 doc="Narrower, per-detector Source Table transformed and converted per a " 

1201 "specified set of functors", 

1202 name="{catalogType}sourceTable", 

1203 storageClass="DataFrame", 

1204 dimensions=("instrument", "visit", "detector") 

1205 ) 

1206 

1207 

1208class TransformSourceTableConfig(TransformCatalogBaseConfig, 

1209 pipelineConnections=TransformSourceTableConnections): 

1210 

1211 def setDefaults(self): 

1212 super().setDefaults() 

1213 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml') 

1214 self.primaryKey = 'sourceId' 

1215 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter'] 

1216 

1217 

1218class TransformSourceTableTask(TransformCatalogBaseTask): 

1219 """Transform/standardize a source catalog 

1220 """ 

1221 _DefaultName = "transformSourceTable" 

1222 ConfigClass = TransformSourceTableConfig 

1223 

1224 

1225class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1226 dimensions=("instrument", "visit",), 

1227 defaultTemplates={"calexpType": ""}): 

1228 calexp = connectionTypes.Input( 

1229 doc="Processed exposures used for metadata", 

1230 name="calexp", 

1231 storageClass="ExposureF", 

1232 dimensions=("instrument", "visit", "detector"), 

1233 deferLoad=True, 

1234 multiple=True, 

1235 ) 

1236 visitSummary = connectionTypes.Output( 

1237 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1238 "detector id for the id and are sorted for fast lookups of a " 

1239 "detector."), 

1240 name="visitSummary", 

1241 storageClass="ExposureCatalog", 

1242 dimensions=("instrument", "visit"), 

1243 ) 

1244 visitSummarySchema = connectionTypes.InitOutput( 

1245 doc="Schema of the visitSummary catalog", 

1246 name="visitSummary_schema", 

1247 storageClass="ExposureCatalog", 

1248 ) 

1249 

1250 

1251class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1252 pipelineConnections=ConsolidateVisitSummaryConnections): 

1253 """Config for ConsolidateVisitSummaryTask""" 

1254 pass 

1255 

1256 

1257class ConsolidateVisitSummaryTask(pipeBase.PipelineTask): 

1258 """Task to consolidate per-detector visit metadata. 

1259 

1260 This task aggregates the following metadata from all the detectors in a 

1261 single visit into an exposure catalog: 

1262 - The visitInfo. 

1263 - The wcs. 

1264 - The photoCalib. 

1265 - The physical_filter and band (if available). 

1266 - The psf size, shape, and effective area at the center of the detector. 

1267 - The corners of the bounding box in right ascension/declination. 

1268 

1269 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1270 are not persisted here because of storage concerns, and because of their 

1271 limited utility as summary statistics. 

1272 

1273 Tests for this task are performed in ci_hsc_gen3. 

1274 """ 

1275 _DefaultName = "consolidateVisitSummary" 

1276 ConfigClass = ConsolidateVisitSummaryConfig 

1277 

1278 def __init__(self, **kwargs): 

1279 super().__init__(**kwargs) 

1280 self.schema = afwTable.ExposureTable.makeMinimalSchema() 

1281 self.schema.addField('visit', type='L', doc='Visit number') 

1282 self.schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1283 self.schema.addField('band', type='String', size=32, doc='Name of band') 

1284 ExposureSummaryStats.update_schema(self.schema) 

1285 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema) 

1286 

1287 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1288 dataRefs = butlerQC.get(inputRefs.calexp) 

1289 visit = dataRefs[0].dataId.byName()['visit'] 

1290 

1291 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1292 len(dataRefs), visit) 

1293 

1294 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1295 

1296 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1297 

1298 def _combineExposureMetadata(self, visit, dataRefs): 

1299 """Make a combined exposure catalog from a list of dataRefs. 

1300 These dataRefs must point to exposures with wcs, summaryStats, 

1301 and other visit metadata. 

1302 

1303 Parameters 

1304 ---------- 

1305 visit : `int` 

1306 Visit identification number. 

1307 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1308 List of dataRefs in visit. 

1309 

1310 Returns 

1311 ------- 

1312 visitSummary : `lsst.afw.table.ExposureCatalog` 

1313 Exposure catalog with per-detector summary information. 

1314 """ 

1315 cat = afwTable.ExposureCatalog(self.schema) 

1316 cat.resize(len(dataRefs)) 

1317 

1318 cat['visit'] = visit 

1319 

1320 for i, dataRef in enumerate(dataRefs): 

1321 visitInfo = dataRef.get(component='visitInfo') 

1322 filterLabel = dataRef.get(component='filter') 

1323 summaryStats = dataRef.get(component='summaryStats') 

1324 detector = dataRef.get(component='detector') 

1325 wcs = dataRef.get(component='wcs') 

1326 photoCalib = dataRef.get(component='photoCalib') 

1327 detector = dataRef.get(component='detector') 

1328 bbox = dataRef.get(component='bbox') 

1329 validPolygon = dataRef.get(component='validPolygon') 

1330 

1331 rec = cat[i] 

1332 rec.setBBox(bbox) 

1333 rec.setVisitInfo(visitInfo) 

1334 rec.setWcs(wcs) 

1335 rec.setPhotoCalib(photoCalib) 

1336 rec.setValidPolygon(validPolygon) 

1337 

1338 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1339 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1340 rec.setId(detector.getId()) 

1341 summaryStats.update_record(rec) 

1342 

1343 metadata = dafBase.PropertyList() 

1344 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1345 # We are looping over existing datarefs, so the following is true 

1346 metadata.add("COMMENT", "Only detectors with data have entries.") 

1347 cat.setMetadata(metadata) 

1348 

1349 cat.sort() 

1350 return cat 

1351 

1352 

1353class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1354 defaultTemplates={"catalogType": ""}, 

1355 dimensions=("instrument", "visit")): 

1356 inputCatalogs = connectionTypes.Input( 

1357 doc="Input per-detector Source Tables", 

1358 name="{catalogType}sourceTable", 

1359 storageClass="DataFrame", 

1360 dimensions=("instrument", "visit", "detector"), 

1361 multiple=True 

1362 ) 

1363 outputCatalog = connectionTypes.Output( 

1364 doc="Per-visit concatenation of Source Table", 

1365 name="{catalogType}sourceTable_visit", 

1366 storageClass="DataFrame", 

1367 dimensions=("instrument", "visit") 

1368 ) 

1369 

1370 

1371class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1372 pipelineConnections=ConsolidateSourceTableConnections): 

1373 pass 

1374 

1375 

1376class ConsolidateSourceTableTask(pipeBase.PipelineTask): 

1377 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1378 """ 

1379 _DefaultName = 'consolidateSourceTable' 

1380 ConfigClass = ConsolidateSourceTableConfig 

1381 

1382 inputDataset = 'sourceTable' 

1383 outputDataset = 'sourceTable_visit' 

1384 

1385 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1386 from .makeWarp import reorderRefs 

1387 

1388 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs] 

1389 detectorOrder.sort() 

1390 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector') 

1391 inputs = butlerQC.get(inputRefs) 

1392 self.log.info("Concatenating %s per-detector Source Tables", 

1393 len(inputs['inputCatalogs'])) 

1394 df = pd.concat(inputs['inputCatalogs']) 

1395 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1396 

1397 

1398class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1399 dimensions=("instrument",), 

1400 defaultTemplates={"calexpType": ""}): 

1401 visitSummaryRefs = connectionTypes.Input( 

1402 doc="Data references for per-visit consolidated exposure metadata", 

1403 name="finalVisitSummary", 

1404 storageClass="ExposureCatalog", 

1405 dimensions=("instrument", "visit"), 

1406 multiple=True, 

1407 deferLoad=True, 

1408 ) 

1409 outputCatalog = connectionTypes.Output( 

1410 doc="CCD and Visit metadata table", 

1411 name="ccdVisitTable", 

1412 storageClass="DataFrame", 

1413 dimensions=("instrument",) 

1414 ) 

1415 

1416 

1417class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1418 pipelineConnections=MakeCcdVisitTableConnections): 

1419 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

1420 

1421 

1422class MakeCcdVisitTableTask(pipeBase.PipelineTask): 

1423 """Produce a `ccdVisitTable` from the visit summary exposure catalogs. 

1424 """ 

1425 _DefaultName = 'makeCcdVisitTable' 

1426 ConfigClass = MakeCcdVisitTableConfig 

1427 

1428 def run(self, visitSummaryRefs): 

1429 """Make a table of ccd information from the visit summary catalogs. 

1430 

1431 Parameters 

1432 ---------- 

1433 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1434 List of DeferredDatasetHandles pointing to exposure catalogs with 

1435 per-detector summary information. 

1436 

1437 Returns 

1438 ------- 

1439 result : `lsst.pipe.Base.Struct` 

1440 Results struct with attribute: 

1441 

1442 ``outputCatalog`` 

1443 Catalog of ccd and visit information. 

1444 """ 

1445 ccdEntries = [] 

1446 for visitSummaryRef in visitSummaryRefs: 

1447 visitSummary = visitSummaryRef.get() 

1448 visitInfo = visitSummary[0].getVisitInfo() 

1449 

1450 ccdEntry = {} 

1451 summaryTable = visitSummary.asAstropy() 

1452 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'dec', 'zenithDistance', 

1453 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise', 

1454 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar', 

1455 'psfStarDeltaE1Median', 'psfStarDeltaE2Median', 

1456 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter', 

1457 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter', 

1458 'psfStarScaledDeltaSizeScatter', 

1459 'psfTraceRadiusDelta', 'maxDistToNearestPsf'] 

1460 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1461 # 'visit' is the human readable visit number. 

1462 # 'visitId' is the key to the visitId table. They are the same. 

1463 # Technically you should join to get the visit from the visit 

1464 # table. 

1465 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"}) 

1466 

1467 # RFC-924: Temporarily keep a duplicate "decl" entry for backwards 

1468 # compatibility. To be removed after September 2023. 

1469 ccdEntry["decl"] = ccdEntry.loc[:, "dec"] 

1470 

1471 ccdEntry['ccdVisitId'] = [ 

1472 self.config.idGenerator.apply( 

1473 visitSummaryRef.dataId, 

1474 detector=detector_id, 

1475 is_exposure=False, 

1476 ).catalog_id # The "catalog ID" here is the ccdVisit ID 

1477 # because it's usually the ID for a whole catalog 

1478 # with a {visit, detector}, and that's the main 

1479 # use case for IdGenerator. This usage for a 

1480 # summary table is rare. 

1481 for detector_id in summaryTable['id'] 

1482 ] 

1483 ccdEntry['detector'] = summaryTable['id'] 

1484 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() if vR.getWcs() 

1485 else np.nan for vR in visitSummary]) 

1486 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1487 

1488 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1489 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1490 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1491 expTime = visitInfo.getExposureTime() 

1492 ccdEntry['expTime'] = expTime 

1493 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1494 expTime_days = expTime / (60*60*24) 

1495 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days 

1496 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1497 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1498 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1499 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1500 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1501 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1502 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1503 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1504 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1505 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1506 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1507 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, 

1508 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. 

1509 # values are actually wanted. 

1510 ccdEntries.append(ccdEntry) 

1511 

1512 outputCatalog = pd.concat(ccdEntries) 

1513 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True) 

1514 return pipeBase.Struct(outputCatalog=outputCatalog) 

1515 

1516 

1517class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1518 dimensions=("instrument",), 

1519 defaultTemplates={"calexpType": ""}): 

1520 visitSummaries = connectionTypes.Input( 

1521 doc="Per-visit consolidated exposure metadata", 

1522 name="finalVisitSummary", 

1523 storageClass="ExposureCatalog", 

1524 dimensions=("instrument", "visit",), 

1525 multiple=True, 

1526 deferLoad=True, 

1527 ) 

1528 outputCatalog = connectionTypes.Output( 

1529 doc="Visit metadata table", 

1530 name="visitTable", 

1531 storageClass="DataFrame", 

1532 dimensions=("instrument",) 

1533 ) 

1534 

1535 

1536class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1537 pipelineConnections=MakeVisitTableConnections): 

1538 pass 

1539 

1540 

1541class MakeVisitTableTask(pipeBase.PipelineTask): 

1542 """Produce a `visitTable` from the visit summary exposure catalogs. 

1543 """ 

1544 _DefaultName = 'makeVisitTable' 

1545 ConfigClass = MakeVisitTableConfig 

1546 

1547 def run(self, visitSummaries): 

1548 """Make a table of visit information from the visit summary catalogs. 

1549 

1550 Parameters 

1551 ---------- 

1552 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog` 

1553 List of exposure catalogs with per-detector summary information. 

1554 Returns 

1555 ------- 

1556 result : `lsst.pipe.Base.Struct` 

1557 Results struct with attribute: 

1558 

1559 ``outputCatalog`` 

1560 Catalog of visit information. 

1561 """ 

1562 visitEntries = [] 

1563 for visitSummary in visitSummaries: 

1564 visitSummary = visitSummary.get() 

1565 visitRow = visitSummary[0] 

1566 visitInfo = visitRow.getVisitInfo() 

1567 

1568 visitEntry = {} 

1569 visitEntry["visitId"] = visitRow['visit'] 

1570 visitEntry["visit"] = visitRow['visit'] 

1571 visitEntry["physical_filter"] = visitRow['physical_filter'] 

1572 visitEntry["band"] = visitRow['band'] 

1573 raDec = visitInfo.getBoresightRaDec() 

1574 visitEntry["ra"] = raDec.getRa().asDegrees() 

1575 visitEntry["dec"] = raDec.getDec().asDegrees() 

1576 

1577 # RFC-924: Temporarily keep a duplicate "decl" entry for backwards 

1578 # compatibility. To be removed after September 2023. 

1579 visitEntry["decl"] = visitEntry["dec"] 

1580 

1581 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1582 azAlt = visitInfo.getBoresightAzAlt() 

1583 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1584 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1585 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1586 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1587 expTime = visitInfo.getExposureTime() 

1588 visitEntry["expTime"] = expTime 

1589 visitEntry["expMidpt"] = visitInfo.getDate().toPython() 

1590 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1591 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1592 expTime_days = expTime / (60*60*24) 

1593 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days 

1594 visitEntries.append(visitEntry) 

1595 

1596 # TODO: DM-30623, Add programId, exposureType, cameraTemp, 

1597 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp, 

1598 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures. 

1599 

1600 outputCatalog = pd.DataFrame(data=visitEntries) 

1601 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True) 

1602 return pipeBase.Struct(outputCatalog=outputCatalog) 

1603 

1604 

1605class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1606 dimensions=("instrument", "visit", "detector", "skymap", "tract")): 

1607 

1608 inputCatalog = connectionTypes.Input( 

1609 doc="Primary per-detector, single-epoch forced-photometry catalog. " 

1610 "By default, it is the output of ForcedPhotCcdTask on calexps", 

1611 name="forced_src", 

1612 storageClass="SourceCatalog", 

1613 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1614 ) 

1615 inputCatalogDiff = connectionTypes.Input( 

1616 doc="Secondary multi-epoch, per-detector, forced photometry catalog. " 

1617 "By default, it is the output of ForcedPhotCcdTask run on image differences.", 

1618 name="forced_diff", 

1619 storageClass="SourceCatalog", 

1620 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1621 ) 

1622 outputCatalog = connectionTypes.Output( 

1623 doc="InputCatalogs horizonatally joined on `objectId` in DataFrame parquet format", 

1624 name="mergedForcedSource", 

1625 storageClass="DataFrame", 

1626 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1627 ) 

1628 

1629 

1630class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig, 

1631 pipelineConnections=WriteForcedSourceTableConnections): 

1632 key = lsst.pex.config.Field( 

1633 doc="Column on which to join the two input tables on and make the primary key of the output", 

1634 dtype=str, 

1635 default="objectId", 

1636 ) 

1637 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

1638 

1639 

1640class WriteForcedSourceTableTask(pipeBase.PipelineTask): 

1641 """Merge and convert per-detector forced source catalogs to DataFrame Parquet format. 

1642 

1643 Because the predecessor ForcedPhotCcdTask operates per-detector, 

1644 per-tract, (i.e., it has tract in its dimensions), detectors 

1645 on the tract boundary may have multiple forced source catalogs. 

1646 

1647 The successor task TransformForcedSourceTable runs per-patch 

1648 and temporally-aggregates overlapping mergedForcedSource catalogs from all 

1649 available multiple epochs. 

1650 """ 

1651 _DefaultName = "writeForcedSourceTable" 

1652 ConfigClass = WriteForcedSourceTableConfig 

1653 

1654 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1655 inputs = butlerQC.get(inputRefs) 

1656 # Add ccdVisitId to allow joining with CcdVisitTable 

1657 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId) 

1658 inputs['ccdVisitId'] = idGenerator.catalog_id 

1659 inputs['band'] = butlerQC.quantum.dataId.full['band'] 

1660 outputs = self.run(**inputs) 

1661 butlerQC.put(outputs, outputRefs) 

1662 

1663 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None): 

1664 dfs = [] 

1665 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')): 

1666 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False) 

1667 df = df.reindex(sorted(df.columns), axis=1) 

1668 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA 

1669 df['band'] = band if band else pd.NA 

1670 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns], 

1671 names=('dataset', 'column')) 

1672 

1673 dfs.append(df) 

1674 

1675 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

1676 return pipeBase.Struct(outputCatalog=outputCatalog) 

1677 

1678 

1679class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1680 dimensions=("instrument", "skymap", "patch", "tract")): 

1681 

1682 inputCatalogs = connectionTypes.Input( 

1683 doc="DataFrames of merged ForcedSources produced by WriteForcedSourceTableTask", 

1684 name="mergedForcedSource", 

1685 storageClass="DataFrame", 

1686 dimensions=("instrument", "visit", "detector", "skymap", "tract"), 

1687 multiple=True, 

1688 deferLoad=True 

1689 ) 

1690 referenceCatalog = connectionTypes.Input( 

1691 doc="Reference catalog which was used to seed the forcedPhot. Columns " 

1692 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner " 

1693 "are expected.", 

1694 name="objectTable", 

1695 storageClass="DataFrame", 

1696 dimensions=("tract", "patch", "skymap"), 

1697 deferLoad=True 

1698 ) 

1699 outputCatalog = connectionTypes.Output( 

1700 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a " 

1701 "specified set of functors", 

1702 name="forcedSourceTable", 

1703 storageClass="DataFrame", 

1704 dimensions=("tract", "patch", "skymap") 

1705 ) 

1706 

1707 

1708class TransformForcedSourceTableConfig(TransformCatalogBaseConfig, 

1709 pipelineConnections=TransformForcedSourceTableConnections): 

1710 referenceColumns = pexConfig.ListField( 

1711 dtype=str, 

1712 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"], 

1713 optional=True, 

1714 doc="Columns to pull from reference catalog", 

1715 ) 

1716 keyRef = lsst.pex.config.Field( 

1717 doc="Column on which to join the two input tables on and make the primary key of the output", 

1718 dtype=str, 

1719 default="objectId", 

1720 ) 

1721 key = lsst.pex.config.Field( 

1722 doc="Rename the output DataFrame index to this name", 

1723 dtype=str, 

1724 default="forcedSourceId", 

1725 ) 

1726 

1727 def setDefaults(self): 

1728 super().setDefaults() 

1729 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml') 

1730 self.columnsFromDataId = ['tract', 'patch'] 

1731 

1732 

1733class TransformForcedSourceTableTask(TransformCatalogBaseTask): 

1734 """Transform/standardize a ForcedSource catalog 

1735 

1736 Transforms each wide, per-detector forcedSource DataFrame per the 

1737 specification file (per-camera defaults found in ForcedSource.yaml). 

1738 All epochs that overlap the patch are aggregated into one per-patch 

1739 narrow-DataFrame file. 

1740 

1741 No de-duplication of rows is performed. Duplicate resolutions flags are 

1742 pulled in from the referenceCatalog: `detect_isPrimary`, 

1743 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate 

1744 for analysis or compare duplicates for QA. 

1745 

1746 The resulting table includes multiple bands. Epochs (MJDs) and other useful 

1747 per-visit rows can be retreived by joining with the CcdVisitTable on 

1748 ccdVisitId. 

1749 """ 

1750 _DefaultName = "transformForcedSourceTable" 

1751 ConfigClass = TransformForcedSourceTableConfig 

1752 

1753 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1754 inputs = butlerQC.get(inputRefs) 

1755 if self.funcs is None: 

1756 raise ValueError("config.functorFile is None. " 

1757 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

1758 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs, 

1759 dataId=outputRefs.outputCatalog.dataId.full) 

1760 

1761 butlerQC.put(outputs, outputRefs) 

1762 

1763 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None): 

1764 dfs = [] 

1765 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns}) 

1766 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs))) 

1767 for handle in inputCatalogs: 

1768 result = self.transform(None, handle, funcs, dataId) 

1769 # Filter for only rows that were detected on (overlap) the patch 

1770 dfs.append(result.df.join(ref, how='inner')) 

1771 

1772 outputCatalog = pd.concat(dfs) 

1773 

1774 # Now that we are done joining on config.keyRef 

1775 # Change index to config.key by 

1776 outputCatalog.index.rename(self.config.keyRef, inplace=True) 

1777 # Add config.keyRef to the column list 

1778 outputCatalog.reset_index(inplace=True) 

1779 # Set the forcedSourceId to the index. This is specified in the 

1780 # ForcedSource.yaml 

1781 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True) 

1782 # Rename it to the config.key 

1783 outputCatalog.index.rename(self.config.key, inplace=True) 

1784 

1785 self.log.info("Made a table of %d columns and %d rows", 

1786 len(outputCatalog.columns), len(outputCatalog)) 

1787 return pipeBase.Struct(outputCatalog=outputCatalog) 

1788 

1789 

1790class ConsolidateTractConnections(pipeBase.PipelineTaskConnections, 

1791 defaultTemplates={"catalogType": ""}, 

1792 dimensions=("instrument", "tract")): 

1793 inputCatalogs = connectionTypes.Input( 

1794 doc="Input per-patch DataFrame Tables to be concatenated", 

1795 name="{catalogType}ForcedSourceTable", 

1796 storageClass="DataFrame", 

1797 dimensions=("tract", "patch", "skymap"), 

1798 multiple=True, 

1799 ) 

1800 

1801 outputCatalog = connectionTypes.Output( 

1802 doc="Output per-tract concatenation of DataFrame Tables", 

1803 name="{catalogType}ForcedSourceTable_tract", 

1804 storageClass="DataFrame", 

1805 dimensions=("tract", "skymap"), 

1806 ) 

1807 

1808 

1809class ConsolidateTractConfig(pipeBase.PipelineTaskConfig, 

1810 pipelineConnections=ConsolidateTractConnections): 

1811 pass 

1812 

1813 

1814class ConsolidateTractTask(pipeBase.PipelineTask): 

1815 """Concatenate any per-patch, dataframe list into a single 

1816 per-tract DataFrame. 

1817 """ 

1818 _DefaultName = 'ConsolidateTract' 

1819 ConfigClass = ConsolidateTractConfig 

1820 

1821 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1822 inputs = butlerQC.get(inputRefs) 

1823 # Not checking at least one inputCatalog exists because that'd be an 

1824 # empty QG. 

1825 self.log.info("Concatenating %s per-patch %s Tables", 

1826 len(inputs['inputCatalogs']), 

1827 inputRefs.inputCatalogs[0].datasetType.name) 

1828 df = pd.concat(inputs['inputCatalogs']) 

1829 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)