Coverage for python/lsst/pipe/tasks/postprocess.py: 32%

693 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-18 12:37 -0700

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24import logging 

25import numpy as np 

26import numbers 

27import os 

28 

29import lsst.geom 

30import lsst.pex.config as pexConfig 

31import lsst.pipe.base as pipeBase 

32import lsst.daf.base as dafBase 

33from lsst.obs.base import ExposureIdInfo 

34from lsst.pipe.base import connectionTypes 

35import lsst.afw.table as afwTable 

36from lsst.meas.base import SingleFrameMeasurementTask 

37from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate 

38from lsst.skymap import BaseSkyMap 

39 

40from .parquetTable import ParquetTable 

41from .functors import CompositeFunctor, Column 

42 

43log = logging.getLogger(__name__) 

44 

45 

46def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

47 """Flattens a dataframe with multilevel column index. 

48 """ 

49 newDf = pd.DataFrame() 

50 # band is the level 0 index 

51 dfBands = df.columns.unique(level=0).values 

52 for band in dfBands: 

53 subdf = df[band] 

54 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

55 newColumns = {c: columnFormat.format(band, c) 

56 for c in subdf.columns if c not in noDupCols} 

57 cols = list(newColumns.keys()) 

58 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

59 

60 # Band must be present in the input and output or else column is all NaN: 

61 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

62 # Get the unexploded columns from any present band's partition 

63 noDupDf = df[presentBands[0]][noDupCols] 

64 newDf = pd.concat([noDupDf, newDf], axis=1) 

65 return newDf 

66 

67 

68class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

69 defaultTemplates={"coaddName": "deep"}, 

70 dimensions=("tract", "patch", "skymap")): 

71 inputCatalogMeas = connectionTypes.Input( 

72 doc="Catalog of source measurements on the deepCoadd.", 

73 dimensions=("tract", "patch", "band", "skymap"), 

74 storageClass="SourceCatalog", 

75 name="{coaddName}Coadd_meas", 

76 multiple=True 

77 ) 

78 inputCatalogForcedSrc = connectionTypes.Input( 

79 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

80 dimensions=("tract", "patch", "band", "skymap"), 

81 storageClass="SourceCatalog", 

82 name="{coaddName}Coadd_forced_src", 

83 multiple=True 

84 ) 

85 inputCatalogRef = connectionTypes.Input( 

86 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

87 "for each detection in deepCoadd_mergeDet.", 

88 dimensions=("tract", "patch", "skymap"), 

89 storageClass="SourceCatalog", 

90 name="{coaddName}Coadd_ref" 

91 ) 

92 outputCatalog = connectionTypes.Output( 

93 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

94 "stored as a DataFrame with a multi-level column index per-patch.", 

95 dimensions=("tract", "patch", "skymap"), 

96 storageClass="DataFrame", 

97 name="{coaddName}Coadd_obj" 

98 ) 

99 

100 

101class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

102 pipelineConnections=WriteObjectTableConnections): 

103 engine = pexConfig.Field( 

104 dtype=str, 

105 default="pyarrow", 

106 doc="Parquet engine for writing (pyarrow or fastparquet)" 

107 ) 

108 coaddName = pexConfig.Field( 

109 dtype=str, 

110 default="deep", 

111 doc="Name of coadd" 

112 ) 

113 

114 

115class WriteObjectTableTask(pipeBase.PipelineTask): 

116 """Write filter-merged source tables to parquet 

117 """ 

118 _DefaultName = "writeObjectTable" 

119 ConfigClass = WriteObjectTableConfig 

120 

121 # Names of table datasets to be merged 

122 inputDatasets = ('forced_src', 'meas', 'ref') 

123 

124 # Tag of output dataset written by `MergeSourcesTask.write` 

125 outputDataset = 'obj' 

126 

127 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

128 inputs = butlerQC.get(inputRefs) 

129 

130 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

131 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

132 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

133 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

134 

135 catalogs = {} 

136 for band in measDict.keys(): 

137 catalogs[band] = {'meas': measDict[band]['meas'], 

138 'forced_src': forcedSourceDict[band]['forced_src'], 

139 'ref': inputs['inputCatalogRef']} 

140 dataId = butlerQC.quantum.dataId 

141 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

142 outputs = pipeBase.Struct(outputCatalog=df) 

143 butlerQC.put(outputs, outputRefs) 

144 

145 def run(self, catalogs, tract, patch): 

146 """Merge multiple catalogs. 

147 

148 Parameters 

149 ---------- 

150 catalogs : `dict` 

151 Mapping from filter names to dict of catalogs. 

152 tract : int 

153 tractId to use for the tractId column. 

154 patch : str 

155 patchId to use for the patchId column. 

156 

157 Returns 

158 ------- 

159 catalog : `pandas.DataFrame` 

160 Merged dataframe. 

161 """ 

162 

163 dfs = [] 

164 for filt, tableDict in catalogs.items(): 

165 for dataset, table in tableDict.items(): 

166 # Convert afwTable to pandas DataFrame 

167 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

168 

169 # Sort columns by name, to ensure matching schema among patches 

170 df = df.reindex(sorted(df.columns), axis=1) 

171 df['tractId'] = tract 

172 df['patchId'] = patch 

173 

174 # Make columns a 3-level MultiIndex 

175 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

176 names=('dataset', 'band', 'column')) 

177 dfs.append(df) 

178 

179 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

180 return catalog 

181 

182 

183class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

184 defaultTemplates={"catalogType": ""}, 

185 dimensions=("instrument", "visit", "detector")): 

186 

187 catalog = connectionTypes.Input( 

188 doc="Input full-depth catalog of sources produced by CalibrateTask", 

189 name="{catalogType}src", 

190 storageClass="SourceCatalog", 

191 dimensions=("instrument", "visit", "detector") 

192 ) 

193 outputCatalog = connectionTypes.Output( 

194 doc="Catalog of sources, `src` in Parquet format. The 'id' column is " 

195 "replaced with an index; all other columns are unchanged.", 

196 name="{catalogType}source", 

197 storageClass="DataFrame", 

198 dimensions=("instrument", "visit", "detector") 

199 ) 

200 

201 

202class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

203 pipelineConnections=WriteSourceTableConnections): 

204 pass 

205 

206 

207class WriteSourceTableTask(pipeBase.PipelineTask): 

208 """Write source table to parquet. 

209 """ 

210 _DefaultName = "writeSourceTable" 

211 ConfigClass = WriteSourceTableConfig 

212 

213 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

214 inputs = butlerQC.get(inputRefs) 

215 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

216 result = self.run(**inputs).table 

217 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

218 butlerQC.put(outputs, outputRefs) 

219 

220 def run(self, catalog, ccdVisitId=None, **kwargs): 

221 """Convert `src` catalog to parquet 

222 

223 Parameters 

224 ---------- 

225 catalog: `afwTable.SourceCatalog` 

226 catalog to be converted 

227 ccdVisitId: `int` 

228 ccdVisitId to be added as a column 

229 

230 Returns 

231 ------- 

232 result : `lsst.pipe.base.Struct` 

233 ``table`` 

234 `ParquetTable` version of the input catalog 

235 """ 

236 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId) 

237 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

238 df['ccdVisitId'] = ccdVisitId 

239 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

240 

241 

242class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections, 

243 defaultTemplates={"catalogType": "", 

244 "skyWcsName": "jointcal", 

245 "photoCalibName": "fgcm"}, 

246 dimensions=("instrument", "visit", "detector", "skymap")): 

247 skyMap = connectionTypes.Input( 

248 doc="skyMap needed to choose which tract-level calibrations to use when multiple available", 

249 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

250 storageClass="SkyMap", 

251 dimensions=("skymap",), 

252 ) 

253 exposure = connectionTypes.Input( 

254 doc="Input exposure to perform photometry on.", 

255 name="calexp", 

256 storageClass="ExposureF", 

257 dimensions=["instrument", "visit", "detector"], 

258 ) 

259 externalSkyWcsTractCatalog = connectionTypes.Input( 

260 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector " 

261 "id for the catalog id, sorted on id for fast lookup."), 

262 name="{skyWcsName}SkyWcsCatalog", 

263 storageClass="ExposureCatalog", 

264 dimensions=["instrument", "visit", "tract"], 

265 multiple=True 

266 ) 

267 externalSkyWcsGlobalCatalog = connectionTypes.Input( 

268 doc=("Per-visit wcs calibrations computed globally (with no tract information). " 

269 "These catalogs use the detector id for the catalog id, sorted on id for " 

270 "fast lookup."), 

271 name="{skyWcsName}SkyWcsCatalog", 

272 storageClass="ExposureCatalog", 

273 dimensions=["instrument", "visit"], 

274 ) 

275 externalPhotoCalibTractCatalog = connectionTypes.Input( 

276 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the " 

277 "detector id for the catalog id, sorted on id for fast lookup."), 

278 name="{photoCalibName}PhotoCalibCatalog", 

279 storageClass="ExposureCatalog", 

280 dimensions=["instrument", "visit", "tract"], 

281 multiple=True 

282 ) 

283 externalPhotoCalibGlobalCatalog = connectionTypes.Input( 

284 doc=("Per-visit photometric calibrations computed globally (with no tract " 

285 "information). These catalogs use the detector id for the catalog id, " 

286 "sorted on id for fast lookup."), 

287 name="{photoCalibName}PhotoCalibCatalog", 

288 storageClass="ExposureCatalog", 

289 dimensions=["instrument", "visit"], 

290 ) 

291 

292 def __init__(self, *, config=None): 

293 super().__init__(config=config) 

294 # Same connection boilerplate as all other applications of 

295 # Global/Tract calibrations 

296 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs: 

297 if config.useGlobalExternalSkyWcs: 

298 self.inputs.remove("externalSkyWcsTractCatalog") 

299 else: 

300 self.inputs.remove("externalSkyWcsGlobalCatalog") 

301 else: 

302 self.inputs.remove("externalSkyWcsTractCatalog") 

303 self.inputs.remove("externalSkyWcsGlobalCatalog") 

304 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib: 

305 if config.useGlobalExternalPhotoCalib: 

306 self.inputs.remove("externalPhotoCalibTractCatalog") 

307 else: 

308 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

309 else: 

310 self.inputs.remove("externalPhotoCalibTractCatalog") 

311 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

312 

313 

314class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig, 

315 pipelineConnections=WriteRecalibratedSourceTableConnections): 

316 

317 doReevaluatePhotoCalib = pexConfig.Field( 

318 dtype=bool, 

319 default=True, 

320 doc=("Add or replace local photoCalib columns") 

321 ) 

322 doReevaluateSkyWcs = pexConfig.Field( 

323 dtype=bool, 

324 default=True, 

325 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec") 

326 ) 

327 doApplyExternalPhotoCalib = pexConfig.Field( 

328 dtype=bool, 

329 default=True, 

330 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ", 

331 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."), 

332 ) 

333 doApplyExternalSkyWcs = pexConfig.Field( 

334 dtype=bool, 

335 default=True, 

336 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ", 

337 "else use the wcs already attached to the exposure."), 

338 ) 

339 useGlobalExternalPhotoCalib = pexConfig.Field( 

340 dtype=bool, 

341 default=True, 

342 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations " 

343 "that are not run per-tract. When False, use per-tract photometric " 

344 "calibration files.") 

345 ) 

346 useGlobalExternalSkyWcs = pexConfig.Field( 

347 dtype=bool, 

348 default=False, 

349 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations " 

350 "that are not run per-tract. When False, use per-tract wcs " 

351 "files.") 

352 ) 

353 

354 def validate(self): 

355 super().validate() 

356 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs: 

357 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False" 

358 "External SkyWcs will not be read or evaluated.") 

359 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib: 

360 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False." 

361 "External PhotoCalib will not be read or evaluated.") 

362 

363 

364class WriteRecalibratedSourceTableTask(WriteSourceTableTask): 

365 """Write source table to parquet 

366 """ 

367 _DefaultName = "writeRecalibratedSourceTable" 

368 ConfigClass = WriteRecalibratedSourceTableConfig 

369 

370 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

371 inputs = butlerQC.get(inputRefs) 

372 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

373 inputs['exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "visit_detector") 

374 

375 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs: 

376 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

377 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs) 

378 

379 inputs['catalog'] = self.addCalibColumns(**inputs) 

380 

381 result = self.run(**inputs).table 

382 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

383 butlerQC.put(outputs, outputRefs) 

384 

385 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None, 

386 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None, 

387 externalPhotoCalibTractCatalog=None, **kwargs): 

388 """Apply external calibrations to exposure per configuration 

389 

390 When multiple tract-level calibrations overlap, select the one with the 

391 center closest to detector. 

392 

393 Parameters 

394 ---------- 

395 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of 

396 tract-level calibs. 

397 skyMap : `lsst.skymap.SkyMap` 

398 exposure : `lsst.afw.image.exposure.Exposure` 

399 Input exposure to adjust calibrations. 

400 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

401 Exposure catalog with external skyWcs to be applied per config 

402 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

403 Exposure catalog with external skyWcs to be applied per config 

404 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

405 Exposure catalog with external photoCalib to be applied per config 

406 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

407 

408 

409 Returns 

410 ------- 

411 exposure : `lsst.afw.image.exposure.Exposure` 

412 Exposure with adjusted calibrations. 

413 """ 

414 if not self.config.doApplyExternalSkyWcs: 

415 # Do not modify the exposure's SkyWcs 

416 externalSkyWcsCatalog = None 

417 elif self.config.useGlobalExternalSkyWcs: 

418 # Use the global external SkyWcs 

419 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog 

420 self.log.info('Applying global SkyWcs') 

421 else: 

422 # use tract-level external SkyWcs from the closest overlapping tract 

423 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog') 

424 tracts = [ref.dataId['tract'] for ref in inputRef] 

425 if len(tracts) == 1: 

426 ind = 0 

427 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind]) 

428 else: 

429 ind = self.getClosestTract(tracts, skyMap, 

430 exposure.getBBox(), exposure.getWcs()) 

431 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). ' 

432 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

433 

434 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind] 

435 

436 if not self.config.doApplyExternalPhotoCalib: 

437 # Do not modify the exposure's PhotoCalib 

438 externalPhotoCalibCatalog = None 

439 elif self.config.useGlobalExternalPhotoCalib: 

440 # Use the global external PhotoCalib 

441 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog 

442 self.log.info('Applying global PhotoCalib') 

443 else: 

444 # use tract-level external PhotoCalib from the closest overlapping tract 

445 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog') 

446 tracts = [ref.dataId['tract'] for ref in inputRef] 

447 if len(tracts) == 1: 

448 ind = 0 

449 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind]) 

450 else: 

451 ind = self.getClosestTract(tracts, skyMap, 

452 exposure.getBBox(), exposure.getWcs()) 

453 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). ' 

454 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

455 

456 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind] 

457 

458 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog) 

459 

460 def getClosestTract(self, tracts, skyMap, bbox, wcs): 

461 """Find the index of the tract closest to detector from list of tractIds 

462 

463 Parameters 

464 ---------- 

465 tracts: `list` [`int`] 

466 Iterable of integer tractIds 

467 skyMap : `lsst.skymap.SkyMap` 

468 skyMap to lookup tract geometry and wcs 

469 bbox : `lsst.geom.Box2I` 

470 Detector bbox, center of which will compared to tract centers 

471 wcs : `lsst.afw.geom.SkyWcs` 

472 Detector Wcs object to map the detector center to SkyCoord 

473 

474 Returns 

475 ------- 

476 index : `int` 

477 """ 

478 if len(tracts) == 1: 

479 return 0 

480 

481 center = wcs.pixelToSky(bbox.getCenter()) 

482 sep = [] 

483 for tractId in tracts: 

484 tract = skyMap[tractId] 

485 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter()) 

486 sep.append(center.separation(tractCenter)) 

487 

488 return np.argmin(sep) 

489 

490 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None): 

491 """Prepare a calibrated exposure and apply external calibrations 

492 if so configured. 

493 

494 Parameters 

495 ---------- 

496 exposure : `lsst.afw.image.exposure.Exposure` 

497 Input exposure to adjust calibrations. 

498 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional 

499 Exposure catalog with external skyWcs to be applied 

500 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id 

501 for the catalog id, sorted on id for fast lookup. 

502 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional 

503 Exposure catalog with external photoCalib to be applied 

504 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector 

505 id for the catalog id, sorted on id for fast lookup. 

506 

507 Returns 

508 ------- 

509 exposure : `lsst.afw.image.exposure.Exposure` 

510 Exposure with adjusted calibrations. 

511 """ 

512 detectorId = exposure.getInfo().getDetector().getId() 

513 

514 if externalPhotoCalibCatalog is not None: 

515 row = externalPhotoCalibCatalog.find(detectorId) 

516 if row is None: 

517 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; " 

518 "Using original photoCalib.", detectorId) 

519 else: 

520 photoCalib = row.getPhotoCalib() 

521 if photoCalib is None: 

522 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; " 

523 "Using original photoCalib.", detectorId) 

524 else: 

525 exposure.setPhotoCalib(photoCalib) 

526 

527 if externalSkyWcsCatalog is not None: 

528 row = externalSkyWcsCatalog.find(detectorId) 

529 if row is None: 

530 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; " 

531 "Using original skyWcs.", detectorId) 

532 else: 

533 skyWcs = row.getWcs() 

534 if skyWcs is None: 

535 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; " 

536 "Using original skyWcs.", detectorId) 

537 else: 

538 exposure.setWcs(skyWcs) 

539 

540 return exposure 

541 

542 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs): 

543 """Add replace columns with calibs evaluated at each centroid 

544 

545 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a 

546 a source catalog, by rerunning the plugins. 

547 

548 Parameters 

549 ---------- 

550 catalog : `lsst.afw.table.SourceCatalog` 

551 catalog to which calib columns will be added 

552 exposure : `lsst.afw.image.exposure.Exposure` 

553 Exposure with attached PhotoCalibs and SkyWcs attributes to be 

554 reevaluated at local centroids. Pixels are not required. 

555 exposureIdInfo : `lsst.obs.base.ExposureIdInfo` 

556 

557 Returns 

558 ------- 

559 newCat: `lsst.afw.table.SourceCatalog` 

560 Source Catalog with requested local calib columns 

561 """ 

562 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

563 measureConfig.doReplaceWithNoise = False 

564 

565 measureConfig.plugins.names = [] 

566 if self.config.doReevaluateSkyWcs: 

567 measureConfig.plugins.names.add('base_LocalWcs') 

568 self.log.info("Re-evaluating base_LocalWcs plugin") 

569 if self.config.doReevaluatePhotoCalib: 

570 measureConfig.plugins.names.add('base_LocalPhotoCalib') 

571 self.log.info("Re-evaluating base_LocalPhotoCalib plugin") 

572 pluginsNotToCopy = tuple(measureConfig.plugins.names) 

573 

574 # Create a new schema and catalog 

575 # Copy all columns from original except for the ones to reevaluate 

576 aliasMap = catalog.schema.getAliasMap() 

577 mapper = afwTable.SchemaMapper(catalog.schema) 

578 for item in catalog.schema: 

579 if not item.field.getName().startswith(pluginsNotToCopy): 

580 mapper.addMapping(item.key) 

581 

582 schema = mapper.getOutputSchema() 

583 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

584 schema.setAliasMap(aliasMap) 

585 newCat = afwTable.SourceCatalog(schema) 

586 newCat.extend(catalog, mapper=mapper) 

587 

588 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to 

589 # update here. LocalPhotoCalibs are applied during transform tasks. 

590 # Update coord_ra/coord_dec, which are expected to be positions on the 

591 # sky and are used as such in sdm tables without transform 

592 if self.config.doReevaluateSkyWcs: 

593 afwTable.updateSourceCoords(exposure.wcs, newCat) 

594 

595 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

596 

597 return newCat 

598 

599 

600class PostprocessAnalysis(object): 

601 """Calculate columns from ParquetTable. 

602 

603 This object manages and organizes an arbitrary set of computations 

604 on a catalog. The catalog is defined by a 

605 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such 

606 as a `deepCoadd_obj` dataset, and the computations are defined by a 

607 collection of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

608 a `CompositeFunctor`). 

609 

610 After the object is initialized, accessing the `.df` attribute (which 

611 holds the `pandas.DataFrame` containing the results of the calculations) 

612 triggers computation of said dataframe. 

613 

614 One of the conveniences of using this object is the ability to define a 

615 desired common filter for all functors. This enables the same functor 

616 collection to be passed to several different `PostprocessAnalysis` objects 

617 without having to change the original functor collection, since the `filt` 

618 keyword argument of this object triggers an overwrite of the `filt` 

619 property for all functors in the collection. 

620 

621 This object also allows a list of refFlags to be passed, and defines a set 

622 of default refFlags that are always included even if not requested. 

623 

624 If a list of `ParquetTable` object is passed, rather than a single one, 

625 then the calculations will be mapped over all the input catalogs. In 

626 principle, it should be straightforward to parallelize this activity, but 

627 initial tests have failed (see TODO in code comments). 

628 

629 Parameters 

630 ---------- 

631 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

632 Source catalog(s) for computation. 

633 

634 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

635 Computations to do (functors that act on `parq`). 

636 If a dict, the output 

637 DataFrame will have columns keyed accordingly. 

638 If a list, the column keys will come from the 

639 `.shortname` attribute of each functor. 

640 

641 filt : `str`, optional 

642 Filter in which to calculate. If provided, 

643 this will overwrite any existing `.filt` attribute 

644 of the provided functors. 

645 

646 flags : `list`, optional 

647 List of flags (per-band) to include in output table. 

648 Taken from the `meas` dataset if applied to a multilevel Object Table. 

649 

650 refFlags : `list`, optional 

651 List of refFlags (only reference band) to include in output table. 

652 

653 forcedFlags : `list`, optional 

654 List of flags (per-band) to include in output table. 

655 Taken from the ``forced_src`` dataset if applied to a 

656 multilevel Object Table. Intended for flags from measurement plugins 

657 only run during multi-band forced-photometry. 

658 """ 

659 _defaultRefFlags = [] 

660 _defaultFuncs = () 

661 

662 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

663 self.parq = parq 

664 self.functors = functors 

665 

666 self.filt = filt 

667 self.flags = list(flags) if flags is not None else [] 

668 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

669 self.refFlags = list(self._defaultRefFlags) 

670 if refFlags is not None: 

671 self.refFlags += list(refFlags) 

672 

673 self._df = None 

674 

675 @property 

676 def defaultFuncs(self): 

677 funcs = dict(self._defaultFuncs) 

678 return funcs 

679 

680 @property 

681 def func(self): 

682 additionalFuncs = self.defaultFuncs 

683 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

684 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

685 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

686 

687 if isinstance(self.functors, CompositeFunctor): 

688 func = self.functors 

689 else: 

690 func = CompositeFunctor(self.functors) 

691 

692 func.funcDict.update(additionalFuncs) 

693 func.filt = self.filt 

694 

695 return func 

696 

697 @property 

698 def noDupCols(self): 

699 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

700 

701 @property 

702 def df(self): 

703 if self._df is None: 

704 self.compute() 

705 return self._df 

706 

707 def compute(self, dropna=False, pool=None): 

708 # map over multiple parquet tables 

709 if type(self.parq) in (list, tuple): 

710 if pool is None: 

711 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

712 else: 

713 # TODO: Figure out why this doesn't work (pyarrow pickling 

714 # issues?) 

715 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

716 self._df = pd.concat(dflist) 

717 else: 

718 self._df = self.func(self.parq, dropna=dropna) 

719 

720 return self._df 

721 

722 

723class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

724 dimensions=()): 

725 """Expected Connections for subclasses of TransformCatalogBaseTask. 

726 

727 Must be subclassed. 

728 """ 

729 inputCatalog = connectionTypes.Input( 

730 name="", 

731 storageClass="DataFrame", 

732 ) 

733 outputCatalog = connectionTypes.Output( 

734 name="", 

735 storageClass="DataFrame", 

736 ) 

737 

738 

739class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

740 pipelineConnections=TransformCatalogBaseConnections): 

741 functorFile = pexConfig.Field( 

742 dtype=str, 

743 doc="Path to YAML file specifying Science Data Model functors to use " 

744 "when copying columns and computing calibrated values.", 

745 default=None, 

746 optional=True 

747 ) 

748 primaryKey = pexConfig.Field( 

749 dtype=str, 

750 doc="Name of column to be set as the DataFrame index. If None, the index" 

751 "will be named `id`", 

752 default=None, 

753 optional=True 

754 ) 

755 columnsFromDataId = pexConfig.ListField( 

756 dtype=str, 

757 default=None, 

758 optional=True, 

759 doc="Columns to extract from the dataId", 

760 ) 

761 

762 

763class TransformCatalogBaseTask(pipeBase.PipelineTask): 

764 """Base class for transforming/standardizing a catalog 

765 

766 by applying functors that convert units and apply calibrations. 

767 The purpose of this task is to perform a set of computations on 

768 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

769 results to a new dataset (which needs to be declared in an `outputDataset` 

770 attribute). 

771 

772 The calculations to be performed are defined in a YAML file that specifies 

773 a set of functors to be computed, provided as 

774 a `--functorFile` config parameter. An example of such a YAML file 

775 is the following: 

776 

777 funcs: 

778 psfMag: 

779 functor: Mag 

780 args: 

781 - base_PsfFlux 

782 filt: HSC-G 

783 dataset: meas 

784 cmodel_magDiff: 

785 functor: MagDiff 

786 args: 

787 - modelfit_CModel 

788 - base_PsfFlux 

789 filt: HSC-G 

790 gauss_magDiff: 

791 functor: MagDiff 

792 args: 

793 - base_GaussianFlux 

794 - base_PsfFlux 

795 filt: HSC-G 

796 count: 

797 functor: Column 

798 args: 

799 - base_InputCount_value 

800 filt: HSC-G 

801 deconvolved_moments: 

802 functor: DeconvolvedMoments 

803 filt: HSC-G 

804 dataset: forced_src 

805 refFlags: 

806 - calib_psfUsed 

807 - merge_measurement_i 

808 - merge_measurement_r 

809 - merge_measurement_z 

810 - merge_measurement_y 

811 - merge_measurement_g 

812 - base_PixelFlags_flag_inexact_psfCenter 

813 - detect_isPrimary 

814 

815 The names for each entry under "func" will become the names of columns in 

816 the output dataset. All the functors referenced are defined in 

817 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each 

818 functor are in the `args` list, and any additional entries for each column 

819 other than "functor" or "args" (e.g., `'filt'`, `'dataset'`) are treated as 

820 keyword arguments to be passed to the functor initialization. 

821 

822 The "flags" entry is the default shortcut for `Column` functors. 

823 All columns listed under "flags" will be copied to the output table 

824 untransformed. They can be of any datatype. 

825 In the special case of transforming a multi-level oject table with 

826 band and dataset indices (deepCoadd_obj), these will be taked from the 

827 `meas` dataset and exploded out per band. 

828 

829 There are two special shortcuts that only apply when transforming 

830 multi-level Object (deepCoadd_obj) tables: 

831 - The "refFlags" entry is shortcut for `Column` functor 

832 taken from the `'ref'` dataset if transforming an ObjectTable. 

833 - The "forcedFlags" entry is shortcut for `Column` functors. 

834 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

835 These are expanded out per band. 

836 

837 

838 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

839 to organize and excecute the calculations. 

840 """ 

841 @property 

842 def _DefaultName(self): 

843 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

844 

845 @property 

846 def outputDataset(self): 

847 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

848 

849 @property 

850 def inputDataset(self): 

851 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

852 

853 @property 

854 def ConfigClass(self): 

855 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

856 

857 def __init__(self, *args, **kwargs): 

858 super().__init__(*args, **kwargs) 

859 if self.config.functorFile: 

860 self.log.info('Loading tranform functor definitions from %s', 

861 self.config.functorFile) 

862 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

863 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

864 else: 

865 self.funcs = None 

866 

867 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

868 inputs = butlerQC.get(inputRefs) 

869 if self.funcs is None: 

870 raise ValueError("config.functorFile is None. " 

871 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

872 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

873 dataId=outputRefs.outputCatalog.dataId.full) 

874 outputs = pipeBase.Struct(outputCatalog=result) 

875 butlerQC.put(outputs, outputRefs) 

876 

877 def run(self, parq, funcs=None, dataId=None, band=None): 

878 """Do postprocessing calculations 

879 

880 Takes a `ParquetTable` object and dataId, 

881 returns a dataframe with results of postprocessing calculations. 

882 

883 Parameters 

884 ---------- 

885 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

886 ParquetTable from which calculations are done. 

887 funcs : `lsst.pipe.tasks.functors.Functors` 

888 Functors to apply to the table's columns 

889 dataId : dict, optional 

890 Used to add a `patchId` column to the output dataframe. 

891 band : `str`, optional 

892 Filter band that is being processed. 

893 

894 Returns 

895 ------ 

896 df : `pandas.DataFrame` 

897 """ 

898 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

899 

900 df = self.transform(band, parq, funcs, dataId).df 

901 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

902 return df 

903 

904 def getFunctors(self): 

905 return self.funcs 

906 

907 def getAnalysis(self, parq, funcs=None, band=None): 

908 if funcs is None: 

909 funcs = self.funcs 

910 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

911 return analysis 

912 

913 def transform(self, band, parq, funcs, dataId): 

914 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

915 df = analysis.df 

916 if dataId and self.config.columnsFromDataId: 

917 for key in self.config.columnsFromDataId: 

918 if key in dataId: 

919 df[str(key)] = dataId[key] 

920 else: 

921 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}") 

922 

923 if self.config.primaryKey: 

924 if df.index.name != self.config.primaryKey and self.config.primaryKey in df: 

925 df.reset_index(inplace=True, drop=True) 

926 df.set_index(self.config.primaryKey, inplace=True) 

927 

928 return pipeBase.Struct( 

929 df=df, 

930 analysis=analysis 

931 ) 

932 

933 

934class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

935 defaultTemplates={"coaddName": "deep"}, 

936 dimensions=("tract", "patch", "skymap")): 

937 inputCatalog = connectionTypes.Input( 

938 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

939 "stored as a DataFrame with a multi-level column index per-patch.", 

940 dimensions=("tract", "patch", "skymap"), 

941 storageClass="DataFrame", 

942 name="{coaddName}Coadd_obj", 

943 deferLoad=True, 

944 ) 

945 outputCatalog = connectionTypes.Output( 

946 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

947 "data model.", 

948 dimensions=("tract", "patch", "skymap"), 

949 storageClass="DataFrame", 

950 name="objectTable" 

951 ) 

952 

953 

954class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

955 pipelineConnections=TransformObjectCatalogConnections): 

956 coaddName = pexConfig.Field( 

957 dtype=str, 

958 default="deep", 

959 doc="Name of coadd" 

960 ) 

961 # TODO: remove in DM-27177 

962 filterMap = pexConfig.DictField( 

963 keytype=str, 

964 itemtype=str, 

965 default={}, 

966 doc=("Dictionary mapping full filter name to short one for column name munging." 

967 "These filters determine the output columns no matter what filters the " 

968 "input data actually contain."), 

969 deprecated=("Coadds are now identified by the band, so this transform is unused." 

970 "Will be removed after v22.") 

971 ) 

972 outputBands = pexConfig.ListField( 

973 dtype=str, 

974 default=None, 

975 optional=True, 

976 doc=("These bands and only these bands will appear in the output," 

977 " NaN-filled if the input does not include them." 

978 " If None, then use all bands found in the input.") 

979 ) 

980 camelCase = pexConfig.Field( 

981 dtype=bool, 

982 default=False, 

983 doc=("Write per-band columns names with camelCase, else underscore " 

984 "For example: gPsFlux instead of g_PsFlux.") 

985 ) 

986 multilevelOutput = pexConfig.Field( 

987 dtype=bool, 

988 default=False, 

989 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

990 "and name-munged (False).") 

991 ) 

992 goodFlags = pexConfig.ListField( 

993 dtype=str, 

994 default=[], 

995 doc=("List of 'good' flags that should be set False when populating empty tables. " 

996 "All other flags are considered to be 'bad' flags and will be set to True.") 

997 ) 

998 floatFillValue = pexConfig.Field( 

999 dtype=float, 

1000 default=np.nan, 

1001 doc="Fill value for float fields when populating empty tables." 

1002 ) 

1003 integerFillValue = pexConfig.Field( 

1004 dtype=int, 

1005 default=-1, 

1006 doc="Fill value for integer fields when populating empty tables." 

1007 ) 

1008 

1009 def setDefaults(self): 

1010 super().setDefaults() 

1011 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml') 

1012 self.primaryKey = 'objectId' 

1013 self.columnsFromDataId = ['tract', 'patch'] 

1014 self.goodFlags = ['calib_astrometry_used', 

1015 'calib_photometry_reserved', 

1016 'calib_photometry_used', 

1017 'calib_psf_candidate', 

1018 'calib_psf_reserved', 

1019 'calib_psf_used'] 

1020 

1021 

1022class TransformObjectCatalogTask(TransformCatalogBaseTask): 

1023 """Produce a flattened Object Table to match the format specified in 

1024 sdm_schemas. 

1025 

1026 Do the same set of postprocessing calculations on all bands. 

1027 

1028 This is identical to `TransformCatalogBaseTask`, except for that it does 

1029 the specified functor calculations for all filters present in the 

1030 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

1031 by the YAML file will be superceded. 

1032 """ 

1033 _DefaultName = "transformObjectCatalog" 

1034 ConfigClass = TransformObjectCatalogConfig 

1035 

1036 def run(self, parq, funcs=None, dataId=None, band=None): 

1037 # NOTE: band kwarg is ignored here. 

1038 dfDict = {} 

1039 analysisDict = {} 

1040 templateDf = pd.DataFrame() 

1041 

1042 if isinstance(parq, DeferredDatasetHandle): 

1043 columns = parq.get(component='columns') 

1044 inputBands = columns.unique(level=1).values 

1045 else: 

1046 inputBands = parq.columnLevelNames['band'] 

1047 

1048 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

1049 

1050 # Perform transform for data of filters that exist in parq. 

1051 for inputBand in inputBands: 

1052 if inputBand not in outputBands: 

1053 self.log.info("Ignoring %s band data in the input", inputBand) 

1054 continue 

1055 self.log.info("Transforming the catalog of band %s", inputBand) 

1056 result = self.transform(inputBand, parq, funcs, dataId) 

1057 dfDict[inputBand] = result.df 

1058 analysisDict[inputBand] = result.analysis 

1059 if templateDf.empty: 

1060 templateDf = result.df 

1061 

1062 # Put filler values in columns of other wanted bands 

1063 for filt in outputBands: 

1064 if filt not in dfDict: 

1065 self.log.info("Adding empty columns for band %s", filt) 

1066 dfTemp = templateDf.copy() 

1067 for col in dfTemp.columns: 

1068 testValue = dfTemp[col].values[0] 

1069 if isinstance(testValue, (np.bool_, pd.BooleanDtype)): 

1070 # Boolean flag type, check if it is a "good" flag 

1071 if col in self.config.goodFlags: 

1072 fillValue = False 

1073 else: 

1074 fillValue = True 

1075 elif isinstance(testValue, numbers.Integral): 

1076 # Checking numbers.Integral catches all flavors 

1077 # of python, numpy, pandas, etc. integers. 

1078 # We must ensure this is not an unsigned integer. 

1079 if isinstance(testValue, np.unsignedinteger): 

1080 raise ValueError("Parquet tables may not have unsigned integer columns.") 

1081 else: 

1082 fillValue = self.config.integerFillValue 

1083 else: 

1084 fillValue = self.config.floatFillValue 

1085 dfTemp[col].values[:] = fillValue 

1086 dfDict[filt] = dfTemp 

1087 

1088 # This makes a multilevel column index, with band as first level 

1089 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

1090 

1091 if not self.config.multilevelOutput: 

1092 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

1093 if self.config.primaryKey in noDupCols: 

1094 noDupCols.remove(self.config.primaryKey) 

1095 if dataId and self.config.columnsFromDataId: 

1096 noDupCols += self.config.columnsFromDataId 

1097 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

1098 inputBands=inputBands) 

1099 

1100 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

1101 

1102 return df 

1103 

1104 

1105class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

1106 dimensions=("tract", "skymap")): 

1107 inputCatalogs = connectionTypes.Input( 

1108 doc="Per-Patch objectTables conforming to the standard data model.", 

1109 name="objectTable", 

1110 storageClass="DataFrame", 

1111 dimensions=("tract", "patch", "skymap"), 

1112 multiple=True, 

1113 ) 

1114 outputCatalog = connectionTypes.Output( 

1115 doc="Pre-tract horizontal concatenation of the input objectTables", 

1116 name="objectTable_tract", 

1117 storageClass="DataFrame", 

1118 dimensions=("tract", "skymap"), 

1119 ) 

1120 

1121 

1122class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

1123 pipelineConnections=ConsolidateObjectTableConnections): 

1124 coaddName = pexConfig.Field( 

1125 dtype=str, 

1126 default="deep", 

1127 doc="Name of coadd" 

1128 ) 

1129 

1130 

1131class ConsolidateObjectTableTask(pipeBase.PipelineTask): 

1132 """Write patch-merged source tables to a tract-level parquet file. 

1133 

1134 Concatenates `objectTable` list into a per-visit `objectTable_tract`. 

1135 """ 

1136 _DefaultName = "consolidateObjectTable" 

1137 ConfigClass = ConsolidateObjectTableConfig 

1138 

1139 inputDataset = 'objectTable' 

1140 outputDataset = 'objectTable_tract' 

1141 

1142 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1143 inputs = butlerQC.get(inputRefs) 

1144 self.log.info("Concatenating %s per-patch Object Tables", 

1145 len(inputs['inputCatalogs'])) 

1146 df = pd.concat(inputs['inputCatalogs']) 

1147 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1148 

1149 

1150class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

1151 defaultTemplates={"catalogType": ""}, 

1152 dimensions=("instrument", "visit", "detector")): 

1153 

1154 inputCatalog = connectionTypes.Input( 

1155 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

1156 name="{catalogType}source", 

1157 storageClass="DataFrame", 

1158 dimensions=("instrument", "visit", "detector"), 

1159 deferLoad=True 

1160 ) 

1161 outputCatalog = connectionTypes.Output( 

1162 doc="Narrower, per-detector Source Table transformed and converted per a " 

1163 "specified set of functors", 

1164 name="{catalogType}sourceTable", 

1165 storageClass="DataFrame", 

1166 dimensions=("instrument", "visit", "detector") 

1167 ) 

1168 

1169 

1170class TransformSourceTableConfig(TransformCatalogBaseConfig, 

1171 pipelineConnections=TransformSourceTableConnections): 

1172 

1173 def setDefaults(self): 

1174 super().setDefaults() 

1175 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml') 

1176 self.primaryKey = 'sourceId' 

1177 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter'] 

1178 

1179 

1180class TransformSourceTableTask(TransformCatalogBaseTask): 

1181 """Transform/standardize a source catalog 

1182 """ 

1183 _DefaultName = "transformSourceTable" 

1184 ConfigClass = TransformSourceTableConfig 

1185 

1186 

1187class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1188 dimensions=("instrument", "visit",), 

1189 defaultTemplates={"calexpType": ""}): 

1190 calexp = connectionTypes.Input( 

1191 doc="Processed exposures used for metadata", 

1192 name="{calexpType}calexp", 

1193 storageClass="ExposureF", 

1194 dimensions=("instrument", "visit", "detector"), 

1195 deferLoad=True, 

1196 multiple=True, 

1197 ) 

1198 visitSummary = connectionTypes.Output( 

1199 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1200 "detector id for the id and are sorted for fast lookups of a " 

1201 "detector."), 

1202 name="{calexpType}visitSummary", 

1203 storageClass="ExposureCatalog", 

1204 dimensions=("instrument", "visit"), 

1205 ) 

1206 

1207 

1208class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1209 pipelineConnections=ConsolidateVisitSummaryConnections): 

1210 """Config for ConsolidateVisitSummaryTask""" 

1211 pass 

1212 

1213 

1214class ConsolidateVisitSummaryTask(pipeBase.PipelineTask): 

1215 """Task to consolidate per-detector visit metadata. 

1216 

1217 This task aggregates the following metadata from all the detectors in a 

1218 single visit into an exposure catalog: 

1219 - The visitInfo. 

1220 - The wcs. 

1221 - The photoCalib. 

1222 - The physical_filter and band (if available). 

1223 - The psf size, shape, and effective area at the center of the detector. 

1224 - The corners of the bounding box in right ascension/declination. 

1225 

1226 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1227 are not persisted here because of storage concerns, and because of their 

1228 limited utility as summary statistics. 

1229 

1230 Tests for this task are performed in ci_hsc_gen3. 

1231 """ 

1232 _DefaultName = "consolidateVisitSummary" 

1233 ConfigClass = ConsolidateVisitSummaryConfig 

1234 

1235 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1236 dataRefs = butlerQC.get(inputRefs.calexp) 

1237 visit = dataRefs[0].dataId.byName()['visit'] 

1238 

1239 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1240 len(dataRefs), visit) 

1241 

1242 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1243 

1244 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1245 

1246 def _combineExposureMetadata(self, visit, dataRefs): 

1247 """Make a combined exposure catalog from a list of dataRefs. 

1248 These dataRefs must point to exposures with wcs, summaryStats, 

1249 and other visit metadata. 

1250 

1251 Parameters 

1252 ---------- 

1253 visit : `int` 

1254 Visit identification number. 

1255 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1256 List of dataRefs in visit. 

1257 

1258 Returns 

1259 ------- 

1260 visitSummary : `lsst.afw.table.ExposureCatalog` 

1261 Exposure catalog with per-detector summary information. 

1262 """ 

1263 schema = self._makeVisitSummarySchema() 

1264 cat = afwTable.ExposureCatalog(schema) 

1265 cat.resize(len(dataRefs)) 

1266 

1267 cat['visit'] = visit 

1268 

1269 for i, dataRef in enumerate(dataRefs): 

1270 visitInfo = dataRef.get(component='visitInfo') 

1271 filterLabel = dataRef.get(component='filter') 

1272 summaryStats = dataRef.get(component='summaryStats') 

1273 detector = dataRef.get(component='detector') 

1274 wcs = dataRef.get(component='wcs') 

1275 photoCalib = dataRef.get(component='photoCalib') 

1276 detector = dataRef.get(component='detector') 

1277 bbox = dataRef.get(component='bbox') 

1278 validPolygon = dataRef.get(component='validPolygon') 

1279 

1280 rec = cat[i] 

1281 rec.setBBox(bbox) 

1282 rec.setVisitInfo(visitInfo) 

1283 rec.setWcs(wcs) 

1284 rec.setPhotoCalib(photoCalib) 

1285 rec.setValidPolygon(validPolygon) 

1286 

1287 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1288 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1289 rec.setId(detector.getId()) 

1290 rec['psfSigma'] = summaryStats.psfSigma 

1291 rec['psfIxx'] = summaryStats.psfIxx 

1292 rec['psfIyy'] = summaryStats.psfIyy 

1293 rec['psfIxy'] = summaryStats.psfIxy 

1294 rec['psfArea'] = summaryStats.psfArea 

1295 rec['raCorners'][:] = summaryStats.raCorners 

1296 rec['decCorners'][:] = summaryStats.decCorners 

1297 rec['ra'] = summaryStats.ra 

1298 rec['decl'] = summaryStats.decl 

1299 rec['zenithDistance'] = summaryStats.zenithDistance 

1300 rec['zeroPoint'] = summaryStats.zeroPoint 

1301 rec['skyBg'] = summaryStats.skyBg 

1302 rec['skyNoise'] = summaryStats.skyNoise 

1303 rec['meanVar'] = summaryStats.meanVar 

1304 rec['astromOffsetMean'] = summaryStats.astromOffsetMean 

1305 rec['astromOffsetStd'] = summaryStats.astromOffsetStd 

1306 rec['nPsfStar'] = summaryStats.nPsfStar 

1307 rec['psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median 

1308 rec['psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median 

1309 rec['psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter 

1310 rec['psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter 

1311 rec['psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian 

1312 rec['psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter 

1313 rec['psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter 

1314 

1315 metadata = dafBase.PropertyList() 

1316 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1317 # We are looping over existing datarefs, so the following is true 

1318 metadata.add("COMMENT", "Only detectors with data have entries.") 

1319 cat.setMetadata(metadata) 

1320 

1321 cat.sort() 

1322 return cat 

1323 

1324 def _makeVisitSummarySchema(self): 

1325 """Make the schema for the visitSummary catalog.""" 

1326 schema = afwTable.ExposureTable.makeMinimalSchema() 

1327 schema.addField('visit', type='L', doc='Visit number') 

1328 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1329 schema.addField('band', type='String', size=32, doc='Name of band') 

1330 schema.addField('psfSigma', type='F', 

1331 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

1332 schema.addField('psfArea', type='F', 

1333 doc='PSF model effective area (center of chip) (pixel**2)') 

1334 schema.addField('psfIxx', type='F', 

1335 doc='PSF model Ixx (center of chip) (pixel**2)') 

1336 schema.addField('psfIyy', type='F', 

1337 doc='PSF model Iyy (center of chip) (pixel**2)') 

1338 schema.addField('psfIxy', type='F', 

1339 doc='PSF model Ixy (center of chip) (pixel**2)') 

1340 schema.addField('raCorners', type='ArrayD', size=4, 

1341 doc='Right Ascension of bounding box corners (degrees)') 

1342 schema.addField('decCorners', type='ArrayD', size=4, 

1343 doc='Declination of bounding box corners (degrees)') 

1344 schema.addField('ra', type='D', 

1345 doc='Right Ascension of bounding box center (degrees)') 

1346 schema.addField('decl', type='D', 

1347 doc='Declination of bounding box center (degrees)') 

1348 schema.addField('zenithDistance', type='F', 

1349 doc='Zenith distance of bounding box center (degrees)') 

1350 schema.addField('zeroPoint', type='F', 

1351 doc='Mean zeropoint in detector (mag)') 

1352 schema.addField('skyBg', type='F', 

1353 doc='Average sky background (ADU)') 

1354 schema.addField('skyNoise', type='F', 

1355 doc='Average sky noise (ADU)') 

1356 schema.addField('meanVar', type='F', 

1357 doc='Mean variance of the weight plane (ADU**2)') 

1358 schema.addField('astromOffsetMean', type='F', 

1359 doc='Mean offset of astrometric calibration matches (arcsec)') 

1360 schema.addField('astromOffsetStd', type='F', 

1361 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)') 

1362 schema.addField('nPsfStar', type='I', doc='Number of stars used for PSF model') 

1363 schema.addField('psfStarDeltaE1Median', type='F', 

1364 doc='Median E1 residual (starE1 - psfE1) for psf stars') 

1365 schema.addField('psfStarDeltaE2Median', type='F', 

1366 doc='Median E2 residual (starE2 - psfE2) for psf stars') 

1367 schema.addField('psfStarDeltaE1Scatter', type='F', 

1368 doc='Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars') 

1369 schema.addField('psfStarDeltaE2Scatter', type='F', 

1370 doc='Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars') 

1371 schema.addField('psfStarDeltaSizeMedian', type='F', 

1372 doc='Median size residual (starSize - psfSize) for psf stars (pixel)') 

1373 schema.addField('psfStarDeltaSizeScatter', type='F', 

1374 doc='Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)') 

1375 schema.addField('psfStarScaledDeltaSizeScatter', type='F', 

1376 doc='Scatter (via MAD) of size residual scaled by median size squared') 

1377 

1378 return schema 

1379 

1380 

1381class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1382 defaultTemplates={"catalogType": ""}, 

1383 dimensions=("instrument", "visit")): 

1384 inputCatalogs = connectionTypes.Input( 

1385 doc="Input per-detector Source Tables", 

1386 name="{catalogType}sourceTable", 

1387 storageClass="DataFrame", 

1388 dimensions=("instrument", "visit", "detector"), 

1389 multiple=True 

1390 ) 

1391 outputCatalog = connectionTypes.Output( 

1392 doc="Per-visit concatenation of Source Table", 

1393 name="{catalogType}sourceTable_visit", 

1394 storageClass="DataFrame", 

1395 dimensions=("instrument", "visit") 

1396 ) 

1397 

1398 

1399class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1400 pipelineConnections=ConsolidateSourceTableConnections): 

1401 pass 

1402 

1403 

1404class ConsolidateSourceTableTask(pipeBase.PipelineTask): 

1405 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1406 """ 

1407 _DefaultName = 'consolidateSourceTable' 

1408 ConfigClass = ConsolidateSourceTableConfig 

1409 

1410 inputDataset = 'sourceTable' 

1411 outputDataset = 'sourceTable_visit' 

1412 

1413 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1414 from .makeWarp import reorderRefs 

1415 

1416 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs] 

1417 detectorOrder.sort() 

1418 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector') 

1419 inputs = butlerQC.get(inputRefs) 

1420 self.log.info("Concatenating %s per-detector Source Tables", 

1421 len(inputs['inputCatalogs'])) 

1422 df = pd.concat(inputs['inputCatalogs']) 

1423 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1424 

1425 

1426class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1427 dimensions=("instrument",), 

1428 defaultTemplates={"calexpType": ""}): 

1429 visitSummaryRefs = connectionTypes.Input( 

1430 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1431 name="{calexpType}visitSummary", 

1432 storageClass="ExposureCatalog", 

1433 dimensions=("instrument", "visit"), 

1434 multiple=True, 

1435 deferLoad=True, 

1436 ) 

1437 outputCatalog = connectionTypes.Output( 

1438 doc="CCD and Visit metadata table", 

1439 name="ccdVisitTable", 

1440 storageClass="DataFrame", 

1441 dimensions=("instrument",) 

1442 ) 

1443 

1444 

1445class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1446 pipelineConnections=MakeCcdVisitTableConnections): 

1447 pass 

1448 

1449 

1450class MakeCcdVisitTableTask(pipeBase.PipelineTask): 

1451 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs. 

1452 """ 

1453 _DefaultName = 'makeCcdVisitTable' 

1454 ConfigClass = MakeCcdVisitTableConfig 

1455 

1456 def run(self, visitSummaryRefs): 

1457 """Make a table of ccd information from the `visitSummary` catalogs. 

1458 

1459 Parameters 

1460 ---------- 

1461 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1462 List of DeferredDatasetHandles pointing to exposure catalogs with 

1463 per-detector summary information. 

1464 

1465 Returns 

1466 ------- 

1467 result : `lsst.pipe.Base.Struct` 

1468 Results struct with attribute: 

1469 

1470 ``outputCatalog`` 

1471 Catalog of ccd and visit information. 

1472 """ 

1473 ccdEntries = [] 

1474 for visitSummaryRef in visitSummaryRefs: 

1475 visitSummary = visitSummaryRef.get() 

1476 visitInfo = visitSummary[0].getVisitInfo() 

1477 

1478 ccdEntry = {} 

1479 summaryTable = visitSummary.asAstropy() 

1480 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance', 

1481 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise', 

1482 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar', 

1483 'psfStarDeltaE1Median', 'psfStarDeltaE2Median', 

1484 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter', 

1485 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter', 

1486 'psfStarScaledDeltaSizeScatter'] 

1487 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1488 # 'visit' is the human readable visit number. 

1489 # 'visitId' is the key to the visitId table. They are the same. 

1490 # Technically you should join to get the visit from the visit 

1491 # table. 

1492 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"}) 

1493 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in 

1494 summaryTable['id']] 

1495 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId) 

1496 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds] 

1497 ccdEntry['ccdVisitId'] = ccdVisitIds 

1498 ccdEntry['detector'] = summaryTable['id'] 

1499 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary]) 

1500 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1501 

1502 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1503 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1504 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1505 expTime = visitInfo.getExposureTime() 

1506 ccdEntry['expTime'] = expTime 

1507 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1508 expTime_days = expTime / (60*60*24) 

1509 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days 

1510 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1511 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1512 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1513 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1514 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1515 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1516 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1517 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1518 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1519 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1520 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1521 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, 

1522 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. 

1523 # values are actually wanted. 

1524 ccdEntries.append(ccdEntry) 

1525 

1526 outputCatalog = pd.concat(ccdEntries) 

1527 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True) 

1528 return pipeBase.Struct(outputCatalog=outputCatalog) 

1529 

1530 

1531class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1532 dimensions=("instrument",), 

1533 defaultTemplates={"calexpType": ""}): 

1534 visitSummaries = connectionTypes.Input( 

1535 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1536 name="{calexpType}visitSummary", 

1537 storageClass="ExposureCatalog", 

1538 dimensions=("instrument", "visit",), 

1539 multiple=True, 

1540 deferLoad=True, 

1541 ) 

1542 outputCatalog = connectionTypes.Output( 

1543 doc="Visit metadata table", 

1544 name="visitTable", 

1545 storageClass="DataFrame", 

1546 dimensions=("instrument",) 

1547 ) 

1548 

1549 

1550class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1551 pipelineConnections=MakeVisitTableConnections): 

1552 pass 

1553 

1554 

1555class MakeVisitTableTask(pipeBase.PipelineTask): 

1556 """Produce a `visitTable` from the `visitSummary` exposure catalogs. 

1557 """ 

1558 _DefaultName = 'makeVisitTable' 

1559 ConfigClass = MakeVisitTableConfig 

1560 

1561 def run(self, visitSummaries): 

1562 """Make a table of visit information from the `visitSummary` catalogs. 

1563 

1564 Parameters 

1565 ---------- 

1566 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog` 

1567 List of exposure catalogs with per-detector summary information. 

1568 Returns 

1569 ------- 

1570 result : `lsst.pipe.Base.Struct` 

1571 Results struct with attribute: 

1572 

1573 ``outputCatalog`` 

1574 Catalog of visit information. 

1575 """ 

1576 visitEntries = [] 

1577 for visitSummary in visitSummaries: 

1578 visitSummary = visitSummary.get() 

1579 visitRow = visitSummary[0] 

1580 visitInfo = visitRow.getVisitInfo() 

1581 

1582 visitEntry = {} 

1583 visitEntry["visitId"] = visitRow['visit'] 

1584 visitEntry["visit"] = visitRow['visit'] 

1585 visitEntry["physical_filter"] = visitRow['physical_filter'] 

1586 visitEntry["band"] = visitRow['band'] 

1587 raDec = visitInfo.getBoresightRaDec() 

1588 visitEntry["ra"] = raDec.getRa().asDegrees() 

1589 visitEntry["decl"] = raDec.getDec().asDegrees() 

1590 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1591 azAlt = visitInfo.getBoresightAzAlt() 

1592 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1593 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1594 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1595 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1596 expTime = visitInfo.getExposureTime() 

1597 visitEntry["expTime"] = expTime 

1598 visitEntry["expMidpt"] = visitInfo.getDate().toPython() 

1599 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1600 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1601 expTime_days = expTime / (60*60*24) 

1602 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days 

1603 visitEntries.append(visitEntry) 

1604 

1605 # TODO: DM-30623, Add programId, exposureType, cameraTemp, 

1606 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp, 

1607 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures. 

1608 

1609 outputCatalog = pd.DataFrame(data=visitEntries) 

1610 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True) 

1611 return pipeBase.Struct(outputCatalog=outputCatalog) 

1612 

1613 

1614class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1615 dimensions=("instrument", "visit", "detector", "skymap", "tract")): 

1616 

1617 inputCatalog = connectionTypes.Input( 

1618 doc="Primary per-detector, single-epoch forced-photometry catalog. " 

1619 "By default, it is the output of ForcedPhotCcdTask on calexps", 

1620 name="forced_src", 

1621 storageClass="SourceCatalog", 

1622 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1623 ) 

1624 inputCatalogDiff = connectionTypes.Input( 

1625 doc="Secondary multi-epoch, per-detector, forced photometry catalog. " 

1626 "By default, it is the output of ForcedPhotCcdTask run on image differences.", 

1627 name="forced_diff", 

1628 storageClass="SourceCatalog", 

1629 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1630 ) 

1631 outputCatalog = connectionTypes.Output( 

1632 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format", 

1633 name="mergedForcedSource", 

1634 storageClass="DataFrame", 

1635 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1636 ) 

1637 

1638 

1639class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig, 

1640 pipelineConnections=WriteForcedSourceTableConnections): 

1641 key = lsst.pex.config.Field( 

1642 doc="Column on which to join the two input tables on and make the primary key of the output", 

1643 dtype=str, 

1644 default="objectId", 

1645 ) 

1646 

1647 

1648class WriteForcedSourceTableTask(pipeBase.PipelineTask): 

1649 """Merge and convert per-detector forced source catalogs to parquet. 

1650 

1651 Because the predecessor ForcedPhotCcdTask operates per-detector, 

1652 per-tract, (i.e., it has tract in its dimensions), detectors 

1653 on the tract boundary may have multiple forced source catalogs. 

1654 

1655 The successor task TransformForcedSourceTable runs per-patch 

1656 and temporally-aggregates overlapping mergedForcedSource catalogs from all 

1657 available multiple epochs. 

1658 """ 

1659 _DefaultName = "writeForcedSourceTable" 

1660 ConfigClass = WriteForcedSourceTableConfig 

1661 

1662 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1663 inputs = butlerQC.get(inputRefs) 

1664 # Add ccdVisitId to allow joining with CcdVisitTable 

1665 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

1666 inputs['band'] = butlerQC.quantum.dataId.full['band'] 

1667 outputs = self.run(**inputs) 

1668 butlerQC.put(outputs, outputRefs) 

1669 

1670 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None): 

1671 dfs = [] 

1672 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')): 

1673 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False) 

1674 df = df.reindex(sorted(df.columns), axis=1) 

1675 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA 

1676 df['band'] = band if band else pd.NA 

1677 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns], 

1678 names=('dataset', 'column')) 

1679 

1680 dfs.append(df) 

1681 

1682 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

1683 return pipeBase.Struct(outputCatalog=outputCatalog) 

1684 

1685 

1686class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1687 dimensions=("instrument", "skymap", "patch", "tract")): 

1688 

1689 inputCatalogs = connectionTypes.Input( 

1690 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask", 

1691 name="mergedForcedSource", 

1692 storageClass="DataFrame", 

1693 dimensions=("instrument", "visit", "detector", "skymap", "tract"), 

1694 multiple=True, 

1695 deferLoad=True 

1696 ) 

1697 referenceCatalog = connectionTypes.Input( 

1698 doc="Reference catalog which was used to seed the forcedPhot. Columns " 

1699 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner " 

1700 "are expected.", 

1701 name="objectTable", 

1702 storageClass="DataFrame", 

1703 dimensions=("tract", "patch", "skymap"), 

1704 deferLoad=True 

1705 ) 

1706 outputCatalog = connectionTypes.Output( 

1707 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a " 

1708 "specified set of functors", 

1709 name="forcedSourceTable", 

1710 storageClass="DataFrame", 

1711 dimensions=("tract", "patch", "skymap") 

1712 ) 

1713 

1714 

1715class TransformForcedSourceTableConfig(TransformCatalogBaseConfig, 

1716 pipelineConnections=TransformForcedSourceTableConnections): 

1717 referenceColumns = pexConfig.ListField( 

1718 dtype=str, 

1719 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"], 

1720 optional=True, 

1721 doc="Columns to pull from reference catalog", 

1722 ) 

1723 keyRef = lsst.pex.config.Field( 

1724 doc="Column on which to join the two input tables on and make the primary key of the output", 

1725 dtype=str, 

1726 default="objectId", 

1727 ) 

1728 key = lsst.pex.config.Field( 

1729 doc="Rename the output DataFrame index to this name", 

1730 dtype=str, 

1731 default="forcedSourceId", 

1732 ) 

1733 

1734 def setDefaults(self): 

1735 super().setDefaults() 

1736 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml') 

1737 self.columnsFromDataId = ['tract', 'patch'] 

1738 

1739 

1740class TransformForcedSourceTableTask(TransformCatalogBaseTask): 

1741 """Transform/standardize a ForcedSource catalog 

1742 

1743 Transforms each wide, per-detector forcedSource parquet table per the 

1744 specification file (per-camera defaults found in ForcedSource.yaml). 

1745 All epochs that overlap the patch are aggregated into one per-patch 

1746 narrow-parquet file. 

1747 

1748 No de-duplication of rows is performed. Duplicate resolutions flags are 

1749 pulled in from the referenceCatalog: `detect_isPrimary`, 

1750 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate 

1751 for analysis or compare duplicates for QA. 

1752 

1753 The resulting table includes multiple bands. Epochs (MJDs) and other useful 

1754 per-visit rows can be retreived by joining with the CcdVisitTable on 

1755 ccdVisitId. 

1756 """ 

1757 _DefaultName = "transformForcedSourceTable" 

1758 ConfigClass = TransformForcedSourceTableConfig 

1759 

1760 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1761 inputs = butlerQC.get(inputRefs) 

1762 if self.funcs is None: 

1763 raise ValueError("config.functorFile is None. " 

1764 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

1765 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs, 

1766 dataId=outputRefs.outputCatalog.dataId.full) 

1767 

1768 butlerQC.put(outputs, outputRefs) 

1769 

1770 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None): 

1771 dfs = [] 

1772 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns}) 

1773 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs))) 

1774 for handle in inputCatalogs: 

1775 result = self.transform(None, handle, funcs, dataId) 

1776 # Filter for only rows that were detected on (overlap) the patch 

1777 dfs.append(result.df.join(ref, how='inner')) 

1778 

1779 outputCatalog = pd.concat(dfs) 

1780 

1781 # Now that we are done joining on config.keyRef 

1782 # Change index to config.key by 

1783 outputCatalog.index.rename(self.config.keyRef, inplace=True) 

1784 # Add config.keyRef to the column list 

1785 outputCatalog.reset_index(inplace=True) 

1786 # Set the forcedSourceId to the index. This is specified in the 

1787 # ForcedSource.yaml 

1788 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True) 

1789 # Rename it to the config.key 

1790 outputCatalog.index.rename(self.config.key, inplace=True) 

1791 

1792 self.log.info("Made a table of %d columns and %d rows", 

1793 len(outputCatalog.columns), len(outputCatalog)) 

1794 return pipeBase.Struct(outputCatalog=outputCatalog) 

1795 

1796 

1797class ConsolidateTractConnections(pipeBase.PipelineTaskConnections, 

1798 defaultTemplates={"catalogType": ""}, 

1799 dimensions=("instrument", "tract")): 

1800 inputCatalogs = connectionTypes.Input( 

1801 doc="Input per-patch DataFrame Tables to be concatenated", 

1802 name="{catalogType}ForcedSourceTable", 

1803 storageClass="DataFrame", 

1804 dimensions=("tract", "patch", "skymap"), 

1805 multiple=True, 

1806 ) 

1807 

1808 outputCatalog = connectionTypes.Output( 

1809 doc="Output per-tract concatenation of DataFrame Tables", 

1810 name="{catalogType}ForcedSourceTable_tract", 

1811 storageClass="DataFrame", 

1812 dimensions=("tract", "skymap"), 

1813 ) 

1814 

1815 

1816class ConsolidateTractConfig(pipeBase.PipelineTaskConfig, 

1817 pipelineConnections=ConsolidateTractConnections): 

1818 pass 

1819 

1820 

1821class ConsolidateTractTask(pipeBase.PipelineTask): 

1822 """Concatenate any per-patch, dataframe list into a single 

1823 per-tract DataFrame. 

1824 """ 

1825 _DefaultName = 'ConsolidateTract' 

1826 ConfigClass = ConsolidateTractConfig 

1827 

1828 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1829 inputs = butlerQC.get(inputRefs) 

1830 # Not checking at least one inputCatalog exists because that'd be an 

1831 # empty QG. 

1832 self.log.info("Concatenating %s per-patch %s Tables", 

1833 len(inputs['inputCatalogs']), 

1834 inputRefs.inputCatalogs[0].datasetType.name) 

1835 df = pd.concat(inputs['inputCatalogs']) 

1836 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)