Coverage for python/lsst/pipe/tasks/postprocess.py: 32%

696 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-30 10:45 +0000

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["WriteObjectTableConfig", "WriteObjectTableTask", 

23 "WriteSourceTableConfig", "WriteSourceTableTask", 

24 "WriteRecalibratedSourceTableConfig", "WriteRecalibratedSourceTableTask", 

25 "PostprocessAnalysis", 

26 "TransformCatalogBaseConfig", "TransformCatalogBaseTask", 

27 "TransformObjectCatalogConfig", "TransformObjectCatalogTask", 

28 "ConsolidateObjectTableConfig", "ConsolidateObjectTableTask", 

29 "TransformSourceTableConfig", "TransformSourceTableTask", 

30 "ConsolidateVisitSummaryConfig", "ConsolidateVisitSummaryTask", 

31 "ConsolidateSourceTableConfig", "ConsolidateSourceTableTask", 

32 "MakeCcdVisitTableConfig", "MakeCcdVisitTableTask", 

33 "MakeVisitTableConfig", "MakeVisitTableTask", 

34 "WriteForcedSourceTableConfig", "WriteForcedSourceTableTask", 

35 "TransformForcedSourceTableConfig", "TransformForcedSourceTableTask", 

36 "ConsolidateTractConfig", "ConsolidateTractTask"] 

37 

38import functools 

39import pandas as pd 

40import logging 

41import numpy as np 

42import numbers 

43import os 

44 

45import lsst.geom 

46import lsst.pex.config as pexConfig 

47import lsst.pipe.base as pipeBase 

48import lsst.daf.base as dafBase 

49from lsst.obs.base import ExposureIdInfo 

50from lsst.pipe.base import connectionTypes 

51import lsst.afw.table as afwTable 

52from lsst.meas.base import SingleFrameMeasurementTask 

53from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate 

54from lsst.skymap import BaseSkyMap 

55 

56from .parquetTable import ParquetTable 

57from .functors import CompositeFunctor, Column 

58 

59log = logging.getLogger(__name__) 

60 

61 

62def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

63 """Flattens a dataframe with multilevel column index. 

64 """ 

65 newDf = pd.DataFrame() 

66 # band is the level 0 index 

67 dfBands = df.columns.unique(level=0).values 

68 for band in dfBands: 

69 subdf = df[band] 

70 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

71 newColumns = {c: columnFormat.format(band, c) 

72 for c in subdf.columns if c not in noDupCols} 

73 cols = list(newColumns.keys()) 

74 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

75 

76 # Band must be present in the input and output or else column is all NaN: 

77 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

78 # Get the unexploded columns from any present band's partition 

79 noDupDf = df[presentBands[0]][noDupCols] 

80 newDf = pd.concat([noDupDf, newDf], axis=1) 

81 return newDf 

82 

83 

84class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

85 defaultTemplates={"coaddName": "deep"}, 

86 dimensions=("tract", "patch", "skymap")): 

87 inputCatalogMeas = connectionTypes.Input( 

88 doc="Catalog of source measurements on the deepCoadd.", 

89 dimensions=("tract", "patch", "band", "skymap"), 

90 storageClass="SourceCatalog", 

91 name="{coaddName}Coadd_meas", 

92 multiple=True 

93 ) 

94 inputCatalogForcedSrc = connectionTypes.Input( 

95 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

96 dimensions=("tract", "patch", "band", "skymap"), 

97 storageClass="SourceCatalog", 

98 name="{coaddName}Coadd_forced_src", 

99 multiple=True 

100 ) 

101 inputCatalogRef = connectionTypes.Input( 

102 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

103 "for each detection in deepCoadd_mergeDet.", 

104 dimensions=("tract", "patch", "skymap"), 

105 storageClass="SourceCatalog", 

106 name="{coaddName}Coadd_ref" 

107 ) 

108 outputCatalog = connectionTypes.Output( 

109 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

110 "stored as a DataFrame with a multi-level column index per-patch.", 

111 dimensions=("tract", "patch", "skymap"), 

112 storageClass="DataFrame", 

113 name="{coaddName}Coadd_obj" 

114 ) 

115 

116 

117class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

118 pipelineConnections=WriteObjectTableConnections): 

119 engine = pexConfig.Field( 

120 dtype=str, 

121 default="pyarrow", 

122 doc="Parquet engine for writing (pyarrow or fastparquet)" 

123 ) 

124 coaddName = pexConfig.Field( 

125 dtype=str, 

126 default="deep", 

127 doc="Name of coadd" 

128 ) 

129 

130 

131class WriteObjectTableTask(pipeBase.PipelineTask): 

132 """Write filter-merged source tables to parquet 

133 """ 

134 _DefaultName = "writeObjectTable" 

135 ConfigClass = WriteObjectTableConfig 

136 

137 # Names of table datasets to be merged 

138 inputDatasets = ('forced_src', 'meas', 'ref') 

139 

140 # Tag of output dataset written by `MergeSourcesTask.write` 

141 outputDataset = 'obj' 

142 

143 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

144 inputs = butlerQC.get(inputRefs) 

145 

146 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

147 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

148 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

149 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

150 

151 catalogs = {} 

152 for band in measDict.keys(): 

153 catalogs[band] = {'meas': measDict[band]['meas'], 

154 'forced_src': forcedSourceDict[band]['forced_src'], 

155 'ref': inputs['inputCatalogRef']} 

156 dataId = butlerQC.quantum.dataId 

157 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

158 outputs = pipeBase.Struct(outputCatalog=df) 

159 butlerQC.put(outputs, outputRefs) 

160 

161 def run(self, catalogs, tract, patch): 

162 """Merge multiple catalogs. 

163 

164 Parameters 

165 ---------- 

166 catalogs : `dict` 

167 Mapping from filter names to dict of catalogs. 

168 tract : int 

169 tractId to use for the tractId column. 

170 patch : str 

171 patchId to use for the patchId column. 

172 

173 Returns 

174 ------- 

175 catalog : `pandas.DataFrame` 

176 Merged dataframe. 

177 """ 

178 

179 dfs = [] 

180 for filt, tableDict in catalogs.items(): 

181 for dataset, table in tableDict.items(): 

182 # Convert afwTable to pandas DataFrame 

183 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

184 

185 # Sort columns by name, to ensure matching schema among patches 

186 df = df.reindex(sorted(df.columns), axis=1) 

187 df['tractId'] = tract 

188 df['patchId'] = patch 

189 

190 # Make columns a 3-level MultiIndex 

191 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

192 names=('dataset', 'band', 'column')) 

193 dfs.append(df) 

194 

195 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

196 return catalog 

197 

198 

199class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

200 defaultTemplates={"catalogType": ""}, 

201 dimensions=("instrument", "visit", "detector")): 

202 

203 catalog = connectionTypes.Input( 

204 doc="Input full-depth catalog of sources produced by CalibrateTask", 

205 name="{catalogType}src", 

206 storageClass="SourceCatalog", 

207 dimensions=("instrument", "visit", "detector") 

208 ) 

209 outputCatalog = connectionTypes.Output( 

210 doc="Catalog of sources, `src` in Parquet format. The 'id' column is " 

211 "replaced with an index; all other columns are unchanged.", 

212 name="{catalogType}source", 

213 storageClass="DataFrame", 

214 dimensions=("instrument", "visit", "detector") 

215 ) 

216 

217 

218class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

219 pipelineConnections=WriteSourceTableConnections): 

220 pass 

221 

222 

223class WriteSourceTableTask(pipeBase.PipelineTask): 

224 """Write source table to parquet. 

225 """ 

226 _DefaultName = "writeSourceTable" 

227 ConfigClass = WriteSourceTableConfig 

228 

229 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

230 inputs = butlerQC.get(inputRefs) 

231 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

232 result = self.run(**inputs).table 

233 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

234 butlerQC.put(outputs, outputRefs) 

235 

236 def run(self, catalog, ccdVisitId=None, **kwargs): 

237 """Convert `src` catalog to parquet 

238 

239 Parameters 

240 ---------- 

241 catalog: `afwTable.SourceCatalog` 

242 catalog to be converted 

243 ccdVisitId: `int` 

244 ccdVisitId to be added as a column 

245 

246 Returns 

247 ------- 

248 result : `lsst.pipe.base.Struct` 

249 ``table`` 

250 `ParquetTable` version of the input catalog 

251 """ 

252 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId) 

253 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

254 df['ccdVisitId'] = ccdVisitId 

255 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

256 

257 

258class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections, 

259 defaultTemplates={"catalogType": "", 

260 "skyWcsName": "jointcal", 

261 "photoCalibName": "fgcm"}, 

262 dimensions=("instrument", "visit", "detector", "skymap")): 

263 skyMap = connectionTypes.Input( 

264 doc="skyMap needed to choose which tract-level calibrations to use when multiple available", 

265 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

266 storageClass="SkyMap", 

267 dimensions=("skymap",), 

268 ) 

269 exposure = connectionTypes.Input( 

270 doc="Input exposure to perform photometry on.", 

271 name="calexp", 

272 storageClass="ExposureF", 

273 dimensions=["instrument", "visit", "detector"], 

274 ) 

275 externalSkyWcsTractCatalog = connectionTypes.Input( 

276 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector " 

277 "id for the catalog id, sorted on id for fast lookup."), 

278 name="{skyWcsName}SkyWcsCatalog", 

279 storageClass="ExposureCatalog", 

280 dimensions=["instrument", "visit", "tract"], 

281 multiple=True 

282 ) 

283 externalSkyWcsGlobalCatalog = connectionTypes.Input( 

284 doc=("Per-visit wcs calibrations computed globally (with no tract information). " 

285 "These catalogs use the detector id for the catalog id, sorted on id for " 

286 "fast lookup."), 

287 name="{skyWcsName}SkyWcsCatalog", 

288 storageClass="ExposureCatalog", 

289 dimensions=["instrument", "visit"], 

290 ) 

291 externalPhotoCalibTractCatalog = connectionTypes.Input( 

292 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the " 

293 "detector id for the catalog id, sorted on id for fast lookup."), 

294 name="{photoCalibName}PhotoCalibCatalog", 

295 storageClass="ExposureCatalog", 

296 dimensions=["instrument", "visit", "tract"], 

297 multiple=True 

298 ) 

299 externalPhotoCalibGlobalCatalog = connectionTypes.Input( 

300 doc=("Per-visit photometric calibrations computed globally (with no tract " 

301 "information). These catalogs use the detector id for the catalog id, " 

302 "sorted on id for fast lookup."), 

303 name="{photoCalibName}PhotoCalibCatalog", 

304 storageClass="ExposureCatalog", 

305 dimensions=["instrument", "visit"], 

306 ) 

307 

308 def __init__(self, *, config=None): 

309 super().__init__(config=config) 

310 # Same connection boilerplate as all other applications of 

311 # Global/Tract calibrations 

312 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs: 

313 if config.useGlobalExternalSkyWcs: 

314 self.inputs.remove("externalSkyWcsTractCatalog") 

315 else: 

316 self.inputs.remove("externalSkyWcsGlobalCatalog") 

317 else: 

318 self.inputs.remove("externalSkyWcsTractCatalog") 

319 self.inputs.remove("externalSkyWcsGlobalCatalog") 

320 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib: 

321 if config.useGlobalExternalPhotoCalib: 

322 self.inputs.remove("externalPhotoCalibTractCatalog") 

323 else: 

324 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

325 else: 

326 self.inputs.remove("externalPhotoCalibTractCatalog") 

327 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

328 

329 

330class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig, 

331 pipelineConnections=WriteRecalibratedSourceTableConnections): 

332 

333 doReevaluatePhotoCalib = pexConfig.Field( 

334 dtype=bool, 

335 default=True, 

336 doc=("Add or replace local photoCalib columns") 

337 ) 

338 doReevaluateSkyWcs = pexConfig.Field( 

339 dtype=bool, 

340 default=True, 

341 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec") 

342 ) 

343 doApplyExternalPhotoCalib = pexConfig.Field( 

344 dtype=bool, 

345 default=True, 

346 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ", 

347 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."), 

348 ) 

349 doApplyExternalSkyWcs = pexConfig.Field( 

350 dtype=bool, 

351 default=True, 

352 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ", 

353 "else use the wcs already attached to the exposure."), 

354 ) 

355 useGlobalExternalPhotoCalib = pexConfig.Field( 

356 dtype=bool, 

357 default=True, 

358 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations " 

359 "that are not run per-tract. When False, use per-tract photometric " 

360 "calibration files.") 

361 ) 

362 useGlobalExternalSkyWcs = pexConfig.Field( 

363 dtype=bool, 

364 default=False, 

365 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations " 

366 "that are not run per-tract. When False, use per-tract wcs " 

367 "files.") 

368 ) 

369 

370 def validate(self): 

371 super().validate() 

372 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs: 

373 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False" 

374 "External SkyWcs will not be read or evaluated.") 

375 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib: 

376 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False." 

377 "External PhotoCalib will not be read or evaluated.") 

378 

379 

380class WriteRecalibratedSourceTableTask(WriteSourceTableTask): 

381 """Write source table to parquet 

382 """ 

383 _DefaultName = "writeRecalibratedSourceTable" 

384 ConfigClass = WriteRecalibratedSourceTableConfig 

385 

386 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

387 inputs = butlerQC.get(inputRefs) 

388 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

389 inputs['exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "visit_detector") 

390 

391 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs: 

392 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

393 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs) 

394 

395 inputs['catalog'] = self.addCalibColumns(**inputs) 

396 

397 result = self.run(**inputs).table 

398 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

399 butlerQC.put(outputs, outputRefs) 

400 

401 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None, 

402 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None, 

403 externalPhotoCalibTractCatalog=None, **kwargs): 

404 """Apply external calibrations to exposure per configuration 

405 

406 When multiple tract-level calibrations overlap, select the one with the 

407 center closest to detector. 

408 

409 Parameters 

410 ---------- 

411 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of 

412 tract-level calibs. 

413 skyMap : `lsst.skymap.SkyMap` 

414 exposure : `lsst.afw.image.exposure.Exposure` 

415 Input exposure to adjust calibrations. 

416 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

417 Exposure catalog with external skyWcs to be applied per config 

418 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

419 Exposure catalog with external skyWcs to be applied per config 

420 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

421 Exposure catalog with external photoCalib to be applied per config 

422 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

423 

424 

425 Returns 

426 ------- 

427 exposure : `lsst.afw.image.exposure.Exposure` 

428 Exposure with adjusted calibrations. 

429 """ 

430 if not self.config.doApplyExternalSkyWcs: 

431 # Do not modify the exposure's SkyWcs 

432 externalSkyWcsCatalog = None 

433 elif self.config.useGlobalExternalSkyWcs: 

434 # Use the global external SkyWcs 

435 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog 

436 self.log.info('Applying global SkyWcs') 

437 else: 

438 # use tract-level external SkyWcs from the closest overlapping tract 

439 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog') 

440 tracts = [ref.dataId['tract'] for ref in inputRef] 

441 if len(tracts) == 1: 

442 ind = 0 

443 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind]) 

444 else: 

445 ind = self.getClosestTract(tracts, skyMap, 

446 exposure.getBBox(), exposure.getWcs()) 

447 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). ' 

448 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

449 

450 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind] 

451 

452 if not self.config.doApplyExternalPhotoCalib: 

453 # Do not modify the exposure's PhotoCalib 

454 externalPhotoCalibCatalog = None 

455 elif self.config.useGlobalExternalPhotoCalib: 

456 # Use the global external PhotoCalib 

457 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog 

458 self.log.info('Applying global PhotoCalib') 

459 else: 

460 # use tract-level external PhotoCalib from the closest overlapping tract 

461 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog') 

462 tracts = [ref.dataId['tract'] for ref in inputRef] 

463 if len(tracts) == 1: 

464 ind = 0 

465 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind]) 

466 else: 

467 ind = self.getClosestTract(tracts, skyMap, 

468 exposure.getBBox(), exposure.getWcs()) 

469 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). ' 

470 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

471 

472 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind] 

473 

474 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog) 

475 

476 def getClosestTract(self, tracts, skyMap, bbox, wcs): 

477 """Find the index of the tract closest to detector from list of tractIds 

478 

479 Parameters 

480 ---------- 

481 tracts: `list` [`int`] 

482 Iterable of integer tractIds 

483 skyMap : `lsst.skymap.SkyMap` 

484 skyMap to lookup tract geometry and wcs 

485 bbox : `lsst.geom.Box2I` 

486 Detector bbox, center of which will compared to tract centers 

487 wcs : `lsst.afw.geom.SkyWcs` 

488 Detector Wcs object to map the detector center to SkyCoord 

489 

490 Returns 

491 ------- 

492 index : `int` 

493 """ 

494 if len(tracts) == 1: 

495 return 0 

496 

497 center = wcs.pixelToSky(bbox.getCenter()) 

498 sep = [] 

499 for tractId in tracts: 

500 tract = skyMap[tractId] 

501 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter()) 

502 sep.append(center.separation(tractCenter)) 

503 

504 return np.argmin(sep) 

505 

506 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None): 

507 """Prepare a calibrated exposure and apply external calibrations 

508 if so configured. 

509 

510 Parameters 

511 ---------- 

512 exposure : `lsst.afw.image.exposure.Exposure` 

513 Input exposure to adjust calibrations. 

514 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional 

515 Exposure catalog with external skyWcs to be applied 

516 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id 

517 for the catalog id, sorted on id for fast lookup. 

518 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional 

519 Exposure catalog with external photoCalib to be applied 

520 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector 

521 id for the catalog id, sorted on id for fast lookup. 

522 

523 Returns 

524 ------- 

525 exposure : `lsst.afw.image.exposure.Exposure` 

526 Exposure with adjusted calibrations. 

527 """ 

528 detectorId = exposure.getInfo().getDetector().getId() 

529 

530 if externalPhotoCalibCatalog is not None: 

531 row = externalPhotoCalibCatalog.find(detectorId) 

532 if row is None: 

533 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; " 

534 "Using original photoCalib.", detectorId) 

535 else: 

536 photoCalib = row.getPhotoCalib() 

537 if photoCalib is None: 

538 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; " 

539 "Using original photoCalib.", detectorId) 

540 else: 

541 exposure.setPhotoCalib(photoCalib) 

542 

543 if externalSkyWcsCatalog is not None: 

544 row = externalSkyWcsCatalog.find(detectorId) 

545 if row is None: 

546 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; " 

547 "Using original skyWcs.", detectorId) 

548 else: 

549 skyWcs = row.getWcs() 

550 if skyWcs is None: 

551 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; " 

552 "Using original skyWcs.", detectorId) 

553 else: 

554 exposure.setWcs(skyWcs) 

555 

556 return exposure 

557 

558 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs): 

559 """Add replace columns with calibs evaluated at each centroid 

560 

561 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a 

562 a source catalog, by rerunning the plugins. 

563 

564 Parameters 

565 ---------- 

566 catalog : `lsst.afw.table.SourceCatalog` 

567 catalog to which calib columns will be added 

568 exposure : `lsst.afw.image.exposure.Exposure` 

569 Exposure with attached PhotoCalibs and SkyWcs attributes to be 

570 reevaluated at local centroids. Pixels are not required. 

571 exposureIdInfo : `lsst.obs.base.ExposureIdInfo` 

572 

573 Returns 

574 ------- 

575 newCat: `lsst.afw.table.SourceCatalog` 

576 Source Catalog with requested local calib columns 

577 """ 

578 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

579 measureConfig.doReplaceWithNoise = False 

580 

581 # Clear all slots, because we aren't running the relevant plugins. 

582 for slot in measureConfig.slots: 

583 setattr(measureConfig.slots, slot, None) 

584 

585 measureConfig.plugins.names = [] 

586 if self.config.doReevaluateSkyWcs: 

587 measureConfig.plugins.names.add('base_LocalWcs') 

588 self.log.info("Re-evaluating base_LocalWcs plugin") 

589 if self.config.doReevaluatePhotoCalib: 

590 measureConfig.plugins.names.add('base_LocalPhotoCalib') 

591 self.log.info("Re-evaluating base_LocalPhotoCalib plugin") 

592 pluginsNotToCopy = tuple(measureConfig.plugins.names) 

593 

594 # Create a new schema and catalog 

595 # Copy all columns from original except for the ones to reevaluate 

596 aliasMap = catalog.schema.getAliasMap() 

597 mapper = afwTable.SchemaMapper(catalog.schema) 

598 for item in catalog.schema: 

599 if not item.field.getName().startswith(pluginsNotToCopy): 

600 mapper.addMapping(item.key) 

601 

602 schema = mapper.getOutputSchema() 

603 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

604 schema.setAliasMap(aliasMap) 

605 newCat = afwTable.SourceCatalog(schema) 

606 newCat.extend(catalog, mapper=mapper) 

607 

608 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to 

609 # update here. LocalPhotoCalibs are applied during transform tasks. 

610 # Update coord_ra/coord_dec, which are expected to be positions on the 

611 # sky and are used as such in sdm tables without transform 

612 if self.config.doReevaluateSkyWcs: 

613 afwTable.updateSourceCoords(exposure.wcs, newCat) 

614 

615 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

616 

617 return newCat 

618 

619 

620class PostprocessAnalysis(object): 

621 """Calculate columns from ParquetTable. 

622 

623 This object manages and organizes an arbitrary set of computations 

624 on a catalog. The catalog is defined by a 

625 `~lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such 

626 as a ``deepCoadd_obj`` dataset, and the computations are defined by a 

627 collection of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

628 a ``CompositeFunctor``). 

629 

630 After the object is initialized, accessing the ``.df`` attribute (which 

631 holds the `pandas.DataFrame` containing the results of the calculations) 

632 triggers computation of said dataframe. 

633 

634 One of the conveniences of using this object is the ability to define a 

635 desired common filter for all functors. This enables the same functor 

636 collection to be passed to several different `PostprocessAnalysis` objects 

637 without having to change the original functor collection, since the ``filt`` 

638 keyword argument of this object triggers an overwrite of the ``filt`` 

639 property for all functors in the collection. 

640 

641 This object also allows a list of refFlags to be passed, and defines a set 

642 of default refFlags that are always included even if not requested. 

643 

644 If a list of `~lsst.pipe.tasks.ParquetTable` object is passed, rather than a single one, 

645 then the calculations will be mapped over all the input catalogs. In 

646 principle, it should be straightforward to parallelize this activity, but 

647 initial tests have failed (see TODO in code comments). 

648 

649 Parameters 

650 ---------- 

651 parq : `~lsst.pipe.tasks.ParquetTable` (or list of such) 

652 Source catalog(s) for computation. 

653 

654 functors : `list`, `dict`, or `~lsst.pipe.tasks.functors.CompositeFunctor` 

655 Computations to do (functors that act on ``parq``). 

656 If a dict, the output 

657 DataFrame will have columns keyed accordingly. 

658 If a list, the column keys will come from the 

659 ``.shortname`` attribute of each functor. 

660 

661 filt : `str`, optional 

662 Filter in which to calculate. If provided, 

663 this will overwrite any existing ``.filt`` attribute 

664 of the provided functors. 

665 

666 flags : `list`, optional 

667 List of flags (per-band) to include in output table. 

668 Taken from the ``meas`` dataset if applied to a multilevel Object Table. 

669 

670 refFlags : `list`, optional 

671 List of refFlags (only reference band) to include in output table. 

672 

673 forcedFlags : `list`, optional 

674 List of flags (per-band) to include in output table. 

675 Taken from the ``forced_src`` dataset if applied to a 

676 multilevel Object Table. Intended for flags from measurement plugins 

677 only run during multi-band forced-photometry. 

678 """ 

679 _defaultRefFlags = [] 

680 _defaultFuncs = () 

681 

682 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

683 self.parq = parq 

684 self.functors = functors 

685 

686 self.filt = filt 

687 self.flags = list(flags) if flags is not None else [] 

688 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

689 self.refFlags = list(self._defaultRefFlags) 

690 if refFlags is not None: 

691 self.refFlags += list(refFlags) 

692 

693 self._df = None 

694 

695 @property 

696 def defaultFuncs(self): 

697 funcs = dict(self._defaultFuncs) 

698 return funcs 

699 

700 @property 

701 def func(self): 

702 additionalFuncs = self.defaultFuncs 

703 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

704 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

705 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

706 

707 if isinstance(self.functors, CompositeFunctor): 

708 func = self.functors 

709 else: 

710 func = CompositeFunctor(self.functors) 

711 

712 func.funcDict.update(additionalFuncs) 

713 func.filt = self.filt 

714 

715 return func 

716 

717 @property 

718 def noDupCols(self): 

719 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

720 

721 @property 

722 def df(self): 

723 if self._df is None: 

724 self.compute() 

725 return self._df 

726 

727 def compute(self, dropna=False, pool=None): 

728 # map over multiple parquet tables 

729 if type(self.parq) in (list, tuple): 

730 if pool is None: 

731 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

732 else: 

733 # TODO: Figure out why this doesn't work (pyarrow pickling 

734 # issues?) 

735 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

736 self._df = pd.concat(dflist) 

737 else: 

738 self._df = self.func(self.parq, dropna=dropna) 

739 

740 return self._df 

741 

742 

743class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

744 dimensions=()): 

745 """Expected Connections for subclasses of TransformCatalogBaseTask. 

746 

747 Must be subclassed. 

748 """ 

749 inputCatalog = connectionTypes.Input( 

750 name="", 

751 storageClass="DataFrame", 

752 ) 

753 outputCatalog = connectionTypes.Output( 

754 name="", 

755 storageClass="DataFrame", 

756 ) 

757 

758 

759class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

760 pipelineConnections=TransformCatalogBaseConnections): 

761 functorFile = pexConfig.Field( 

762 dtype=str, 

763 doc="Path to YAML file specifying Science Data Model functors to use " 

764 "when copying columns and computing calibrated values.", 

765 default=None, 

766 optional=True 

767 ) 

768 primaryKey = pexConfig.Field( 

769 dtype=str, 

770 doc="Name of column to be set as the DataFrame index. If None, the index" 

771 "will be named `id`", 

772 default=None, 

773 optional=True 

774 ) 

775 columnsFromDataId = pexConfig.ListField( 

776 dtype=str, 

777 default=None, 

778 optional=True, 

779 doc="Columns to extract from the dataId", 

780 ) 

781 

782 

783class TransformCatalogBaseTask(pipeBase.PipelineTask): 

784 """Base class for transforming/standardizing a catalog 

785 

786 by applying functors that convert units and apply calibrations. 

787 The purpose of this task is to perform a set of computations on 

788 an input `ParquetTable` dataset (such as ``deepCoadd_obj``) and write the 

789 results to a new dataset (which needs to be declared in an ``outputDataset`` 

790 attribute). 

791 

792 The calculations to be performed are defined in a YAML file that specifies 

793 a set of functors to be computed, provided as 

794 a ``--functorFile`` config parameter. An example of such a YAML file 

795 is the following: 

796 

797 funcs: 

798 psfMag: 

799 functor: Mag 

800 args: 

801 - base_PsfFlux 

802 filt: HSC-G 

803 dataset: meas 

804 cmodel_magDiff: 

805 functor: MagDiff 

806 args: 

807 - modelfit_CModel 

808 - base_PsfFlux 

809 filt: HSC-G 

810 gauss_magDiff: 

811 functor: MagDiff 

812 args: 

813 - base_GaussianFlux 

814 - base_PsfFlux 

815 filt: HSC-G 

816 count: 

817 functor: Column 

818 args: 

819 - base_InputCount_value 

820 filt: HSC-G 

821 deconvolved_moments: 

822 functor: DeconvolvedMoments 

823 filt: HSC-G 

824 dataset: forced_src 

825 refFlags: 

826 - calib_psfUsed 

827 - merge_measurement_i 

828 - merge_measurement_r 

829 - merge_measurement_z 

830 - merge_measurement_y 

831 - merge_measurement_g 

832 - base_PixelFlags_flag_inexact_psfCenter 

833 - detect_isPrimary 

834 

835 The names for each entry under "func" will become the names of columns in 

836 the output dataset. All the functors referenced are defined in 

837 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each 

838 functor are in the `args` list, and any additional entries for each column 

839 other than "functor" or "args" (e.g., ``'filt'``, ``'dataset'``) are treated as 

840 keyword arguments to be passed to the functor initialization. 

841 

842 The "flags" entry is the default shortcut for `Column` functors. 

843 All columns listed under "flags" will be copied to the output table 

844 untransformed. They can be of any datatype. 

845 In the special case of transforming a multi-level oject table with 

846 band and dataset indices (deepCoadd_obj), these will be taked from the 

847 `meas` dataset and exploded out per band. 

848 

849 There are two special shortcuts that only apply when transforming 

850 multi-level Object (deepCoadd_obj) tables: 

851 - The "refFlags" entry is shortcut for `Column` functor 

852 taken from the `'ref'` dataset if transforming an ObjectTable. 

853 - The "forcedFlags" entry is shortcut for `Column` functors. 

854 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

855 These are expanded out per band. 

856 

857 

858 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

859 to organize and excecute the calculations. 

860 """ 

861 @property 

862 def _DefaultName(self): 

863 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

864 

865 @property 

866 def outputDataset(self): 

867 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

868 

869 @property 

870 def inputDataset(self): 

871 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

872 

873 @property 

874 def ConfigClass(self): 

875 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

876 

877 def __init__(self, *args, **kwargs): 

878 super().__init__(*args, **kwargs) 

879 if self.config.functorFile: 

880 self.log.info('Loading tranform functor definitions from %s', 

881 self.config.functorFile) 

882 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

883 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

884 else: 

885 self.funcs = None 

886 

887 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

888 inputs = butlerQC.get(inputRefs) 

889 if self.funcs is None: 

890 raise ValueError("config.functorFile is None. " 

891 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

892 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

893 dataId=outputRefs.outputCatalog.dataId.full) 

894 outputs = pipeBase.Struct(outputCatalog=result) 

895 butlerQC.put(outputs, outputRefs) 

896 

897 def run(self, parq, funcs=None, dataId=None, band=None): 

898 """Do postprocessing calculations 

899 

900 Takes a `ParquetTable` object and dataId, 

901 returns a dataframe with results of postprocessing calculations. 

902 

903 Parameters 

904 ---------- 

905 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

906 ParquetTable from which calculations are done. 

907 funcs : `lsst.pipe.tasks.functors.Functors` 

908 Functors to apply to the table's columns 

909 dataId : dict, optional 

910 Used to add a `patchId` column to the output dataframe. 

911 band : `str`, optional 

912 Filter band that is being processed. 

913 

914 Returns 

915 ------ 

916 df : `pandas.DataFrame` 

917 """ 

918 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

919 

920 df = self.transform(band, parq, funcs, dataId).df 

921 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

922 return df 

923 

924 def getFunctors(self): 

925 return self.funcs 

926 

927 def getAnalysis(self, parq, funcs=None, band=None): 

928 if funcs is None: 

929 funcs = self.funcs 

930 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

931 return analysis 

932 

933 def transform(self, band, parq, funcs, dataId): 

934 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

935 df = analysis.df 

936 if dataId and self.config.columnsFromDataId: 

937 for key in self.config.columnsFromDataId: 

938 if key in dataId: 

939 df[str(key)] = dataId[key] 

940 else: 

941 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}") 

942 

943 if self.config.primaryKey: 

944 if df.index.name != self.config.primaryKey and self.config.primaryKey in df: 

945 df.reset_index(inplace=True, drop=True) 

946 df.set_index(self.config.primaryKey, inplace=True) 

947 

948 return pipeBase.Struct( 

949 df=df, 

950 analysis=analysis 

951 ) 

952 

953 

954class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

955 defaultTemplates={"coaddName": "deep"}, 

956 dimensions=("tract", "patch", "skymap")): 

957 inputCatalog = connectionTypes.Input( 

958 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

959 "stored as a DataFrame with a multi-level column index per-patch.", 

960 dimensions=("tract", "patch", "skymap"), 

961 storageClass="DataFrame", 

962 name="{coaddName}Coadd_obj", 

963 deferLoad=True, 

964 ) 

965 outputCatalog = connectionTypes.Output( 

966 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

967 "data model.", 

968 dimensions=("tract", "patch", "skymap"), 

969 storageClass="DataFrame", 

970 name="objectTable" 

971 ) 

972 

973 

974class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

975 pipelineConnections=TransformObjectCatalogConnections): 

976 coaddName = pexConfig.Field( 

977 dtype=str, 

978 default="deep", 

979 doc="Name of coadd" 

980 ) 

981 # TODO: remove in DM-27177 

982 filterMap = pexConfig.DictField( 

983 keytype=str, 

984 itemtype=str, 

985 default={}, 

986 doc=("Dictionary mapping full filter name to short one for column name munging." 

987 "These filters determine the output columns no matter what filters the " 

988 "input data actually contain."), 

989 deprecated=("Coadds are now identified by the band, so this transform is unused." 

990 "Will be removed after v22.") 

991 ) 

992 outputBands = pexConfig.ListField( 

993 dtype=str, 

994 default=None, 

995 optional=True, 

996 doc=("These bands and only these bands will appear in the output," 

997 " NaN-filled if the input does not include them." 

998 " If None, then use all bands found in the input.") 

999 ) 

1000 camelCase = pexConfig.Field( 

1001 dtype=bool, 

1002 default=False, 

1003 doc=("Write per-band columns names with camelCase, else underscore " 

1004 "For example: gPsFlux instead of g_PsFlux.") 

1005 ) 

1006 multilevelOutput = pexConfig.Field( 

1007 dtype=bool, 

1008 default=False, 

1009 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

1010 "and name-munged (False).") 

1011 ) 

1012 goodFlags = pexConfig.ListField( 

1013 dtype=str, 

1014 default=[], 

1015 doc=("List of 'good' flags that should be set False when populating empty tables. " 

1016 "All other flags are considered to be 'bad' flags and will be set to True.") 

1017 ) 

1018 floatFillValue = pexConfig.Field( 

1019 dtype=float, 

1020 default=np.nan, 

1021 doc="Fill value for float fields when populating empty tables." 

1022 ) 

1023 integerFillValue = pexConfig.Field( 

1024 dtype=int, 

1025 default=-1, 

1026 doc="Fill value for integer fields when populating empty tables." 

1027 ) 

1028 

1029 def setDefaults(self): 

1030 super().setDefaults() 

1031 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml') 

1032 self.primaryKey = 'objectId' 

1033 self.columnsFromDataId = ['tract', 'patch'] 

1034 self.goodFlags = ['calib_astrometry_used', 

1035 'calib_photometry_reserved', 

1036 'calib_photometry_used', 

1037 'calib_psf_candidate', 

1038 'calib_psf_reserved', 

1039 'calib_psf_used'] 

1040 

1041 

1042class TransformObjectCatalogTask(TransformCatalogBaseTask): 

1043 """Produce a flattened Object Table to match the format specified in 

1044 sdm_schemas. 

1045 

1046 Do the same set of postprocessing calculations on all bands. 

1047 

1048 This is identical to `TransformCatalogBaseTask`, except for that it does 

1049 the specified functor calculations for all filters present in the 

1050 input `deepCoadd_obj` table. Any specific ``"filt"`` keywords specified 

1051 by the YAML file will be superceded. 

1052 """ 

1053 _DefaultName = "transformObjectCatalog" 

1054 ConfigClass = TransformObjectCatalogConfig 

1055 

1056 def run(self, parq, funcs=None, dataId=None, band=None): 

1057 # NOTE: band kwarg is ignored here. 

1058 dfDict = {} 

1059 analysisDict = {} 

1060 templateDf = pd.DataFrame() 

1061 

1062 if isinstance(parq, DeferredDatasetHandle): 

1063 columns = parq.get(component='columns') 

1064 inputBands = columns.unique(level=1).values 

1065 else: 

1066 inputBands = parq.columnLevelNames['band'] 

1067 

1068 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

1069 

1070 # Perform transform for data of filters that exist in parq. 

1071 for inputBand in inputBands: 

1072 if inputBand not in outputBands: 

1073 self.log.info("Ignoring %s band data in the input", inputBand) 

1074 continue 

1075 self.log.info("Transforming the catalog of band %s", inputBand) 

1076 result = self.transform(inputBand, parq, funcs, dataId) 

1077 dfDict[inputBand] = result.df 

1078 analysisDict[inputBand] = result.analysis 

1079 if templateDf.empty: 

1080 templateDf = result.df 

1081 

1082 # Put filler values in columns of other wanted bands 

1083 for filt in outputBands: 

1084 if filt not in dfDict: 

1085 self.log.info("Adding empty columns for band %s", filt) 

1086 dfTemp = templateDf.copy() 

1087 for col in dfTemp.columns: 

1088 testValue = dfTemp[col].values[0] 

1089 if isinstance(testValue, (np.bool_, pd.BooleanDtype)): 

1090 # Boolean flag type, check if it is a "good" flag 

1091 if col in self.config.goodFlags: 

1092 fillValue = False 

1093 else: 

1094 fillValue = True 

1095 elif isinstance(testValue, numbers.Integral): 

1096 # Checking numbers.Integral catches all flavors 

1097 # of python, numpy, pandas, etc. integers. 

1098 # We must ensure this is not an unsigned integer. 

1099 if isinstance(testValue, np.unsignedinteger): 

1100 raise ValueError("Parquet tables may not have unsigned integer columns.") 

1101 else: 

1102 fillValue = self.config.integerFillValue 

1103 else: 

1104 fillValue = self.config.floatFillValue 

1105 dfTemp[col].values[:] = fillValue 

1106 dfDict[filt] = dfTemp 

1107 

1108 # This makes a multilevel column index, with band as first level 

1109 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

1110 

1111 if not self.config.multilevelOutput: 

1112 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

1113 if self.config.primaryKey in noDupCols: 

1114 noDupCols.remove(self.config.primaryKey) 

1115 if dataId and self.config.columnsFromDataId: 

1116 noDupCols += self.config.columnsFromDataId 

1117 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

1118 inputBands=inputBands) 

1119 

1120 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

1121 

1122 return df 

1123 

1124 

1125class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

1126 dimensions=("tract", "skymap")): 

1127 inputCatalogs = connectionTypes.Input( 

1128 doc="Per-Patch objectTables conforming to the standard data model.", 

1129 name="objectTable", 

1130 storageClass="DataFrame", 

1131 dimensions=("tract", "patch", "skymap"), 

1132 multiple=True, 

1133 ) 

1134 outputCatalog = connectionTypes.Output( 

1135 doc="Pre-tract horizontal concatenation of the input objectTables", 

1136 name="objectTable_tract", 

1137 storageClass="DataFrame", 

1138 dimensions=("tract", "skymap"), 

1139 ) 

1140 

1141 

1142class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

1143 pipelineConnections=ConsolidateObjectTableConnections): 

1144 coaddName = pexConfig.Field( 

1145 dtype=str, 

1146 default="deep", 

1147 doc="Name of coadd" 

1148 ) 

1149 

1150 

1151class ConsolidateObjectTableTask(pipeBase.PipelineTask): 

1152 """Write patch-merged source tables to a tract-level parquet file. 

1153 

1154 Concatenates `objectTable` list into a per-visit `objectTable_tract`. 

1155 """ 

1156 _DefaultName = "consolidateObjectTable" 

1157 ConfigClass = ConsolidateObjectTableConfig 

1158 

1159 inputDataset = 'objectTable' 

1160 outputDataset = 'objectTable_tract' 

1161 

1162 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1163 inputs = butlerQC.get(inputRefs) 

1164 self.log.info("Concatenating %s per-patch Object Tables", 

1165 len(inputs['inputCatalogs'])) 

1166 df = pd.concat(inputs['inputCatalogs']) 

1167 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1168 

1169 

1170class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

1171 defaultTemplates={"catalogType": ""}, 

1172 dimensions=("instrument", "visit", "detector")): 

1173 

1174 inputCatalog = connectionTypes.Input( 

1175 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

1176 name="{catalogType}source", 

1177 storageClass="DataFrame", 

1178 dimensions=("instrument", "visit", "detector"), 

1179 deferLoad=True 

1180 ) 

1181 outputCatalog = connectionTypes.Output( 

1182 doc="Narrower, per-detector Source Table transformed and converted per a " 

1183 "specified set of functors", 

1184 name="{catalogType}sourceTable", 

1185 storageClass="DataFrame", 

1186 dimensions=("instrument", "visit", "detector") 

1187 ) 

1188 

1189 

1190class TransformSourceTableConfig(TransformCatalogBaseConfig, 

1191 pipelineConnections=TransformSourceTableConnections): 

1192 

1193 def setDefaults(self): 

1194 super().setDefaults() 

1195 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml') 

1196 self.primaryKey = 'sourceId' 

1197 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter'] 

1198 

1199 

1200class TransformSourceTableTask(TransformCatalogBaseTask): 

1201 """Transform/standardize a source catalog 

1202 """ 

1203 _DefaultName = "transformSourceTable" 

1204 ConfigClass = TransformSourceTableConfig 

1205 

1206 

1207class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1208 dimensions=("instrument", "visit",), 

1209 defaultTemplates={"calexpType": ""}): 

1210 calexp = connectionTypes.Input( 

1211 doc="Processed exposures used for metadata", 

1212 name="{calexpType}calexp", 

1213 storageClass="ExposureF", 

1214 dimensions=("instrument", "visit", "detector"), 

1215 deferLoad=True, 

1216 multiple=True, 

1217 ) 

1218 visitSummary = connectionTypes.Output( 

1219 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1220 "detector id for the id and are sorted for fast lookups of a " 

1221 "detector."), 

1222 name="{calexpType}visitSummary", 

1223 storageClass="ExposureCatalog", 

1224 dimensions=("instrument", "visit"), 

1225 ) 

1226 

1227 

1228class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1229 pipelineConnections=ConsolidateVisitSummaryConnections): 

1230 """Config for ConsolidateVisitSummaryTask""" 

1231 pass 

1232 

1233 

1234class ConsolidateVisitSummaryTask(pipeBase.PipelineTask): 

1235 """Task to consolidate per-detector visit metadata. 

1236 

1237 This task aggregates the following metadata from all the detectors in a 

1238 single visit into an exposure catalog: 

1239 - The visitInfo. 

1240 - The wcs. 

1241 - The photoCalib. 

1242 - The physical_filter and band (if available). 

1243 - The psf size, shape, and effective area at the center of the detector. 

1244 - The corners of the bounding box in right ascension/declination. 

1245 

1246 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1247 are not persisted here because of storage concerns, and because of their 

1248 limited utility as summary statistics. 

1249 

1250 Tests for this task are performed in ci_hsc_gen3. 

1251 """ 

1252 _DefaultName = "consolidateVisitSummary" 

1253 ConfigClass = ConsolidateVisitSummaryConfig 

1254 

1255 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1256 dataRefs = butlerQC.get(inputRefs.calexp) 

1257 visit = dataRefs[0].dataId.byName()['visit'] 

1258 

1259 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1260 len(dataRefs), visit) 

1261 

1262 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1263 

1264 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1265 

1266 def _combineExposureMetadata(self, visit, dataRefs): 

1267 """Make a combined exposure catalog from a list of dataRefs. 

1268 These dataRefs must point to exposures with wcs, summaryStats, 

1269 and other visit metadata. 

1270 

1271 Parameters 

1272 ---------- 

1273 visit : `int` 

1274 Visit identification number. 

1275 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1276 List of dataRefs in visit. 

1277 

1278 Returns 

1279 ------- 

1280 visitSummary : `lsst.afw.table.ExposureCatalog` 

1281 Exposure catalog with per-detector summary information. 

1282 """ 

1283 schema = self._makeVisitSummarySchema() 

1284 cat = afwTable.ExposureCatalog(schema) 

1285 cat.resize(len(dataRefs)) 

1286 

1287 cat['visit'] = visit 

1288 

1289 for i, dataRef in enumerate(dataRefs): 

1290 visitInfo = dataRef.get(component='visitInfo') 

1291 filterLabel = dataRef.get(component='filter') 

1292 summaryStats = dataRef.get(component='summaryStats') 

1293 detector = dataRef.get(component='detector') 

1294 wcs = dataRef.get(component='wcs') 

1295 photoCalib = dataRef.get(component='photoCalib') 

1296 detector = dataRef.get(component='detector') 

1297 bbox = dataRef.get(component='bbox') 

1298 validPolygon = dataRef.get(component='validPolygon') 

1299 

1300 rec = cat[i] 

1301 rec.setBBox(bbox) 

1302 rec.setVisitInfo(visitInfo) 

1303 rec.setWcs(wcs) 

1304 rec.setPhotoCalib(photoCalib) 

1305 rec.setValidPolygon(validPolygon) 

1306 

1307 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1308 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1309 rec.setId(detector.getId()) 

1310 rec['psfSigma'] = summaryStats.psfSigma 

1311 rec['psfIxx'] = summaryStats.psfIxx 

1312 rec['psfIyy'] = summaryStats.psfIyy 

1313 rec['psfIxy'] = summaryStats.psfIxy 

1314 rec['psfArea'] = summaryStats.psfArea 

1315 rec['raCorners'][:] = summaryStats.raCorners 

1316 rec['decCorners'][:] = summaryStats.decCorners 

1317 rec['ra'] = summaryStats.ra 

1318 rec['decl'] = summaryStats.decl 

1319 rec['zenithDistance'] = summaryStats.zenithDistance 

1320 rec['zeroPoint'] = summaryStats.zeroPoint 

1321 rec['skyBg'] = summaryStats.skyBg 

1322 rec['skyNoise'] = summaryStats.skyNoise 

1323 rec['meanVar'] = summaryStats.meanVar 

1324 rec['astromOffsetMean'] = summaryStats.astromOffsetMean 

1325 rec['astromOffsetStd'] = summaryStats.astromOffsetStd 

1326 rec['nPsfStar'] = summaryStats.nPsfStar 

1327 rec['psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median 

1328 rec['psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median 

1329 rec['psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter 

1330 rec['psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter 

1331 rec['psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian 

1332 rec['psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter 

1333 rec['psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter 

1334 

1335 metadata = dafBase.PropertyList() 

1336 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1337 # We are looping over existing datarefs, so the following is true 

1338 metadata.add("COMMENT", "Only detectors with data have entries.") 

1339 cat.setMetadata(metadata) 

1340 

1341 cat.sort() 

1342 return cat 

1343 

1344 def _makeVisitSummarySchema(self): 

1345 """Make the schema for the visitSummary catalog.""" 

1346 schema = afwTable.ExposureTable.makeMinimalSchema() 

1347 schema.addField('visit', type='L', doc='Visit number') 

1348 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1349 schema.addField('band', type='String', size=32, doc='Name of band') 

1350 schema.addField('psfSigma', type='F', 

1351 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

1352 schema.addField('psfArea', type='F', 

1353 doc='PSF model effective area (center of chip) (pixel**2)') 

1354 schema.addField('psfIxx', type='F', 

1355 doc='PSF model Ixx (center of chip) (pixel**2)') 

1356 schema.addField('psfIyy', type='F', 

1357 doc='PSF model Iyy (center of chip) (pixel**2)') 

1358 schema.addField('psfIxy', type='F', 

1359 doc='PSF model Ixy (center of chip) (pixel**2)') 

1360 schema.addField('raCorners', type='ArrayD', size=4, 

1361 doc='Right Ascension of bounding box corners (degrees)') 

1362 schema.addField('decCorners', type='ArrayD', size=4, 

1363 doc='Declination of bounding box corners (degrees)') 

1364 schema.addField('ra', type='D', 

1365 doc='Right Ascension of bounding box center (degrees)') 

1366 schema.addField('decl', type='D', 

1367 doc='Declination of bounding box center (degrees)') 

1368 schema.addField('zenithDistance', type='F', 

1369 doc='Zenith distance of bounding box center (degrees)') 

1370 schema.addField('zeroPoint', type='F', 

1371 doc='Mean zeropoint in detector (mag)') 

1372 schema.addField('skyBg', type='F', 

1373 doc='Average sky background (ADU)') 

1374 schema.addField('skyNoise', type='F', 

1375 doc='Average sky noise (ADU)') 

1376 schema.addField('meanVar', type='F', 

1377 doc='Mean variance of the weight plane (ADU**2)') 

1378 schema.addField('astromOffsetMean', type='F', 

1379 doc='Mean offset of astrometric calibration matches (arcsec)') 

1380 schema.addField('astromOffsetStd', type='F', 

1381 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)') 

1382 schema.addField('nPsfStar', type='I', doc='Number of stars used for PSF model') 

1383 schema.addField('psfStarDeltaE1Median', type='F', 

1384 doc='Median E1 residual (starE1 - psfE1) for psf stars') 

1385 schema.addField('psfStarDeltaE2Median', type='F', 

1386 doc='Median E2 residual (starE2 - psfE2) for psf stars') 

1387 schema.addField('psfStarDeltaE1Scatter', type='F', 

1388 doc='Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars') 

1389 schema.addField('psfStarDeltaE2Scatter', type='F', 

1390 doc='Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars') 

1391 schema.addField('psfStarDeltaSizeMedian', type='F', 

1392 doc='Median size residual (starSize - psfSize) for psf stars (pixel)') 

1393 schema.addField('psfStarDeltaSizeScatter', type='F', 

1394 doc='Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)') 

1395 schema.addField('psfStarScaledDeltaSizeScatter', type='F', 

1396 doc='Scatter (via MAD) of size residual scaled by median size squared') 

1397 

1398 return schema 

1399 

1400 

1401class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1402 defaultTemplates={"catalogType": ""}, 

1403 dimensions=("instrument", "visit")): 

1404 inputCatalogs = connectionTypes.Input( 

1405 doc="Input per-detector Source Tables", 

1406 name="{catalogType}sourceTable", 

1407 storageClass="DataFrame", 

1408 dimensions=("instrument", "visit", "detector"), 

1409 multiple=True 

1410 ) 

1411 outputCatalog = connectionTypes.Output( 

1412 doc="Per-visit concatenation of Source Table", 

1413 name="{catalogType}sourceTable_visit", 

1414 storageClass="DataFrame", 

1415 dimensions=("instrument", "visit") 

1416 ) 

1417 

1418 

1419class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1420 pipelineConnections=ConsolidateSourceTableConnections): 

1421 pass 

1422 

1423 

1424class ConsolidateSourceTableTask(pipeBase.PipelineTask): 

1425 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1426 """ 

1427 _DefaultName = 'consolidateSourceTable' 

1428 ConfigClass = ConsolidateSourceTableConfig 

1429 

1430 inputDataset = 'sourceTable' 

1431 outputDataset = 'sourceTable_visit' 

1432 

1433 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1434 from .makeWarp import reorderRefs 

1435 

1436 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs] 

1437 detectorOrder.sort() 

1438 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector') 

1439 inputs = butlerQC.get(inputRefs) 

1440 self.log.info("Concatenating %s per-detector Source Tables", 

1441 len(inputs['inputCatalogs'])) 

1442 df = pd.concat(inputs['inputCatalogs']) 

1443 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1444 

1445 

1446class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1447 dimensions=("instrument",), 

1448 defaultTemplates={"calexpType": ""}): 

1449 visitSummaryRefs = connectionTypes.Input( 

1450 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1451 name="{calexpType}visitSummary", 

1452 storageClass="ExposureCatalog", 

1453 dimensions=("instrument", "visit"), 

1454 multiple=True, 

1455 deferLoad=True, 

1456 ) 

1457 outputCatalog = connectionTypes.Output( 

1458 doc="CCD and Visit metadata table", 

1459 name="{calexpType}ccdVisitTable", 

1460 storageClass="DataFrame", 

1461 dimensions=("instrument",) 

1462 ) 

1463 

1464 

1465class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1466 pipelineConnections=MakeCcdVisitTableConnections): 

1467 pass 

1468 

1469 

1470class MakeCcdVisitTableTask(pipeBase.PipelineTask): 

1471 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs. 

1472 """ 

1473 _DefaultName = 'makeCcdVisitTable' 

1474 ConfigClass = MakeCcdVisitTableConfig 

1475 

1476 def run(self, visitSummaryRefs): 

1477 """Make a table of ccd information from the `visitSummary` catalogs. 

1478 

1479 Parameters 

1480 ---------- 

1481 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1482 List of DeferredDatasetHandles pointing to exposure catalogs with 

1483 per-detector summary information. 

1484 

1485 Returns 

1486 ------- 

1487 result : `lsst.pipe.Base.Struct` 

1488 Results struct with attribute: 

1489 

1490 ``outputCatalog`` 

1491 Catalog of ccd and visit information. 

1492 """ 

1493 ccdEntries = [] 

1494 for visitSummaryRef in visitSummaryRefs: 

1495 visitSummary = visitSummaryRef.get() 

1496 visitInfo = visitSummary[0].getVisitInfo() 

1497 

1498 ccdEntry = {} 

1499 summaryTable = visitSummary.asAstropy() 

1500 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance', 

1501 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise', 

1502 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar', 

1503 'psfStarDeltaE1Median', 'psfStarDeltaE2Median', 

1504 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter', 

1505 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter', 

1506 'psfStarScaledDeltaSizeScatter'] 

1507 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1508 # 'visit' is the human readable visit number. 

1509 # 'visitId' is the key to the visitId table. They are the same. 

1510 # Technically you should join to get the visit from the visit 

1511 # table. 

1512 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"}) 

1513 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in 

1514 summaryTable['id']] 

1515 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId) 

1516 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds] 

1517 ccdEntry['ccdVisitId'] = ccdVisitIds 

1518 ccdEntry['detector'] = summaryTable['id'] 

1519 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary]) 

1520 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1521 

1522 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1523 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1524 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1525 expTime = visitInfo.getExposureTime() 

1526 ccdEntry['expTime'] = expTime 

1527 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1528 expTime_days = expTime / (60*60*24) 

1529 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days 

1530 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1531 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1532 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1533 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1534 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1535 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1536 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1537 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1538 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1539 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1540 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1541 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, 

1542 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. 

1543 # values are actually wanted. 

1544 ccdEntries.append(ccdEntry) 

1545 

1546 outputCatalog = pd.concat(ccdEntries) 

1547 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True) 

1548 return pipeBase.Struct(outputCatalog=outputCatalog) 

1549 

1550 

1551class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1552 dimensions=("instrument",), 

1553 defaultTemplates={"calexpType": ""}): 

1554 visitSummaries = connectionTypes.Input( 

1555 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1556 name="{calexpType}visitSummary", 

1557 storageClass="ExposureCatalog", 

1558 dimensions=("instrument", "visit",), 

1559 multiple=True, 

1560 deferLoad=True, 

1561 ) 

1562 outputCatalog = connectionTypes.Output( 

1563 doc="Visit metadata table", 

1564 name="{calexpType}visitTable", 

1565 storageClass="DataFrame", 

1566 dimensions=("instrument",) 

1567 ) 

1568 

1569 

1570class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1571 pipelineConnections=MakeVisitTableConnections): 

1572 pass 

1573 

1574 

1575class MakeVisitTableTask(pipeBase.PipelineTask): 

1576 """Produce a `visitTable` from the `visitSummary` exposure catalogs. 

1577 """ 

1578 _DefaultName = 'makeVisitTable' 

1579 ConfigClass = MakeVisitTableConfig 

1580 

1581 def run(self, visitSummaries): 

1582 """Make a table of visit information from the `visitSummary` catalogs. 

1583 

1584 Parameters 

1585 ---------- 

1586 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog` 

1587 List of exposure catalogs with per-detector summary information. 

1588 Returns 

1589 ------- 

1590 result : `lsst.pipe.Base.Struct` 

1591 Results struct with attribute: 

1592 

1593 ``outputCatalog`` 

1594 Catalog of visit information. 

1595 """ 

1596 visitEntries = [] 

1597 for visitSummary in visitSummaries: 

1598 visitSummary = visitSummary.get() 

1599 visitRow = visitSummary[0] 

1600 visitInfo = visitRow.getVisitInfo() 

1601 

1602 visitEntry = {} 

1603 visitEntry["visitId"] = visitRow['visit'] 

1604 visitEntry["visit"] = visitRow['visit'] 

1605 visitEntry["physical_filter"] = visitRow['physical_filter'] 

1606 visitEntry["band"] = visitRow['band'] 

1607 raDec = visitInfo.getBoresightRaDec() 

1608 visitEntry["ra"] = raDec.getRa().asDegrees() 

1609 visitEntry["decl"] = raDec.getDec().asDegrees() 

1610 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1611 azAlt = visitInfo.getBoresightAzAlt() 

1612 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1613 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1614 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1615 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1616 expTime = visitInfo.getExposureTime() 

1617 visitEntry["expTime"] = expTime 

1618 visitEntry["expMidpt"] = visitInfo.getDate().toPython() 

1619 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1620 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1621 expTime_days = expTime / (60*60*24) 

1622 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days 

1623 visitEntries.append(visitEntry) 

1624 

1625 # TODO: DM-30623, Add programId, exposureType, cameraTemp, 

1626 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp, 

1627 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures. 

1628 

1629 outputCatalog = pd.DataFrame(data=visitEntries) 

1630 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True) 

1631 return pipeBase.Struct(outputCatalog=outputCatalog) 

1632 

1633 

1634class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1635 dimensions=("instrument", "visit", "detector", "skymap", "tract")): 

1636 

1637 inputCatalog = connectionTypes.Input( 

1638 doc="Primary per-detector, single-epoch forced-photometry catalog. " 

1639 "By default, it is the output of ForcedPhotCcdTask on calexps", 

1640 name="forced_src", 

1641 storageClass="SourceCatalog", 

1642 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1643 ) 

1644 inputCatalogDiff = connectionTypes.Input( 

1645 doc="Secondary multi-epoch, per-detector, forced photometry catalog. " 

1646 "By default, it is the output of ForcedPhotCcdTask run on image differences.", 

1647 name="forced_diff", 

1648 storageClass="SourceCatalog", 

1649 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1650 ) 

1651 outputCatalog = connectionTypes.Output( 

1652 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format", 

1653 name="mergedForcedSource", 

1654 storageClass="DataFrame", 

1655 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1656 ) 

1657 

1658 

1659class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig, 

1660 pipelineConnections=WriteForcedSourceTableConnections): 

1661 key = lsst.pex.config.Field( 

1662 doc="Column on which to join the two input tables on and make the primary key of the output", 

1663 dtype=str, 

1664 default="objectId", 

1665 ) 

1666 

1667 

1668class WriteForcedSourceTableTask(pipeBase.PipelineTask): 

1669 """Merge and convert per-detector forced source catalogs to parquet. 

1670 

1671 Because the predecessor ForcedPhotCcdTask operates per-detector, 

1672 per-tract, (i.e., it has tract in its dimensions), detectors 

1673 on the tract boundary may have multiple forced source catalogs. 

1674 

1675 The successor task TransformForcedSourceTable runs per-patch 

1676 and temporally-aggregates overlapping mergedForcedSource catalogs from all 

1677 available multiple epochs. 

1678 """ 

1679 _DefaultName = "writeForcedSourceTable" 

1680 ConfigClass = WriteForcedSourceTableConfig 

1681 

1682 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1683 inputs = butlerQC.get(inputRefs) 

1684 # Add ccdVisitId to allow joining with CcdVisitTable 

1685 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

1686 inputs['band'] = butlerQC.quantum.dataId.full['band'] 

1687 outputs = self.run(**inputs) 

1688 butlerQC.put(outputs, outputRefs) 

1689 

1690 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None): 

1691 dfs = [] 

1692 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')): 

1693 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False) 

1694 df = df.reindex(sorted(df.columns), axis=1) 

1695 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA 

1696 df['band'] = band if band else pd.NA 

1697 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns], 

1698 names=('dataset', 'column')) 

1699 

1700 dfs.append(df) 

1701 

1702 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

1703 return pipeBase.Struct(outputCatalog=outputCatalog) 

1704 

1705 

1706class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1707 dimensions=("instrument", "skymap", "patch", "tract")): 

1708 

1709 inputCatalogs = connectionTypes.Input( 

1710 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask", 

1711 name="mergedForcedSource", 

1712 storageClass="DataFrame", 

1713 dimensions=("instrument", "visit", "detector", "skymap", "tract"), 

1714 multiple=True, 

1715 deferLoad=True 

1716 ) 

1717 referenceCatalog = connectionTypes.Input( 

1718 doc="Reference catalog which was used to seed the forcedPhot. Columns " 

1719 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner " 

1720 "are expected.", 

1721 name="objectTable", 

1722 storageClass="DataFrame", 

1723 dimensions=("tract", "patch", "skymap"), 

1724 deferLoad=True 

1725 ) 

1726 outputCatalog = connectionTypes.Output( 

1727 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a " 

1728 "specified set of functors", 

1729 name="forcedSourceTable", 

1730 storageClass="DataFrame", 

1731 dimensions=("tract", "patch", "skymap") 

1732 ) 

1733 

1734 

1735class TransformForcedSourceTableConfig(TransformCatalogBaseConfig, 

1736 pipelineConnections=TransformForcedSourceTableConnections): 

1737 referenceColumns = pexConfig.ListField( 

1738 dtype=str, 

1739 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"], 

1740 optional=True, 

1741 doc="Columns to pull from reference catalog", 

1742 ) 

1743 keyRef = lsst.pex.config.Field( 

1744 doc="Column on which to join the two input tables on and make the primary key of the output", 

1745 dtype=str, 

1746 default="objectId", 

1747 ) 

1748 key = lsst.pex.config.Field( 

1749 doc="Rename the output DataFrame index to this name", 

1750 dtype=str, 

1751 default="forcedSourceId", 

1752 ) 

1753 

1754 def setDefaults(self): 

1755 super().setDefaults() 

1756 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml') 

1757 self.columnsFromDataId = ['tract', 'patch'] 

1758 

1759 

1760class TransformForcedSourceTableTask(TransformCatalogBaseTask): 

1761 """Transform/standardize a ForcedSource catalog 

1762 

1763 Transforms each wide, per-detector forcedSource parquet table per the 

1764 specification file (per-camera defaults found in ForcedSource.yaml). 

1765 All epochs that overlap the patch are aggregated into one per-patch 

1766 narrow-parquet file. 

1767 

1768 No de-duplication of rows is performed. Duplicate resolutions flags are 

1769 pulled in from the referenceCatalog: `detect_isPrimary`, 

1770 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate 

1771 for analysis or compare duplicates for QA. 

1772 

1773 The resulting table includes multiple bands. Epochs (MJDs) and other useful 

1774 per-visit rows can be retreived by joining with the CcdVisitTable on 

1775 ccdVisitId. 

1776 """ 

1777 _DefaultName = "transformForcedSourceTable" 

1778 ConfigClass = TransformForcedSourceTableConfig 

1779 

1780 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1781 inputs = butlerQC.get(inputRefs) 

1782 if self.funcs is None: 

1783 raise ValueError("config.functorFile is None. " 

1784 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

1785 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs, 

1786 dataId=outputRefs.outputCatalog.dataId.full) 

1787 

1788 butlerQC.put(outputs, outputRefs) 

1789 

1790 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None): 

1791 dfs = [] 

1792 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns}) 

1793 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs))) 

1794 for handle in inputCatalogs: 

1795 result = self.transform(None, handle, funcs, dataId) 

1796 # Filter for only rows that were detected on (overlap) the patch 

1797 dfs.append(result.df.join(ref, how='inner')) 

1798 

1799 outputCatalog = pd.concat(dfs) 

1800 

1801 # Now that we are done joining on config.keyRef 

1802 # Change index to config.key by 

1803 outputCatalog.index.rename(self.config.keyRef, inplace=True) 

1804 # Add config.keyRef to the column list 

1805 outputCatalog.reset_index(inplace=True) 

1806 # Set the forcedSourceId to the index. This is specified in the 

1807 # ForcedSource.yaml 

1808 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True) 

1809 # Rename it to the config.key 

1810 outputCatalog.index.rename(self.config.key, inplace=True) 

1811 

1812 self.log.info("Made a table of %d columns and %d rows", 

1813 len(outputCatalog.columns), len(outputCatalog)) 

1814 return pipeBase.Struct(outputCatalog=outputCatalog) 

1815 

1816 

1817class ConsolidateTractConnections(pipeBase.PipelineTaskConnections, 

1818 defaultTemplates={"catalogType": ""}, 

1819 dimensions=("instrument", "tract")): 

1820 inputCatalogs = connectionTypes.Input( 

1821 doc="Input per-patch DataFrame Tables to be concatenated", 

1822 name="{catalogType}ForcedSourceTable", 

1823 storageClass="DataFrame", 

1824 dimensions=("tract", "patch", "skymap"), 

1825 multiple=True, 

1826 ) 

1827 

1828 outputCatalog = connectionTypes.Output( 

1829 doc="Output per-tract concatenation of DataFrame Tables", 

1830 name="{catalogType}ForcedSourceTable_tract", 

1831 storageClass="DataFrame", 

1832 dimensions=("tract", "skymap"), 

1833 ) 

1834 

1835 

1836class ConsolidateTractConfig(pipeBase.PipelineTaskConfig, 

1837 pipelineConnections=ConsolidateTractConnections): 

1838 pass 

1839 

1840 

1841class ConsolidateTractTask(pipeBase.PipelineTask): 

1842 """Concatenate any per-patch, dataframe list into a single 

1843 per-tract DataFrame. 

1844 """ 

1845 _DefaultName = 'ConsolidateTract' 

1846 ConfigClass = ConsolidateTractConfig 

1847 

1848 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1849 inputs = butlerQC.get(inputRefs) 

1850 # Not checking at least one inputCatalog exists because that'd be an 

1851 # empty QG. 

1852 self.log.info("Concatenating %s per-patch %s Tables", 

1853 len(inputs['inputCatalogs']), 

1854 inputRefs.inputCatalogs[0].datasetType.name) 

1855 df = pd.concat(inputs['inputCatalogs']) 

1856 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)