Coverage for python/lsst/pipe/tasks/postprocess.py: 31%

807 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-16 11:48 +0000

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24from collections import defaultdict 

25import logging 

26import numpy as np 

27import numbers 

28import os 

29 

30import lsst.geom 

31import lsst.pex.config as pexConfig 

32import lsst.pipe.base as pipeBase 

33import lsst.daf.base as dafBase 

34from lsst.obs.base import ExposureIdInfo 

35from lsst.pipe.base import connectionTypes 

36import lsst.afw.table as afwTable 

37from lsst.meas.base import SingleFrameMeasurementTask 

38from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

39from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

40from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate 

41from lsst.skymap import BaseSkyMap 

42 

43from .parquetTable import ParquetTable 

44from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

45from .functors import CompositeFunctor, Column 

46 

47log = logging.getLogger(__name__) 

48 

49 

50def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

51 """Flattens a dataframe with multilevel column index. 

52 """ 

53 newDf = pd.DataFrame() 

54 # band is the level 0 index 

55 dfBands = df.columns.unique(level=0).values 

56 for band in dfBands: 

57 subdf = df[band] 

58 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

59 newColumns = {c: columnFormat.format(band, c) 

60 for c in subdf.columns if c not in noDupCols} 

61 cols = list(newColumns.keys()) 

62 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

63 

64 # Band must be present in the input and output or else column is all NaN: 

65 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

66 # Get the unexploded columns from any present band's partition 

67 noDupDf = df[presentBands[0]][noDupCols] 

68 newDf = pd.concat([noDupDf, newDf], axis=1) 

69 return newDf 

70 

71 

72class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

73 defaultTemplates={"coaddName": "deep"}, 

74 dimensions=("tract", "patch", "skymap")): 

75 inputCatalogMeas = connectionTypes.Input( 

76 doc="Catalog of source measurements on the deepCoadd.", 

77 dimensions=("tract", "patch", "band", "skymap"), 

78 storageClass="SourceCatalog", 

79 name="{coaddName}Coadd_meas", 

80 multiple=True 

81 ) 

82 inputCatalogForcedSrc = connectionTypes.Input( 

83 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

84 dimensions=("tract", "patch", "band", "skymap"), 

85 storageClass="SourceCatalog", 

86 name="{coaddName}Coadd_forced_src", 

87 multiple=True 

88 ) 

89 inputCatalogRef = connectionTypes.Input( 

90 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

91 "for each detection in deepCoadd_mergeDet.", 

92 dimensions=("tract", "patch", "skymap"), 

93 storageClass="SourceCatalog", 

94 name="{coaddName}Coadd_ref" 

95 ) 

96 outputCatalog = connectionTypes.Output( 

97 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

98 "stored as a DataFrame with a multi-level column index per-patch.", 

99 dimensions=("tract", "patch", "skymap"), 

100 storageClass="DataFrame", 

101 name="{coaddName}Coadd_obj" 

102 ) 

103 

104 

105class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

106 pipelineConnections=WriteObjectTableConnections): 

107 engine = pexConfig.Field( 

108 dtype=str, 

109 default="pyarrow", 

110 doc="Parquet engine for writing (pyarrow or fastparquet)" 

111 ) 

112 coaddName = pexConfig.Field( 

113 dtype=str, 

114 default="deep", 

115 doc="Name of coadd" 

116 ) 

117 

118 

119class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

120 """Write filter-merged source tables to parquet 

121 """ 

122 _DefaultName = "writeObjectTable" 

123 ConfigClass = WriteObjectTableConfig 

124 RunnerClass = MergeSourcesRunner 

125 

126 # Names of table datasets to be merged 

127 inputDatasets = ('forced_src', 'meas', 'ref') 

128 

129 # Tag of output dataset written by `MergeSourcesTask.write` 

130 outputDataset = 'obj' 

131 

132 def __init__(self, butler=None, schema=None, **kwargs): 

133 # It is a shame that this class can't use the default init for 

134 # CmdLineTask, but to do so would require its own special task 

135 # runner, which is many more lines of specialization, so this is 

136 # how it is for now. 

137 super().__init__(**kwargs) 

138 

139 def runDataRef(self, patchRefList): 

140 """! 

141 @brief Merge coadd sources from multiple bands. Calls @ref `run` which 

142 must be defined in subclasses that inherit from MergeSourcesTask. 

143 @param[in] patchRefList list of data references for each filter 

144 """ 

145 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

146 dataId = patchRefList[0].dataId 

147 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

148 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog)) 

149 

150 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

151 inputs = butlerQC.get(inputRefs) 

152 

153 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

154 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

155 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

156 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

157 

158 catalogs = {} 

159 for band in measDict.keys(): 

160 catalogs[band] = {'meas': measDict[band]['meas'], 

161 'forced_src': forcedSourceDict[band]['forced_src'], 

162 'ref': inputs['inputCatalogRef']} 

163 dataId = butlerQC.quantum.dataId 

164 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

165 outputs = pipeBase.Struct(outputCatalog=df) 

166 butlerQC.put(outputs, outputRefs) 

167 

168 @classmethod 

169 def _makeArgumentParser(cls): 

170 """Create a suitable ArgumentParser. 

171 

172 We will use the ArgumentParser to get a list of data 

173 references for patches; the RunnerClass will sort them into lists 

174 of data references for the same patch. 

175 

176 References first of self.inputDatasets, rather than 

177 self.inputDataset 

178 """ 

179 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

180 

181 def readCatalog(self, patchRef): 

182 """Read input catalogs 

183 

184 Read all the input datasets given by the 'inputDatasets' 

185 attribute. 

186 

187 Parameters 

188 ---------- 

189 patchRef : `lsst.daf.persistence.ButlerDataRef` 

190 Data reference for patch. 

191 

192 Returns 

193 ------- 

194 Tuple consisting of band name and a dict of catalogs, keyed by 

195 dataset name. 

196 """ 

197 band = patchRef.get(self.config.coaddName + "Coadd_filter", immediate=True).bandLabel 

198 catalogDict = {} 

199 for dataset in self.inputDatasets: 

200 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

201 self.log.info("Read %d sources from %s for band %s: %s", 

202 len(catalog), dataset, band, patchRef.dataId) 

203 catalogDict[dataset] = catalog 

204 return band, catalogDict 

205 

206 def run(self, catalogs, tract, patch): 

207 """Merge multiple catalogs. 

208 

209 Parameters 

210 ---------- 

211 catalogs : `dict` 

212 Mapping from filter names to dict of catalogs. 

213 tract : int 

214 tractId to use for the tractId column. 

215 patch : str 

216 patchId to use for the patchId column. 

217 

218 Returns 

219 ------- 

220 catalog : `pandas.DataFrame` 

221 Merged dataframe. 

222 """ 

223 

224 dfs = [] 

225 for filt, tableDict in catalogs.items(): 

226 for dataset, table in tableDict.items(): 

227 # Convert afwTable to pandas DataFrame 

228 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

229 

230 # Sort columns by name, to ensure matching schema among patches 

231 df = df.reindex(sorted(df.columns), axis=1) 

232 df['tractId'] = tract 

233 df['patchId'] = patch 

234 

235 # Make columns a 3-level MultiIndex 

236 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

237 names=('dataset', 'band', 'column')) 

238 dfs.append(df) 

239 

240 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

241 return catalog 

242 

243 def write(self, patchRef, catalog): 

244 """Write the output. 

245 

246 Parameters 

247 ---------- 

248 catalog : `ParquetTable` 

249 Catalog to write. 

250 patchRef : `lsst.daf.persistence.ButlerDataRef` 

251 Data reference for patch. 

252 """ 

253 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

254 # since the filter isn't actually part of the data ID for the dataset 

255 # we're saving, it's confusing to see it in the log message, even if 

256 # the butler simply ignores it. 

257 mergeDataId = patchRef.dataId.copy() 

258 del mergeDataId["filter"] 

259 self.log.info("Wrote merged catalog: %s", mergeDataId) 

260 

261 def writeMetadata(self, dataRefList): 

262 """No metadata to write, and not sure how to write it for a list of 

263 dataRefs. 

264 """ 

265 pass 

266 

267 

268class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

269 defaultTemplates={"catalogType": ""}, 

270 dimensions=("instrument", "visit", "detector")): 

271 

272 catalog = connectionTypes.Input( 

273 doc="Input full-depth catalog of sources produced by CalibrateTask", 

274 name="{catalogType}src", 

275 storageClass="SourceCatalog", 

276 dimensions=("instrument", "visit", "detector") 

277 ) 

278 outputCatalog = connectionTypes.Output( 

279 doc="Catalog of sources, `src` in Parquet format. The 'id' column is " 

280 "replaced with an index; all other columns are unchanged.", 

281 name="{catalogType}source", 

282 storageClass="DataFrame", 

283 dimensions=("instrument", "visit", "detector") 

284 ) 

285 

286 

287class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

288 pipelineConnections=WriteSourceTableConnections): 

289 pass 

290 

291 

292class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

293 """Write source table to parquet. 

294 """ 

295 _DefaultName = "writeSourceTable" 

296 ConfigClass = WriteSourceTableConfig 

297 

298 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

299 inputs = butlerQC.get(inputRefs) 

300 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

301 result = self.run(**inputs).table 

302 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

303 butlerQC.put(outputs, outputRefs) 

304 

305 def run(self, catalog, ccdVisitId=None, **kwargs): 

306 """Convert `src` catalog to parquet 

307 

308 Parameters 

309 ---------- 

310 catalog: `afwTable.SourceCatalog` 

311 catalog to be converted 

312 ccdVisitId: `int` 

313 ccdVisitId to be added as a column 

314 

315 Returns 

316 ------- 

317 result : `lsst.pipe.base.Struct` 

318 ``table`` 

319 `ParquetTable` version of the input catalog 

320 """ 

321 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId) 

322 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

323 df['ccdVisitId'] = ccdVisitId 

324 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

325 

326 

327class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections, 

328 defaultTemplates={"catalogType": "", 

329 "skyWcsName": "jointcal", 

330 "photoCalibName": "fgcm"}, 

331 dimensions=("instrument", "visit", "detector", "skymap")): 

332 skyMap = connectionTypes.Input( 

333 doc="skyMap needed to choose which tract-level calibrations to use when multiple available", 

334 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

335 storageClass="SkyMap", 

336 dimensions=("skymap",), 

337 ) 

338 exposure = connectionTypes.Input( 

339 doc="Input exposure to perform photometry on.", 

340 name="calexp", 

341 storageClass="ExposureF", 

342 dimensions=["instrument", "visit", "detector"], 

343 ) 

344 externalSkyWcsTractCatalog = connectionTypes.Input( 

345 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector " 

346 "id for the catalog id, sorted on id for fast lookup."), 

347 name="{skyWcsName}SkyWcsCatalog", 

348 storageClass="ExposureCatalog", 

349 dimensions=["instrument", "visit", "tract"], 

350 multiple=True 

351 ) 

352 externalSkyWcsGlobalCatalog = connectionTypes.Input( 

353 doc=("Per-visit wcs calibrations computed globally (with no tract information). " 

354 "These catalogs use the detector id for the catalog id, sorted on id for " 

355 "fast lookup."), 

356 name="{skyWcsName}SkyWcsCatalog", 

357 storageClass="ExposureCatalog", 

358 dimensions=["instrument", "visit"], 

359 ) 

360 externalPhotoCalibTractCatalog = connectionTypes.Input( 

361 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the " 

362 "detector id for the catalog id, sorted on id for fast lookup."), 

363 name="{photoCalibName}PhotoCalibCatalog", 

364 storageClass="ExposureCatalog", 

365 dimensions=["instrument", "visit", "tract"], 

366 multiple=True 

367 ) 

368 externalPhotoCalibGlobalCatalog = connectionTypes.Input( 

369 doc=("Per-visit photometric calibrations computed globally (with no tract " 

370 "information). These catalogs use the detector id for the catalog id, " 

371 "sorted on id for fast lookup."), 

372 name="{photoCalibName}PhotoCalibCatalog", 

373 storageClass="ExposureCatalog", 

374 dimensions=["instrument", "visit"], 

375 ) 

376 

377 def __init__(self, *, config=None): 

378 super().__init__(config=config) 

379 # Same connection boilerplate as all other applications of 

380 # Global/Tract calibrations 

381 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs: 

382 if config.useGlobalExternalSkyWcs: 

383 self.inputs.remove("externalSkyWcsTractCatalog") 

384 else: 

385 self.inputs.remove("externalSkyWcsGlobalCatalog") 

386 else: 

387 self.inputs.remove("externalSkyWcsTractCatalog") 

388 self.inputs.remove("externalSkyWcsGlobalCatalog") 

389 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib: 

390 if config.useGlobalExternalPhotoCalib: 

391 self.inputs.remove("externalPhotoCalibTractCatalog") 

392 else: 

393 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

394 else: 

395 self.inputs.remove("externalPhotoCalibTractCatalog") 

396 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

397 

398 

399class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig, 

400 pipelineConnections=WriteRecalibratedSourceTableConnections): 

401 

402 doReevaluatePhotoCalib = pexConfig.Field( 

403 dtype=bool, 

404 default=True, 

405 doc=("Add or replace local photoCalib columns") 

406 ) 

407 doReevaluateSkyWcs = pexConfig.Field( 

408 dtype=bool, 

409 default=True, 

410 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec") 

411 ) 

412 doApplyExternalPhotoCalib = pexConfig.Field( 

413 dtype=bool, 

414 default=True, 

415 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ", 

416 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."), 

417 ) 

418 doApplyExternalSkyWcs = pexConfig.Field( 

419 dtype=bool, 

420 default=True, 

421 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ", 

422 "else use the wcs already attached to the exposure."), 

423 ) 

424 useGlobalExternalPhotoCalib = pexConfig.Field( 

425 dtype=bool, 

426 default=True, 

427 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations " 

428 "that are not run per-tract. When False, use per-tract photometric " 

429 "calibration files.") 

430 ) 

431 useGlobalExternalSkyWcs = pexConfig.Field( 

432 dtype=bool, 

433 default=False, 

434 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations " 

435 "that are not run per-tract. When False, use per-tract wcs " 

436 "files.") 

437 ) 

438 

439 def validate(self): 

440 super().validate() 

441 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs: 

442 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False" 

443 "External SkyWcs will not be read or evaluated.") 

444 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib: 

445 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False." 

446 "External PhotoCalib will not be read or evaluated.") 

447 

448 

449class WriteRecalibratedSourceTableTask(WriteSourceTableTask): 

450 """Write source table to parquet 

451 """ 

452 _DefaultName = "writeRecalibratedSourceTable" 

453 ConfigClass = WriteRecalibratedSourceTableConfig 

454 

455 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

456 inputs = butlerQC.get(inputRefs) 

457 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

458 inputs['exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "visit_detector") 

459 

460 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs: 

461 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

462 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs) 

463 

464 inputs['catalog'] = self.addCalibColumns(**inputs) 

465 

466 result = self.run(**inputs).table 

467 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

468 butlerQC.put(outputs, outputRefs) 

469 

470 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None, 

471 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None, 

472 externalPhotoCalibTractCatalog=None, **kwargs): 

473 """Apply external calibrations to exposure per configuration 

474 

475 When multiple tract-level calibrations overlap, select the one with the 

476 center closest to detector. 

477 

478 Parameters 

479 ---------- 

480 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of 

481 tract-level calibs. 

482 skyMap : `lsst.skymap.SkyMap` 

483 exposure : `lsst.afw.image.exposure.Exposure` 

484 Input exposure to adjust calibrations. 

485 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

486 Exposure catalog with external skyWcs to be applied per config 

487 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

488 Exposure catalog with external skyWcs to be applied per config 

489 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

490 Exposure catalog with external photoCalib to be applied per config 

491 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

492 

493 

494 Returns 

495 ------- 

496 exposure : `lsst.afw.image.exposure.Exposure` 

497 Exposure with adjusted calibrations. 

498 """ 

499 if not self.config.doApplyExternalSkyWcs: 

500 # Do not modify the exposure's SkyWcs 

501 externalSkyWcsCatalog = None 

502 elif self.config.useGlobalExternalSkyWcs: 

503 # Use the global external SkyWcs 

504 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog 

505 self.log.info('Applying global SkyWcs') 

506 else: 

507 # use tract-level external SkyWcs from the closest overlapping tract 

508 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog') 

509 tracts = [ref.dataId['tract'] for ref in inputRef] 

510 if len(tracts) == 1: 

511 ind = 0 

512 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind]) 

513 else: 

514 ind = self.getClosestTract(tracts, skyMap, 

515 exposure.getBBox(), exposure.getWcs()) 

516 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). ' 

517 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

518 

519 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind] 

520 

521 if not self.config.doApplyExternalPhotoCalib: 

522 # Do not modify the exposure's PhotoCalib 

523 externalPhotoCalibCatalog = None 

524 elif self.config.useGlobalExternalPhotoCalib: 

525 # Use the global external PhotoCalib 

526 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog 

527 self.log.info('Applying global PhotoCalib') 

528 else: 

529 # use tract-level external PhotoCalib from the closest overlapping tract 

530 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog') 

531 tracts = [ref.dataId['tract'] for ref in inputRef] 

532 if len(tracts) == 1: 

533 ind = 0 

534 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind]) 

535 else: 

536 ind = self.getClosestTract(tracts, skyMap, 

537 exposure.getBBox(), exposure.getWcs()) 

538 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). ' 

539 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

540 

541 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind] 

542 

543 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog) 

544 

545 def getClosestTract(self, tracts, skyMap, bbox, wcs): 

546 """Find the index of the tract closest to detector from list of tractIds 

547 

548 Parameters 

549 ---------- 

550 tracts: `list` [`int`] 

551 Iterable of integer tractIds 

552 skyMap : `lsst.skymap.SkyMap` 

553 skyMap to lookup tract geometry and wcs 

554 bbox : `lsst.geom.Box2I` 

555 Detector bbox, center of which will compared to tract centers 

556 wcs : `lsst.afw.geom.SkyWcs` 

557 Detector Wcs object to map the detector center to SkyCoord 

558 

559 Returns 

560 ------- 

561 index : `int` 

562 """ 

563 if len(tracts) == 1: 

564 return 0 

565 

566 center = wcs.pixelToSky(bbox.getCenter()) 

567 sep = [] 

568 for tractId in tracts: 

569 tract = skyMap[tractId] 

570 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter()) 

571 sep.append(center.separation(tractCenter)) 

572 

573 return np.argmin(sep) 

574 

575 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None): 

576 """Prepare a calibrated exposure and apply external calibrations 

577 if so configured. 

578 

579 Parameters 

580 ---------- 

581 exposure : `lsst.afw.image.exposure.Exposure` 

582 Input exposure to adjust calibrations. 

583 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional 

584 Exposure catalog with external skyWcs to be applied 

585 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id 

586 for the catalog id, sorted on id for fast lookup. 

587 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional 

588 Exposure catalog with external photoCalib to be applied 

589 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector 

590 id for the catalog id, sorted on id for fast lookup. 

591 

592 Returns 

593 ------- 

594 exposure : `lsst.afw.image.exposure.Exposure` 

595 Exposure with adjusted calibrations. 

596 """ 

597 detectorId = exposure.getInfo().getDetector().getId() 

598 

599 if externalPhotoCalibCatalog is not None: 

600 row = externalPhotoCalibCatalog.find(detectorId) 

601 if row is None: 

602 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; " 

603 "Using original photoCalib.", detectorId) 

604 else: 

605 photoCalib = row.getPhotoCalib() 

606 if photoCalib is None: 

607 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; " 

608 "Using original photoCalib.", detectorId) 

609 else: 

610 exposure.setPhotoCalib(photoCalib) 

611 

612 if externalSkyWcsCatalog is not None: 

613 row = externalSkyWcsCatalog.find(detectorId) 

614 if row is None: 

615 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; " 

616 "Using original skyWcs.", detectorId) 

617 else: 

618 skyWcs = row.getWcs() 

619 if skyWcs is None: 

620 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; " 

621 "Using original skyWcs.", detectorId) 

622 else: 

623 exposure.setWcs(skyWcs) 

624 

625 return exposure 

626 

627 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs): 

628 """Add replace columns with calibs evaluated at each centroid 

629 

630 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a 

631 a source catalog, by rerunning the plugins. 

632 

633 Parameters 

634 ---------- 

635 catalog : `lsst.afw.table.SourceCatalog` 

636 catalog to which calib columns will be added 

637 exposure : `lsst.afw.image.exposure.Exposure` 

638 Exposure with attached PhotoCalibs and SkyWcs attributes to be 

639 reevaluated at local centroids. Pixels are not required. 

640 exposureIdInfo : `lsst.obs.base.ExposureIdInfo` 

641 

642 Returns 

643 ------- 

644 newCat: `lsst.afw.table.SourceCatalog` 

645 Source Catalog with requested local calib columns 

646 """ 

647 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

648 measureConfig.doReplaceWithNoise = False 

649 

650 measureConfig.plugins.names = [] 

651 if self.config.doReevaluateSkyWcs: 

652 measureConfig.plugins.names.add('base_LocalWcs') 

653 self.log.info("Re-evaluating base_LocalWcs plugin") 

654 if self.config.doReevaluatePhotoCalib: 

655 measureConfig.plugins.names.add('base_LocalPhotoCalib') 

656 self.log.info("Re-evaluating base_LocalPhotoCalib plugin") 

657 pluginsNotToCopy = tuple(measureConfig.plugins.names) 

658 

659 # Create a new schema and catalog 

660 # Copy all columns from original except for the ones to reevaluate 

661 aliasMap = catalog.schema.getAliasMap() 

662 mapper = afwTable.SchemaMapper(catalog.schema) 

663 for item in catalog.schema: 

664 if not item.field.getName().startswith(pluginsNotToCopy): 

665 mapper.addMapping(item.key) 

666 

667 schema = mapper.getOutputSchema() 

668 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

669 schema.setAliasMap(aliasMap) 

670 newCat = afwTable.SourceCatalog(schema) 

671 newCat.extend(catalog, mapper=mapper) 

672 

673 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to 

674 # update here. LocalPhotoCalibs are applied during transform tasks. 

675 # Update coord_ra/coord_dec, which are expected to be positions on the 

676 # sky and are used as such in sdm tables without transform 

677 if self.config.doReevaluateSkyWcs: 

678 afwTable.updateSourceCoords(exposure.wcs, newCat) 

679 

680 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

681 

682 return newCat 

683 

684 

685class PostprocessAnalysis(object): 

686 """Calculate columns from ParquetTable. 

687 

688 This object manages and organizes an arbitrary set of computations 

689 on a catalog. The catalog is defined by a 

690 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such 

691 as a `deepCoadd_obj` dataset, and the computations are defined by a 

692 collection of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

693 a `CompositeFunctor`). 

694 

695 After the object is initialized, accessing the `.df` attribute (which 

696 holds the `pandas.DataFrame` containing the results of the calculations) 

697 triggers computation of said dataframe. 

698 

699 One of the conveniences of using this object is the ability to define a 

700 desired common filter for all functors. This enables the same functor 

701 collection to be passed to several different `PostprocessAnalysis` objects 

702 without having to change the original functor collection, since the `filt` 

703 keyword argument of this object triggers an overwrite of the `filt` 

704 property for all functors in the collection. 

705 

706 This object also allows a list of refFlags to be passed, and defines a set 

707 of default refFlags that are always included even if not requested. 

708 

709 If a list of `ParquetTable` object is passed, rather than a single one, 

710 then the calculations will be mapped over all the input catalogs. In 

711 principle, it should be straightforward to parallelize this activity, but 

712 initial tests have failed (see TODO in code comments). 

713 

714 Parameters 

715 ---------- 

716 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

717 Source catalog(s) for computation. 

718 

719 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

720 Computations to do (functors that act on `parq`). 

721 If a dict, the output 

722 DataFrame will have columns keyed accordingly. 

723 If a list, the column keys will come from the 

724 `.shortname` attribute of each functor. 

725 

726 filt : `str`, optional 

727 Filter in which to calculate. If provided, 

728 this will overwrite any existing `.filt` attribute 

729 of the provided functors. 

730 

731 flags : `list`, optional 

732 List of flags (per-band) to include in output table. 

733 Taken from the `meas` dataset if applied to a multilevel Object Table. 

734 

735 refFlags : `list`, optional 

736 List of refFlags (only reference band) to include in output table. 

737 

738 forcedFlags : `list`, optional 

739 List of flags (per-band) to include in output table. 

740 Taken from the ``forced_src`` dataset if applied to a 

741 multilevel Object Table. Intended for flags from measurement plugins 

742 only run during multi-band forced-photometry. 

743 """ 

744 _defaultRefFlags = [] 

745 _defaultFuncs = () 

746 

747 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

748 self.parq = parq 

749 self.functors = functors 

750 

751 self.filt = filt 

752 self.flags = list(flags) if flags is not None else [] 

753 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

754 self.refFlags = list(self._defaultRefFlags) 

755 if refFlags is not None: 

756 self.refFlags += list(refFlags) 

757 

758 self._df = None 

759 

760 @property 

761 def defaultFuncs(self): 

762 funcs = dict(self._defaultFuncs) 

763 return funcs 

764 

765 @property 

766 def func(self): 

767 additionalFuncs = self.defaultFuncs 

768 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

769 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

770 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

771 

772 if isinstance(self.functors, CompositeFunctor): 

773 func = self.functors 

774 else: 

775 func = CompositeFunctor(self.functors) 

776 

777 func.funcDict.update(additionalFuncs) 

778 func.filt = self.filt 

779 

780 return func 

781 

782 @property 

783 def noDupCols(self): 

784 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

785 

786 @property 

787 def df(self): 

788 if self._df is None: 

789 self.compute() 

790 return self._df 

791 

792 def compute(self, dropna=False, pool=None): 

793 # map over multiple parquet tables 

794 if type(self.parq) in (list, tuple): 

795 if pool is None: 

796 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

797 else: 

798 # TODO: Figure out why this doesn't work (pyarrow pickling 

799 # issues?) 

800 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

801 self._df = pd.concat(dflist) 

802 else: 

803 self._df = self.func(self.parq, dropna=dropna) 

804 

805 return self._df 

806 

807 

808class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

809 dimensions=()): 

810 """Expected Connections for subclasses of TransformCatalogBaseTask. 

811 

812 Must be subclassed. 

813 """ 

814 inputCatalog = connectionTypes.Input( 

815 name="", 

816 storageClass="DataFrame", 

817 ) 

818 outputCatalog = connectionTypes.Output( 

819 name="", 

820 storageClass="DataFrame", 

821 ) 

822 

823 

824class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

825 pipelineConnections=TransformCatalogBaseConnections): 

826 functorFile = pexConfig.Field( 

827 dtype=str, 

828 doc="Path to YAML file specifying Science Data Model functors to use " 

829 "when copying columns and computing calibrated values.", 

830 default=None, 

831 optional=True 

832 ) 

833 primaryKey = pexConfig.Field( 

834 dtype=str, 

835 doc="Name of column to be set as the DataFrame index. If None, the index" 

836 "will be named `id`", 

837 default=None, 

838 optional=True 

839 ) 

840 columnsFromDataId = pexConfig.ListField( 

841 dtype=str, 

842 default=None, 

843 optional=True, 

844 doc="Columns to extract from the dataId", 

845 ) 

846 

847 

848class TransformCatalogBaseTask(pipeBase.PipelineTask): 

849 """Base class for transforming/standardizing a catalog 

850 

851 by applying functors that convert units and apply calibrations. 

852 The purpose of this task is to perform a set of computations on 

853 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

854 results to a new dataset (which needs to be declared in an `outputDataset` 

855 attribute). 

856 

857 The calculations to be performed are defined in a YAML file that specifies 

858 a set of functors to be computed, provided as 

859 a `--functorFile` config parameter. An example of such a YAML file 

860 is the following: 

861 

862 funcs: 

863 psfMag: 

864 functor: Mag 

865 args: 

866 - base_PsfFlux 

867 filt: HSC-G 

868 dataset: meas 

869 cmodel_magDiff: 

870 functor: MagDiff 

871 args: 

872 - modelfit_CModel 

873 - base_PsfFlux 

874 filt: HSC-G 

875 gauss_magDiff: 

876 functor: MagDiff 

877 args: 

878 - base_GaussianFlux 

879 - base_PsfFlux 

880 filt: HSC-G 

881 count: 

882 functor: Column 

883 args: 

884 - base_InputCount_value 

885 filt: HSC-G 

886 deconvolved_moments: 

887 functor: DeconvolvedMoments 

888 filt: HSC-G 

889 dataset: forced_src 

890 refFlags: 

891 - calib_psfUsed 

892 - merge_measurement_i 

893 - merge_measurement_r 

894 - merge_measurement_z 

895 - merge_measurement_y 

896 - merge_measurement_g 

897 - base_PixelFlags_flag_inexact_psfCenter 

898 - detect_isPrimary 

899 

900 The names for each entry under "func" will become the names of columns in 

901 the output dataset. All the functors referenced are defined in 

902 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each 

903 functor are in the `args` list, and any additional entries for each column 

904 other than "functor" or "args" (e.g., `'filt'`, `'dataset'`) are treated as 

905 keyword arguments to be passed to the functor initialization. 

906 

907 The "flags" entry is the default shortcut for `Column` functors. 

908 All columns listed under "flags" will be copied to the output table 

909 untransformed. They can be of any datatype. 

910 In the special case of transforming a multi-level oject table with 

911 band and dataset indices (deepCoadd_obj), these will be taked from the 

912 `meas` dataset and exploded out per band. 

913 

914 There are two special shortcuts that only apply when transforming 

915 multi-level Object (deepCoadd_obj) tables: 

916 - The "refFlags" entry is shortcut for `Column` functor 

917 taken from the `'ref'` dataset if transforming an ObjectTable. 

918 - The "forcedFlags" entry is shortcut for `Column` functors. 

919 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

920 These are expanded out per band. 

921 

922 

923 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

924 to organize and excecute the calculations. 

925 """ 

926 @property 

927 def _DefaultName(self): 

928 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

929 

930 @property 

931 def outputDataset(self): 

932 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

933 

934 @property 

935 def inputDataset(self): 

936 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

937 

938 @property 

939 def ConfigClass(self): 

940 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

941 

942 def __init__(self, *args, **kwargs): 

943 super().__init__(*args, **kwargs) 

944 if self.config.functorFile: 

945 self.log.info('Loading tranform functor definitions from %s', 

946 self.config.functorFile) 

947 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

948 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

949 else: 

950 self.funcs = None 

951 

952 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

953 inputs = butlerQC.get(inputRefs) 

954 if self.funcs is None: 

955 raise ValueError("config.functorFile is None. " 

956 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

957 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

958 dataId=outputRefs.outputCatalog.dataId.full) 

959 outputs = pipeBase.Struct(outputCatalog=result) 

960 butlerQC.put(outputs, outputRefs) 

961 

962 def run(self, parq, funcs=None, dataId=None, band=None): 

963 """Do postprocessing calculations 

964 

965 Takes a `ParquetTable` object and dataId, 

966 returns a dataframe with results of postprocessing calculations. 

967 

968 Parameters 

969 ---------- 

970 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

971 ParquetTable from which calculations are done. 

972 funcs : `lsst.pipe.tasks.functors.Functors` 

973 Functors to apply to the table's columns 

974 dataId : dict, optional 

975 Used to add a `patchId` column to the output dataframe. 

976 band : `str`, optional 

977 Filter band that is being processed. 

978 

979 Returns 

980 ------ 

981 df : `pandas.DataFrame` 

982 """ 

983 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

984 

985 df = self.transform(band, parq, funcs, dataId).df 

986 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

987 return df 

988 

989 def getFunctors(self): 

990 return self.funcs 

991 

992 def getAnalysis(self, parq, funcs=None, band=None): 

993 if funcs is None: 

994 funcs = self.funcs 

995 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

996 return analysis 

997 

998 def transform(self, band, parq, funcs, dataId): 

999 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

1000 df = analysis.df 

1001 if dataId and self.config.columnsFromDataId: 

1002 for key in self.config.columnsFromDataId: 

1003 if key in dataId: 

1004 df[str(key)] = dataId[key] 

1005 else: 

1006 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}") 

1007 

1008 if self.config.primaryKey: 

1009 if df.index.name != self.config.primaryKey and self.config.primaryKey in df: 

1010 df.reset_index(inplace=True, drop=True) 

1011 df.set_index(self.config.primaryKey, inplace=True) 

1012 

1013 return pipeBase.Struct( 

1014 df=df, 

1015 analysis=analysis 

1016 ) 

1017 

1018 def write(self, df, parqRef): 

1019 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

1020 

1021 def writeMetadata(self, dataRef): 

1022 """No metadata to write. 

1023 """ 

1024 pass 

1025 

1026 

1027class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

1028 defaultTemplates={"coaddName": "deep"}, 

1029 dimensions=("tract", "patch", "skymap")): 

1030 inputCatalog = connectionTypes.Input( 

1031 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

1032 "stored as a DataFrame with a multi-level column index per-patch.", 

1033 dimensions=("tract", "patch", "skymap"), 

1034 storageClass="DataFrame", 

1035 name="{coaddName}Coadd_obj", 

1036 deferLoad=True, 

1037 ) 

1038 outputCatalog = connectionTypes.Output( 

1039 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

1040 "data model.", 

1041 dimensions=("tract", "patch", "skymap"), 

1042 storageClass="DataFrame", 

1043 name="objectTable" 

1044 ) 

1045 

1046 

1047class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

1048 pipelineConnections=TransformObjectCatalogConnections): 

1049 coaddName = pexConfig.Field( 

1050 dtype=str, 

1051 default="deep", 

1052 doc="Name of coadd" 

1053 ) 

1054 # TODO: remove in DM-27177 

1055 filterMap = pexConfig.DictField( 

1056 keytype=str, 

1057 itemtype=str, 

1058 default={}, 

1059 doc=("Dictionary mapping full filter name to short one for column name munging." 

1060 "These filters determine the output columns no matter what filters the " 

1061 "input data actually contain."), 

1062 deprecated=("Coadds are now identified by the band, so this transform is unused." 

1063 "Will be removed after v22.") 

1064 ) 

1065 outputBands = pexConfig.ListField( 

1066 dtype=str, 

1067 default=None, 

1068 optional=True, 

1069 doc=("These bands and only these bands will appear in the output," 

1070 " NaN-filled if the input does not include them." 

1071 " If None, then use all bands found in the input.") 

1072 ) 

1073 camelCase = pexConfig.Field( 

1074 dtype=bool, 

1075 default=False, 

1076 doc=("Write per-band columns names with camelCase, else underscore " 

1077 "For example: gPsFlux instead of g_PsFlux.") 

1078 ) 

1079 multilevelOutput = pexConfig.Field( 

1080 dtype=bool, 

1081 default=False, 

1082 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

1083 "and name-munged (False).") 

1084 ) 

1085 goodFlags = pexConfig.ListField( 

1086 dtype=str, 

1087 default=[], 

1088 doc=("List of 'good' flags that should be set False when populating empty tables. " 

1089 "All other flags are considered to be 'bad' flags and will be set to True.") 

1090 ) 

1091 floatFillValue = pexConfig.Field( 

1092 dtype=float, 

1093 default=np.nan, 

1094 doc="Fill value for float fields when populating empty tables." 

1095 ) 

1096 integerFillValue = pexConfig.Field( 

1097 dtype=int, 

1098 default=-1, 

1099 doc="Fill value for integer fields when populating empty tables." 

1100 ) 

1101 

1102 def setDefaults(self): 

1103 super().setDefaults() 

1104 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml') 

1105 self.primaryKey = 'objectId' 

1106 self.columnsFromDataId = ['tract', 'patch'] 

1107 self.goodFlags = ['calib_astrometry_used', 

1108 'calib_photometry_reserved', 

1109 'calib_photometry_used', 

1110 'calib_psf_candidate', 

1111 'calib_psf_reserved', 

1112 'calib_psf_used'] 

1113 

1114 

1115class TransformObjectCatalogTask(TransformCatalogBaseTask): 

1116 """Produce a flattened Object Table to match the format specified in 

1117 sdm_schemas. 

1118 

1119 Do the same set of postprocessing calculations on all bands. 

1120 

1121 This is identical to `TransformCatalogBaseTask`, except for that it does 

1122 the specified functor calculations for all filters present in the 

1123 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

1124 by the YAML file will be superceded. 

1125 """ 

1126 _DefaultName = "transformObjectCatalog" 

1127 ConfigClass = TransformObjectCatalogConfig 

1128 

1129 def run(self, parq, funcs=None, dataId=None, band=None): 

1130 # NOTE: band kwarg is ignored here. 

1131 dfDict = {} 

1132 analysisDict = {} 

1133 templateDf = pd.DataFrame() 

1134 

1135 if isinstance(parq, DeferredDatasetHandle): 

1136 columns = parq.get(component='columns') 

1137 inputBands = columns.unique(level=1).values 

1138 else: 

1139 inputBands = parq.columnLevelNames['band'] 

1140 

1141 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

1142 

1143 # Perform transform for data of filters that exist in parq. 

1144 for inputBand in inputBands: 

1145 if inputBand not in outputBands: 

1146 self.log.info("Ignoring %s band data in the input", inputBand) 

1147 continue 

1148 self.log.info("Transforming the catalog of band %s", inputBand) 

1149 result = self.transform(inputBand, parq, funcs, dataId) 

1150 dfDict[inputBand] = result.df 

1151 analysisDict[inputBand] = result.analysis 

1152 if templateDf.empty: 

1153 templateDf = result.df 

1154 

1155 # Put filler values in columns of other wanted bands 

1156 for filt in outputBands: 

1157 if filt not in dfDict: 

1158 self.log.info("Adding empty columns for band %s", filt) 

1159 dfTemp = templateDf.copy() 

1160 for col in dfTemp.columns: 

1161 testValue = dfTemp[col].values[0] 

1162 if isinstance(testValue, (np.bool_, pd.BooleanDtype)): 

1163 # Boolean flag type, check if it is a "good" flag 

1164 if col in self.config.goodFlags: 

1165 fillValue = False 

1166 else: 

1167 fillValue = True 

1168 elif isinstance(testValue, numbers.Integral): 

1169 # Checking numbers.Integral catches all flavors 

1170 # of python, numpy, pandas, etc. integers. 

1171 # We must ensure this is not an unsigned integer. 

1172 if isinstance(testValue, np.unsignedinteger): 

1173 raise ValueError("Parquet tables may not have unsigned integer columns.") 

1174 else: 

1175 fillValue = self.config.integerFillValue 

1176 else: 

1177 fillValue = self.config.floatFillValue 

1178 dfTemp[col].values[:] = fillValue 

1179 dfDict[filt] = dfTemp 

1180 

1181 # This makes a multilevel column index, with band as first level 

1182 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

1183 

1184 if not self.config.multilevelOutput: 

1185 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

1186 if self.config.primaryKey in noDupCols: 

1187 noDupCols.remove(self.config.primaryKey) 

1188 if dataId and self.config.columnsFromDataId: 

1189 noDupCols += self.config.columnsFromDataId 

1190 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

1191 inputBands=inputBands) 

1192 

1193 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

1194 

1195 return df 

1196 

1197 

1198class TractObjectDataIdContainer(CoaddDataIdContainer): 

1199 

1200 def makeDataRefList(self, namespace): 

1201 """Make self.refList from self.idList 

1202 

1203 Generate a list of data references given tract and/or patch. 

1204 This was adapted from `TractQADataIdContainer`, which was 

1205 `TractDataIdContainer` modifie to not require "filter". 

1206 Only existing dataRefs are returned. 

1207 """ 

1208 def getPatchRefList(tract): 

1209 return [namespace.butler.dataRef(datasetType=self.datasetType, 

1210 tract=tract.getId(), 

1211 patch="%d,%d" % patch.getIndex()) for patch in tract] 

1212 

1213 tractRefs = defaultdict(list) # Data references for each tract 

1214 for dataId in self.idList: 

1215 skymap = self.getSkymap(namespace) 

1216 

1217 if "tract" in dataId: 

1218 tractId = dataId["tract"] 

1219 if "patch" in dataId: 

1220 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

1221 tract=tractId, 

1222 patch=dataId['patch'])) 

1223 else: 

1224 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

1225 else: 

1226 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

1227 for tract in skymap) 

1228 outputRefList = [] 

1229 for tractRefList in tractRefs.values(): 

1230 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

1231 outputRefList.append(existingRefs) 

1232 

1233 self.refList = outputRefList 

1234 

1235 

1236class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

1237 dimensions=("tract", "skymap")): 

1238 inputCatalogs = connectionTypes.Input( 

1239 doc="Per-Patch objectTables conforming to the standard data model.", 

1240 name="objectTable", 

1241 storageClass="DataFrame", 

1242 dimensions=("tract", "patch", "skymap"), 

1243 multiple=True, 

1244 ) 

1245 outputCatalog = connectionTypes.Output( 

1246 doc="Pre-tract horizontal concatenation of the input objectTables", 

1247 name="objectTable_tract", 

1248 storageClass="DataFrame", 

1249 dimensions=("tract", "skymap"), 

1250 ) 

1251 

1252 

1253class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

1254 pipelineConnections=ConsolidateObjectTableConnections): 

1255 coaddName = pexConfig.Field( 

1256 dtype=str, 

1257 default="deep", 

1258 doc="Name of coadd" 

1259 ) 

1260 

1261 

1262class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

1263 """Write patch-merged source tables to a tract-level parquet file. 

1264 

1265 Concatenates `objectTable` list into a per-visit `objectTable_tract`. 

1266 """ 

1267 _DefaultName = "consolidateObjectTable" 

1268 ConfigClass = ConsolidateObjectTableConfig 

1269 

1270 inputDataset = 'objectTable' 

1271 outputDataset = 'objectTable_tract' 

1272 

1273 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1274 inputs = butlerQC.get(inputRefs) 

1275 self.log.info("Concatenating %s per-patch Object Tables", 

1276 len(inputs['inputCatalogs'])) 

1277 df = pd.concat(inputs['inputCatalogs']) 

1278 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1279 

1280 @classmethod 

1281 def _makeArgumentParser(cls): 

1282 parser = ArgumentParser(name=cls._DefaultName) 

1283 

1284 parser.add_id_argument("--id", cls.inputDataset, 

1285 help="data ID, e.g. --id tract=12345", 

1286 ContainerClass=TractObjectDataIdContainer) 

1287 return parser 

1288 

1289 def runDataRef(self, patchRefList): 

1290 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

1291 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1292 

1293 def writeMetadata(self, dataRef): 

1294 """No metadata to write. 

1295 """ 

1296 pass 

1297 

1298 

1299class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

1300 defaultTemplates={"catalogType": ""}, 

1301 dimensions=("instrument", "visit", "detector")): 

1302 

1303 inputCatalog = connectionTypes.Input( 

1304 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

1305 name="{catalogType}source", 

1306 storageClass="DataFrame", 

1307 dimensions=("instrument", "visit", "detector"), 

1308 deferLoad=True 

1309 ) 

1310 outputCatalog = connectionTypes.Output( 

1311 doc="Narrower, per-detector Source Table transformed and converted per a " 

1312 "specified set of functors", 

1313 name="{catalogType}sourceTable", 

1314 storageClass="DataFrame", 

1315 dimensions=("instrument", "visit", "detector") 

1316 ) 

1317 

1318 

1319class TransformSourceTableConfig(TransformCatalogBaseConfig, 

1320 pipelineConnections=TransformSourceTableConnections): 

1321 

1322 def setDefaults(self): 

1323 super().setDefaults() 

1324 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml') 

1325 self.primaryKey = 'sourceId' 

1326 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter'] 

1327 

1328 

1329class TransformSourceTableTask(TransformCatalogBaseTask): 

1330 """Transform/standardize a source catalog 

1331 """ 

1332 _DefaultName = "transformSourceTable" 

1333 ConfigClass = TransformSourceTableConfig 

1334 

1335 

1336class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1337 dimensions=("instrument", "visit",), 

1338 defaultTemplates={"calexpType": ""}): 

1339 calexp = connectionTypes.Input( 

1340 doc="Processed exposures used for metadata", 

1341 name="{calexpType}calexp", 

1342 storageClass="ExposureF", 

1343 dimensions=("instrument", "visit", "detector"), 

1344 deferLoad=True, 

1345 multiple=True, 

1346 ) 

1347 visitSummary = connectionTypes.Output( 

1348 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1349 "detector id for the id and are sorted for fast lookups of a " 

1350 "detector."), 

1351 name="{calexpType}visitSummary", 

1352 storageClass="ExposureCatalog", 

1353 dimensions=("instrument", "visit"), 

1354 ) 

1355 

1356 

1357class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1358 pipelineConnections=ConsolidateVisitSummaryConnections): 

1359 """Config for ConsolidateVisitSummaryTask""" 

1360 pass 

1361 

1362 

1363class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

1364 """Task to consolidate per-detector visit metadata. 

1365 

1366 This task aggregates the following metadata from all the detectors in a 

1367 single visit into an exposure catalog: 

1368 - The visitInfo. 

1369 - The wcs. 

1370 - The photoCalib. 

1371 - The physical_filter and band (if available). 

1372 - The psf size, shape, and effective area at the center of the detector. 

1373 - The corners of the bounding box in right ascension/declination. 

1374 

1375 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1376 are not persisted here because of storage concerns, and because of their 

1377 limited utility as summary statistics. 

1378 

1379 Tests for this task are performed in ci_hsc_gen3. 

1380 """ 

1381 _DefaultName = "consolidateVisitSummary" 

1382 ConfigClass = ConsolidateVisitSummaryConfig 

1383 

1384 @classmethod 

1385 def _makeArgumentParser(cls): 

1386 parser = ArgumentParser(name=cls._DefaultName) 

1387 

1388 parser.add_id_argument("--id", "calexp", 

1389 help="data ID, e.g. --id visit=12345", 

1390 ContainerClass=VisitDataIdContainer) 

1391 return parser 

1392 

1393 def writeMetadata(self, dataRef): 

1394 """No metadata to persist, so override to remove metadata persistance. 

1395 """ 

1396 pass 

1397 

1398 def writeConfig(self, butler, clobber=False, doBackup=True): 

1399 """No config to persist, so override to remove config persistance. 

1400 """ 

1401 pass 

1402 

1403 def runDataRef(self, dataRefList): 

1404 visit = dataRefList[0].dataId['visit'] 

1405 

1406 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1407 len(dataRefList), visit) 

1408 

1409 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

1410 

1411 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

1412 

1413 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1414 dataRefs = butlerQC.get(inputRefs.calexp) 

1415 visit = dataRefs[0].dataId.byName()['visit'] 

1416 

1417 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1418 len(dataRefs), visit) 

1419 

1420 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1421 

1422 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1423 

1424 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

1425 """Make a combined exposure catalog from a list of dataRefs. 

1426 These dataRefs must point to exposures with wcs, summaryStats, 

1427 and other visit metadata. 

1428 

1429 Parameters 

1430 ---------- 

1431 visit : `int` 

1432 Visit identification number. 

1433 dataRefs : `list` 

1434 List of dataRefs in visit. May be list of 

1435 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

1436 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

1437 isGen3 : `bool`, optional 

1438 Specifies if this is a Gen3 list of datarefs. 

1439 

1440 Returns 

1441 ------- 

1442 visitSummary : `lsst.afw.table.ExposureCatalog` 

1443 Exposure catalog with per-detector summary information. 

1444 """ 

1445 schema = self._makeVisitSummarySchema() 

1446 cat = afwTable.ExposureCatalog(schema) 

1447 cat.resize(len(dataRefs)) 

1448 

1449 cat['visit'] = visit 

1450 

1451 for i, dataRef in enumerate(dataRefs): 

1452 if isGen3: 

1453 visitInfo = dataRef.get(component='visitInfo') 

1454 filterLabel = dataRef.get(component='filter') 

1455 summaryStats = dataRef.get(component='summaryStats') 

1456 detector = dataRef.get(component='detector') 

1457 wcs = dataRef.get(component='wcs') 

1458 photoCalib = dataRef.get(component='photoCalib') 

1459 detector = dataRef.get(component='detector') 

1460 bbox = dataRef.get(component='bbox') 

1461 validPolygon = dataRef.get(component='validPolygon') 

1462 else: 

1463 # Note that we need to read the calexp because there is 

1464 # no magic access to the psf except through the exposure. 

1465 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

1466 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

1467 visitInfo = exp.getInfo().getVisitInfo() 

1468 filterLabel = dataRef.get("calexp_filter") 

1469 summaryStats = exp.getInfo().getSummaryStats() 

1470 wcs = exp.getWcs() 

1471 photoCalib = exp.getPhotoCalib() 

1472 detector = exp.getDetector() 

1473 bbox = dataRef.get(datasetType='calexp_bbox') 

1474 validPolygon = exp.getInfo().getValidPolygon() 

1475 

1476 rec = cat[i] 

1477 rec.setBBox(bbox) 

1478 rec.setVisitInfo(visitInfo) 

1479 rec.setWcs(wcs) 

1480 rec.setPhotoCalib(photoCalib) 

1481 rec.setValidPolygon(validPolygon) 

1482 

1483 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1484 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1485 rec.setId(detector.getId()) 

1486 rec['psfSigma'] = summaryStats.psfSigma 

1487 rec['psfIxx'] = summaryStats.psfIxx 

1488 rec['psfIyy'] = summaryStats.psfIyy 

1489 rec['psfIxy'] = summaryStats.psfIxy 

1490 rec['psfArea'] = summaryStats.psfArea 

1491 rec['raCorners'][:] = summaryStats.raCorners 

1492 rec['decCorners'][:] = summaryStats.decCorners 

1493 rec['ra'] = summaryStats.ra 

1494 rec['decl'] = summaryStats.decl 

1495 rec['zenithDistance'] = summaryStats.zenithDistance 

1496 rec['zeroPoint'] = summaryStats.zeroPoint 

1497 rec['skyBg'] = summaryStats.skyBg 

1498 rec['skyNoise'] = summaryStats.skyNoise 

1499 rec['meanVar'] = summaryStats.meanVar 

1500 rec['astromOffsetMean'] = summaryStats.astromOffsetMean 

1501 rec['astromOffsetStd'] = summaryStats.astromOffsetStd 

1502 rec['nPsfStar'] = summaryStats.nPsfStar 

1503 rec['psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median 

1504 rec['psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median 

1505 rec['psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter 

1506 rec['psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter 

1507 rec['psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian 

1508 rec['psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter 

1509 rec['psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter 

1510 

1511 metadata = dafBase.PropertyList() 

1512 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1513 # We are looping over existing datarefs, so the following is true 

1514 metadata.add("COMMENT", "Only detectors with data have entries.") 

1515 cat.setMetadata(metadata) 

1516 

1517 cat.sort() 

1518 return cat 

1519 

1520 def _makeVisitSummarySchema(self): 

1521 """Make the schema for the visitSummary catalog.""" 

1522 schema = afwTable.ExposureTable.makeMinimalSchema() 

1523 schema.addField('visit', type='L', doc='Visit number') 

1524 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1525 schema.addField('band', type='String', size=32, doc='Name of band') 

1526 schema.addField('psfSigma', type='F', 

1527 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

1528 schema.addField('psfArea', type='F', 

1529 doc='PSF model effective area (center of chip) (pixel**2)') 

1530 schema.addField('psfIxx', type='F', 

1531 doc='PSF model Ixx (center of chip) (pixel**2)') 

1532 schema.addField('psfIyy', type='F', 

1533 doc='PSF model Iyy (center of chip) (pixel**2)') 

1534 schema.addField('psfIxy', type='F', 

1535 doc='PSF model Ixy (center of chip) (pixel**2)') 

1536 schema.addField('raCorners', type='ArrayD', size=4, 

1537 doc='Right Ascension of bounding box corners (degrees)') 

1538 schema.addField('decCorners', type='ArrayD', size=4, 

1539 doc='Declination of bounding box corners (degrees)') 

1540 schema.addField('ra', type='D', 

1541 doc='Right Ascension of bounding box center (degrees)') 

1542 schema.addField('decl', type='D', 

1543 doc='Declination of bounding box center (degrees)') 

1544 schema.addField('zenithDistance', type='F', 

1545 doc='Zenith distance of bounding box center (degrees)') 

1546 schema.addField('zeroPoint', type='F', 

1547 doc='Mean zeropoint in detector (mag)') 

1548 schema.addField('skyBg', type='F', 

1549 doc='Average sky background (ADU)') 

1550 schema.addField('skyNoise', type='F', 

1551 doc='Average sky noise (ADU)') 

1552 schema.addField('meanVar', type='F', 

1553 doc='Mean variance of the weight plane (ADU**2)') 

1554 schema.addField('astromOffsetMean', type='F', 

1555 doc='Mean offset of astrometric calibration matches (arcsec)') 

1556 schema.addField('astromOffsetStd', type='F', 

1557 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)') 

1558 schema.addField('nPsfStar', type='I', doc='Number of stars used for PSF model') 

1559 schema.addField('psfStarDeltaE1Median', type='F', 

1560 doc='Median E1 residual (starE1 - psfE1) for psf stars') 

1561 schema.addField('psfStarDeltaE2Median', type='F', 

1562 doc='Median E2 residual (starE2 - psfE2) for psf stars') 

1563 schema.addField('psfStarDeltaE1Scatter', type='F', 

1564 doc='Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars') 

1565 schema.addField('psfStarDeltaE2Scatter', type='F', 

1566 doc='Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars') 

1567 schema.addField('psfStarDeltaSizeMedian', type='F', 

1568 doc='Median size residual (starSize - psfSize) for psf stars (pixel)') 

1569 schema.addField('psfStarDeltaSizeScatter', type='F', 

1570 doc='Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)') 

1571 schema.addField('psfStarScaledDeltaSizeScatter', type='F', 

1572 doc='Scatter (via MAD) of size residual scaled by median size squared') 

1573 

1574 return schema 

1575 

1576 

1577class VisitDataIdContainer(DataIdContainer): 

1578 """DataIdContainer that groups sensor-level ids by visit. 

1579 """ 

1580 

1581 def makeDataRefList(self, namespace): 

1582 """Make self.refList from self.idList 

1583 

1584 Generate a list of data references grouped by visit. 

1585 

1586 Parameters 

1587 ---------- 

1588 namespace : `argparse.Namespace` 

1589 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command 

1590 line arguments. 

1591 """ 

1592 # Group by visits 

1593 visitRefs = defaultdict(list) 

1594 for dataId in self.idList: 

1595 if "visit" in dataId: 

1596 visitId = dataId["visit"] 

1597 # append all subsets to 

1598 subset = namespace.butler.subset(self.datasetType, dataId=dataId) 

1599 visitRefs[visitId].extend([dataRef for dataRef in subset]) 

1600 

1601 outputRefList = [] 

1602 for refList in visitRefs.values(): 

1603 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1604 if existingRefs: 

1605 outputRefList.append(existingRefs) 

1606 

1607 self.refList = outputRefList 

1608 

1609 

1610class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1611 defaultTemplates={"catalogType": ""}, 

1612 dimensions=("instrument", "visit")): 

1613 inputCatalogs = connectionTypes.Input( 

1614 doc="Input per-detector Source Tables", 

1615 name="{catalogType}sourceTable", 

1616 storageClass="DataFrame", 

1617 dimensions=("instrument", "visit", "detector"), 

1618 multiple=True 

1619 ) 

1620 outputCatalog = connectionTypes.Output( 

1621 doc="Per-visit concatenation of Source Table", 

1622 name="{catalogType}sourceTable_visit", 

1623 storageClass="DataFrame", 

1624 dimensions=("instrument", "visit") 

1625 ) 

1626 

1627 

1628class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1629 pipelineConnections=ConsolidateSourceTableConnections): 

1630 pass 

1631 

1632 

1633class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

1634 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1635 """ 

1636 _DefaultName = 'consolidateSourceTable' 

1637 ConfigClass = ConsolidateSourceTableConfig 

1638 

1639 inputDataset = 'sourceTable' 

1640 outputDataset = 'sourceTable_visit' 

1641 

1642 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1643 from .makeCoaddTempExp import reorderRefs 

1644 

1645 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs] 

1646 detectorOrder.sort() 

1647 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector') 

1648 inputs = butlerQC.get(inputRefs) 

1649 self.log.info("Concatenating %s per-detector Source Tables", 

1650 len(inputs['inputCatalogs'])) 

1651 df = pd.concat(inputs['inputCatalogs']) 

1652 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1653 

1654 def runDataRef(self, dataRefList): 

1655 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1656 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1657 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1658 

1659 @classmethod 

1660 def _makeArgumentParser(cls): 

1661 parser = ArgumentParser(name=cls._DefaultName) 

1662 

1663 parser.add_id_argument("--id", cls.inputDataset, 

1664 help="data ID, e.g. --id visit=12345", 

1665 ContainerClass=VisitDataIdContainer) 

1666 return parser 

1667 

1668 def writeMetadata(self, dataRef): 

1669 """No metadata to write. 

1670 """ 

1671 pass 

1672 

1673 def writeConfig(self, butler, clobber=False, doBackup=True): 

1674 """No config to write. 

1675 """ 

1676 pass 

1677 

1678 

1679class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1680 dimensions=("instrument",), 

1681 defaultTemplates={"calexpType": ""}): 

1682 visitSummaryRefs = connectionTypes.Input( 

1683 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1684 name="{calexpType}visitSummary", 

1685 storageClass="ExposureCatalog", 

1686 dimensions=("instrument", "visit"), 

1687 multiple=True, 

1688 deferLoad=True, 

1689 ) 

1690 outputCatalog = connectionTypes.Output( 

1691 doc="CCD and Visit metadata table", 

1692 name="ccdVisitTable", 

1693 storageClass="DataFrame", 

1694 dimensions=("instrument",) 

1695 ) 

1696 

1697 

1698class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1699 pipelineConnections=MakeCcdVisitTableConnections): 

1700 pass 

1701 

1702 

1703class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1704 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs. 

1705 """ 

1706 _DefaultName = 'makeCcdVisitTable' 

1707 ConfigClass = MakeCcdVisitTableConfig 

1708 

1709 def run(self, visitSummaryRefs): 

1710 """Make a table of ccd information from the `visitSummary` catalogs. 

1711 

1712 Parameters 

1713 ---------- 

1714 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1715 List of DeferredDatasetHandles pointing to exposure catalogs with 

1716 per-detector summary information. 

1717 

1718 Returns 

1719 ------- 

1720 result : `lsst.pipe.Base.Struct` 

1721 Results struct with attribute: 

1722 

1723 ``outputCatalog`` 

1724 Catalog of ccd and visit information. 

1725 """ 

1726 ccdEntries = [] 

1727 for visitSummaryRef in visitSummaryRefs: 

1728 visitSummary = visitSummaryRef.get() 

1729 visitInfo = visitSummary[0].getVisitInfo() 

1730 

1731 ccdEntry = {} 

1732 summaryTable = visitSummary.asAstropy() 

1733 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance', 

1734 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise', 

1735 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar', 

1736 'psfStarDeltaE1Median', 'psfStarDeltaE2Median', 

1737 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter', 

1738 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter', 

1739 'psfStarScaledDeltaSizeScatter'] 

1740 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1741 # 'visit' is the human readable visit number. 

1742 # 'visitId' is the key to the visitId table. They are the same. 

1743 # Technically you should join to get the visit from the visit 

1744 # table. 

1745 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"}) 

1746 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in 

1747 summaryTable['id']] 

1748 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId) 

1749 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds] 

1750 ccdEntry['ccdVisitId'] = ccdVisitIds 

1751 ccdEntry['detector'] = summaryTable['id'] 

1752 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary]) 

1753 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1754 

1755 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1756 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1757 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1758 expTime = visitInfo.getExposureTime() 

1759 ccdEntry['expTime'] = expTime 

1760 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1761 expTime_days = expTime / (60*60*24) 

1762 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days 

1763 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1764 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1765 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1766 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1767 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1768 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1769 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1770 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1771 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1772 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1773 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1774 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, 

1775 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. 

1776 # values are actually wanted. 

1777 ccdEntries.append(ccdEntry) 

1778 

1779 outputCatalog = pd.concat(ccdEntries) 

1780 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True) 

1781 return pipeBase.Struct(outputCatalog=outputCatalog) 

1782 

1783 

1784class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1785 dimensions=("instrument",), 

1786 defaultTemplates={"calexpType": ""}): 

1787 visitSummaries = connectionTypes.Input( 

1788 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1789 name="{calexpType}visitSummary", 

1790 storageClass="ExposureCatalog", 

1791 dimensions=("instrument", "visit",), 

1792 multiple=True, 

1793 deferLoad=True, 

1794 ) 

1795 outputCatalog = connectionTypes.Output( 

1796 doc="Visit metadata table", 

1797 name="visitTable", 

1798 storageClass="DataFrame", 

1799 dimensions=("instrument",) 

1800 ) 

1801 

1802 

1803class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1804 pipelineConnections=MakeVisitTableConnections): 

1805 pass 

1806 

1807 

1808class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1809 """Produce a `visitTable` from the `visitSummary` exposure catalogs. 

1810 """ 

1811 _DefaultName = 'makeVisitTable' 

1812 ConfigClass = MakeVisitTableConfig 

1813 

1814 def run(self, visitSummaries): 

1815 """Make a table of visit information from the `visitSummary` catalogs. 

1816 

1817 Parameters 

1818 ---------- 

1819 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog` 

1820 List of exposure catalogs with per-detector summary information. 

1821 Returns 

1822 ------- 

1823 result : `lsst.pipe.Base.Struct` 

1824 Results struct with attribute: 

1825 

1826 ``outputCatalog`` 

1827 Catalog of visit information. 

1828 """ 

1829 visitEntries = [] 

1830 for visitSummary in visitSummaries: 

1831 visitSummary = visitSummary.get() 

1832 visitRow = visitSummary[0] 

1833 visitInfo = visitRow.getVisitInfo() 

1834 

1835 visitEntry = {} 

1836 visitEntry["visitId"] = visitRow['visit'] 

1837 visitEntry["visit"] = visitRow['visit'] 

1838 visitEntry["physical_filter"] = visitRow['physical_filter'] 

1839 visitEntry["band"] = visitRow['band'] 

1840 raDec = visitInfo.getBoresightRaDec() 

1841 visitEntry["ra"] = raDec.getRa().asDegrees() 

1842 visitEntry["decl"] = raDec.getDec().asDegrees() 

1843 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1844 azAlt = visitInfo.getBoresightAzAlt() 

1845 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1846 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1847 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1848 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1849 expTime = visitInfo.getExposureTime() 

1850 visitEntry["expTime"] = expTime 

1851 visitEntry["expMidpt"] = visitInfo.getDate().toPython() 

1852 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1853 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1854 expTime_days = expTime / (60*60*24) 

1855 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days 

1856 visitEntries.append(visitEntry) 

1857 

1858 # TODO: DM-30623, Add programId, exposureType, cameraTemp, 

1859 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp, 

1860 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures. 

1861 

1862 outputCatalog = pd.DataFrame(data=visitEntries) 

1863 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True) 

1864 return pipeBase.Struct(outputCatalog=outputCatalog) 

1865 

1866 

1867class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1868 dimensions=("instrument", "visit", "detector", "skymap", "tract")): 

1869 

1870 inputCatalog = connectionTypes.Input( 

1871 doc="Primary per-detector, single-epoch forced-photometry catalog. " 

1872 "By default, it is the output of ForcedPhotCcdTask on calexps", 

1873 name="forced_src", 

1874 storageClass="SourceCatalog", 

1875 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1876 ) 

1877 inputCatalogDiff = connectionTypes.Input( 

1878 doc="Secondary multi-epoch, per-detector, forced photometry catalog. " 

1879 "By default, it is the output of ForcedPhotCcdTask run on image differences.", 

1880 name="forced_diff", 

1881 storageClass="SourceCatalog", 

1882 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1883 ) 

1884 outputCatalog = connectionTypes.Output( 

1885 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format", 

1886 name="mergedForcedSource", 

1887 storageClass="DataFrame", 

1888 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1889 ) 

1890 

1891 

1892class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig, 

1893 pipelineConnections=WriteForcedSourceTableConnections): 

1894 key = lsst.pex.config.Field( 

1895 doc="Column on which to join the two input tables on and make the primary key of the output", 

1896 dtype=str, 

1897 default="objectId", 

1898 ) 

1899 

1900 

1901class WriteForcedSourceTableTask(pipeBase.PipelineTask): 

1902 """Merge and convert per-detector forced source catalogs to parquet. 

1903 

1904 Because the predecessor ForcedPhotCcdTask operates per-detector, 

1905 per-tract, (i.e., it has tract in its dimensions), detectors 

1906 on the tract boundary may have multiple forced source catalogs. 

1907 

1908 The successor task TransformForcedSourceTable runs per-patch 

1909 and temporally-aggregates overlapping mergedForcedSource catalogs from all 

1910 available multiple epochs. 

1911 """ 

1912 _DefaultName = "writeForcedSourceTable" 

1913 ConfigClass = WriteForcedSourceTableConfig 

1914 

1915 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1916 inputs = butlerQC.get(inputRefs) 

1917 # Add ccdVisitId to allow joining with CcdVisitTable 

1918 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

1919 inputs['band'] = butlerQC.quantum.dataId.full['band'] 

1920 outputs = self.run(**inputs) 

1921 butlerQC.put(outputs, outputRefs) 

1922 

1923 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None): 

1924 dfs = [] 

1925 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')): 

1926 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False) 

1927 df = df.reindex(sorted(df.columns), axis=1) 

1928 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA 

1929 df['band'] = band if band else pd.NA 

1930 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns], 

1931 names=('dataset', 'column')) 

1932 

1933 dfs.append(df) 

1934 

1935 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

1936 return pipeBase.Struct(outputCatalog=outputCatalog) 

1937 

1938 

1939class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1940 dimensions=("instrument", "skymap", "patch", "tract")): 

1941 

1942 inputCatalogs = connectionTypes.Input( 

1943 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask", 

1944 name="mergedForcedSource", 

1945 storageClass="DataFrame", 

1946 dimensions=("instrument", "visit", "detector", "skymap", "tract"), 

1947 multiple=True, 

1948 deferLoad=True 

1949 ) 

1950 referenceCatalog = connectionTypes.Input( 

1951 doc="Reference catalog which was used to seed the forcedPhot. Columns " 

1952 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner " 

1953 "are expected.", 

1954 name="objectTable", 

1955 storageClass="DataFrame", 

1956 dimensions=("tract", "patch", "skymap"), 

1957 deferLoad=True 

1958 ) 

1959 outputCatalog = connectionTypes.Output( 

1960 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a " 

1961 "specified set of functors", 

1962 name="forcedSourceTable", 

1963 storageClass="DataFrame", 

1964 dimensions=("tract", "patch", "skymap") 

1965 ) 

1966 

1967 

1968class TransformForcedSourceTableConfig(TransformCatalogBaseConfig, 

1969 pipelineConnections=TransformForcedSourceTableConnections): 

1970 referenceColumns = pexConfig.ListField( 

1971 dtype=str, 

1972 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"], 

1973 optional=True, 

1974 doc="Columns to pull from reference catalog", 

1975 ) 

1976 keyRef = lsst.pex.config.Field( 

1977 doc="Column on which to join the two input tables on and make the primary key of the output", 

1978 dtype=str, 

1979 default="objectId", 

1980 ) 

1981 key = lsst.pex.config.Field( 

1982 doc="Rename the output DataFrame index to this name", 

1983 dtype=str, 

1984 default="forcedSourceId", 

1985 ) 

1986 

1987 def setDefaults(self): 

1988 super().setDefaults() 

1989 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml') 

1990 self.columnsFromDataId = ['tract', 'patch'] 

1991 

1992 

1993class TransformForcedSourceTableTask(TransformCatalogBaseTask): 

1994 """Transform/standardize a ForcedSource catalog 

1995 

1996 Transforms each wide, per-detector forcedSource parquet table per the 

1997 specification file (per-camera defaults found in ForcedSource.yaml). 

1998 All epochs that overlap the patch are aggregated into one per-patch 

1999 narrow-parquet file. 

2000 

2001 No de-duplication of rows is performed. Duplicate resolutions flags are 

2002 pulled in from the referenceCatalog: `detect_isPrimary`, 

2003 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate 

2004 for analysis or compare duplicates for QA. 

2005 

2006 The resulting table includes multiple bands. Epochs (MJDs) and other useful 

2007 per-visit rows can be retreived by joining with the CcdVisitTable on 

2008 ccdVisitId. 

2009 """ 

2010 _DefaultName = "transformForcedSourceTable" 

2011 ConfigClass = TransformForcedSourceTableConfig 

2012 

2013 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

2014 inputs = butlerQC.get(inputRefs) 

2015 if self.funcs is None: 

2016 raise ValueError("config.functorFile is None. " 

2017 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

2018 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs, 

2019 dataId=outputRefs.outputCatalog.dataId.full) 

2020 

2021 butlerQC.put(outputs, outputRefs) 

2022 

2023 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None): 

2024 dfs = [] 

2025 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns}) 

2026 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs))) 

2027 for handle in inputCatalogs: 

2028 result = self.transform(None, handle, funcs, dataId) 

2029 # Filter for only rows that were detected on (overlap) the patch 

2030 dfs.append(result.df.join(ref, how='inner')) 

2031 

2032 outputCatalog = pd.concat(dfs) 

2033 

2034 # Now that we are done joining on config.keyRef 

2035 # Change index to config.key by 

2036 outputCatalog.index.rename(self.config.keyRef, inplace=True) 

2037 # Add config.keyRef to the column list 

2038 outputCatalog.reset_index(inplace=True) 

2039 # Set the forcedSourceId to the index. This is specified in the 

2040 # ForcedSource.yaml 

2041 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True) 

2042 # Rename it to the config.key 

2043 outputCatalog.index.rename(self.config.key, inplace=True) 

2044 

2045 self.log.info("Made a table of %d columns and %d rows", 

2046 len(outputCatalog.columns), len(outputCatalog)) 

2047 return pipeBase.Struct(outputCatalog=outputCatalog) 

2048 

2049 

2050class ConsolidateTractConnections(pipeBase.PipelineTaskConnections, 

2051 defaultTemplates={"catalogType": ""}, 

2052 dimensions=("instrument", "tract")): 

2053 inputCatalogs = connectionTypes.Input( 

2054 doc="Input per-patch DataFrame Tables to be concatenated", 

2055 name="{catalogType}ForcedSourceTable", 

2056 storageClass="DataFrame", 

2057 dimensions=("tract", "patch", "skymap"), 

2058 multiple=True, 

2059 ) 

2060 

2061 outputCatalog = connectionTypes.Output( 

2062 doc="Output per-tract concatenation of DataFrame Tables", 

2063 name="{catalogType}ForcedSourceTable_tract", 

2064 storageClass="DataFrame", 

2065 dimensions=("tract", "skymap"), 

2066 ) 

2067 

2068 

2069class ConsolidateTractConfig(pipeBase.PipelineTaskConfig, 

2070 pipelineConnections=ConsolidateTractConnections): 

2071 pass 

2072 

2073 

2074class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask): 

2075 """Concatenate any per-patch, dataframe list into a single 

2076 per-tract DataFrame. 

2077 """ 

2078 _DefaultName = 'ConsolidateTract' 

2079 ConfigClass = ConsolidateTractConfig 

2080 

2081 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

2082 inputs = butlerQC.get(inputRefs) 

2083 # Not checking at least one inputCatalog exists because that'd be an 

2084 # empty QG. 

2085 self.log.info("Concatenating %s per-patch %s Tables", 

2086 len(inputs['inputCatalogs']), 

2087 inputRefs.inputCatalogs[0].datasetType.name) 

2088 df = pd.concat(inputs['inputCatalogs']) 

2089 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)