Coverage for python/lsst/pipe/tasks/postprocess.py: 31%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

827 statements  

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24from collections import defaultdict 

25import logging 

26import numpy as np 

27import numbers 

28import os 

29 

30import lsst.geom 

31import lsst.pex.config as pexConfig 

32import lsst.pipe.base as pipeBase 

33import lsst.daf.base as dafBase 

34from lsst.obs.base import ExposureIdInfo 

35from lsst.pipe.base import connectionTypes 

36import lsst.afw.table as afwTable 

37from lsst.meas.base import SingleFrameMeasurementTask 

38from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

39from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

40from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate 

41from lsst.skymap import BaseSkyMap 

42 

43from .parquetTable import ParquetTable 

44from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

45from .functors import CompositeFunctor, Column 

46 

47log = logging.getLogger(__name__) 

48 

49 

50def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

51 """Flattens a dataframe with multilevel column index 

52 """ 

53 newDf = pd.DataFrame() 

54 # band is the level 0 index 

55 dfBands = df.columns.unique(level=0).values 

56 for band in dfBands: 

57 subdf = df[band] 

58 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

59 newColumns = {c: columnFormat.format(band, c) 

60 for c in subdf.columns if c not in noDupCols} 

61 cols = list(newColumns.keys()) 

62 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

63 

64 # Band must be present in the input and output or else column is all NaN: 

65 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

66 # Get the unexploded columns from any present band's partition 

67 noDupDf = df[presentBands[0]][noDupCols] 

68 newDf = pd.concat([noDupDf, newDf], axis=1) 

69 return newDf 

70 

71 

72class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

73 defaultTemplates={"coaddName": "deep"}, 

74 dimensions=("tract", "patch", "skymap")): 

75 inputCatalogMeas = connectionTypes.Input( 

76 doc="Catalog of source measurements on the deepCoadd.", 

77 dimensions=("tract", "patch", "band", "skymap"), 

78 storageClass="SourceCatalog", 

79 name="{coaddName}Coadd_meas", 

80 multiple=True 

81 ) 

82 inputCatalogForcedSrc = connectionTypes.Input( 

83 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

84 dimensions=("tract", "patch", "band", "skymap"), 

85 storageClass="SourceCatalog", 

86 name="{coaddName}Coadd_forced_src", 

87 multiple=True 

88 ) 

89 inputCatalogRef = connectionTypes.Input( 

90 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

91 "for each detection in deepCoadd_mergeDet.", 

92 dimensions=("tract", "patch", "skymap"), 

93 storageClass="SourceCatalog", 

94 name="{coaddName}Coadd_ref" 

95 ) 

96 outputCatalog = connectionTypes.Output( 

97 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

98 "stored as a DataFrame with a multi-level column index per-patch.", 

99 dimensions=("tract", "patch", "skymap"), 

100 storageClass="DataFrame", 

101 name="{coaddName}Coadd_obj" 

102 ) 

103 

104 

105class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

106 pipelineConnections=WriteObjectTableConnections): 

107 engine = pexConfig.Field( 

108 dtype=str, 

109 default="pyarrow", 

110 doc="Parquet engine for writing (pyarrow or fastparquet)" 

111 ) 

112 coaddName = pexConfig.Field( 

113 dtype=str, 

114 default="deep", 

115 doc="Name of coadd" 

116 ) 

117 

118 

119class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

120 """Write filter-merged source tables to parquet 

121 """ 

122 _DefaultName = "writeObjectTable" 

123 ConfigClass = WriteObjectTableConfig 

124 RunnerClass = MergeSourcesRunner 

125 

126 # Names of table datasets to be merged 

127 inputDatasets = ('forced_src', 'meas', 'ref') 

128 

129 # Tag of output dataset written by `MergeSourcesTask.write` 

130 outputDataset = 'obj' 

131 

132 def __init__(self, butler=None, schema=None, **kwargs): 

133 # It is a shame that this class can't use the default init for CmdLineTask 

134 # But to do so would require its own special task runner, which is many 

135 # more lines of specialization, so this is how it is for now 

136 super().__init__(**kwargs) 

137 

138 def runDataRef(self, patchRefList): 

139 """! 

140 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in 

141 subclasses that inherit from MergeSourcesTask. 

142 @param[in] patchRefList list of data references for each filter 

143 """ 

144 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

145 dataId = patchRefList[0].dataId 

146 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

147 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog)) 

148 

149 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

150 inputs = butlerQC.get(inputRefs) 

151 

152 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

153 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

154 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

155 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

156 

157 catalogs = {} 

158 for band in measDict.keys(): 

159 catalogs[band] = {'meas': measDict[band]['meas'], 

160 'forced_src': forcedSourceDict[band]['forced_src'], 

161 'ref': inputs['inputCatalogRef']} 

162 dataId = butlerQC.quantum.dataId 

163 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

164 outputs = pipeBase.Struct(outputCatalog=df) 

165 butlerQC.put(outputs, outputRefs) 

166 

167 @classmethod 

168 def _makeArgumentParser(cls): 

169 """Create a suitable ArgumentParser. 

170 

171 We will use the ArgumentParser to get a list of data 

172 references for patches; the RunnerClass will sort them into lists 

173 of data references for the same patch. 

174 

175 References first of self.inputDatasets, rather than 

176 self.inputDataset 

177 """ 

178 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

179 

180 def readCatalog(self, patchRef): 

181 """Read input catalogs 

182 

183 Read all the input datasets given by the 'inputDatasets' 

184 attribute. 

185 

186 Parameters 

187 ---------- 

188 patchRef : `lsst.daf.persistence.ButlerDataRef` 

189 Data reference for patch 

190 

191 Returns 

192 ------- 

193 Tuple consisting of band name and a dict of catalogs, keyed by 

194 dataset name 

195 """ 

196 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel 

197 catalogDict = {} 

198 for dataset in self.inputDatasets: 

199 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

200 self.log.info("Read %d sources from %s for band %s: %s", 

201 len(catalog), dataset, band, patchRef.dataId) 

202 catalogDict[dataset] = catalog 

203 return band, catalogDict 

204 

205 def run(self, catalogs, tract, patch): 

206 """Merge multiple catalogs. 

207 

208 Parameters 

209 ---------- 

210 catalogs : `dict` 

211 Mapping from filter names to dict of catalogs. 

212 tract : int 

213 tractId to use for the tractId column 

214 patch : str 

215 patchId to use for the patchId column 

216 

217 Returns 

218 ------- 

219 catalog : `pandas.DataFrame` 

220 Merged dataframe 

221 """ 

222 

223 dfs = [] 

224 for filt, tableDict in catalogs.items(): 

225 for dataset, table in tableDict.items(): 

226 # Convert afwTable to pandas DataFrame 

227 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

228 

229 # Sort columns by name, to ensure matching schema among patches 

230 df = df.reindex(sorted(df.columns), axis=1) 

231 df['tractId'] = tract 

232 df['patchId'] = patch 

233 

234 # Make columns a 3-level MultiIndex 

235 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

236 names=('dataset', 'band', 'column')) 

237 dfs.append(df) 

238 

239 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

240 return catalog 

241 

242 def write(self, patchRef, catalog): 

243 """Write the output. 

244 

245 Parameters 

246 ---------- 

247 catalog : `ParquetTable` 

248 Catalog to write 

249 patchRef : `lsst.daf.persistence.ButlerDataRef` 

250 Data reference for patch 

251 """ 

252 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

253 # since the filter isn't actually part of the data ID for the dataset we're saving, 

254 # it's confusing to see it in the log message, even if the butler simply ignores it. 

255 mergeDataId = patchRef.dataId.copy() 

256 del mergeDataId["filter"] 

257 self.log.info("Wrote merged catalog: %s", mergeDataId) 

258 

259 def writeMetadata(self, dataRefList): 

260 """No metadata to write, and not sure how to write it for a list of dataRefs. 

261 """ 

262 pass 

263 

264 

265class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

266 defaultTemplates={"catalogType": ""}, 

267 dimensions=("instrument", "visit", "detector")): 

268 

269 catalog = connectionTypes.Input( 

270 doc="Input full-depth catalog of sources produced by CalibrateTask", 

271 name="{catalogType}src", 

272 storageClass="SourceCatalog", 

273 dimensions=("instrument", "visit", "detector") 

274 ) 

275 outputCatalog = connectionTypes.Output( 

276 doc="Catalog of sources, `src` in Parquet format. The 'id' column is " 

277 "replaced with an index; all other columns are unchanged.", 

278 name="{catalogType}source", 

279 storageClass="DataFrame", 

280 dimensions=("instrument", "visit", "detector") 

281 ) 

282 

283 

284class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

285 pipelineConnections=WriteSourceTableConnections): 

286 pass 

287 

288 

289class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

290 """Write source table to parquet 

291 """ 

292 _DefaultName = "writeSourceTable" 

293 ConfigClass = WriteSourceTableConfig 

294 

295 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

296 inputs = butlerQC.get(inputRefs) 

297 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

298 result = self.run(**inputs).table 

299 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

300 butlerQC.put(outputs, outputRefs) 

301 

302 def run(self, catalog, ccdVisitId=None, **kwargs): 

303 """Convert `src` catalog to parquet 

304 

305 Parameters 

306 ---------- 

307 catalog: `afwTable.SourceCatalog` 

308 catalog to be converted 

309 ccdVisitId: `int` 

310 ccdVisitId to be added as a column 

311 

312 Returns 

313 ------- 

314 result : `lsst.pipe.base.Struct` 

315 ``table`` 

316 `ParquetTable` version of the input catalog 

317 """ 

318 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId) 

319 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

320 df['ccdVisitId'] = ccdVisitId 

321 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

322 

323 

324class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections, 

325 defaultTemplates={"catalogType": "", 

326 "skyWcsName": "jointcal", 

327 "photoCalibName": "fgcm"}, 

328 dimensions=("instrument", "visit", "detector", "skymap")): 

329 skyMap = connectionTypes.Input( 

330 doc="skyMap needed to choose which tract-level calibrations to use when multiple available", 

331 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

332 storageClass="SkyMap", 

333 dimensions=("skymap",), 

334 ) 

335 exposure = connectionTypes.Input( 

336 doc="Input exposure to perform photometry on.", 

337 name="calexp", 

338 storageClass="ExposureF", 

339 dimensions=["instrument", "visit", "detector"], 

340 ) 

341 externalSkyWcsTractCatalog = connectionTypes.Input( 

342 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector " 

343 "id for the catalog id, sorted on id for fast lookup."), 

344 name="{skyWcsName}SkyWcsCatalog", 

345 storageClass="ExposureCatalog", 

346 dimensions=["instrument", "visit", "tract"], 

347 multiple=True 

348 ) 

349 externalSkyWcsGlobalCatalog = connectionTypes.Input( 

350 doc=("Per-visit wcs calibrations computed globally (with no tract information). " 

351 "These catalogs use the detector id for the catalog id, sorted on id for " 

352 "fast lookup."), 

353 name="{skyWcsName}SkyWcsCatalog", 

354 storageClass="ExposureCatalog", 

355 dimensions=["instrument", "visit"], 

356 ) 

357 externalPhotoCalibTractCatalog = connectionTypes.Input( 

358 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the " 

359 "detector id for the catalog id, sorted on id for fast lookup."), 

360 name="{photoCalibName}PhotoCalibCatalog", 

361 storageClass="ExposureCatalog", 

362 dimensions=["instrument", "visit", "tract"], 

363 multiple=True 

364 ) 

365 externalPhotoCalibGlobalCatalog = connectionTypes.Input( 

366 doc=("Per-visit photometric calibrations computed globally (with no tract " 

367 "information). These catalogs use the detector id for the catalog id, " 

368 "sorted on id for fast lookup."), 

369 name="{photoCalibName}PhotoCalibCatalog", 

370 storageClass="ExposureCatalog", 

371 dimensions=["instrument", "visit"], 

372 ) 

373 

374 def __init__(self, *, config=None): 

375 super().__init__(config=config) 

376 # Same connection boilerplate as all other applications of 

377 # Global/Tract calibrations 

378 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs: 

379 if config.useGlobalExternalSkyWcs: 

380 self.inputs.remove("externalSkyWcsTractCatalog") 

381 else: 

382 self.inputs.remove("externalSkyWcsGlobalCatalog") 

383 else: 

384 self.inputs.remove("externalSkyWcsTractCatalog") 

385 self.inputs.remove("externalSkyWcsGlobalCatalog") 

386 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib: 

387 if config.useGlobalExternalPhotoCalib: 

388 self.inputs.remove("externalPhotoCalibTractCatalog") 

389 else: 

390 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

391 else: 

392 self.inputs.remove("externalPhotoCalibTractCatalog") 

393 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

394 

395 

396class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig, 

397 pipelineConnections=WriteRecalibratedSourceTableConnections): 

398 

399 doReevaluatePhotoCalib = pexConfig.Field( 

400 dtype=bool, 

401 default=False, 

402 doc=("Add or replace local photoCalib columns from either the calexp.photoCalib or jointcal/FGCM") 

403 ) 

404 doReevaluateSkyWcs = pexConfig.Field( 

405 dtype=bool, 

406 default=False, 

407 doc=("Add or replace local WCS columns from either the calexp.wcs or or jointcal") 

408 ) 

409 doApplyExternalPhotoCalib = pexConfig.Field( 

410 dtype=bool, 

411 default=False, 

412 doc=("Whether to apply external photometric calibration via an " 

413 "`lsst.afw.image.PhotoCalib` object. Uses the " 

414 "``externalPhotoCalibName`` field to determine which calibration " 

415 "to load."), 

416 ) 

417 doApplyExternalSkyWcs = pexConfig.Field( 

418 dtype=bool, 

419 default=False, 

420 doc=("Whether to apply external astrometric calibration via an " 

421 "`lsst.afw.geom.SkyWcs` object. Uses ``externalSkyWcsName`` " 

422 "field to determine which calibration to load."), 

423 ) 

424 useGlobalExternalPhotoCalib = pexConfig.Field( 

425 dtype=bool, 

426 default=True, 

427 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations " 

428 "that are not run per-tract. When False, use per-tract photometric " 

429 "calibration files.") 

430 ) 

431 useGlobalExternalSkyWcs = pexConfig.Field( 

432 dtype=bool, 

433 default=False, 

434 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations " 

435 "that are not run per-tract. When False, use per-tract wcs " 

436 "files.") 

437 ) 

438 

439 def validate(self): 

440 super().validate() 

441 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs: 

442 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False" 

443 "External SkyWcs will not be read or evaluated.") 

444 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib: 

445 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False." 

446 "External PhotoCalib will not be read or evaluated.") 

447 

448 

449class WriteRecalibratedSourceTableTask(WriteSourceTableTask): 

450 """Write source table to parquet 

451 """ 

452 _DefaultName = "writeRecalibratedSourceTable" 

453 ConfigClass = WriteRecalibratedSourceTableConfig 

454 

455 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

456 inputs = butlerQC.get(inputRefs) 

457 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

458 inputs['exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "visit_detector") 

459 

460 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs: 

461 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

462 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs) 

463 

464 inputs['catalog'] = self.addCalibColumns(**inputs) 

465 

466 result = self.run(**inputs).table 

467 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

468 butlerQC.put(outputs, outputRefs) 

469 

470 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None, 

471 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None, 

472 externalPhotoCalibTractCatalog=None, **kwargs): 

473 """Apply external calibrations to exposure per configuration 

474 

475 When multiple tract-level calibrations overlap, select the one with the 

476 center closest to detector. 

477 

478 Parameters 

479 ---------- 

480 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of 

481 tract-level calibs. 

482 skyMap : `lsst.skymap.SkyMap` 

483 exposure : `lsst.afw.image.exposure.Exposure` 

484 Input exposure to adjust calibrations. 

485 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

486 Exposure catalog with external skyWcs to be applied per config 

487 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

488 Exposure catalog with external skyWcs to be applied per config 

489 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

490 Exposure catalog with external photoCalib to be applied per config 

491 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

492 

493 

494 Returns 

495 ------- 

496 exposure : `lsst.afw.image.exposure.Exposure` 

497 Exposure with adjusted calibrations. 

498 """ 

499 if not self.config.doApplyExternalSkyWcs: 

500 # Do not modify the exposure's SkyWcs 

501 externalSkyWcsCatalog = None 

502 elif self.config.useGlobalExternalSkyWcs: 

503 # Use the global external SkyWcs 

504 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog 

505 self.log.info('Applying global SkyWcs') 

506 else: 

507 # use tract-level external SkyWcs from the closest overlapping tract 

508 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog') 

509 tracts = [ref.dataId['tract'] for ref in inputRef] 

510 if len(tracts) == 1: 

511 ind = 0 

512 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind]) 

513 else: 

514 ind = self.getClosestTract(tracts, skyMap, 

515 exposure.getBBox(), exposure.getWcs()) 

516 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). ' 

517 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

518 

519 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind] 

520 

521 if not self.config.doApplyExternalPhotoCalib: 

522 # Do not modify the exposure's PhotoCalib 

523 externalPhotoCalibCatalog = None 

524 elif self.config.useGlobalExternalPhotoCalib: 

525 # Use the global external PhotoCalib 

526 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog 

527 self.log.info('Applying global PhotoCalib') 

528 else: 

529 # use tract-level external PhotoCalib from the closest overlapping tract 

530 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog') 

531 tracts = [ref.dataId['tract'] for ref in inputRef] 

532 if len(tracts) == 1: 

533 ind = 0 

534 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind]) 

535 else: 

536 ind = self.getClosestTract(tracts, skyMap, 

537 exposure.getBBox(), exposure.getWcs()) 

538 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). ' 

539 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

540 

541 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind] 

542 

543 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog) 

544 

545 def getClosestTract(self, tracts, skyMap, bbox, wcs): 

546 """Find the index of the tract closest to detector from list of tractIds 

547 

548 Parameters 

549 ---------- 

550 tracts: `list` [`int`] 

551 Iterable of integer tractIds 

552 skyMap : `lsst.skymap.SkyMap` 

553 skyMap to lookup tract geometry and wcs 

554 bbox : `lsst.geom.Box2I` 

555 Detector bbox, center of which will compared to tract centers 

556 wcs : `lsst.afw.geom.SkyWcs` 

557 Detector Wcs object to map the detector center to SkyCoord 

558 

559 Returns 

560 ------- 

561 index : `int` 

562 """ 

563 if len(tracts) == 1: 

564 return 0 

565 

566 center = wcs.pixelToSky(bbox.getCenter()) 

567 sep = [] 

568 for tractId in tracts: 

569 tract = skyMap[tractId] 

570 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter()) 

571 sep.append(center.separation(tractCenter)) 

572 

573 return np.argmin(sep) 

574 

575 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None): 

576 """Prepare a calibrated exposure and apply external calibrations 

577 if so configured. 

578 

579 Parameters 

580 ---------- 

581 exposure : `lsst.afw.image.exposure.Exposure` 

582 Input exposure to adjust calibrations. 

583 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional 

584 Exposure catalog with external skyWcs to be applied 

585 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id 

586 for the catalog id, sorted on id for fast lookup. 

587 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional 

588 Exposure catalog with external photoCalib to be applied 

589 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector 

590 id for the catalog id, sorted on id for fast lookup. 

591 

592 Returns 

593 ------- 

594 exposure : `lsst.afw.image.exposure.Exposure` 

595 Exposure with adjusted calibrations. 

596 """ 

597 detectorId = exposure.getInfo().getDetector().getId() 

598 

599 if externalPhotoCalibCatalog is not None: 

600 row = externalPhotoCalibCatalog.find(detectorId) 

601 if row is None: 

602 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; " 

603 "Using original photoCalib.", detectorId) 

604 else: 

605 photoCalib = row.getPhotoCalib() 

606 if photoCalib is None: 

607 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; " 

608 "Using original photoCalib.", detectorId) 

609 else: 

610 exposure.setPhotoCalib(photoCalib) 

611 

612 if externalSkyWcsCatalog is not None: 

613 row = externalSkyWcsCatalog.find(detectorId) 

614 if row is None: 

615 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; " 

616 "Using original skyWcs.", detectorId) 

617 else: 

618 skyWcs = row.getWcs() 

619 if skyWcs is None: 

620 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; " 

621 "Using original skyWcs.", detectorId) 

622 else: 

623 exposure.setWcs(skyWcs) 

624 

625 return exposure 

626 

627 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs): 

628 """Add replace columns with calibs evaluated at each centroid 

629 

630 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a 

631 a source catalog, by rerunning the plugins. 

632 

633 Parameters 

634 ---------- 

635 catalog : `lsst.afw.table.SourceCatalog` 

636 catalog to which calib columns will be added 

637 exposure : `lsst.afw.image.exposure.Exposure` 

638 Exposure with attached PhotoCalibs and SkyWcs attributes to be 

639 reevaluated at local centroids. Pixels are not required. 

640 exposureIdInfo : `lsst.obs.base.ExposureIdInfo` 

641 

642 Returns 

643 ------- 

644 newCat: `lsst.afw.table.SourceCatalog` 

645 Source Catalog with requested local calib columns 

646 """ 

647 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

648 measureConfig.doReplaceWithNoise = False 

649 

650 measureConfig.plugins.names = [] 

651 if self.config.doReevaluateSkyWcs: 

652 measureConfig.plugins.names.add('base_LocalWcs') 

653 self.log.info("Re-evaluating base_LocalWcs plugin") 

654 if self.config.doReevaluatePhotoCalib: 

655 measureConfig.plugins.names.add('base_LocalPhotoCalib') 

656 self.log.info("Re-evaluating base_LocalPhotoCalib plugin") 

657 pluginsNotToCopy = tuple(measureConfig.plugins.names) 

658 

659 # Create a new schema and catalog 

660 # Copy all columns from original except for the ones to reevaluate 

661 aliasMap = catalog.schema.getAliasMap() 

662 mapper = afwTable.SchemaMapper(catalog.schema) 

663 for item in catalog.schema: 

664 if not item.field.getName().startswith(pluginsNotToCopy): 

665 mapper.addMapping(item.key) 

666 

667 schema = mapper.getOutputSchema() 

668 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

669 schema.setAliasMap(aliasMap) 

670 newCat = afwTable.SourceCatalog(schema) 

671 newCat.extend(catalog, mapper=mapper) 

672 

673 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

674 

675 return newCat 

676 

677 

678class PostprocessAnalysis(object): 

679 """Calculate columns from ParquetTable 

680 

681 This object manages and organizes an arbitrary set of computations 

682 on a catalog. The catalog is defined by a 

683 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a 

684 `deepCoadd_obj` dataset, and the computations are defined by a collection 

685 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

686 a `CompositeFunctor`). 

687 

688 After the object is initialized, accessing the `.df` attribute (which 

689 holds the `pandas.DataFrame` containing the results of the calculations) triggers 

690 computation of said dataframe. 

691 

692 One of the conveniences of using this object is the ability to define a desired common 

693 filter for all functors. This enables the same functor collection to be passed to 

694 several different `PostprocessAnalysis` objects without having to change the original 

695 functor collection, since the `filt` keyword argument of this object triggers an 

696 overwrite of the `filt` property for all functors in the collection. 

697 

698 This object also allows a list of refFlags to be passed, and defines a set of default 

699 refFlags that are always included even if not requested. 

700 

701 If a list of `ParquetTable` object is passed, rather than a single one, then the 

702 calculations will be mapped over all the input catalogs. In principle, it should 

703 be straightforward to parallelize this activity, but initial tests have failed 

704 (see TODO in code comments). 

705 

706 Parameters 

707 ---------- 

708 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

709 Source catalog(s) for computation 

710 

711 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

712 Computations to do (functors that act on `parq`). 

713 If a dict, the output 

714 DataFrame will have columns keyed accordingly. 

715 If a list, the column keys will come from the 

716 `.shortname` attribute of each functor. 

717 

718 filt : `str` (optional) 

719 Filter in which to calculate. If provided, 

720 this will overwrite any existing `.filt` attribute 

721 of the provided functors. 

722 

723 flags : `list` (optional) 

724 List of flags (per-band) to include in output table. 

725 Taken from the `meas` dataset if applied to a multilevel Object Table. 

726 

727 refFlags : `list` (optional) 

728 List of refFlags (only reference band) to include in output table. 

729 

730 forcedFlags : `list` (optional) 

731 List of flags (per-band) to include in output table. 

732 Taken from the ``forced_src`` dataset if applied to a 

733 multilevel Object Table. Intended for flags from measurement plugins 

734 only run during multi-band forced-photometry. 

735 """ 

736 _defaultRefFlags = [] 

737 _defaultFuncs = () 

738 

739 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

740 self.parq = parq 

741 self.functors = functors 

742 

743 self.filt = filt 

744 self.flags = list(flags) if flags is not None else [] 

745 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

746 self.refFlags = list(self._defaultRefFlags) 

747 if refFlags is not None: 

748 self.refFlags += list(refFlags) 

749 

750 self._df = None 

751 

752 @property 

753 def defaultFuncs(self): 

754 funcs = dict(self._defaultFuncs) 

755 return funcs 

756 

757 @property 

758 def func(self): 

759 additionalFuncs = self.defaultFuncs 

760 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

761 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

762 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

763 

764 if isinstance(self.functors, CompositeFunctor): 

765 func = self.functors 

766 else: 

767 func = CompositeFunctor(self.functors) 

768 

769 func.funcDict.update(additionalFuncs) 

770 func.filt = self.filt 

771 

772 return func 

773 

774 @property 

775 def noDupCols(self): 

776 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

777 

778 @property 

779 def df(self): 

780 if self._df is None: 

781 self.compute() 

782 return self._df 

783 

784 def compute(self, dropna=False, pool=None): 

785 # map over multiple parquet tables 

786 if type(self.parq) in (list, tuple): 

787 if pool is None: 

788 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

789 else: 

790 # TODO: Figure out why this doesn't work (pyarrow pickling issues?) 

791 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

792 self._df = pd.concat(dflist) 

793 else: 

794 self._df = self.func(self.parq, dropna=dropna) 

795 

796 return self._df 

797 

798 

799class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

800 dimensions=()): 

801 """Expected Connections for subclasses of TransformCatalogBaseTask. 

802 

803 Must be subclassed. 

804 """ 

805 inputCatalog = connectionTypes.Input( 

806 name="", 

807 storageClass="DataFrame", 

808 ) 

809 outputCatalog = connectionTypes.Output( 

810 name="", 

811 storageClass="DataFrame", 

812 ) 

813 

814 

815class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

816 pipelineConnections=TransformCatalogBaseConnections): 

817 functorFile = pexConfig.Field( 

818 dtype=str, 

819 doc="Path to YAML file specifying Science Data Model functors to use " 

820 "when copying columns and computing calibrated values.", 

821 default=None, 

822 optional=True 

823 ) 

824 primaryKey = pexConfig.Field( 

825 dtype=str, 

826 doc="Name of column to be set as the DataFrame index. If None, the index" 

827 "will be named `id`", 

828 default=None, 

829 optional=True 

830 ) 

831 

832 

833class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask): 

834 """Base class for transforming/standardizing a catalog 

835 

836 by applying functors that convert units and apply calibrations. 

837 The purpose of this task is to perform a set of computations on 

838 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

839 results to a new dataset (which needs to be declared in an `outputDataset` 

840 attribute). 

841 

842 The calculations to be performed are defined in a YAML file that specifies 

843 a set of functors to be computed, provided as 

844 a `--functorFile` config parameter. An example of such a YAML file 

845 is the following: 

846 

847 funcs: 

848 psfMag: 

849 functor: Mag 

850 args: 

851 - base_PsfFlux 

852 filt: HSC-G 

853 dataset: meas 

854 cmodel_magDiff: 

855 functor: MagDiff 

856 args: 

857 - modelfit_CModel 

858 - base_PsfFlux 

859 filt: HSC-G 

860 gauss_magDiff: 

861 functor: MagDiff 

862 args: 

863 - base_GaussianFlux 

864 - base_PsfFlux 

865 filt: HSC-G 

866 count: 

867 functor: Column 

868 args: 

869 - base_InputCount_value 

870 filt: HSC-G 

871 deconvolved_moments: 

872 functor: DeconvolvedMoments 

873 filt: HSC-G 

874 dataset: forced_src 

875 refFlags: 

876 - calib_psfUsed 

877 - merge_measurement_i 

878 - merge_measurement_r 

879 - merge_measurement_z 

880 - merge_measurement_y 

881 - merge_measurement_g 

882 - base_PixelFlags_flag_inexact_psfCenter 

883 - detect_isPrimary 

884 

885 The names for each entry under "func" will become the names of columns in the 

886 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`. 

887 Positional arguments to be passed to each functor are in the `args` list, 

888 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`, 

889 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization. 

890 

891 The "flags" entry is the default shortcut for `Column` functors. 

892 All columns listed under "flags" will be copied to the output table 

893 untransformed. They can be of any datatype. 

894 In the special case of transforming a multi-level oject table with 

895 band and dataset indices (deepCoadd_obj), these will be taked from the 

896 `meas` dataset and exploded out per band. 

897 

898 There are two special shortcuts that only apply when transforming 

899 multi-level Object (deepCoadd_obj) tables: 

900 - The "refFlags" entry is shortcut for `Column` functor 

901 taken from the `'ref'` dataset if transforming an ObjectTable. 

902 - The "forcedFlags" entry is shortcut for `Column` functors. 

903 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

904 These are expanded out per band. 

905 

906 

907 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

908 to organize and excecute the calculations. 

909 

910 """ 

911 @property 

912 def _DefaultName(self): 

913 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

914 

915 @property 

916 def outputDataset(self): 

917 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

918 

919 @property 

920 def inputDataset(self): 

921 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

922 

923 @property 

924 def ConfigClass(self): 

925 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

926 

927 def __init__(self, *args, **kwargs): 

928 super().__init__(*args, **kwargs) 

929 if self.config.functorFile: 

930 self.log.info('Loading tranform functor definitions from %s', 

931 self.config.functorFile) 

932 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

933 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

934 else: 

935 self.funcs = None 

936 

937 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

938 inputs = butlerQC.get(inputRefs) 

939 if self.funcs is None: 

940 raise ValueError("config.functorFile is None. " 

941 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

942 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

943 dataId=outputRefs.outputCatalog.dataId.full) 

944 outputs = pipeBase.Struct(outputCatalog=result) 

945 butlerQC.put(outputs, outputRefs) 

946 

947 def runDataRef(self, dataRef): 

948 parq = dataRef.get() 

949 if self.funcs is None: 

950 raise ValueError("config.functorFile is None. " 

951 "Must be a valid path to yaml in order to run as a CommandlineTask.") 

952 df = self.run(parq, funcs=self.funcs, dataId=dataRef.dataId) 

953 self.write(df, dataRef) 

954 return df 

955 

956 def run(self, parq, funcs=None, dataId=None, band=None): 

957 """Do postprocessing calculations 

958 

959 Takes a `ParquetTable` object and dataId, 

960 returns a dataframe with results of postprocessing calculations. 

961 

962 Parameters 

963 ---------- 

964 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

965 ParquetTable from which calculations are done. 

966 funcs : `lsst.pipe.tasks.functors.Functors` 

967 Functors to apply to the table's columns 

968 dataId : dict, optional 

969 Used to add a `patchId` column to the output dataframe. 

970 band : `str`, optional 

971 Filter band that is being processed. 

972 

973 Returns 

974 ------ 

975 `pandas.DataFrame` 

976 

977 """ 

978 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

979 

980 df = self.transform(band, parq, funcs, dataId).df 

981 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

982 return df 

983 

984 def getFunctors(self): 

985 return self.funcs 

986 

987 def getAnalysis(self, parq, funcs=None, band=None): 

988 if funcs is None: 

989 funcs = self.funcs 

990 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

991 return analysis 

992 

993 def transform(self, band, parq, funcs, dataId): 

994 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

995 df = analysis.df 

996 if dataId is not None: 

997 for key, value in dataId.items(): 

998 df[str(key)] = value 

999 

1000 if self.config.primaryKey: 

1001 if df.index.name != self.config.primaryKey and self.config.primaryKey in df: 

1002 df.reset_index(inplace=True, drop=True) 

1003 df.set_index(self.config.primaryKey, inplace=True) 

1004 

1005 return pipeBase.Struct( 

1006 df=df, 

1007 analysis=analysis 

1008 ) 

1009 

1010 def write(self, df, parqRef): 

1011 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

1012 

1013 def writeMetadata(self, dataRef): 

1014 """No metadata to write. 

1015 """ 

1016 pass 

1017 

1018 

1019class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

1020 defaultTemplates={"coaddName": "deep"}, 

1021 dimensions=("tract", "patch", "skymap")): 

1022 inputCatalog = connectionTypes.Input( 

1023 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

1024 "stored as a DataFrame with a multi-level column index per-patch.", 

1025 dimensions=("tract", "patch", "skymap"), 

1026 storageClass="DataFrame", 

1027 name="{coaddName}Coadd_obj", 

1028 deferLoad=True, 

1029 ) 

1030 outputCatalog = connectionTypes.Output( 

1031 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

1032 "data model.", 

1033 dimensions=("tract", "patch", "skymap"), 

1034 storageClass="DataFrame", 

1035 name="objectTable" 

1036 ) 

1037 

1038 

1039class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

1040 pipelineConnections=TransformObjectCatalogConnections): 

1041 coaddName = pexConfig.Field( 

1042 dtype=str, 

1043 default="deep", 

1044 doc="Name of coadd" 

1045 ) 

1046 # TODO: remove in DM-27177 

1047 filterMap = pexConfig.DictField( 

1048 keytype=str, 

1049 itemtype=str, 

1050 default={}, 

1051 doc=("Dictionary mapping full filter name to short one for column name munging." 

1052 "These filters determine the output columns no matter what filters the " 

1053 "input data actually contain."), 

1054 deprecated=("Coadds are now identified by the band, so this transform is unused." 

1055 "Will be removed after v22.") 

1056 ) 

1057 outputBands = pexConfig.ListField( 

1058 dtype=str, 

1059 default=None, 

1060 optional=True, 

1061 doc=("These bands and only these bands will appear in the output," 

1062 " NaN-filled if the input does not include them." 

1063 " If None, then use all bands found in the input.") 

1064 ) 

1065 camelCase = pexConfig.Field( 

1066 dtype=bool, 

1067 default=False, 

1068 doc=("Write per-band columns names with camelCase, else underscore " 

1069 "For example: gPsFlux instead of g_PsFlux.") 

1070 ) 

1071 multilevelOutput = pexConfig.Field( 

1072 dtype=bool, 

1073 default=False, 

1074 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

1075 "and name-munged (False).") 

1076 ) 

1077 goodFlags = pexConfig.ListField( 

1078 dtype=str, 

1079 default=[], 

1080 doc=("List of 'good' flags that should be set False when populating empty tables. " 

1081 "All other flags are considered to be 'bad' flags and will be set to True.") 

1082 ) 

1083 floatFillValue = pexConfig.Field( 

1084 dtype=float, 

1085 default=np.nan, 

1086 doc="Fill value for float fields when populating empty tables." 

1087 ) 

1088 integerFillValue = pexConfig.Field( 

1089 dtype=int, 

1090 default=-1, 

1091 doc="Fill value for integer fields when populating empty tables." 

1092 ) 

1093 

1094 def setDefaults(self): 

1095 super().setDefaults() 

1096 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml') 

1097 self.primaryKey = 'objectId' 

1098 self.goodFlags = ['calib_astrometry_used', 

1099 'calib_photometry_reserved', 

1100 'calib_photometry_used', 

1101 'calib_psf_candidate', 

1102 'calib_psf_reserved', 

1103 'calib_psf_used'] 

1104 

1105 

1106class TransformObjectCatalogTask(TransformCatalogBaseTask): 

1107 """Produce a flattened Object Table to match the format specified in 

1108 sdm_schemas. 

1109 

1110 Do the same set of postprocessing calculations on all bands 

1111 

1112 This is identical to `TransformCatalogBaseTask`, except for that it does the 

1113 specified functor calculations for all filters present in the 

1114 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

1115 by the YAML file will be superceded. 

1116 """ 

1117 _DefaultName = "transformObjectCatalog" 

1118 ConfigClass = TransformObjectCatalogConfig 

1119 

1120 # Used by Gen 2 runDataRef only: 

1121 inputDataset = 'deepCoadd_obj' 

1122 outputDataset = 'objectTable' 

1123 

1124 @classmethod 

1125 def _makeArgumentParser(cls): 

1126 parser = ArgumentParser(name=cls._DefaultName) 

1127 parser.add_id_argument("--id", cls.inputDataset, 

1128 ContainerClass=CoaddDataIdContainer, 

1129 help="data ID, e.g. --id tract=12345 patch=1,2") 

1130 return parser 

1131 

1132 def run(self, parq, funcs=None, dataId=None, band=None): 

1133 # NOTE: band kwarg is ignored here. 

1134 dfDict = {} 

1135 analysisDict = {} 

1136 templateDf = pd.DataFrame() 

1137 

1138 if isinstance(parq, DeferredDatasetHandle): 

1139 columns = parq.get(component='columns') 

1140 inputBands = columns.unique(level=1).values 

1141 else: 

1142 inputBands = parq.columnLevelNames['band'] 

1143 

1144 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

1145 

1146 # Perform transform for data of filters that exist in parq. 

1147 for inputBand in inputBands: 

1148 if inputBand not in outputBands: 

1149 self.log.info("Ignoring %s band data in the input", inputBand) 

1150 continue 

1151 self.log.info("Transforming the catalog of band %s", inputBand) 

1152 result = self.transform(inputBand, parq, funcs, dataId) 

1153 dfDict[inputBand] = result.df 

1154 analysisDict[inputBand] = result.analysis 

1155 if templateDf.empty: 

1156 templateDf = result.df 

1157 

1158 # Put filler values in columns of other wanted bands 

1159 for filt in outputBands: 

1160 if filt not in dfDict: 

1161 self.log.info("Adding empty columns for band %s", filt) 

1162 dfTemp = templateDf.copy() 

1163 for col in dfTemp.columns: 

1164 testValue = dfTemp[col].values[0] 

1165 if isinstance(testValue, (np.bool_, pd.BooleanDtype)): 

1166 # Boolean flag type, check if it is a "good" flag 

1167 if col in self.config.goodFlags: 

1168 fillValue = False 

1169 else: 

1170 fillValue = True 

1171 elif isinstance(testValue, numbers.Integral): 

1172 # Checking numbers.Integral catches all flavors 

1173 # of python, numpy, pandas, etc. integers. 

1174 # We must ensure this is not an unsigned integer. 

1175 if isinstance(testValue, np.unsignedinteger): 

1176 raise ValueError("Parquet tables may not have unsigned integer columns.") 

1177 else: 

1178 fillValue = self.config.integerFillValue 

1179 else: 

1180 fillValue = self.config.floatFillValue 

1181 dfTemp[col].values[:] = fillValue 

1182 dfDict[filt] = dfTemp 

1183 

1184 # This makes a multilevel column index, with band as first level 

1185 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

1186 

1187 if not self.config.multilevelOutput: 

1188 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

1189 if self.config.primaryKey in noDupCols: 

1190 noDupCols.remove(self.config.primaryKey) 

1191 if dataId is not None: 

1192 noDupCols += list(dataId.keys()) 

1193 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

1194 inputBands=inputBands) 

1195 

1196 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

1197 

1198 return df 

1199 

1200 

1201class TractObjectDataIdContainer(CoaddDataIdContainer): 

1202 

1203 def makeDataRefList(self, namespace): 

1204 """Make self.refList from self.idList 

1205 

1206 Generate a list of data references given tract and/or patch. 

1207 This was adapted from `TractQADataIdContainer`, which was 

1208 `TractDataIdContainer` modifie to not require "filter". 

1209 Only existing dataRefs are returned. 

1210 """ 

1211 def getPatchRefList(tract): 

1212 return [namespace.butler.dataRef(datasetType=self.datasetType, 

1213 tract=tract.getId(), 

1214 patch="%d,%d" % patch.getIndex()) for patch in tract] 

1215 

1216 tractRefs = defaultdict(list) # Data references for each tract 

1217 for dataId in self.idList: 

1218 skymap = self.getSkymap(namespace) 

1219 

1220 if "tract" in dataId: 

1221 tractId = dataId["tract"] 

1222 if "patch" in dataId: 

1223 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

1224 tract=tractId, 

1225 patch=dataId['patch'])) 

1226 else: 

1227 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

1228 else: 

1229 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

1230 for tract in skymap) 

1231 outputRefList = [] 

1232 for tractRefList in tractRefs.values(): 

1233 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

1234 outputRefList.append(existingRefs) 

1235 

1236 self.refList = outputRefList 

1237 

1238 

1239class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

1240 dimensions=("tract", "skymap")): 

1241 inputCatalogs = connectionTypes.Input( 

1242 doc="Per-Patch objectTables conforming to the standard data model.", 

1243 name="objectTable", 

1244 storageClass="DataFrame", 

1245 dimensions=("tract", "patch", "skymap"), 

1246 multiple=True, 

1247 ) 

1248 outputCatalog = connectionTypes.Output( 

1249 doc="Pre-tract horizontal concatenation of the input objectTables", 

1250 name="objectTable_tract", 

1251 storageClass="DataFrame", 

1252 dimensions=("tract", "skymap"), 

1253 ) 

1254 

1255 

1256class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

1257 pipelineConnections=ConsolidateObjectTableConnections): 

1258 coaddName = pexConfig.Field( 

1259 dtype=str, 

1260 default="deep", 

1261 doc="Name of coadd" 

1262 ) 

1263 

1264 

1265class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

1266 """Write patch-merged source tables to a tract-level parquet file 

1267 

1268 Concatenates `objectTable` list into a per-visit `objectTable_tract` 

1269 """ 

1270 _DefaultName = "consolidateObjectTable" 

1271 ConfigClass = ConsolidateObjectTableConfig 

1272 

1273 inputDataset = 'objectTable' 

1274 outputDataset = 'objectTable_tract' 

1275 

1276 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1277 inputs = butlerQC.get(inputRefs) 

1278 self.log.info("Concatenating %s per-patch Object Tables", 

1279 len(inputs['inputCatalogs'])) 

1280 df = pd.concat(inputs['inputCatalogs']) 

1281 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1282 

1283 @classmethod 

1284 def _makeArgumentParser(cls): 

1285 parser = ArgumentParser(name=cls._DefaultName) 

1286 

1287 parser.add_id_argument("--id", cls.inputDataset, 

1288 help="data ID, e.g. --id tract=12345", 

1289 ContainerClass=TractObjectDataIdContainer) 

1290 return parser 

1291 

1292 def runDataRef(self, patchRefList): 

1293 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

1294 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1295 

1296 def writeMetadata(self, dataRef): 

1297 """No metadata to write. 

1298 """ 

1299 pass 

1300 

1301 

1302class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

1303 defaultTemplates={"catalogType": ""}, 

1304 dimensions=("instrument", "visit", "detector")): 

1305 

1306 inputCatalog = connectionTypes.Input( 

1307 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

1308 name="{catalogType}source", 

1309 storageClass="DataFrame", 

1310 dimensions=("instrument", "visit", "detector"), 

1311 deferLoad=True 

1312 ) 

1313 outputCatalog = connectionTypes.Output( 

1314 doc="Narrower, per-detector Source Table transformed and converted per a " 

1315 "specified set of functors", 

1316 name="{catalogType}sourceTable", 

1317 storageClass="DataFrame", 

1318 dimensions=("instrument", "visit", "detector") 

1319 ) 

1320 

1321 

1322class TransformSourceTableConfig(TransformCatalogBaseConfig, 

1323 pipelineConnections=TransformSourceTableConnections): 

1324 

1325 def setDefaults(self): 

1326 super().setDefaults() 

1327 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml') 

1328 self.primaryKey = 'sourceId' 

1329 

1330 

1331class TransformSourceTableTask(TransformCatalogBaseTask): 

1332 """Transform/standardize a source catalog 

1333 """ 

1334 _DefaultName = "transformSourceTable" 

1335 ConfigClass = TransformSourceTableConfig 

1336 

1337 inputDataset = 'source' 

1338 outputDataset = 'sourceTable' 

1339 

1340 @classmethod 

1341 def _makeArgumentParser(cls): 

1342 parser = ArgumentParser(name=cls._DefaultName) 

1343 parser.add_id_argument("--id", datasetType=cls.inputDataset, 

1344 level="sensor", 

1345 help="data ID, e.g. --id visit=12345 ccd=0") 

1346 return parser 

1347 

1348 def runDataRef(self, dataRef): 

1349 """Override to specify band label to run().""" 

1350 parq = dataRef.get() 

1351 funcs = self.getFunctors() 

1352 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel 

1353 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band) 

1354 self.write(df, dataRef) 

1355 return df 

1356 

1357 

1358class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1359 dimensions=("instrument", "visit",), 

1360 defaultTemplates={"calexpType": ""}): 

1361 calexp = connectionTypes.Input( 

1362 doc="Processed exposures used for metadata", 

1363 name="{calexpType}calexp", 

1364 storageClass="ExposureF", 

1365 dimensions=("instrument", "visit", "detector"), 

1366 deferLoad=True, 

1367 multiple=True, 

1368 ) 

1369 visitSummary = connectionTypes.Output( 

1370 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1371 "detector id for the id and are sorted for fast lookups of a " 

1372 "detector."), 

1373 name="{calexpType}visitSummary", 

1374 storageClass="ExposureCatalog", 

1375 dimensions=("instrument", "visit"), 

1376 ) 

1377 

1378 

1379class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1380 pipelineConnections=ConsolidateVisitSummaryConnections): 

1381 """Config for ConsolidateVisitSummaryTask""" 

1382 pass 

1383 

1384 

1385class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

1386 """Task to consolidate per-detector visit metadata. 

1387 

1388 This task aggregates the following metadata from all the detectors in a 

1389 single visit into an exposure catalog: 

1390 - The visitInfo. 

1391 - The wcs. 

1392 - The photoCalib. 

1393 - The physical_filter and band (if available). 

1394 - The psf size, shape, and effective area at the center of the detector. 

1395 - The corners of the bounding box in right ascension/declination. 

1396 

1397 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1398 are not persisted here because of storage concerns, and because of their 

1399 limited utility as summary statistics. 

1400 

1401 Tests for this task are performed in ci_hsc_gen3. 

1402 """ 

1403 _DefaultName = "consolidateVisitSummary" 

1404 ConfigClass = ConsolidateVisitSummaryConfig 

1405 

1406 @classmethod 

1407 def _makeArgumentParser(cls): 

1408 parser = ArgumentParser(name=cls._DefaultName) 

1409 

1410 parser.add_id_argument("--id", "calexp", 

1411 help="data ID, e.g. --id visit=12345", 

1412 ContainerClass=VisitDataIdContainer) 

1413 return parser 

1414 

1415 def writeMetadata(self, dataRef): 

1416 """No metadata to persist, so override to remove metadata persistance. 

1417 """ 

1418 pass 

1419 

1420 def writeConfig(self, butler, clobber=False, doBackup=True): 

1421 """No config to persist, so override to remove config persistance. 

1422 """ 

1423 pass 

1424 

1425 def runDataRef(self, dataRefList): 

1426 visit = dataRefList[0].dataId['visit'] 

1427 

1428 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1429 len(dataRefList), visit) 

1430 

1431 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

1432 

1433 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

1434 

1435 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1436 dataRefs = butlerQC.get(inputRefs.calexp) 

1437 visit = dataRefs[0].dataId.byName()['visit'] 

1438 

1439 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1440 len(dataRefs), visit) 

1441 

1442 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1443 

1444 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1445 

1446 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

1447 """Make a combined exposure catalog from a list of dataRefs. 

1448 These dataRefs must point to exposures with wcs, summaryStats, 

1449 and other visit metadata. 

1450 

1451 Parameters 

1452 ---------- 

1453 visit : `int` 

1454 Visit identification number. 

1455 dataRefs : `list` 

1456 List of dataRefs in visit. May be list of 

1457 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

1458 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

1459 isGen3 : `bool`, optional 

1460 Specifies if this is a Gen3 list of datarefs. 

1461 

1462 Returns 

1463 ------- 

1464 visitSummary : `lsst.afw.table.ExposureCatalog` 

1465 Exposure catalog with per-detector summary information. 

1466 """ 

1467 schema = self._makeVisitSummarySchema() 

1468 cat = afwTable.ExposureCatalog(schema) 

1469 cat.resize(len(dataRefs)) 

1470 

1471 cat['visit'] = visit 

1472 

1473 for i, dataRef in enumerate(dataRefs): 

1474 if isGen3: 

1475 visitInfo = dataRef.get(component='visitInfo') 

1476 filterLabel = dataRef.get(component='filterLabel') 

1477 summaryStats = dataRef.get(component='summaryStats') 

1478 detector = dataRef.get(component='detector') 

1479 wcs = dataRef.get(component='wcs') 

1480 photoCalib = dataRef.get(component='photoCalib') 

1481 detector = dataRef.get(component='detector') 

1482 bbox = dataRef.get(component='bbox') 

1483 validPolygon = dataRef.get(component='validPolygon') 

1484 else: 

1485 # Note that we need to read the calexp because there is 

1486 # no magic access to the psf except through the exposure. 

1487 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

1488 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

1489 visitInfo = exp.getInfo().getVisitInfo() 

1490 filterLabel = dataRef.get("calexp_filterLabel") 

1491 summaryStats = exp.getInfo().getSummaryStats() 

1492 wcs = exp.getWcs() 

1493 photoCalib = exp.getPhotoCalib() 

1494 detector = exp.getDetector() 

1495 bbox = dataRef.get(datasetType='calexp_bbox') 

1496 validPolygon = exp.getInfo().getValidPolygon() 

1497 

1498 rec = cat[i] 

1499 rec.setBBox(bbox) 

1500 rec.setVisitInfo(visitInfo) 

1501 rec.setWcs(wcs) 

1502 rec.setPhotoCalib(photoCalib) 

1503 rec.setValidPolygon(validPolygon) 

1504 

1505 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1506 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1507 rec.setId(detector.getId()) 

1508 rec['psfSigma'] = summaryStats.psfSigma 

1509 rec['psfIxx'] = summaryStats.psfIxx 

1510 rec['psfIyy'] = summaryStats.psfIyy 

1511 rec['psfIxy'] = summaryStats.psfIxy 

1512 rec['psfArea'] = summaryStats.psfArea 

1513 rec['raCorners'][:] = summaryStats.raCorners 

1514 rec['decCorners'][:] = summaryStats.decCorners 

1515 rec['ra'] = summaryStats.ra 

1516 rec['decl'] = summaryStats.decl 

1517 rec['zenithDistance'] = summaryStats.zenithDistance 

1518 rec['zeroPoint'] = summaryStats.zeroPoint 

1519 rec['skyBg'] = summaryStats.skyBg 

1520 rec['skyNoise'] = summaryStats.skyNoise 

1521 rec['meanVar'] = summaryStats.meanVar 

1522 rec['astromOffsetMean'] = summaryStats.astromOffsetMean 

1523 rec['astromOffsetStd'] = summaryStats.astromOffsetStd 

1524 rec['nPsfStar'] = summaryStats.nPsfStar 

1525 rec['psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median 

1526 rec['psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median 

1527 rec['psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter 

1528 rec['psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter 

1529 rec['psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian 

1530 rec['psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter 

1531 rec['psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter 

1532 

1533 metadata = dafBase.PropertyList() 

1534 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1535 # We are looping over existing datarefs, so the following is true 

1536 metadata.add("COMMENT", "Only detectors with data have entries.") 

1537 cat.setMetadata(metadata) 

1538 

1539 cat.sort() 

1540 return cat 

1541 

1542 def _makeVisitSummarySchema(self): 

1543 """Make the schema for the visitSummary catalog.""" 

1544 schema = afwTable.ExposureTable.makeMinimalSchema() 

1545 schema.addField('visit', type='L', doc='Visit number') 

1546 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1547 schema.addField('band', type='String', size=32, doc='Name of band') 

1548 schema.addField('psfSigma', type='F', 

1549 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

1550 schema.addField('psfArea', type='F', 

1551 doc='PSF model effective area (center of chip) (pixel**2)') 

1552 schema.addField('psfIxx', type='F', 

1553 doc='PSF model Ixx (center of chip) (pixel**2)') 

1554 schema.addField('psfIyy', type='F', 

1555 doc='PSF model Iyy (center of chip) (pixel**2)') 

1556 schema.addField('psfIxy', type='F', 

1557 doc='PSF model Ixy (center of chip) (pixel**2)') 

1558 schema.addField('raCorners', type='ArrayD', size=4, 

1559 doc='Right Ascension of bounding box corners (degrees)') 

1560 schema.addField('decCorners', type='ArrayD', size=4, 

1561 doc='Declination of bounding box corners (degrees)') 

1562 schema.addField('ra', type='D', 

1563 doc='Right Ascension of bounding box center (degrees)') 

1564 schema.addField('decl', type='D', 

1565 doc='Declination of bounding box center (degrees)') 

1566 schema.addField('zenithDistance', type='F', 

1567 doc='Zenith distance of bounding box center (degrees)') 

1568 schema.addField('zeroPoint', type='F', 

1569 doc='Mean zeropoint in detector (mag)') 

1570 schema.addField('skyBg', type='F', 

1571 doc='Average sky background (ADU)') 

1572 schema.addField('skyNoise', type='F', 

1573 doc='Average sky noise (ADU)') 

1574 schema.addField('meanVar', type='F', 

1575 doc='Mean variance of the weight plane (ADU**2)') 

1576 schema.addField('astromOffsetMean', type='F', 

1577 doc='Mean offset of astrometric calibration matches (arcsec)') 

1578 schema.addField('astromOffsetStd', type='F', 

1579 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)') 

1580 schema.addField('nPsfStar', type='I', doc='Number of stars used for PSF model') 

1581 schema.addField('psfStarDeltaE1Median', type='F', 

1582 doc='Median E1 residual (starE1 - psfE1) for psf stars') 

1583 schema.addField('psfStarDeltaE2Median', type='F', 

1584 doc='Median E2 residual (starE2 - psfE2) for psf stars') 

1585 schema.addField('psfStarDeltaE1Scatter', type='F', 

1586 doc='Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars') 

1587 schema.addField('psfStarDeltaE2Scatter', type='F', 

1588 doc='Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars') 

1589 schema.addField('psfStarDeltaSizeMedian', type='F', 

1590 doc='Median size residual (starSize - psfSize) for psf stars (pixel)') 

1591 schema.addField('psfStarDeltaSizeScatter', type='F', 

1592 doc='Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)') 

1593 schema.addField('psfStarScaledDeltaSizeScatter', type='F', 

1594 doc='Scatter (via MAD) of size residual scaled by median size squared') 

1595 

1596 return schema 

1597 

1598 

1599class VisitDataIdContainer(DataIdContainer): 

1600 """DataIdContainer that groups sensor-level id's by visit 

1601 """ 

1602 

1603 def makeDataRefList(self, namespace): 

1604 """Make self.refList from self.idList 

1605 

1606 Generate a list of data references grouped by visit. 

1607 

1608 Parameters 

1609 ---------- 

1610 namespace : `argparse.Namespace` 

1611 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments 

1612 """ 

1613 # Group by visits 

1614 visitRefs = defaultdict(list) 

1615 for dataId in self.idList: 

1616 if "visit" in dataId: 

1617 visitId = dataId["visit"] 

1618 # append all subsets to 

1619 subset = namespace.butler.subset(self.datasetType, dataId=dataId) 

1620 visitRefs[visitId].extend([dataRef for dataRef in subset]) 

1621 

1622 outputRefList = [] 

1623 for refList in visitRefs.values(): 

1624 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1625 if existingRefs: 

1626 outputRefList.append(existingRefs) 

1627 

1628 self.refList = outputRefList 

1629 

1630 

1631class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1632 defaultTemplates={"catalogType": ""}, 

1633 dimensions=("instrument", "visit")): 

1634 inputCatalogs = connectionTypes.Input( 

1635 doc="Input per-detector Source Tables", 

1636 name="{catalogType}sourceTable", 

1637 storageClass="DataFrame", 

1638 dimensions=("instrument", "visit", "detector"), 

1639 multiple=True 

1640 ) 

1641 outputCatalog = connectionTypes.Output( 

1642 doc="Per-visit concatenation of Source Table", 

1643 name="{catalogType}sourceTable_visit", 

1644 storageClass="DataFrame", 

1645 dimensions=("instrument", "visit") 

1646 ) 

1647 

1648 

1649class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1650 pipelineConnections=ConsolidateSourceTableConnections): 

1651 pass 

1652 

1653 

1654class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

1655 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1656 """ 

1657 _DefaultName = 'consolidateSourceTable' 

1658 ConfigClass = ConsolidateSourceTableConfig 

1659 

1660 inputDataset = 'sourceTable' 

1661 outputDataset = 'sourceTable_visit' 

1662 

1663 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1664 from .makeCoaddTempExp import reorderRefs 

1665 

1666 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs] 

1667 detectorOrder.sort() 

1668 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector') 

1669 inputs = butlerQC.get(inputRefs) 

1670 self.log.info("Concatenating %s per-detector Source Tables", 

1671 len(inputs['inputCatalogs'])) 

1672 df = pd.concat(inputs['inputCatalogs']) 

1673 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1674 

1675 def runDataRef(self, dataRefList): 

1676 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1677 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1678 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1679 

1680 @classmethod 

1681 def _makeArgumentParser(cls): 

1682 parser = ArgumentParser(name=cls._DefaultName) 

1683 

1684 parser.add_id_argument("--id", cls.inputDataset, 

1685 help="data ID, e.g. --id visit=12345", 

1686 ContainerClass=VisitDataIdContainer) 

1687 return parser 

1688 

1689 def writeMetadata(self, dataRef): 

1690 """No metadata to write. 

1691 """ 

1692 pass 

1693 

1694 def writeConfig(self, butler, clobber=False, doBackup=True): 

1695 """No config to write. 

1696 """ 

1697 pass 

1698 

1699 

1700class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1701 dimensions=("instrument",), 

1702 defaultTemplates={"calexpType": ""}): 

1703 visitSummaryRefs = connectionTypes.Input( 

1704 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1705 name="{calexpType}visitSummary", 

1706 storageClass="ExposureCatalog", 

1707 dimensions=("instrument", "visit"), 

1708 multiple=True, 

1709 deferLoad=True, 

1710 ) 

1711 outputCatalog = connectionTypes.Output( 

1712 doc="CCD and Visit metadata table", 

1713 name="ccdVisitTable", 

1714 storageClass="DataFrame", 

1715 dimensions=("instrument",) 

1716 ) 

1717 

1718 

1719class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1720 pipelineConnections=MakeCcdVisitTableConnections): 

1721 pass 

1722 

1723 

1724class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1725 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs. 

1726 """ 

1727 _DefaultName = 'makeCcdVisitTable' 

1728 ConfigClass = MakeCcdVisitTableConfig 

1729 

1730 def run(self, visitSummaryRefs): 

1731 """ Make a table of ccd information from the `visitSummary` catalogs. 

1732 Parameters 

1733 ---------- 

1734 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1735 List of DeferredDatasetHandles pointing to exposure catalogs with 

1736 per-detector summary information. 

1737 Returns 

1738 ------- 

1739 result : `lsst.pipe.Base.Struct` 

1740 Results struct with attribute: 

1741 - `outputCatalog` 

1742 Catalog of ccd and visit information. 

1743 """ 

1744 ccdEntries = [] 

1745 for visitSummaryRef in visitSummaryRefs: 

1746 visitSummary = visitSummaryRef.get() 

1747 visitInfo = visitSummary[0].getVisitInfo() 

1748 

1749 ccdEntry = {} 

1750 summaryTable = visitSummary.asAstropy() 

1751 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance', 

1752 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise'] 

1753 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1754 # 'visit' is the human readible visit number 

1755 # 'visitId' is the key to the visitId table. They are the same 

1756 # Technically you should join to get the visit from the visit table 

1757 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"}) 

1758 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in 

1759 summaryTable['id']] 

1760 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId) 

1761 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds] 

1762 ccdEntry['ccdVisitId'] = ccdVisitIds 

1763 ccdEntry['detector'] = summaryTable['id'] 

1764 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary]) 

1765 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1766 

1767 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1768 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1769 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1770 expTime = visitInfo.getExposureTime() 

1771 ccdEntry['expTime'] = expTime 

1772 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1773 expTime_days = expTime / (60*60*24) 

1774 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days 

1775 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1776 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1777 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1778 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1779 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1780 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1781 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1782 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1783 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1784 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1785 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1786 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags, 

1787 # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted. 

1788 ccdEntries.append(ccdEntry) 

1789 

1790 outputCatalog = pd.concat(ccdEntries) 

1791 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True) 

1792 return pipeBase.Struct(outputCatalog=outputCatalog) 

1793 

1794 

1795class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1796 dimensions=("instrument",), 

1797 defaultTemplates={"calexpType": ""}): 

1798 visitSummaries = connectionTypes.Input( 

1799 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1800 name="{calexpType}visitSummary", 

1801 storageClass="ExposureCatalog", 

1802 dimensions=("instrument", "visit",), 

1803 multiple=True, 

1804 deferLoad=True, 

1805 ) 

1806 outputCatalog = connectionTypes.Output( 

1807 doc="Visit metadata table", 

1808 name="visitTable", 

1809 storageClass="DataFrame", 

1810 dimensions=("instrument",) 

1811 ) 

1812 

1813 

1814class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1815 pipelineConnections=MakeVisitTableConnections): 

1816 pass 

1817 

1818 

1819class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1820 """Produce a `visitTable` from the `visitSummary` exposure catalogs. 

1821 """ 

1822 _DefaultName = 'makeVisitTable' 

1823 ConfigClass = MakeVisitTableConfig 

1824 

1825 def run(self, visitSummaries): 

1826 """ Make a table of visit information from the `visitSummary` catalogs 

1827 

1828 Parameters 

1829 ---------- 

1830 visitSummaries : list of `lsst.afw.table.ExposureCatalog` 

1831 List of exposure catalogs with per-detector summary information. 

1832 Returns 

1833 ------- 

1834 result : `lsst.pipe.Base.Struct` 

1835 Results struct with attribute: 

1836 ``outputCatalog`` 

1837 Catalog of visit information. 

1838 """ 

1839 visitEntries = [] 

1840 for visitSummary in visitSummaries: 

1841 visitSummary = visitSummary.get() 

1842 visitRow = visitSummary[0] 

1843 visitInfo = visitRow.getVisitInfo() 

1844 

1845 visitEntry = {} 

1846 visitEntry["visitId"] = visitRow['visit'] 

1847 visitEntry["visit"] = visitRow['visit'] 

1848 visitEntry["physical_filter"] = visitRow['physical_filter'] 

1849 visitEntry["band"] = visitRow['band'] 

1850 raDec = visitInfo.getBoresightRaDec() 

1851 visitEntry["ra"] = raDec.getRa().asDegrees() 

1852 visitEntry["decl"] = raDec.getDec().asDegrees() 

1853 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1854 azAlt = visitInfo.getBoresightAzAlt() 

1855 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1856 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1857 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1858 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1859 expTime = visitInfo.getExposureTime() 

1860 visitEntry["expTime"] = expTime 

1861 visitEntry["expMidpt"] = visitInfo.getDate().toPython() 

1862 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1863 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1864 expTime_days = expTime / (60*60*24) 

1865 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days 

1866 visitEntries.append(visitEntry) 

1867 

1868 # TODO: DM-30623, Add programId, exposureType, cameraTemp, mirror1Temp, mirror2Temp, 

1869 # mirror3Temp, domeTemp, externalTemp, dimmSeeing, pwvGPS, pwvMW, flags, nExposures 

1870 

1871 outputCatalog = pd.DataFrame(data=visitEntries) 

1872 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True) 

1873 return pipeBase.Struct(outputCatalog=outputCatalog) 

1874 

1875 

1876class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1877 dimensions=("instrument", "visit", "detector", "skymap", "tract")): 

1878 

1879 inputCatalog = connectionTypes.Input( 

1880 doc="Primary per-detector, single-epoch forced-photometry catalog. " 

1881 "By default, it is the output of ForcedPhotCcdTask on calexps", 

1882 name="forced_src", 

1883 storageClass="SourceCatalog", 

1884 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1885 ) 

1886 inputCatalogDiff = connectionTypes.Input( 

1887 doc="Secondary multi-epoch, per-detector, forced photometry catalog. " 

1888 "By default, it is the output of ForcedPhotCcdTask run on image differences.", 

1889 name="forced_diff", 

1890 storageClass="SourceCatalog", 

1891 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1892 ) 

1893 outputCatalog = connectionTypes.Output( 

1894 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format", 

1895 name="mergedForcedSource", 

1896 storageClass="DataFrame", 

1897 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1898 ) 

1899 

1900 

1901class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig, 

1902 pipelineConnections=WriteForcedSourceTableConnections): 

1903 key = lsst.pex.config.Field( 

1904 doc="Column on which to join the two input tables on and make the primary key of the output", 

1905 dtype=str, 

1906 default="objectId", 

1907 ) 

1908 

1909 

1910class WriteForcedSourceTableTask(pipeBase.PipelineTask): 

1911 """Merge and convert per-detector forced source catalogs to parquet 

1912 

1913 Because the predecessor ForcedPhotCcdTask operates per-detector, 

1914 per-tract, (i.e., it has tract in its dimensions), detectors 

1915 on the tract boundary may have multiple forced source catalogs. 

1916 

1917 The successor task TransformForcedSourceTable runs per-patch 

1918 and temporally-aggregates overlapping mergedForcedSource catalogs from all 

1919 available multiple epochs. 

1920 """ 

1921 _DefaultName = "writeForcedSourceTable" 

1922 ConfigClass = WriteForcedSourceTableConfig 

1923 

1924 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1925 inputs = butlerQC.get(inputRefs) 

1926 # Add ccdVisitId to allow joining with CcdVisitTable 

1927 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

1928 inputs['band'] = butlerQC.quantum.dataId.full['band'] 

1929 outputs = self.run(**inputs) 

1930 butlerQC.put(outputs, outputRefs) 

1931 

1932 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None): 

1933 dfs = [] 

1934 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')): 

1935 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False) 

1936 df = df.reindex(sorted(df.columns), axis=1) 

1937 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA 

1938 df['band'] = band if band else pd.NA 

1939 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns], 

1940 names=('dataset', 'column')) 

1941 

1942 dfs.append(df) 

1943 

1944 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

1945 return pipeBase.Struct(outputCatalog=outputCatalog) 

1946 

1947 

1948class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1949 dimensions=("instrument", "skymap", "patch", "tract")): 

1950 

1951 inputCatalogs = connectionTypes.Input( 

1952 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask", 

1953 name="mergedForcedSource", 

1954 storageClass="DataFrame", 

1955 dimensions=("instrument", "visit", "detector", "skymap", "tract"), 

1956 multiple=True, 

1957 deferLoad=True 

1958 ) 

1959 referenceCatalog = connectionTypes.Input( 

1960 doc="Reference catalog which was used to seed the forcedPhot. Columns " 

1961 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner " 

1962 "are expected.", 

1963 name="objectTable", 

1964 storageClass="DataFrame", 

1965 dimensions=("tract", "patch", "skymap"), 

1966 deferLoad=True 

1967 ) 

1968 outputCatalog = connectionTypes.Output( 

1969 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a " 

1970 "specified set of functors", 

1971 name="forcedSourceTable", 

1972 storageClass="DataFrame", 

1973 dimensions=("tract", "patch", "skymap") 

1974 ) 

1975 

1976 

1977class TransformForcedSourceTableConfig(TransformCatalogBaseConfig, 

1978 pipelineConnections=TransformForcedSourceTableConnections): 

1979 referenceColumns = pexConfig.ListField( 

1980 dtype=str, 

1981 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"], 

1982 optional=True, 

1983 doc="Columns to pull from reference catalog", 

1984 ) 

1985 keyRef = lsst.pex.config.Field( 

1986 doc="Column on which to join the two input tables on and make the primary key of the output", 

1987 dtype=str, 

1988 default="objectId", 

1989 ) 

1990 key = lsst.pex.config.Field( 

1991 doc="Rename the output DataFrame index to this name", 

1992 dtype=str, 

1993 default="forcedSourceId", 

1994 ) 

1995 

1996 def setDefaults(self): 

1997 super().setDefaults() 

1998 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml') 

1999 

2000 

2001class TransformForcedSourceTableTask(TransformCatalogBaseTask): 

2002 """Transform/standardize a ForcedSource catalog 

2003 

2004 Transforms each wide, per-detector forcedSource parquet table per the 

2005 specification file (per-camera defaults found in ForcedSource.yaml). 

2006 All epochs that overlap the patch are aggregated into one per-patch 

2007 narrow-parquet file. 

2008 

2009 No de-duplication of rows is performed. Duplicate resolutions flags are 

2010 pulled in from the referenceCatalog: `detect_isPrimary`, 

2011 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate 

2012 for analysis or compare duplicates for QA. 

2013 

2014 The resulting table includes multiple bands. Epochs (MJDs) and other useful 

2015 per-visit rows can be retreived by joining with the CcdVisitTable on 

2016 ccdVisitId. 

2017 """ 

2018 _DefaultName = "transformForcedSourceTable" 

2019 ConfigClass = TransformForcedSourceTableConfig 

2020 

2021 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

2022 inputs = butlerQC.get(inputRefs) 

2023 if self.funcs is None: 

2024 raise ValueError("config.functorFile is None. " 

2025 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

2026 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs, 

2027 dataId=outputRefs.outputCatalog.dataId.full) 

2028 

2029 butlerQC.put(outputs, outputRefs) 

2030 

2031 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None): 

2032 dfs = [] 

2033 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns}) 

2034 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs))) 

2035 for handle in inputCatalogs: 

2036 result = self.transform(None, handle, funcs, dataId) 

2037 # Filter for only rows that were detected on (overlap) the patch 

2038 dfs.append(result.df.join(ref, how='inner')) 

2039 

2040 outputCatalog = pd.concat(dfs) 

2041 

2042 # Now that we are done joining on config.keyRef 

2043 # Change index to config.key by 

2044 outputCatalog.index.rename(self.config.keyRef, inplace=True) 

2045 # Add config.keyRef to the column list 

2046 outputCatalog.reset_index(inplace=True) 

2047 # set the forcedSourceId to the index. This is specified in the ForcedSource.yaml 

2048 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True) 

2049 # Rename it to the config.key 

2050 outputCatalog.index.rename(self.config.key, inplace=True) 

2051 

2052 self.log.info("Made a table of %d columns and %d rows", 

2053 len(outputCatalog.columns), len(outputCatalog)) 

2054 return pipeBase.Struct(outputCatalog=outputCatalog) 

2055 

2056 

2057class ConsolidateTractConnections(pipeBase.PipelineTaskConnections, 

2058 defaultTemplates={"catalogType": ""}, 

2059 dimensions=("instrument", "tract")): 

2060 inputCatalogs = connectionTypes.Input( 

2061 doc="Input per-patch DataFrame Tables to be concatenated", 

2062 name="{catalogType}ForcedSourceTable", 

2063 storageClass="DataFrame", 

2064 dimensions=("tract", "patch", "skymap"), 

2065 multiple=True, 

2066 ) 

2067 

2068 outputCatalog = connectionTypes.Output( 

2069 doc="Output per-tract concatenation of DataFrame Tables", 

2070 name="{catalogType}ForcedSourceTable_tract", 

2071 storageClass="DataFrame", 

2072 dimensions=("tract", "skymap"), 

2073 ) 

2074 

2075 

2076class ConsolidateTractConfig(pipeBase.PipelineTaskConfig, 

2077 pipelineConnections=ConsolidateTractConnections): 

2078 pass 

2079 

2080 

2081class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask): 

2082 """Concatenate any per-patch, dataframe list into a single 

2083 per-tract DataFrame 

2084 """ 

2085 _DefaultName = 'ConsolidateTract' 

2086 ConfigClass = ConsolidateTractConfig 

2087 

2088 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

2089 inputs = butlerQC.get(inputRefs) 

2090 # Not checking at least one inputCatalog exists because that'd be an empty QG 

2091 self.log.info("Concatenating %s per-patch %s Tables", 

2092 len(inputs['inputCatalogs']), 

2093 inputRefs.inputCatalogs[0].datasetType.name) 

2094 df = pd.concat(inputs['inputCatalogs']) 

2095 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)