Coverage for python/lsst/pipe/tasks/postprocess.py: 26%

763 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2024-02-08 07:10 +0000

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24from collections import defaultdict 

25import logging 

26import numpy as np 

27import numbers 

28import os 

29 

30import lsst.geom 

31import lsst.pex.config as pexConfig 

32import lsst.pipe.base as pipeBase 

33import lsst.daf.base as dafBase 

34from lsst.obs.base import ExposureIdInfo 

35from lsst.pipe.base import connectionTypes 

36import lsst.afw.table as afwTable 

37from lsst.afw.image import ExposureSummaryStats 

38from lsst.meas.base import SingleFrameMeasurementTask 

39from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

40from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

41from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate 

42from lsst.skymap import BaseSkyMap 

43 

44from .parquetTable import ParquetTable 

45from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

46from .functors import CompositeFunctor, Column 

47 

48log = logging.getLogger(__name__) 

49 

50 

51def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

52 """Flattens a dataframe with multilevel column index. 

53 """ 

54 newDf = pd.DataFrame() 

55 # band is the level 0 index 

56 dfBands = df.columns.unique(level=0).values 

57 for band in dfBands: 

58 subdf = df[band] 

59 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

60 newColumns = {c: columnFormat.format(band, c) 

61 for c in subdf.columns if c not in noDupCols} 

62 cols = list(newColumns.keys()) 

63 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

64 

65 # Band must be present in the input and output or else column is all NaN: 

66 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

67 # Get the unexploded columns from any present band's partition 

68 noDupDf = df[presentBands[0]][noDupCols] 

69 newDf = pd.concat([noDupDf, newDf], axis=1) 

70 return newDf 

71 

72 

73class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

74 defaultTemplates={"coaddName": "deep"}, 

75 dimensions=("tract", "patch", "skymap")): 

76 inputCatalogMeas = connectionTypes.Input( 

77 doc="Catalog of source measurements on the deepCoadd.", 

78 dimensions=("tract", "patch", "band", "skymap"), 

79 storageClass="SourceCatalog", 

80 name="{coaddName}Coadd_meas", 

81 multiple=True 

82 ) 

83 inputCatalogForcedSrc = connectionTypes.Input( 

84 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

85 dimensions=("tract", "patch", "band", "skymap"), 

86 storageClass="SourceCatalog", 

87 name="{coaddName}Coadd_forced_src", 

88 multiple=True 

89 ) 

90 inputCatalogRef = connectionTypes.Input( 

91 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

92 "for each detection in deepCoadd_mergeDet.", 

93 dimensions=("tract", "patch", "skymap"), 

94 storageClass="SourceCatalog", 

95 name="{coaddName}Coadd_ref" 

96 ) 

97 outputCatalog = connectionTypes.Output( 

98 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

99 "stored as a DataFrame with a multi-level column index per-patch.", 

100 dimensions=("tract", "patch", "skymap"), 

101 storageClass="DataFrame", 

102 name="{coaddName}Coadd_obj" 

103 ) 

104 

105 

106class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

107 pipelineConnections=WriteObjectTableConnections): 

108 engine = pexConfig.Field( 

109 dtype=str, 

110 default="pyarrow", 

111 doc="Parquet engine for writing (pyarrow or fastparquet)" 

112 ) 

113 coaddName = pexConfig.Field( 

114 dtype=str, 

115 default="deep", 

116 doc="Name of coadd" 

117 ) 

118 

119 

120class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

121 """Write filter-merged source tables to parquet 

122 """ 

123 _DefaultName = "writeObjectTable" 

124 ConfigClass = WriteObjectTableConfig 

125 RunnerClass = MergeSourcesRunner 

126 

127 # Names of table datasets to be merged 

128 inputDatasets = ('forced_src', 'meas', 'ref') 

129 

130 # Tag of output dataset written by `MergeSourcesTask.write` 

131 outputDataset = 'obj' 

132 

133 def __init__(self, butler=None, schema=None, **kwargs): 

134 # It is a shame that this class can't use the default init for 

135 # CmdLineTask, but to do so would require its own special task 

136 # runner, which is many more lines of specialization, so this is 

137 # how it is for now. 

138 super().__init__(**kwargs) 

139 

140 def runDataRef(self, patchRefList): 

141 """! 

142 @brief Merge coadd sources from multiple bands. Calls @ref `run` which 

143 must be defined in subclasses that inherit from MergeSourcesTask. 

144 @param[in] patchRefList list of data references for each filter 

145 """ 

146 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

147 dataId = patchRefList[0].dataId 

148 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

149 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog)) 

150 

151 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

152 inputs = butlerQC.get(inputRefs) 

153 

154 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

155 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

156 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

157 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

158 

159 catalogs = {} 

160 for band in measDict.keys(): 

161 catalogs[band] = {'meas': measDict[band]['meas'], 

162 'forced_src': forcedSourceDict[band]['forced_src'], 

163 'ref': inputs['inputCatalogRef']} 

164 dataId = butlerQC.quantum.dataId 

165 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

166 outputs = pipeBase.Struct(outputCatalog=df) 

167 butlerQC.put(outputs, outputRefs) 

168 

169 @classmethod 

170 def _makeArgumentParser(cls): 

171 """Create a suitable ArgumentParser. 

172 

173 We will use the ArgumentParser to get a list of data 

174 references for patches; the RunnerClass will sort them into lists 

175 of data references for the same patch. 

176 

177 References first of self.inputDatasets, rather than 

178 self.inputDataset 

179 """ 

180 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

181 

182 def readCatalog(self, patchRef): 

183 """Read input catalogs 

184 

185 Read all the input datasets given by the 'inputDatasets' 

186 attribute. 

187 

188 Parameters 

189 ---------- 

190 patchRef : `lsst.daf.persistence.ButlerDataRef` 

191 Data reference for patch. 

192 

193 Returns 

194 ------- 

195 Tuple consisting of band name and a dict of catalogs, keyed by 

196 dataset name. 

197 """ 

198 band = patchRef.get(self.config.coaddName + "Coadd_filter", immediate=True).bandLabel 

199 catalogDict = {} 

200 for dataset in self.inputDatasets: 

201 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

202 self.log.info("Read %d sources from %s for band %s: %s", 

203 len(catalog), dataset, band, patchRef.dataId) 

204 catalogDict[dataset] = catalog 

205 return band, catalogDict 

206 

207 def run(self, catalogs, tract, patch): 

208 """Merge multiple catalogs. 

209 

210 Parameters 

211 ---------- 

212 catalogs : `dict` 

213 Mapping from filter names to dict of catalogs. 

214 tract : int 

215 tractId to use for the tractId column. 

216 patch : str 

217 patchId to use for the patchId column. 

218 

219 Returns 

220 ------- 

221 catalog : `pandas.DataFrame` 

222 Merged dataframe. 

223 """ 

224 

225 dfs = [] 

226 for filt, tableDict in catalogs.items(): 

227 for dataset, table in tableDict.items(): 

228 # Convert afwTable to pandas DataFrame 

229 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

230 

231 # Sort columns by name, to ensure matching schema among patches 

232 df = df.reindex(sorted(df.columns), axis=1) 

233 df['tractId'] = tract 

234 df['patchId'] = patch 

235 

236 # Make columns a 3-level MultiIndex 

237 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

238 names=('dataset', 'band', 'column')) 

239 dfs.append(df) 

240 

241 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

242 return catalog 

243 

244 def write(self, patchRef, catalog): 

245 """Write the output. 

246 

247 Parameters 

248 ---------- 

249 catalog : `ParquetTable` 

250 Catalog to write. 

251 patchRef : `lsst.daf.persistence.ButlerDataRef` 

252 Data reference for patch. 

253 """ 

254 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

255 # since the filter isn't actually part of the data ID for the dataset 

256 # we're saving, it's confusing to see it in the log message, even if 

257 # the butler simply ignores it. 

258 mergeDataId = patchRef.dataId.copy() 

259 del mergeDataId["filter"] 

260 self.log.info("Wrote merged catalog: %s", mergeDataId) 

261 

262 def writeMetadata(self, dataRefList): 

263 """No metadata to write, and not sure how to write it for a list of 

264 dataRefs. 

265 """ 

266 pass 

267 

268 

269class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

270 defaultTemplates={"catalogType": ""}, 

271 dimensions=("instrument", "visit", "detector")): 

272 

273 catalog = connectionTypes.Input( 

274 doc="Input full-depth catalog of sources produced by CalibrateTask", 

275 name="{catalogType}src", 

276 storageClass="SourceCatalog", 

277 dimensions=("instrument", "visit", "detector") 

278 ) 

279 outputCatalog = connectionTypes.Output( 

280 doc="Catalog of sources, `src` in Parquet format. The 'id' column is " 

281 "replaced with an index; all other columns are unchanged.", 

282 name="{catalogType}source", 

283 storageClass="DataFrame", 

284 dimensions=("instrument", "visit", "detector") 

285 ) 

286 

287 

288class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

289 pipelineConnections=WriteSourceTableConnections): 

290 pass 

291 

292 

293class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

294 """Write source table to parquet. 

295 """ 

296 _DefaultName = "writeSourceTable" 

297 ConfigClass = WriteSourceTableConfig 

298 

299 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

300 inputs = butlerQC.get(inputRefs) 

301 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

302 result = self.run(**inputs).table 

303 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

304 butlerQC.put(outputs, outputRefs) 

305 

306 def run(self, catalog, ccdVisitId=None, **kwargs): 

307 """Convert `src` catalog to parquet 

308 

309 Parameters 

310 ---------- 

311 catalog: `afwTable.SourceCatalog` 

312 catalog to be converted 

313 ccdVisitId: `int` 

314 ccdVisitId to be added as a column 

315 

316 Returns 

317 ------- 

318 result : `lsst.pipe.base.Struct` 

319 ``table`` 

320 `ParquetTable` version of the input catalog 

321 """ 

322 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId) 

323 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

324 df['ccdVisitId'] = ccdVisitId 

325 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

326 

327 

328class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections, 

329 defaultTemplates={"catalogType": "", 

330 "skyWcsName": "jointcal", 

331 "photoCalibName": "fgcm"}, 

332 dimensions=("instrument", "visit", "detector", "skymap")): 

333 skyMap = connectionTypes.Input( 

334 doc="skyMap needed to choose which tract-level calibrations to use when multiple available", 

335 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

336 storageClass="SkyMap", 

337 dimensions=("skymap",), 

338 ) 

339 exposure = connectionTypes.Input( 

340 doc="Input exposure to perform photometry on.", 

341 name="calexp", 

342 storageClass="ExposureF", 

343 dimensions=["instrument", "visit", "detector"], 

344 ) 

345 externalSkyWcsTractCatalog = connectionTypes.Input( 

346 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector " 

347 "id for the catalog id, sorted on id for fast lookup."), 

348 name="{skyWcsName}SkyWcsCatalog", 

349 storageClass="ExposureCatalog", 

350 dimensions=["instrument", "visit", "tract"], 

351 multiple=True 

352 ) 

353 externalSkyWcsGlobalCatalog = connectionTypes.Input( 

354 doc=("Per-visit wcs calibrations computed globally (with no tract information). " 

355 "These catalogs use the detector id for the catalog id, sorted on id for " 

356 "fast lookup."), 

357 name="finalVisitSummary", 

358 storageClass="ExposureCatalog", 

359 dimensions=["instrument", "visit"], 

360 ) 

361 externalPhotoCalibTractCatalog = connectionTypes.Input( 

362 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the " 

363 "detector id for the catalog id, sorted on id for fast lookup."), 

364 name="{photoCalibName}PhotoCalibCatalog", 

365 storageClass="ExposureCatalog", 

366 dimensions=["instrument", "visit", "tract"], 

367 multiple=True 

368 ) 

369 externalPhotoCalibGlobalCatalog = connectionTypes.Input( 

370 doc=("Per-visit photometric calibrations computed globally (with no tract " 

371 "information). These catalogs use the detector id for the catalog id, " 

372 "sorted on id for fast lookup."), 

373 name="finalVisitSummary", 

374 storageClass="ExposureCatalog", 

375 dimensions=["instrument", "visit"], 

376 ) 

377 

378 def __init__(self, *, config=None): 

379 super().__init__(config=config) 

380 # Same connection boilerplate as all other applications of 

381 # Global/Tract calibrations 

382 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs: 

383 if config.useGlobalExternalSkyWcs: 

384 self.inputs.remove("externalSkyWcsTractCatalog") 

385 else: 

386 self.inputs.remove("externalSkyWcsGlobalCatalog") 

387 else: 

388 self.inputs.remove("externalSkyWcsTractCatalog") 

389 self.inputs.remove("externalSkyWcsGlobalCatalog") 

390 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib: 

391 if config.useGlobalExternalPhotoCalib: 

392 self.inputs.remove("externalPhotoCalibTractCatalog") 

393 else: 

394 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

395 else: 

396 self.inputs.remove("externalPhotoCalibTractCatalog") 

397 self.inputs.remove("externalPhotoCalibGlobalCatalog") 

398 

399 

400class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig, 

401 pipelineConnections=WriteRecalibratedSourceTableConnections): 

402 

403 doReevaluatePhotoCalib = pexConfig.Field( 

404 dtype=bool, 

405 default=True, 

406 doc=("Add or replace local photoCalib columns") 

407 ) 

408 doReevaluateSkyWcs = pexConfig.Field( 

409 dtype=bool, 

410 default=True, 

411 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec") 

412 ) 

413 doApplyExternalPhotoCalib = pexConfig.Field( 

414 dtype=bool, 

415 default=True, 

416 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ", 

417 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."), 

418 ) 

419 doApplyExternalSkyWcs = pexConfig.Field( 

420 dtype=bool, 

421 default=True, 

422 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ", 

423 "else use the wcs already attached to the exposure."), 

424 ) 

425 useGlobalExternalPhotoCalib = pexConfig.Field( 

426 dtype=bool, 

427 default=True, 

428 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations " 

429 "that are not run per-tract. When False, use per-tract photometric " 

430 "calibration files.") 

431 ) 

432 useGlobalExternalSkyWcs = pexConfig.Field( 

433 dtype=bool, 

434 default=True, 

435 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations " 

436 "that are not run per-tract. When False, use per-tract wcs " 

437 "files.") 

438 ) 

439 

440 def validate(self): 

441 super().validate() 

442 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs: 

443 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False" 

444 "External SkyWcs will not be read or evaluated.") 

445 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib: 

446 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False." 

447 "External PhotoCalib will not be read or evaluated.") 

448 

449 

450class WriteRecalibratedSourceTableTask(WriteSourceTableTask): 

451 """Write source table to parquet 

452 """ 

453 _DefaultName = "writeRecalibratedSourceTable" 

454 ConfigClass = WriteRecalibratedSourceTableConfig 

455 

456 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

457 inputs = butlerQC.get(inputRefs) 

458 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

459 inputs['exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "visit_detector") 

460 

461 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs: 

462 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

463 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs) 

464 

465 inputs['catalog'] = self.addCalibColumns(**inputs) 

466 

467 result = self.run(**inputs).table 

468 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

469 butlerQC.put(outputs, outputRefs) 

470 

471 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None, 

472 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None, 

473 externalPhotoCalibTractCatalog=None, **kwargs): 

474 """Apply external calibrations to exposure per configuration 

475 

476 When multiple tract-level calibrations overlap, select the one with the 

477 center closest to detector. 

478 

479 Parameters 

480 ---------- 

481 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of 

482 tract-level calibs. 

483 skyMap : `lsst.skymap.SkyMap` 

484 exposure : `lsst.afw.image.exposure.Exposure` 

485 Input exposure to adjust calibrations. 

486 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

487 Exposure catalog with external skyWcs to be applied per config 

488 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

489 Exposure catalog with external skyWcs to be applied per config 

490 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional 

491 Exposure catalog with external photoCalib to be applied per config 

492 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional 

493 

494 

495 Returns 

496 ------- 

497 exposure : `lsst.afw.image.exposure.Exposure` 

498 Exposure with adjusted calibrations. 

499 """ 

500 if not self.config.doApplyExternalSkyWcs: 

501 # Do not modify the exposure's SkyWcs 

502 externalSkyWcsCatalog = None 

503 elif self.config.useGlobalExternalSkyWcs: 

504 # Use the global external SkyWcs 

505 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog 

506 self.log.info('Applying global SkyWcs') 

507 else: 

508 # use tract-level external SkyWcs from the closest overlapping tract 

509 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog') 

510 tracts = [ref.dataId['tract'] for ref in inputRef] 

511 if len(tracts) == 1: 

512 ind = 0 

513 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind]) 

514 else: 

515 ind = self.getClosestTract(tracts, skyMap, 

516 exposure.getBBox(), exposure.getWcs()) 

517 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). ' 

518 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

519 

520 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind] 

521 

522 if not self.config.doApplyExternalPhotoCalib: 

523 # Do not modify the exposure's PhotoCalib 

524 externalPhotoCalibCatalog = None 

525 elif self.config.useGlobalExternalPhotoCalib: 

526 # Use the global external PhotoCalib 

527 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog 

528 self.log.info('Applying global PhotoCalib') 

529 else: 

530 # use tract-level external PhotoCalib from the closest overlapping tract 

531 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog') 

532 tracts = [ref.dataId['tract'] for ref in inputRef] 

533 if len(tracts) == 1: 

534 ind = 0 

535 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind]) 

536 else: 

537 ind = self.getClosestTract(tracts, skyMap, 

538 exposure.getBBox(), exposure.getWcs()) 

539 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). ' 

540 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind]) 

541 

542 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind] 

543 

544 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog) 

545 

546 def getClosestTract(self, tracts, skyMap, bbox, wcs): 

547 """Find the index of the tract closest to detector from list of tractIds 

548 

549 Parameters 

550 ---------- 

551 tracts: `list` [`int`] 

552 Iterable of integer tractIds 

553 skyMap : `lsst.skymap.SkyMap` 

554 skyMap to lookup tract geometry and wcs 

555 bbox : `lsst.geom.Box2I` 

556 Detector bbox, center of which will compared to tract centers 

557 wcs : `lsst.afw.geom.SkyWcs` 

558 Detector Wcs object to map the detector center to SkyCoord 

559 

560 Returns 

561 ------- 

562 index : `int` 

563 """ 

564 if len(tracts) == 1: 

565 return 0 

566 

567 center = wcs.pixelToSky(bbox.getCenter()) 

568 sep = [] 

569 for tractId in tracts: 

570 tract = skyMap[tractId] 

571 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter()) 

572 sep.append(center.separation(tractCenter)) 

573 

574 return np.argmin(sep) 

575 

576 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None): 

577 """Prepare a calibrated exposure and apply external calibrations 

578 if so configured. 

579 

580 Parameters 

581 ---------- 

582 exposure : `lsst.afw.image.exposure.Exposure` 

583 Input exposure to adjust calibrations. 

584 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional 

585 Exposure catalog with external skyWcs to be applied 

586 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id 

587 for the catalog id, sorted on id for fast lookup. 

588 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional 

589 Exposure catalog with external photoCalib to be applied 

590 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector 

591 id for the catalog id, sorted on id for fast lookup. 

592 

593 Returns 

594 ------- 

595 exposure : `lsst.afw.image.exposure.Exposure` 

596 Exposure with adjusted calibrations. 

597 """ 

598 detectorId = exposure.getInfo().getDetector().getId() 

599 

600 if externalPhotoCalibCatalog is not None: 

601 row = externalPhotoCalibCatalog.find(detectorId) 

602 if row is None: 

603 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; " 

604 "Using original photoCalib.", detectorId) 

605 else: 

606 photoCalib = row.getPhotoCalib() 

607 if photoCalib is None: 

608 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; " 

609 "Using original photoCalib.", detectorId) 

610 else: 

611 exposure.setPhotoCalib(photoCalib) 

612 

613 if externalSkyWcsCatalog is not None: 

614 row = externalSkyWcsCatalog.find(detectorId) 

615 if row is None: 

616 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; " 

617 "Using original skyWcs.", detectorId) 

618 else: 

619 skyWcs = row.getWcs() 

620 if skyWcs is None: 

621 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; " 

622 "Using original skyWcs.", detectorId) 

623 else: 

624 exposure.setWcs(skyWcs) 

625 

626 return exposure 

627 

628 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs): 

629 """Add replace columns with calibs evaluated at each centroid 

630 

631 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a 

632 a source catalog, by rerunning the plugins. 

633 

634 Parameters 

635 ---------- 

636 catalog : `lsst.afw.table.SourceCatalog` 

637 catalog to which calib columns will be added 

638 exposure : `lsst.afw.image.exposure.Exposure` 

639 Exposure with attached PhotoCalibs and SkyWcs attributes to be 

640 reevaluated at local centroids. Pixels are not required. 

641 exposureIdInfo : `lsst.obs.base.ExposureIdInfo` 

642 

643 Returns 

644 ------- 

645 newCat: `lsst.afw.table.SourceCatalog` 

646 Source Catalog with requested local calib columns 

647 """ 

648 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

649 measureConfig.doReplaceWithNoise = False 

650 

651 measureConfig.plugins.names = [] 

652 if self.config.doReevaluateSkyWcs: 

653 measureConfig.plugins.names.add('base_LocalWcs') 

654 self.log.info("Re-evaluating base_LocalWcs plugin") 

655 if self.config.doReevaluatePhotoCalib: 

656 measureConfig.plugins.names.add('base_LocalPhotoCalib') 

657 self.log.info("Re-evaluating base_LocalPhotoCalib plugin") 

658 pluginsNotToCopy = tuple(measureConfig.plugins.names) 

659 

660 # Create a new schema and catalog 

661 # Copy all columns from original except for the ones to reevaluate 

662 aliasMap = catalog.schema.getAliasMap() 

663 mapper = afwTable.SchemaMapper(catalog.schema) 

664 for item in catalog.schema: 

665 if not item.field.getName().startswith(pluginsNotToCopy): 

666 mapper.addMapping(item.key) 

667 

668 schema = mapper.getOutputSchema() 

669 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

670 schema.setAliasMap(aliasMap) 

671 newCat = afwTable.SourceCatalog(schema) 

672 newCat.extend(catalog, mapper=mapper) 

673 

674 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to 

675 # update here. LocalPhotoCalibs are applied during transform tasks. 

676 # Update coord_ra/coord_dec, which are expected to be positions on the 

677 # sky and are used as such in sdm tables without transform 

678 if self.config.doReevaluateSkyWcs: 

679 afwTable.updateSourceCoords(exposure.wcs, newCat) 

680 

681 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

682 

683 return newCat 

684 

685 

686class PostprocessAnalysis(object): 

687 """Calculate columns from ParquetTable. 

688 

689 This object manages and organizes an arbitrary set of computations 

690 on a catalog. The catalog is defined by a 

691 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such 

692 as a `deepCoadd_obj` dataset, and the computations are defined by a 

693 collection of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

694 a `CompositeFunctor`). 

695 

696 After the object is initialized, accessing the `.df` attribute (which 

697 holds the `pandas.DataFrame` containing the results of the calculations) 

698 triggers computation of said dataframe. 

699 

700 One of the conveniences of using this object is the ability to define a 

701 desired common filter for all functors. This enables the same functor 

702 collection to be passed to several different `PostprocessAnalysis` objects 

703 without having to change the original functor collection, since the `filt` 

704 keyword argument of this object triggers an overwrite of the `filt` 

705 property for all functors in the collection. 

706 

707 This object also allows a list of refFlags to be passed, and defines a set 

708 of default refFlags that are always included even if not requested. 

709 

710 If a list of `ParquetTable` object is passed, rather than a single one, 

711 then the calculations will be mapped over all the input catalogs. In 

712 principle, it should be straightforward to parallelize this activity, but 

713 initial tests have failed (see TODO in code comments). 

714 

715 Parameters 

716 ---------- 

717 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

718 Source catalog(s) for computation. 

719 

720 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

721 Computations to do (functors that act on `parq`). 

722 If a dict, the output 

723 DataFrame will have columns keyed accordingly. 

724 If a list, the column keys will come from the 

725 `.shortname` attribute of each functor. 

726 

727 filt : `str`, optional 

728 Filter in which to calculate. If provided, 

729 this will overwrite any existing `.filt` attribute 

730 of the provided functors. 

731 

732 flags : `list`, optional 

733 List of flags (per-band) to include in output table. 

734 Taken from the `meas` dataset if applied to a multilevel Object Table. 

735 

736 refFlags : `list`, optional 

737 List of refFlags (only reference band) to include in output table. 

738 

739 forcedFlags : `list`, optional 

740 List of flags (per-band) to include in output table. 

741 Taken from the ``forced_src`` dataset if applied to a 

742 multilevel Object Table. Intended for flags from measurement plugins 

743 only run during multi-band forced-photometry. 

744 """ 

745 _defaultRefFlags = [] 

746 _defaultFuncs = () 

747 

748 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

749 self.parq = parq 

750 self.functors = functors 

751 

752 self.filt = filt 

753 self.flags = list(flags) if flags is not None else [] 

754 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

755 self.refFlags = list(self._defaultRefFlags) 

756 if refFlags is not None: 

757 self.refFlags += list(refFlags) 

758 

759 self._df = None 

760 

761 @property 

762 def defaultFuncs(self): 

763 funcs = dict(self._defaultFuncs) 

764 return funcs 

765 

766 @property 

767 def func(self): 

768 additionalFuncs = self.defaultFuncs 

769 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

770 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

771 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

772 

773 if isinstance(self.functors, CompositeFunctor): 

774 func = self.functors 

775 else: 

776 func = CompositeFunctor(self.functors) 

777 

778 func.funcDict.update(additionalFuncs) 

779 func.filt = self.filt 

780 

781 return func 

782 

783 @property 

784 def noDupCols(self): 

785 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

786 

787 @property 

788 def df(self): 

789 if self._df is None: 

790 self.compute() 

791 return self._df 

792 

793 def compute(self, dropna=False, pool=None): 

794 # map over multiple parquet tables 

795 if type(self.parq) in (list, tuple): 

796 if pool is None: 

797 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

798 else: 

799 # TODO: Figure out why this doesn't work (pyarrow pickling 

800 # issues?) 

801 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

802 self._df = pd.concat(dflist) 

803 else: 

804 self._df = self.func(self.parq, dropna=dropna) 

805 

806 return self._df 

807 

808 

809class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

810 dimensions=()): 

811 """Expected Connections for subclasses of TransformCatalogBaseTask. 

812 

813 Must be subclassed. 

814 """ 

815 inputCatalog = connectionTypes.Input( 

816 name="", 

817 storageClass="DataFrame", 

818 ) 

819 outputCatalog = connectionTypes.Output( 

820 name="", 

821 storageClass="DataFrame", 

822 ) 

823 

824 

825class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

826 pipelineConnections=TransformCatalogBaseConnections): 

827 functorFile = pexConfig.Field( 

828 dtype=str, 

829 doc="Path to YAML file specifying Science Data Model functors to use " 

830 "when copying columns and computing calibrated values.", 

831 default=None, 

832 optional=True 

833 ) 

834 primaryKey = pexConfig.Field( 

835 dtype=str, 

836 doc="Name of column to be set as the DataFrame index. If None, the index" 

837 "will be named `id`", 

838 default=None, 

839 optional=True 

840 ) 

841 columnsFromDataId = pexConfig.ListField( 

842 dtype=str, 

843 default=None, 

844 optional=True, 

845 doc="Columns to extract from the dataId", 

846 ) 

847 

848 

849class TransformCatalogBaseTask(pipeBase.PipelineTask): 

850 """Base class for transforming/standardizing a catalog 

851 

852 by applying functors that convert units and apply calibrations. 

853 The purpose of this task is to perform a set of computations on 

854 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

855 results to a new dataset (which needs to be declared in an `outputDataset` 

856 attribute). 

857 

858 The calculations to be performed are defined in a YAML file that specifies 

859 a set of functors to be computed, provided as 

860 a `--functorFile` config parameter. An example of such a YAML file 

861 is the following: 

862 

863 funcs: 

864 psfMag: 

865 functor: Mag 

866 args: 

867 - base_PsfFlux 

868 filt: HSC-G 

869 dataset: meas 

870 cmodel_magDiff: 

871 functor: MagDiff 

872 args: 

873 - modelfit_CModel 

874 - base_PsfFlux 

875 filt: HSC-G 

876 gauss_magDiff: 

877 functor: MagDiff 

878 args: 

879 - base_GaussianFlux 

880 - base_PsfFlux 

881 filt: HSC-G 

882 count: 

883 functor: Column 

884 args: 

885 - base_InputCount_value 

886 filt: HSC-G 

887 deconvolved_moments: 

888 functor: DeconvolvedMoments 

889 filt: HSC-G 

890 dataset: forced_src 

891 refFlags: 

892 - calib_psfUsed 

893 - merge_measurement_i 

894 - merge_measurement_r 

895 - merge_measurement_z 

896 - merge_measurement_y 

897 - merge_measurement_g 

898 - base_PixelFlags_flag_inexact_psfCenter 

899 - detect_isPrimary 

900 

901 The names for each entry under "func" will become the names of columns in 

902 the output dataset. All the functors referenced are defined in 

903 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each 

904 functor are in the `args` list, and any additional entries for each column 

905 other than "functor" or "args" (e.g., `'filt'`, `'dataset'`) are treated as 

906 keyword arguments to be passed to the functor initialization. 

907 

908 The "flags" entry is the default shortcut for `Column` functors. 

909 All columns listed under "flags" will be copied to the output table 

910 untransformed. They can be of any datatype. 

911 In the special case of transforming a multi-level oject table with 

912 band and dataset indices (deepCoadd_obj), these will be taked from the 

913 `meas` dataset and exploded out per band. 

914 

915 There are two special shortcuts that only apply when transforming 

916 multi-level Object (deepCoadd_obj) tables: 

917 - The "refFlags" entry is shortcut for `Column` functor 

918 taken from the `'ref'` dataset if transforming an ObjectTable. 

919 - The "forcedFlags" entry is shortcut for `Column` functors. 

920 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

921 These are expanded out per band. 

922 

923 

924 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

925 to organize and excecute the calculations. 

926 """ 

927 @property 

928 def _DefaultName(self): 

929 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

930 

931 @property 

932 def outputDataset(self): 

933 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

934 

935 @property 

936 def inputDataset(self): 

937 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

938 

939 @property 

940 def ConfigClass(self): 

941 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

942 

943 def __init__(self, *args, **kwargs): 

944 super().__init__(*args, **kwargs) 

945 if self.config.functorFile: 

946 self.log.info('Loading tranform functor definitions from %s', 

947 self.config.functorFile) 

948 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

949 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

950 else: 

951 self.funcs = None 

952 

953 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

954 inputs = butlerQC.get(inputRefs) 

955 if self.funcs is None: 

956 raise ValueError("config.functorFile is None. " 

957 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

958 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

959 dataId=outputRefs.outputCatalog.dataId.full) 

960 outputs = pipeBase.Struct(outputCatalog=result) 

961 butlerQC.put(outputs, outputRefs) 

962 

963 def run(self, parq, funcs=None, dataId=None, band=None): 

964 """Do postprocessing calculations 

965 

966 Takes a `ParquetTable` object and dataId, 

967 returns a dataframe with results of postprocessing calculations. 

968 

969 Parameters 

970 ---------- 

971 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

972 ParquetTable from which calculations are done. 

973 funcs : `lsst.pipe.tasks.functors.Functors` 

974 Functors to apply to the table's columns 

975 dataId : dict, optional 

976 Used to add a `patchId` column to the output dataframe. 

977 band : `str`, optional 

978 Filter band that is being processed. 

979 

980 Returns 

981 ------ 

982 df : `pandas.DataFrame` 

983 """ 

984 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

985 

986 df = self.transform(band, parq, funcs, dataId).df 

987 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

988 return df 

989 

990 def getFunctors(self): 

991 return self.funcs 

992 

993 def getAnalysis(self, parq, funcs=None, band=None): 

994 if funcs is None: 

995 funcs = self.funcs 

996 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

997 return analysis 

998 

999 def transform(self, band, parq, funcs, dataId): 

1000 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

1001 df = analysis.df 

1002 if dataId and self.config.columnsFromDataId: 

1003 for key in self.config.columnsFromDataId: 

1004 if key in dataId: 

1005 df[str(key)] = dataId[key] 

1006 else: 

1007 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}") 

1008 

1009 if self.config.primaryKey: 

1010 if df.index.name != self.config.primaryKey and self.config.primaryKey in df: 

1011 df.reset_index(inplace=True, drop=True) 

1012 df.set_index(self.config.primaryKey, inplace=True) 

1013 

1014 return pipeBase.Struct( 

1015 df=df, 

1016 analysis=analysis 

1017 ) 

1018 

1019 def write(self, df, parqRef): 

1020 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

1021 

1022 def writeMetadata(self, dataRef): 

1023 """No metadata to write. 

1024 """ 

1025 pass 

1026 

1027 

1028class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

1029 defaultTemplates={"coaddName": "deep"}, 

1030 dimensions=("tract", "patch", "skymap")): 

1031 inputCatalog = connectionTypes.Input( 

1032 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

1033 "stored as a DataFrame with a multi-level column index per-patch.", 

1034 dimensions=("tract", "patch", "skymap"), 

1035 storageClass="DataFrame", 

1036 name="{coaddName}Coadd_obj", 

1037 deferLoad=True, 

1038 ) 

1039 outputCatalog = connectionTypes.Output( 

1040 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

1041 "data model.", 

1042 dimensions=("tract", "patch", "skymap"), 

1043 storageClass="DataFrame", 

1044 name="objectTable" 

1045 ) 

1046 

1047 

1048class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

1049 pipelineConnections=TransformObjectCatalogConnections): 

1050 coaddName = pexConfig.Field( 

1051 dtype=str, 

1052 default="deep", 

1053 doc="Name of coadd" 

1054 ) 

1055 # TODO: remove in DM-27177 

1056 filterMap = pexConfig.DictField( 

1057 keytype=str, 

1058 itemtype=str, 

1059 default={}, 

1060 doc=("Dictionary mapping full filter name to short one for column name munging." 

1061 "These filters determine the output columns no matter what filters the " 

1062 "input data actually contain."), 

1063 deprecated=("Coadds are now identified by the band, so this transform is unused." 

1064 "Will be removed after v22.") 

1065 ) 

1066 outputBands = pexConfig.ListField( 

1067 dtype=str, 

1068 default=None, 

1069 optional=True, 

1070 doc=("These bands and only these bands will appear in the output," 

1071 " NaN-filled if the input does not include them." 

1072 " If None, then use all bands found in the input.") 

1073 ) 

1074 camelCase = pexConfig.Field( 

1075 dtype=bool, 

1076 default=False, 

1077 doc=("Write per-band columns names with camelCase, else underscore " 

1078 "For example: gPsFlux instead of g_PsFlux.") 

1079 ) 

1080 multilevelOutput = pexConfig.Field( 

1081 dtype=bool, 

1082 default=False, 

1083 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

1084 "and name-munged (False).") 

1085 ) 

1086 goodFlags = pexConfig.ListField( 

1087 dtype=str, 

1088 default=[], 

1089 doc=("List of 'good' flags that should be set False when populating empty tables. " 

1090 "All other flags are considered to be 'bad' flags and will be set to True.") 

1091 ) 

1092 floatFillValue = pexConfig.Field( 

1093 dtype=float, 

1094 default=np.nan, 

1095 doc="Fill value for float fields when populating empty tables." 

1096 ) 

1097 integerFillValue = pexConfig.Field( 

1098 dtype=int, 

1099 default=-1, 

1100 doc="Fill value for integer fields when populating empty tables." 

1101 ) 

1102 

1103 def setDefaults(self): 

1104 super().setDefaults() 

1105 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml') 

1106 self.primaryKey = 'objectId' 

1107 self.columnsFromDataId = ['tract', 'patch'] 

1108 self.goodFlags = ['calib_astrometry_used', 

1109 'calib_photometry_reserved', 

1110 'calib_photometry_used', 

1111 'calib_psf_candidate', 

1112 'calib_psf_reserved', 

1113 'calib_psf_used'] 

1114 

1115 

1116class TransformObjectCatalogTask(TransformCatalogBaseTask): 

1117 """Produce a flattened Object Table to match the format specified in 

1118 sdm_schemas. 

1119 

1120 Do the same set of postprocessing calculations on all bands. 

1121 

1122 This is identical to `TransformCatalogBaseTask`, except for that it does 

1123 the specified functor calculations for all filters present in the 

1124 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

1125 by the YAML file will be superceded. 

1126 """ 

1127 _DefaultName = "transformObjectCatalog" 

1128 ConfigClass = TransformObjectCatalogConfig 

1129 

1130 def run(self, parq, funcs=None, dataId=None, band=None): 

1131 # NOTE: band kwarg is ignored here. 

1132 dfDict = {} 

1133 analysisDict = {} 

1134 templateDf = pd.DataFrame() 

1135 

1136 if isinstance(parq, DeferredDatasetHandle): 

1137 columns = parq.get(component='columns') 

1138 inputBands = columns.unique(level=1).values 

1139 else: 

1140 inputBands = parq.columnLevelNames['band'] 

1141 

1142 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

1143 

1144 # Perform transform for data of filters that exist in parq. 

1145 for inputBand in inputBands: 

1146 if inputBand not in outputBands: 

1147 self.log.info("Ignoring %s band data in the input", inputBand) 

1148 continue 

1149 self.log.info("Transforming the catalog of band %s", inputBand) 

1150 result = self.transform(inputBand, parq, funcs, dataId) 

1151 dfDict[inputBand] = result.df 

1152 analysisDict[inputBand] = result.analysis 

1153 if templateDf.empty: 

1154 templateDf = result.df 

1155 

1156 # Put filler values in columns of other wanted bands 

1157 for filt in outputBands: 

1158 if filt not in dfDict: 

1159 self.log.info("Adding empty columns for band %s", filt) 

1160 dfTemp = templateDf.copy() 

1161 for col in dfTemp.columns: 

1162 testValue = dfTemp[col].values[0] 

1163 if isinstance(testValue, (np.bool_, pd.BooleanDtype)): 

1164 # Boolean flag type, check if it is a "good" flag 

1165 if col in self.config.goodFlags: 

1166 fillValue = False 

1167 else: 

1168 fillValue = True 

1169 elif isinstance(testValue, numbers.Integral): 

1170 # Checking numbers.Integral catches all flavors 

1171 # of python, numpy, pandas, etc. integers. 

1172 # We must ensure this is not an unsigned integer. 

1173 if isinstance(testValue, np.unsignedinteger): 

1174 raise ValueError("Parquet tables may not have unsigned integer columns.") 

1175 else: 

1176 fillValue = self.config.integerFillValue 

1177 else: 

1178 fillValue = self.config.floatFillValue 

1179 dfTemp[col].values[:] = fillValue 

1180 dfDict[filt] = dfTemp 

1181 

1182 # This makes a multilevel column index, with band as first level 

1183 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

1184 

1185 if not self.config.multilevelOutput: 

1186 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

1187 if self.config.primaryKey in noDupCols: 

1188 noDupCols.remove(self.config.primaryKey) 

1189 if dataId and self.config.columnsFromDataId: 

1190 noDupCols += self.config.columnsFromDataId 

1191 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

1192 inputBands=inputBands) 

1193 

1194 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

1195 

1196 return df 

1197 

1198 

1199class TractObjectDataIdContainer(CoaddDataIdContainer): 

1200 

1201 def makeDataRefList(self, namespace): 

1202 """Make self.refList from self.idList 

1203 

1204 Generate a list of data references given tract and/or patch. 

1205 This was adapted from `TractQADataIdContainer`, which was 

1206 `TractDataIdContainer` modifie to not require "filter". 

1207 Only existing dataRefs are returned. 

1208 """ 

1209 def getPatchRefList(tract): 

1210 return [namespace.butler.dataRef(datasetType=self.datasetType, 

1211 tract=tract.getId(), 

1212 patch="%d,%d" % patch.getIndex()) for patch in tract] 

1213 

1214 tractRefs = defaultdict(list) # Data references for each tract 

1215 for dataId in self.idList: 

1216 skymap = self.getSkymap(namespace) 

1217 

1218 if "tract" in dataId: 

1219 tractId = dataId["tract"] 

1220 if "patch" in dataId: 

1221 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

1222 tract=tractId, 

1223 patch=dataId['patch'])) 

1224 else: 

1225 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

1226 else: 

1227 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

1228 for tract in skymap) 

1229 outputRefList = [] 

1230 for tractRefList in tractRefs.values(): 

1231 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

1232 outputRefList.append(existingRefs) 

1233 

1234 self.refList = outputRefList 

1235 

1236 

1237class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

1238 dimensions=("tract", "skymap")): 

1239 inputCatalogs = connectionTypes.Input( 

1240 doc="Per-Patch objectTables conforming to the standard data model.", 

1241 name="objectTable", 

1242 storageClass="DataFrame", 

1243 dimensions=("tract", "patch", "skymap"), 

1244 multiple=True, 

1245 ) 

1246 outputCatalog = connectionTypes.Output( 

1247 doc="Pre-tract horizontal concatenation of the input objectTables", 

1248 name="objectTable_tract", 

1249 storageClass="DataFrame", 

1250 dimensions=("tract", "skymap"), 

1251 ) 

1252 

1253 

1254class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

1255 pipelineConnections=ConsolidateObjectTableConnections): 

1256 coaddName = pexConfig.Field( 

1257 dtype=str, 

1258 default="deep", 

1259 doc="Name of coadd" 

1260 ) 

1261 

1262 

1263class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

1264 """Write patch-merged source tables to a tract-level parquet file. 

1265 

1266 Concatenates `objectTable` list into a per-visit `objectTable_tract`. 

1267 """ 

1268 _DefaultName = "consolidateObjectTable" 

1269 ConfigClass = ConsolidateObjectTableConfig 

1270 

1271 inputDataset = 'objectTable' 

1272 outputDataset = 'objectTable_tract' 

1273 

1274 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1275 inputs = butlerQC.get(inputRefs) 

1276 self.log.info("Concatenating %s per-patch Object Tables", 

1277 len(inputs['inputCatalogs'])) 

1278 df = pd.concat(inputs['inputCatalogs']) 

1279 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1280 

1281 @classmethod 

1282 def _makeArgumentParser(cls): 

1283 parser = ArgumentParser(name=cls._DefaultName) 

1284 

1285 parser.add_id_argument("--id", cls.inputDataset, 

1286 help="data ID, e.g. --id tract=12345", 

1287 ContainerClass=TractObjectDataIdContainer) 

1288 return parser 

1289 

1290 def runDataRef(self, patchRefList): 

1291 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

1292 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1293 

1294 def writeMetadata(self, dataRef): 

1295 """No metadata to write. 

1296 """ 

1297 pass 

1298 

1299 

1300class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

1301 defaultTemplates={"catalogType": ""}, 

1302 dimensions=("instrument", "visit", "detector")): 

1303 

1304 inputCatalog = connectionTypes.Input( 

1305 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

1306 name="{catalogType}source", 

1307 storageClass="DataFrame", 

1308 dimensions=("instrument", "visit", "detector"), 

1309 deferLoad=True 

1310 ) 

1311 outputCatalog = connectionTypes.Output( 

1312 doc="Narrower, per-detector Source Table transformed and converted per a " 

1313 "specified set of functors", 

1314 name="{catalogType}sourceTable", 

1315 storageClass="DataFrame", 

1316 dimensions=("instrument", "visit", "detector") 

1317 ) 

1318 

1319 

1320class TransformSourceTableConfig(TransformCatalogBaseConfig, 

1321 pipelineConnections=TransformSourceTableConnections): 

1322 

1323 def setDefaults(self): 

1324 super().setDefaults() 

1325 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml') 

1326 self.primaryKey = 'sourceId' 

1327 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter'] 

1328 

1329 

1330class TransformSourceTableTask(TransformCatalogBaseTask): 

1331 """Transform/standardize a source catalog 

1332 """ 

1333 _DefaultName = "transformSourceTable" 

1334 ConfigClass = TransformSourceTableConfig 

1335 

1336 

1337class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1338 dimensions=("instrument", "visit",), 

1339 defaultTemplates={"calexpType": ""}): 

1340 calexp = connectionTypes.Input( 

1341 doc="Processed exposures used for metadata", 

1342 name="calexp", 

1343 storageClass="ExposureF", 

1344 dimensions=("instrument", "visit", "detector"), 

1345 deferLoad=True, 

1346 multiple=True, 

1347 ) 

1348 visitSummary = connectionTypes.Output( 

1349 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1350 "detector id for the id and are sorted for fast lookups of a " 

1351 "detector."), 

1352 name="visitSummary", 

1353 storageClass="ExposureCatalog", 

1354 dimensions=("instrument", "visit"), 

1355 ) 

1356 visitSummarySchema = connectionTypes.InitOutput( 

1357 doc="Schema of the visitSummary catalog", 

1358 name="visitSummary_schema", 

1359 storageClass="ExposureCatalog", 

1360 ) 

1361 

1362 

1363class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1364 pipelineConnections=ConsolidateVisitSummaryConnections): 

1365 """Config for ConsolidateVisitSummaryTask""" 

1366 pass 

1367 

1368 

1369class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

1370 """Task to consolidate per-detector visit metadata. 

1371 

1372 This task aggregates the following metadata from all the detectors in a 

1373 single visit into an exposure catalog: 

1374 - The visitInfo. 

1375 - The wcs. 

1376 - The photoCalib. 

1377 - The physical_filter and band (if available). 

1378 - The psf size, shape, and effective area at the center of the detector. 

1379 - The corners of the bounding box in right ascension/declination. 

1380 

1381 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1382 are not persisted here because of storage concerns, and because of their 

1383 limited utility as summary statistics. 

1384 

1385 Tests for this task are performed in ci_hsc_gen3. 

1386 """ 

1387 _DefaultName = "consolidateVisitSummary" 

1388 ConfigClass = ConsolidateVisitSummaryConfig 

1389 

1390 @classmethod 

1391 def _makeArgumentParser(cls): 

1392 parser = ArgumentParser(name=cls._DefaultName) 

1393 

1394 parser.add_id_argument("--id", "calexp", 

1395 help="data ID, e.g. --id visit=12345", 

1396 ContainerClass=VisitDataIdContainer) 

1397 return parser 

1398 

1399 def __init__(self, **kwargs): 

1400 super().__init__(**kwargs) 

1401 self.schema = afwTable.ExposureTable.makeMinimalSchema() 

1402 self.schema.addField('visit', type='L', doc='Visit number') 

1403 self.schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1404 self.schema.addField('band', type='String', size=32, doc='Name of band') 

1405 ExposureSummaryStats.update_schema(self.schema) 

1406 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema) 

1407 

1408 def writeMetadata(self, dataRef): 

1409 """No metadata to persist, so override to remove metadata persistance. 

1410 """ 

1411 pass 

1412 

1413 def writeConfig(self, butler, clobber=False, doBackup=True): 

1414 """No config to persist, so override to remove config persistance. 

1415 """ 

1416 pass 

1417 

1418 def runDataRef(self, dataRefList): 

1419 visit = dataRefList[0].dataId['visit'] 

1420 

1421 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1422 len(dataRefList), visit) 

1423 

1424 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

1425 

1426 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

1427 

1428 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1429 dataRefs = butlerQC.get(inputRefs.calexp) 

1430 visit = dataRefs[0].dataId.byName()['visit'] 

1431 

1432 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1433 len(dataRefs), visit) 

1434 

1435 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1436 

1437 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1438 

1439 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

1440 """Make a combined exposure catalog from a list of dataRefs. 

1441 These dataRefs must point to exposures with wcs, summaryStats, 

1442 and other visit metadata. 

1443 

1444 Parameters 

1445 ---------- 

1446 visit : `int` 

1447 Visit identification number. 

1448 dataRefs : `list` 

1449 List of dataRefs in visit. May be list of 

1450 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

1451 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

1452 isGen3 : `bool`, optional 

1453 Specifies if this is a Gen3 list of datarefs. 

1454 

1455 Returns 

1456 ------- 

1457 visitSummary : `lsst.afw.table.ExposureCatalog` 

1458 Exposure catalog with per-detector summary information. 

1459 """ 

1460 cat = afwTable.ExposureCatalog(self.schema) 

1461 cat.resize(len(dataRefs)) 

1462 

1463 cat['visit'] = visit 

1464 

1465 for i, dataRef in enumerate(dataRefs): 

1466 if isGen3: 

1467 visitInfo = dataRef.get(component='visitInfo') 

1468 filterLabel = dataRef.get(component='filter') 

1469 summaryStats = dataRef.get(component='summaryStats') 

1470 detector = dataRef.get(component='detector') 

1471 wcs = dataRef.get(component='wcs') 

1472 photoCalib = dataRef.get(component='photoCalib') 

1473 detector = dataRef.get(component='detector') 

1474 bbox = dataRef.get(component='bbox') 

1475 validPolygon = dataRef.get(component='validPolygon') 

1476 else: 

1477 # Note that we need to read the calexp because there is 

1478 # no magic access to the psf except through the exposure. 

1479 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

1480 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

1481 visitInfo = exp.getInfo().getVisitInfo() 

1482 filterLabel = dataRef.get("calexp_filter") 

1483 summaryStats = exp.getInfo().getSummaryStats() 

1484 wcs = exp.getWcs() 

1485 photoCalib = exp.getPhotoCalib() 

1486 detector = exp.getDetector() 

1487 bbox = dataRef.get(datasetType='calexp_bbox') 

1488 validPolygon = exp.getInfo().getValidPolygon() 

1489 

1490 rec = cat[i] 

1491 rec.setBBox(bbox) 

1492 rec.setVisitInfo(visitInfo) 

1493 rec.setWcs(wcs) 

1494 rec.setPhotoCalib(photoCalib) 

1495 rec.setValidPolygon(validPolygon) 

1496 

1497 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1498 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1499 rec.setId(detector.getId()) 

1500 summaryStats.update_record(rec) 

1501 

1502 metadata = dafBase.PropertyList() 

1503 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1504 # We are looping over existing datarefs, so the following is true 

1505 metadata.add("COMMENT", "Only detectors with data have entries.") 

1506 cat.setMetadata(metadata) 

1507 

1508 cat.sort() 

1509 return cat 

1510 

1511 

1512class VisitDataIdContainer(DataIdContainer): 

1513 """DataIdContainer that groups sensor-level ids by visit. 

1514 """ 

1515 

1516 def makeDataRefList(self, namespace): 

1517 """Make self.refList from self.idList 

1518 

1519 Generate a list of data references grouped by visit. 

1520 

1521 Parameters 

1522 ---------- 

1523 namespace : `argparse.Namespace` 

1524 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command 

1525 line arguments. 

1526 """ 

1527 # Group by visits 

1528 visitRefs = defaultdict(list) 

1529 for dataId in self.idList: 

1530 if "visit" in dataId: 

1531 visitId = dataId["visit"] 

1532 # append all subsets to 

1533 subset = namespace.butler.subset(self.datasetType, dataId=dataId) 

1534 visitRefs[visitId].extend([dataRef for dataRef in subset]) 

1535 

1536 outputRefList = [] 

1537 for refList in visitRefs.values(): 

1538 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1539 if existingRefs: 

1540 outputRefList.append(existingRefs) 

1541 

1542 self.refList = outputRefList 

1543 

1544 

1545class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1546 defaultTemplates={"catalogType": ""}, 

1547 dimensions=("instrument", "visit")): 

1548 inputCatalogs = connectionTypes.Input( 

1549 doc="Input per-detector Source Tables", 

1550 name="{catalogType}sourceTable", 

1551 storageClass="DataFrame", 

1552 dimensions=("instrument", "visit", "detector"), 

1553 multiple=True 

1554 ) 

1555 outputCatalog = connectionTypes.Output( 

1556 doc="Per-visit concatenation of Source Table", 

1557 name="{catalogType}sourceTable_visit", 

1558 storageClass="DataFrame", 

1559 dimensions=("instrument", "visit") 

1560 ) 

1561 

1562 

1563class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1564 pipelineConnections=ConsolidateSourceTableConnections): 

1565 pass 

1566 

1567 

1568class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

1569 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1570 """ 

1571 _DefaultName = 'consolidateSourceTable' 

1572 ConfigClass = ConsolidateSourceTableConfig 

1573 

1574 inputDataset = 'sourceTable' 

1575 outputDataset = 'sourceTable_visit' 

1576 

1577 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1578 from .makeCoaddTempExp import reorderRefs 

1579 

1580 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs] 

1581 detectorOrder.sort() 

1582 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector') 

1583 inputs = butlerQC.get(inputRefs) 

1584 self.log.info("Concatenating %s per-detector Source Tables", 

1585 len(inputs['inputCatalogs'])) 

1586 df = pd.concat(inputs['inputCatalogs']) 

1587 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1588 

1589 def runDataRef(self, dataRefList): 

1590 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1591 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1592 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1593 

1594 @classmethod 

1595 def _makeArgumentParser(cls): 

1596 parser = ArgumentParser(name=cls._DefaultName) 

1597 

1598 parser.add_id_argument("--id", cls.inputDataset, 

1599 help="data ID, e.g. --id visit=12345", 

1600 ContainerClass=VisitDataIdContainer) 

1601 return parser 

1602 

1603 def writeMetadata(self, dataRef): 

1604 """No metadata to write. 

1605 """ 

1606 pass 

1607 

1608 def writeConfig(self, butler, clobber=False, doBackup=True): 

1609 """No config to write. 

1610 """ 

1611 pass 

1612 

1613 

1614class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1615 dimensions=("instrument",), 

1616 defaultTemplates={"calexpType": ""}): 

1617 visitSummaryRefs = connectionTypes.Input( 

1618 doc="Data references for per-visit consolidated exposure metadata", 

1619 name="finalVisitSummary", 

1620 storageClass="ExposureCatalog", 

1621 dimensions=("instrument", "visit"), 

1622 multiple=True, 

1623 deferLoad=True, 

1624 ) 

1625 outputCatalog = connectionTypes.Output( 

1626 doc="CCD and Visit metadata table", 

1627 name="ccdVisitTable", 

1628 storageClass="DataFrame", 

1629 dimensions=("instrument",) 

1630 ) 

1631 

1632 

1633class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1634 pipelineConnections=MakeCcdVisitTableConnections): 

1635 pass 

1636 

1637 

1638class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1639 """Produce a `ccdVisitTable` from the visit summary exposure catalogs. 

1640 """ 

1641 _DefaultName = 'makeCcdVisitTable' 

1642 ConfigClass = MakeCcdVisitTableConfig 

1643 

1644 def run(self, visitSummaryRefs): 

1645 """Make a table of ccd information from the visit summary catalogs. 

1646 

1647 Parameters 

1648 ---------- 

1649 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1650 List of DeferredDatasetHandles pointing to exposure catalogs with 

1651 per-detector summary information. 

1652 

1653 Returns 

1654 ------- 

1655 result : `lsst.pipe.Base.Struct` 

1656 Results struct with attribute: 

1657 

1658 ``outputCatalog`` 

1659 Catalog of ccd and visit information. 

1660 """ 

1661 ccdEntries = [] 

1662 for visitSummaryRef in visitSummaryRefs: 

1663 visitSummary = visitSummaryRef.get() 

1664 visitInfo = visitSummary[0].getVisitInfo() 

1665 

1666 ccdEntry = {} 

1667 summaryTable = visitSummary.asAstropy() 

1668 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance', 

1669 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise', 

1670 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar', 

1671 'psfStarDeltaE1Median', 'psfStarDeltaE2Median', 

1672 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter', 

1673 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter', 

1674 'psfStarScaledDeltaSizeScatter', 

1675 'psfTraceRadiusDelta', 'maxDistToNearestPsf'] 

1676 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1677 # 'visit' is the human readable visit number. 

1678 # 'visitId' is the key to the visitId table. They are the same. 

1679 # Technically you should join to get the visit from the visit 

1680 # table. 

1681 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"}) 

1682 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in 

1683 summaryTable['id']] 

1684 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId) 

1685 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds] 

1686 ccdEntry['ccdVisitId'] = ccdVisitIds 

1687 ccdEntry['detector'] = summaryTable['id'] 

1688 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary]) 

1689 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1690 

1691 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1692 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1693 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1694 expTime = visitInfo.getExposureTime() 

1695 ccdEntry['expTime'] = expTime 

1696 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1697 expTime_days = expTime / (60*60*24) 

1698 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days 

1699 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1700 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1701 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1702 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1703 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1704 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1705 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1706 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1707 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1708 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1709 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1710 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, 

1711 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. 

1712 # values are actually wanted. 

1713 ccdEntries.append(ccdEntry) 

1714 

1715 outputCatalog = pd.concat(ccdEntries) 

1716 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True) 

1717 return pipeBase.Struct(outputCatalog=outputCatalog) 

1718 

1719 

1720class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1721 dimensions=("instrument",), 

1722 defaultTemplates={"calexpType": ""}): 

1723 visitSummaries = connectionTypes.Input( 

1724 doc="Per-visit consolidated exposure metadata", 

1725 name="finalVisitSummary", 

1726 storageClass="ExposureCatalog", 

1727 dimensions=("instrument", "visit",), 

1728 multiple=True, 

1729 deferLoad=True, 

1730 ) 

1731 outputCatalog = connectionTypes.Output( 

1732 doc="Visit metadata table", 

1733 name="visitTable", 

1734 storageClass="DataFrame", 

1735 dimensions=("instrument",) 

1736 ) 

1737 

1738 

1739class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1740 pipelineConnections=MakeVisitTableConnections): 

1741 pass 

1742 

1743 

1744class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1745 """Produce a `visitTable` from the visit summary exposure catalogs. 

1746 """ 

1747 _DefaultName = 'makeVisitTable' 

1748 ConfigClass = MakeVisitTableConfig 

1749 

1750 def run(self, visitSummaries): 

1751 """Make a table of visit information from the visit summary catalogs. 

1752 

1753 Parameters 

1754 ---------- 

1755 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog` 

1756 List of exposure catalogs with per-detector summary information. 

1757 Returns 

1758 ------- 

1759 result : `lsst.pipe.Base.Struct` 

1760 Results struct with attribute: 

1761 

1762 ``outputCatalog`` 

1763 Catalog of visit information. 

1764 """ 

1765 visitEntries = [] 

1766 for visitSummary in visitSummaries: 

1767 visitSummary = visitSummary.get() 

1768 visitRow = visitSummary[0] 

1769 visitInfo = visitRow.getVisitInfo() 

1770 

1771 visitEntry = {} 

1772 visitEntry["visitId"] = visitRow['visit'] 

1773 visitEntry["visit"] = visitRow['visit'] 

1774 visitEntry["physical_filter"] = visitRow['physical_filter'] 

1775 visitEntry["band"] = visitRow['band'] 

1776 raDec = visitInfo.getBoresightRaDec() 

1777 visitEntry["ra"] = raDec.getRa().asDegrees() 

1778 visitEntry["decl"] = raDec.getDec().asDegrees() 

1779 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1780 azAlt = visitInfo.getBoresightAzAlt() 

1781 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1782 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1783 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1784 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1785 expTime = visitInfo.getExposureTime() 

1786 visitEntry["expTime"] = expTime 

1787 visitEntry["expMidpt"] = visitInfo.getDate().toPython() 

1788 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD) 

1789 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1790 expTime_days = expTime / (60*60*24) 

1791 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days 

1792 visitEntries.append(visitEntry) 

1793 

1794 # TODO: DM-30623, Add programId, exposureType, cameraTemp, 

1795 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp, 

1796 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures. 

1797 

1798 outputCatalog = pd.DataFrame(data=visitEntries) 

1799 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True) 

1800 return pipeBase.Struct(outputCatalog=outputCatalog) 

1801 

1802 

1803class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1804 dimensions=("instrument", "visit", "detector", "skymap", "tract")): 

1805 

1806 inputCatalog = connectionTypes.Input( 

1807 doc="Primary per-detector, single-epoch forced-photometry catalog. " 

1808 "By default, it is the output of ForcedPhotCcdTask on calexps", 

1809 name="forced_src", 

1810 storageClass="SourceCatalog", 

1811 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1812 ) 

1813 inputCatalogDiff = connectionTypes.Input( 

1814 doc="Secondary multi-epoch, per-detector, forced photometry catalog. " 

1815 "By default, it is the output of ForcedPhotCcdTask run on image differences.", 

1816 name="forced_diff", 

1817 storageClass="SourceCatalog", 

1818 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1819 ) 

1820 outputCatalog = connectionTypes.Output( 

1821 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format", 

1822 name="mergedForcedSource", 

1823 storageClass="DataFrame", 

1824 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1825 ) 

1826 

1827 

1828class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig, 

1829 pipelineConnections=WriteForcedSourceTableConnections): 

1830 key = lsst.pex.config.Field( 

1831 doc="Column on which to join the two input tables on and make the primary key of the output", 

1832 dtype=str, 

1833 default="objectId", 

1834 ) 

1835 

1836 

1837class WriteForcedSourceTableTask(pipeBase.PipelineTask): 

1838 """Merge and convert per-detector forced source catalogs to parquet. 

1839 

1840 Because the predecessor ForcedPhotCcdTask operates per-detector, 

1841 per-tract, (i.e., it has tract in its dimensions), detectors 

1842 on the tract boundary may have multiple forced source catalogs. 

1843 

1844 The successor task TransformForcedSourceTable runs per-patch 

1845 and temporally-aggregates overlapping mergedForcedSource catalogs from all 

1846 available multiple epochs. 

1847 """ 

1848 _DefaultName = "writeForcedSourceTable" 

1849 ConfigClass = WriteForcedSourceTableConfig 

1850 

1851 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1852 inputs = butlerQC.get(inputRefs) 

1853 # Add ccdVisitId to allow joining with CcdVisitTable 

1854 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

1855 inputs['band'] = butlerQC.quantum.dataId.full['band'] 

1856 outputs = self.run(**inputs) 

1857 butlerQC.put(outputs, outputRefs) 

1858 

1859 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None): 

1860 dfs = [] 

1861 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')): 

1862 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False) 

1863 df = df.reindex(sorted(df.columns), axis=1) 

1864 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA 

1865 df['band'] = band if band else pd.NA 

1866 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns], 

1867 names=('dataset', 'column')) 

1868 

1869 dfs.append(df) 

1870 

1871 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

1872 return pipeBase.Struct(outputCatalog=outputCatalog) 

1873 

1874 

1875class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1876 dimensions=("instrument", "skymap", "patch", "tract")): 

1877 

1878 inputCatalogs = connectionTypes.Input( 

1879 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask", 

1880 name="mergedForcedSource", 

1881 storageClass="DataFrame", 

1882 dimensions=("instrument", "visit", "detector", "skymap", "tract"), 

1883 multiple=True, 

1884 deferLoad=True 

1885 ) 

1886 referenceCatalog = connectionTypes.Input( 

1887 doc="Reference catalog which was used to seed the forcedPhot. Columns " 

1888 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner " 

1889 "are expected.", 

1890 name="objectTable", 

1891 storageClass="DataFrame", 

1892 dimensions=("tract", "patch", "skymap"), 

1893 deferLoad=True 

1894 ) 

1895 outputCatalog = connectionTypes.Output( 

1896 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a " 

1897 "specified set of functors", 

1898 name="forcedSourceTable", 

1899 storageClass="DataFrame", 

1900 dimensions=("tract", "patch", "skymap") 

1901 ) 

1902 

1903 

1904class TransformForcedSourceTableConfig(TransformCatalogBaseConfig, 

1905 pipelineConnections=TransformForcedSourceTableConnections): 

1906 referenceColumns = pexConfig.ListField( 

1907 dtype=str, 

1908 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"], 

1909 optional=True, 

1910 doc="Columns to pull from reference catalog", 

1911 ) 

1912 keyRef = lsst.pex.config.Field( 

1913 doc="Column on which to join the two input tables on and make the primary key of the output", 

1914 dtype=str, 

1915 default="objectId", 

1916 ) 

1917 key = lsst.pex.config.Field( 

1918 doc="Rename the output DataFrame index to this name", 

1919 dtype=str, 

1920 default="forcedSourceId", 

1921 ) 

1922 

1923 def setDefaults(self): 

1924 super().setDefaults() 

1925 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml') 

1926 self.columnsFromDataId = ['tract', 'patch'] 

1927 

1928 

1929class TransformForcedSourceTableTask(TransformCatalogBaseTask): 

1930 """Transform/standardize a ForcedSource catalog 

1931 

1932 Transforms each wide, per-detector forcedSource parquet table per the 

1933 specification file (per-camera defaults found in ForcedSource.yaml). 

1934 All epochs that overlap the patch are aggregated into one per-patch 

1935 narrow-parquet file. 

1936 

1937 No de-duplication of rows is performed. Duplicate resolutions flags are 

1938 pulled in from the referenceCatalog: `detect_isPrimary`, 

1939 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate 

1940 for analysis or compare duplicates for QA. 

1941 

1942 The resulting table includes multiple bands. Epochs (MJDs) and other useful 

1943 per-visit rows can be retreived by joining with the CcdVisitTable on 

1944 ccdVisitId. 

1945 """ 

1946 _DefaultName = "transformForcedSourceTable" 

1947 ConfigClass = TransformForcedSourceTableConfig 

1948 

1949 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1950 inputs = butlerQC.get(inputRefs) 

1951 if self.funcs is None: 

1952 raise ValueError("config.functorFile is None. " 

1953 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

1954 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs, 

1955 dataId=outputRefs.outputCatalog.dataId.full) 

1956 

1957 butlerQC.put(outputs, outputRefs) 

1958 

1959 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None): 

1960 dfs = [] 

1961 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns}) 

1962 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs))) 

1963 for handle in inputCatalogs: 

1964 result = self.transform(None, handle, funcs, dataId) 

1965 # Filter for only rows that were detected on (overlap) the patch 

1966 dfs.append(result.df.join(ref, how='inner')) 

1967 

1968 outputCatalog = pd.concat(dfs) 

1969 

1970 # Now that we are done joining on config.keyRef 

1971 # Change index to config.key by 

1972 outputCatalog.index.rename(self.config.keyRef, inplace=True) 

1973 # Add config.keyRef to the column list 

1974 outputCatalog.reset_index(inplace=True) 

1975 # Set the forcedSourceId to the index. This is specified in the 

1976 # ForcedSource.yaml 

1977 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True) 

1978 # Rename it to the config.key 

1979 outputCatalog.index.rename(self.config.key, inplace=True) 

1980 

1981 self.log.info("Made a table of %d columns and %d rows", 

1982 len(outputCatalog.columns), len(outputCatalog)) 

1983 return pipeBase.Struct(outputCatalog=outputCatalog) 

1984 

1985 

1986class ConsolidateTractConnections(pipeBase.PipelineTaskConnections, 

1987 defaultTemplates={"catalogType": ""}, 

1988 dimensions=("instrument", "tract")): 

1989 inputCatalogs = connectionTypes.Input( 

1990 doc="Input per-patch DataFrame Tables to be concatenated", 

1991 name="{catalogType}ForcedSourceTable", 

1992 storageClass="DataFrame", 

1993 dimensions=("tract", "patch", "skymap"), 

1994 multiple=True, 

1995 ) 

1996 

1997 outputCatalog = connectionTypes.Output( 

1998 doc="Output per-tract concatenation of DataFrame Tables", 

1999 name="{catalogType}ForcedSourceTable_tract", 

2000 storageClass="DataFrame", 

2001 dimensions=("tract", "skymap"), 

2002 ) 

2003 

2004 

2005class ConsolidateTractConfig(pipeBase.PipelineTaskConfig, 

2006 pipelineConnections=ConsolidateTractConnections): 

2007 pass 

2008 

2009 

2010class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask): 

2011 """Concatenate any per-patch, dataframe list into a single 

2012 per-tract DataFrame. 

2013 """ 

2014 _DefaultName = 'ConsolidateTract' 

2015 ConfigClass = ConsolidateTractConfig 

2016 

2017 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

2018 inputs = butlerQC.get(inputRefs) 

2019 # Not checking at least one inputCatalog exists because that'd be an 

2020 # empty QG. 

2021 self.log.info("Concatenating %s per-patch %s Tables", 

2022 len(inputs['inputCatalogs']), 

2023 inputRefs.inputCatalogs[0].datasetType.name) 

2024 df = pd.concat(inputs['inputCatalogs']) 

2025 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)