Coverage for python/lsst/pipe/tasks/postprocess.py: 33%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

727 statements  

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24from collections import defaultdict 

25import numpy as np 

26import numbers 

27import os 

28 

29import lsst.geom 

30import lsst.pex.config as pexConfig 

31import lsst.pipe.base as pipeBase 

32import lsst.daf.base as dafBase 

33from lsst.pipe.base import connectionTypes 

34import lsst.afw.table as afwTable 

35from lsst.meas.base import SingleFrameMeasurementTask 

36from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

37from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

38from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate 

39 

40from .parquetTable import ParquetTable 

41from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

42from .functors import CompositeFunctor, Column 

43 

44 

45def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

46 """Flattens a dataframe with multilevel column index 

47 """ 

48 newDf = pd.DataFrame() 

49 # band is the level 0 index 

50 dfBands = df.columns.unique(level=0).values 

51 for band in dfBands: 

52 subdf = df[band] 

53 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

54 newColumns = {c: columnFormat.format(band, c) 

55 for c in subdf.columns if c not in noDupCols} 

56 cols = list(newColumns.keys()) 

57 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

58 

59 # Band must be present in the input and output or else column is all NaN: 

60 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

61 # Get the unexploded columns from any present band's partition 

62 noDupDf = df[presentBands[0]][noDupCols] 

63 newDf = pd.concat([noDupDf, newDf], axis=1) 

64 return newDf 

65 

66 

67class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

68 defaultTemplates={"coaddName": "deep"}, 

69 dimensions=("tract", "patch", "skymap")): 

70 inputCatalogMeas = connectionTypes.Input( 

71 doc="Catalog of source measurements on the deepCoadd.", 

72 dimensions=("tract", "patch", "band", "skymap"), 

73 storageClass="SourceCatalog", 

74 name="{coaddName}Coadd_meas", 

75 multiple=True 

76 ) 

77 inputCatalogForcedSrc = connectionTypes.Input( 

78 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

79 dimensions=("tract", "patch", "band", "skymap"), 

80 storageClass="SourceCatalog", 

81 name="{coaddName}Coadd_forced_src", 

82 multiple=True 

83 ) 

84 inputCatalogRef = connectionTypes.Input( 

85 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

86 "for each detection in deepCoadd_mergeDet.", 

87 dimensions=("tract", "patch", "skymap"), 

88 storageClass="SourceCatalog", 

89 name="{coaddName}Coadd_ref" 

90 ) 

91 outputCatalog = connectionTypes.Output( 

92 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

93 "stored as a DataFrame with a multi-level column index per-patch.", 

94 dimensions=("tract", "patch", "skymap"), 

95 storageClass="DataFrame", 

96 name="{coaddName}Coadd_obj" 

97 ) 

98 

99 

100class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

101 pipelineConnections=WriteObjectTableConnections): 

102 engine = pexConfig.Field( 

103 dtype=str, 

104 default="pyarrow", 

105 doc="Parquet engine for writing (pyarrow or fastparquet)" 

106 ) 

107 coaddName = pexConfig.Field( 

108 dtype=str, 

109 default="deep", 

110 doc="Name of coadd" 

111 ) 

112 

113 

114class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

115 """Write filter-merged source tables to parquet 

116 """ 

117 _DefaultName = "writeObjectTable" 

118 ConfigClass = WriteObjectTableConfig 

119 RunnerClass = MergeSourcesRunner 

120 

121 # Names of table datasets to be merged 

122 inputDatasets = ('forced_src', 'meas', 'ref') 

123 

124 # Tag of output dataset written by `MergeSourcesTask.write` 

125 outputDataset = 'obj' 

126 

127 def __init__(self, butler=None, schema=None, **kwargs): 

128 # It is a shame that this class can't use the default init for CmdLineTask 

129 # But to do so would require its own special task runner, which is many 

130 # more lines of specialization, so this is how it is for now 

131 super().__init__(**kwargs) 

132 

133 def runDataRef(self, patchRefList): 

134 """! 

135 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in 

136 subclasses that inherit from MergeSourcesTask. 

137 @param[in] patchRefList list of data references for each filter 

138 """ 

139 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

140 dataId = patchRefList[0].dataId 

141 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

142 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog)) 

143 

144 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

145 inputs = butlerQC.get(inputRefs) 

146 

147 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

148 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

149 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

150 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

151 

152 catalogs = {} 

153 for band in measDict.keys(): 

154 catalogs[band] = {'meas': measDict[band]['meas'], 

155 'forced_src': forcedSourceDict[band]['forced_src'], 

156 'ref': inputs['inputCatalogRef']} 

157 dataId = butlerQC.quantum.dataId 

158 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

159 outputs = pipeBase.Struct(outputCatalog=df) 

160 butlerQC.put(outputs, outputRefs) 

161 

162 @classmethod 

163 def _makeArgumentParser(cls): 

164 """Create a suitable ArgumentParser. 

165 

166 We will use the ArgumentParser to get a list of data 

167 references for patches; the RunnerClass will sort them into lists 

168 of data references for the same patch. 

169 

170 References first of self.inputDatasets, rather than 

171 self.inputDataset 

172 """ 

173 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

174 

175 def readCatalog(self, patchRef): 

176 """Read input catalogs 

177 

178 Read all the input datasets given by the 'inputDatasets' 

179 attribute. 

180 

181 Parameters 

182 ---------- 

183 patchRef : `lsst.daf.persistence.ButlerDataRef` 

184 Data reference for patch 

185 

186 Returns 

187 ------- 

188 Tuple consisting of band name and a dict of catalogs, keyed by 

189 dataset name 

190 """ 

191 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel 

192 catalogDict = {} 

193 for dataset in self.inputDatasets: 

194 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

195 self.log.info("Read %d sources from %s for band %s: %s", 

196 len(catalog), dataset, band, patchRef.dataId) 

197 catalogDict[dataset] = catalog 

198 return band, catalogDict 

199 

200 def run(self, catalogs, tract, patch): 

201 """Merge multiple catalogs. 

202 

203 Parameters 

204 ---------- 

205 catalogs : `dict` 

206 Mapping from filter names to dict of catalogs. 

207 tract : int 

208 tractId to use for the tractId column 

209 patch : str 

210 patchId to use for the patchId column 

211 

212 Returns 

213 ------- 

214 catalog : `pandas.DataFrame` 

215 Merged dataframe 

216 """ 

217 

218 dfs = [] 

219 for filt, tableDict in catalogs.items(): 

220 for dataset, table in tableDict.items(): 

221 # Convert afwTable to pandas DataFrame 

222 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

223 

224 # Sort columns by name, to ensure matching schema among patches 

225 df = df.reindex(sorted(df.columns), axis=1) 

226 df['tractId'] = tract 

227 df['patchId'] = patch 

228 

229 # Make columns a 3-level MultiIndex 

230 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

231 names=('dataset', 'band', 'column')) 

232 dfs.append(df) 

233 

234 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

235 return catalog 

236 

237 def write(self, patchRef, catalog): 

238 """Write the output. 

239 

240 Parameters 

241 ---------- 

242 catalog : `ParquetTable` 

243 Catalog to write 

244 patchRef : `lsst.daf.persistence.ButlerDataRef` 

245 Data reference for patch 

246 """ 

247 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

248 # since the filter isn't actually part of the data ID for the dataset we're saving, 

249 # it's confusing to see it in the log message, even if the butler simply ignores it. 

250 mergeDataId = patchRef.dataId.copy() 

251 del mergeDataId["filter"] 

252 self.log.info("Wrote merged catalog: %s", mergeDataId) 

253 

254 def writeMetadata(self, dataRefList): 

255 """No metadata to write, and not sure how to write it for a list of dataRefs. 

256 """ 

257 pass 

258 

259 

260class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

261 defaultTemplates={"catalogType": ""}, 

262 dimensions=("instrument", "visit", "detector")): 

263 

264 catalog = connectionTypes.Input( 

265 doc="Input full-depth catalog of sources produced by CalibrateTask", 

266 name="{catalogType}src", 

267 storageClass="SourceCatalog", 

268 dimensions=("instrument", "visit", "detector") 

269 ) 

270 outputCatalog = connectionTypes.Output( 

271 doc="Catalog of sources, `src` in Parquet format. The 'id' column is " 

272 "replaced with an index; all other columns are unchanged.", 

273 name="{catalogType}source", 

274 storageClass="DataFrame", 

275 dimensions=("instrument", "visit", "detector") 

276 ) 

277 

278 

279class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

280 pipelineConnections=WriteSourceTableConnections): 

281 doApplyExternalPhotoCalib = pexConfig.Field( 

282 dtype=bool, 

283 default=False, 

284 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if " 

285 "generating Source Tables from older src tables which do not already have local calib columns") 

286 ) 

287 doApplyExternalSkyWcs = pexConfig.Field( 

288 dtype=bool, 

289 default=False, 

290 doc=("Add local WCS columns from the calexp.wcs? Should only set True if " 

291 "generating Source Tables from older src tables which do not already have local calib columns") 

292 ) 

293 

294 

295class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

296 """Write source table to parquet 

297 """ 

298 _DefaultName = "writeSourceTable" 

299 ConfigClass = WriteSourceTableConfig 

300 

301 def runDataRef(self, dataRef): 

302 src = dataRef.get('src') 

303 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

304 src = self.addCalibColumns(src, dataRef) 

305 

306 ccdVisitId = dataRef.get('ccdExposureId') 

307 result = self.run(src, ccdVisitId=ccdVisitId) 

308 dataRef.put(result.table, 'source') 

309 

310 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

311 inputs = butlerQC.get(inputRefs) 

312 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

313 result = self.run(**inputs).table 

314 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

315 butlerQC.put(outputs, outputRefs) 

316 

317 def run(self, catalog, ccdVisitId=None): 

318 """Convert `src` catalog to parquet 

319 

320 Parameters 

321 ---------- 

322 catalog: `afwTable.SourceCatalog` 

323 catalog to be converted 

324 ccdVisitId: `int` 

325 ccdVisitId to be added as a column 

326 

327 Returns 

328 ------- 

329 result : `lsst.pipe.base.Struct` 

330 ``table`` 

331 `ParquetTable` version of the input catalog 

332 """ 

333 self.log.info("Generating parquet table from src catalog %s", ccdVisitId) 

334 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

335 df['ccdVisitId'] = ccdVisitId 

336 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

337 

338 def addCalibColumns(self, catalog, dataRef): 

339 """Add columns with local calibration evaluated at each centroid 

340 

341 for backwards compatibility with old repos. 

342 This exists for the purpose of converting old src catalogs 

343 (which don't have the expected local calib columns) to Source Tables. 

344 

345 Parameters 

346 ---------- 

347 catalog: `afwTable.SourceCatalog` 

348 catalog to which calib columns will be added 

349 dataRef: `lsst.daf.persistence.ButlerDataRef 

350 for fetching the calibs from disk. 

351 

352 Returns 

353 ------- 

354 newCat: `afwTable.SourceCatalog` 

355 Source Catalog with requested local calib columns 

356 """ 

357 mapper = afwTable.SchemaMapper(catalog.schema) 

358 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

359 measureConfig.doReplaceWithNoise = False 

360 

361 # Just need the WCS or the PhotoCalib attached to an exposue 

362 exposure = dataRef.get('calexp_sub', 

363 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0))) 

364 

365 mapper = afwTable.SchemaMapper(catalog.schema) 

366 mapper.addMinimalSchema(catalog.schema, True) 

367 schema = mapper.getOutputSchema() 

368 

369 exposureIdInfo = dataRef.get("expIdInfo") 

370 measureConfig.plugins.names = [] 

371 if self.config.doApplyExternalSkyWcs: 

372 plugin = 'base_LocalWcs' 

373 if plugin in schema: 

374 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False") 

375 else: 

376 measureConfig.plugins.names.add(plugin) 

377 

378 if self.config.doApplyExternalPhotoCalib: 

379 plugin = 'base_LocalPhotoCalib' 

380 if plugin in schema: 

381 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False") 

382 else: 

383 measureConfig.plugins.names.add(plugin) 

384 

385 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

386 newCat = afwTable.SourceCatalog(schema) 

387 newCat.extend(catalog, mapper=mapper) 

388 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

389 return newCat 

390 

391 def writeMetadata(self, dataRef): 

392 """No metadata to write. 

393 """ 

394 pass 

395 

396 @classmethod 

397 def _makeArgumentParser(cls): 

398 parser = ArgumentParser(name=cls._DefaultName) 

399 parser.add_id_argument("--id", 'src', 

400 help="data ID, e.g. --id visit=12345 ccd=0") 

401 return parser 

402 

403 

404class PostprocessAnalysis(object): 

405 """Calculate columns from ParquetTable 

406 

407 This object manages and organizes an arbitrary set of computations 

408 on a catalog. The catalog is defined by a 

409 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a 

410 `deepCoadd_obj` dataset, and the computations are defined by a collection 

411 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

412 a `CompositeFunctor`). 

413 

414 After the object is initialized, accessing the `.df` attribute (which 

415 holds the `pandas.DataFrame` containing the results of the calculations) triggers 

416 computation of said dataframe. 

417 

418 One of the conveniences of using this object is the ability to define a desired common 

419 filter for all functors. This enables the same functor collection to be passed to 

420 several different `PostprocessAnalysis` objects without having to change the original 

421 functor collection, since the `filt` keyword argument of this object triggers an 

422 overwrite of the `filt` property for all functors in the collection. 

423 

424 This object also allows a list of refFlags to be passed, and defines a set of default 

425 refFlags that are always included even if not requested. 

426 

427 If a list of `ParquetTable` object is passed, rather than a single one, then the 

428 calculations will be mapped over all the input catalogs. In principle, it should 

429 be straightforward to parallelize this activity, but initial tests have failed 

430 (see TODO in code comments). 

431 

432 Parameters 

433 ---------- 

434 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

435 Source catalog(s) for computation 

436 

437 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

438 Computations to do (functors that act on `parq`). 

439 If a dict, the output 

440 DataFrame will have columns keyed accordingly. 

441 If a list, the column keys will come from the 

442 `.shortname` attribute of each functor. 

443 

444 filt : `str` (optional) 

445 Filter in which to calculate. If provided, 

446 this will overwrite any existing `.filt` attribute 

447 of the provided functors. 

448 

449 flags : `list` (optional) 

450 List of flags (per-band) to include in output table. 

451 Taken from the `meas` dataset if applied to a multilevel Object Table. 

452 

453 refFlags : `list` (optional) 

454 List of refFlags (only reference band) to include in output table. 

455 

456 forcedFlags : `list` (optional) 

457 List of flags (per-band) to include in output table. 

458 Taken from the ``forced_src`` dataset if applied to a 

459 multilevel Object Table. Intended for flags from measurement plugins 

460 only run during multi-band forced-photometry. 

461 """ 

462 _defaultRefFlags = [] 

463 _defaultFuncs = () 

464 

465 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

466 self.parq = parq 

467 self.functors = functors 

468 

469 self.filt = filt 

470 self.flags = list(flags) if flags is not None else [] 

471 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

472 self.refFlags = list(self._defaultRefFlags) 

473 if refFlags is not None: 

474 self.refFlags += list(refFlags) 

475 

476 self._df = None 

477 

478 @property 

479 def defaultFuncs(self): 

480 funcs = dict(self._defaultFuncs) 

481 return funcs 

482 

483 @property 

484 def func(self): 

485 additionalFuncs = self.defaultFuncs 

486 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

487 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

488 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

489 

490 if isinstance(self.functors, CompositeFunctor): 

491 func = self.functors 

492 else: 

493 func = CompositeFunctor(self.functors) 

494 

495 func.funcDict.update(additionalFuncs) 

496 func.filt = self.filt 

497 

498 return func 

499 

500 @property 

501 def noDupCols(self): 

502 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

503 

504 @property 

505 def df(self): 

506 if self._df is None: 

507 self.compute() 

508 return self._df 

509 

510 def compute(self, dropna=False, pool=None): 

511 # map over multiple parquet tables 

512 if type(self.parq) in (list, tuple): 

513 if pool is None: 

514 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

515 else: 

516 # TODO: Figure out why this doesn't work (pyarrow pickling issues?) 

517 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

518 self._df = pd.concat(dflist) 

519 else: 

520 self._df = self.func(self.parq, dropna=dropna) 

521 

522 return self._df 

523 

524 

525class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

526 dimensions=()): 

527 """Expected Connections for subclasses of TransformCatalogBaseTask. 

528 

529 Must be subclassed. 

530 """ 

531 inputCatalog = connectionTypes.Input( 

532 name="", 

533 storageClass="DataFrame", 

534 ) 

535 outputCatalog = connectionTypes.Output( 

536 name="", 

537 storageClass="DataFrame", 

538 ) 

539 

540 

541class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

542 pipelineConnections=TransformCatalogBaseConnections): 

543 functorFile = pexConfig.Field( 

544 dtype=str, 

545 doc="Path to YAML file specifying Science Data Model functors to use " 

546 "when copying columns and computing calibrated values.", 

547 default=None, 

548 optional=True 

549 ) 

550 primaryKey = pexConfig.Field( 

551 dtype=str, 

552 doc="Name of column to be set as the DataFrame index. If None, the index" 

553 "will be named `id`", 

554 default=None, 

555 optional=True 

556 ) 

557 

558 

559class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask): 

560 """Base class for transforming/standardizing a catalog 

561 

562 by applying functors that convert units and apply calibrations. 

563 The purpose of this task is to perform a set of computations on 

564 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

565 results to a new dataset (which needs to be declared in an `outputDataset` 

566 attribute). 

567 

568 The calculations to be performed are defined in a YAML file that specifies 

569 a set of functors to be computed, provided as 

570 a `--functorFile` config parameter. An example of such a YAML file 

571 is the following: 

572 

573 funcs: 

574 psfMag: 

575 functor: Mag 

576 args: 

577 - base_PsfFlux 

578 filt: HSC-G 

579 dataset: meas 

580 cmodel_magDiff: 

581 functor: MagDiff 

582 args: 

583 - modelfit_CModel 

584 - base_PsfFlux 

585 filt: HSC-G 

586 gauss_magDiff: 

587 functor: MagDiff 

588 args: 

589 - base_GaussianFlux 

590 - base_PsfFlux 

591 filt: HSC-G 

592 count: 

593 functor: Column 

594 args: 

595 - base_InputCount_value 

596 filt: HSC-G 

597 deconvolved_moments: 

598 functor: DeconvolvedMoments 

599 filt: HSC-G 

600 dataset: forced_src 

601 refFlags: 

602 - calib_psfUsed 

603 - merge_measurement_i 

604 - merge_measurement_r 

605 - merge_measurement_z 

606 - merge_measurement_y 

607 - merge_measurement_g 

608 - base_PixelFlags_flag_inexact_psfCenter 

609 - detect_isPrimary 

610 

611 The names for each entry under "func" will become the names of columns in the 

612 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`. 

613 Positional arguments to be passed to each functor are in the `args` list, 

614 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`, 

615 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization. 

616 

617 The "flags" entry is the default shortcut for `Column` functors. 

618 All columns listed under "flags" will be copied to the output table 

619 untransformed. They can be of any datatype. 

620 In the special case of transforming a multi-level oject table with 

621 band and dataset indices (deepCoadd_obj), these will be taked from the 

622 `meas` dataset and exploded out per band. 

623 

624 There are two special shortcuts that only apply when transforming 

625 multi-level Object (deepCoadd_obj) tables: 

626 - The "refFlags" entry is shortcut for `Column` functor 

627 taken from the `'ref'` dataset if transforming an ObjectTable. 

628 - The "forcedFlags" entry is shortcut for `Column` functors. 

629 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

630 These are expanded out per band. 

631 

632 

633 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

634 to organize and excecute the calculations. 

635 

636 """ 

637 @property 

638 def _DefaultName(self): 

639 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

640 

641 @property 

642 def outputDataset(self): 

643 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

644 

645 @property 

646 def inputDataset(self): 

647 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

648 

649 @property 

650 def ConfigClass(self): 

651 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

652 

653 def __init__(self, *args, **kwargs): 

654 super().__init__(*args, **kwargs) 

655 if self.config.functorFile: 

656 self.log.info('Loading tranform functor definitions from %s', 

657 self.config.functorFile) 

658 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

659 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

660 else: 

661 self.funcs = None 

662 

663 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

664 inputs = butlerQC.get(inputRefs) 

665 if self.funcs is None: 

666 raise ValueError("config.functorFile is None. " 

667 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

668 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

669 dataId=outputRefs.outputCatalog.dataId.full) 

670 outputs = pipeBase.Struct(outputCatalog=result) 

671 butlerQC.put(outputs, outputRefs) 

672 

673 def runDataRef(self, dataRef): 

674 parq = dataRef.get() 

675 if self.funcs is None: 

676 raise ValueError("config.functorFile is None. " 

677 "Must be a valid path to yaml in order to run as a CommandlineTask.") 

678 df = self.run(parq, funcs=self.funcs, dataId=dataRef.dataId) 

679 self.write(df, dataRef) 

680 return df 

681 

682 def run(self, parq, funcs=None, dataId=None, band=None): 

683 """Do postprocessing calculations 

684 

685 Takes a `ParquetTable` object and dataId, 

686 returns a dataframe with results of postprocessing calculations. 

687 

688 Parameters 

689 ---------- 

690 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

691 ParquetTable from which calculations are done. 

692 funcs : `lsst.pipe.tasks.functors.Functors` 

693 Functors to apply to the table's columns 

694 dataId : dict, optional 

695 Used to add a `patchId` column to the output dataframe. 

696 band : `str`, optional 

697 Filter band that is being processed. 

698 

699 Returns 

700 ------ 

701 `pandas.DataFrame` 

702 

703 """ 

704 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

705 

706 df = self.transform(band, parq, funcs, dataId).df 

707 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

708 return df 

709 

710 def getFunctors(self): 

711 return self.funcs 

712 

713 def getAnalysis(self, parq, funcs=None, band=None): 

714 if funcs is None: 

715 funcs = self.funcs 

716 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

717 return analysis 

718 

719 def transform(self, band, parq, funcs, dataId): 

720 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

721 df = analysis.df 

722 if dataId is not None: 

723 for key, value in dataId.items(): 

724 df[str(key)] = value 

725 

726 if self.config.primaryKey: 

727 if df.index.name != self.config.primaryKey and self.config.primaryKey in df: 

728 df.reset_index(inplace=True, drop=True) 

729 df.set_index(self.config.primaryKey, inplace=True) 

730 

731 return pipeBase.Struct( 

732 df=df, 

733 analysis=analysis 

734 ) 

735 

736 def write(self, df, parqRef): 

737 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

738 

739 def writeMetadata(self, dataRef): 

740 """No metadata to write. 

741 """ 

742 pass 

743 

744 

745class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

746 defaultTemplates={"coaddName": "deep"}, 

747 dimensions=("tract", "patch", "skymap")): 

748 inputCatalog = connectionTypes.Input( 

749 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

750 "stored as a DataFrame with a multi-level column index per-patch.", 

751 dimensions=("tract", "patch", "skymap"), 

752 storageClass="DataFrame", 

753 name="{coaddName}Coadd_obj", 

754 deferLoad=True, 

755 ) 

756 outputCatalog = connectionTypes.Output( 

757 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

758 "data model.", 

759 dimensions=("tract", "patch", "skymap"), 

760 storageClass="DataFrame", 

761 name="objectTable" 

762 ) 

763 

764 

765class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

766 pipelineConnections=TransformObjectCatalogConnections): 

767 coaddName = pexConfig.Field( 

768 dtype=str, 

769 default="deep", 

770 doc="Name of coadd" 

771 ) 

772 # TODO: remove in DM-27177 

773 filterMap = pexConfig.DictField( 

774 keytype=str, 

775 itemtype=str, 

776 default={}, 

777 doc=("Dictionary mapping full filter name to short one for column name munging." 

778 "These filters determine the output columns no matter what filters the " 

779 "input data actually contain."), 

780 deprecated=("Coadds are now identified by the band, so this transform is unused." 

781 "Will be removed after v22.") 

782 ) 

783 outputBands = pexConfig.ListField( 

784 dtype=str, 

785 default=None, 

786 optional=True, 

787 doc=("These bands and only these bands will appear in the output," 

788 " NaN-filled if the input does not include them." 

789 " If None, then use all bands found in the input.") 

790 ) 

791 camelCase = pexConfig.Field( 

792 dtype=bool, 

793 default=False, 

794 doc=("Write per-band columns names with camelCase, else underscore " 

795 "For example: gPsFlux instead of g_PsFlux.") 

796 ) 

797 multilevelOutput = pexConfig.Field( 

798 dtype=bool, 

799 default=False, 

800 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

801 "and name-munged (False).") 

802 ) 

803 goodFlags = pexConfig.ListField( 

804 dtype=str, 

805 default=[], 

806 doc=("List of 'good' flags that should be set False when populating empty tables. " 

807 "All other flags are considered to be 'bad' flags and will be set to True.") 

808 ) 

809 floatFillValue = pexConfig.Field( 

810 dtype=float, 

811 default=np.nan, 

812 doc="Fill value for float fields when populating empty tables." 

813 ) 

814 integerFillValue = pexConfig.Field( 

815 dtype=int, 

816 default=-1, 

817 doc="Fill value for integer fields when populating empty tables." 

818 ) 

819 

820 def setDefaults(self): 

821 super().setDefaults() 

822 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml') 

823 self.primaryKey = 'objectId' 

824 self.goodFlags = ['calib_astrometry_used', 

825 'calib_photometry_reserved', 

826 'calib_photometry_used', 

827 'calib_psf_candidate', 

828 'calib_psf_reserved', 

829 'calib_psf_used'] 

830 

831 

832class TransformObjectCatalogTask(TransformCatalogBaseTask): 

833 """Produce a flattened Object Table to match the format specified in 

834 sdm_schemas. 

835 

836 Do the same set of postprocessing calculations on all bands 

837 

838 This is identical to `TransformCatalogBaseTask`, except for that it does the 

839 specified functor calculations for all filters present in the 

840 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

841 by the YAML file will be superceded. 

842 """ 

843 _DefaultName = "transformObjectCatalog" 

844 ConfigClass = TransformObjectCatalogConfig 

845 

846 # Used by Gen 2 runDataRef only: 

847 inputDataset = 'deepCoadd_obj' 

848 outputDataset = 'objectTable' 

849 

850 @classmethod 

851 def _makeArgumentParser(cls): 

852 parser = ArgumentParser(name=cls._DefaultName) 

853 parser.add_id_argument("--id", cls.inputDataset, 

854 ContainerClass=CoaddDataIdContainer, 

855 help="data ID, e.g. --id tract=12345 patch=1,2") 

856 return parser 

857 

858 def run(self, parq, funcs=None, dataId=None, band=None): 

859 # NOTE: band kwarg is ignored here. 

860 dfDict = {} 

861 analysisDict = {} 

862 templateDf = pd.DataFrame() 

863 

864 if isinstance(parq, DeferredDatasetHandle): 

865 columns = parq.get(component='columns') 

866 inputBands = columns.unique(level=1).values 

867 else: 

868 inputBands = parq.columnLevelNames['band'] 

869 

870 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

871 

872 # Perform transform for data of filters that exist in parq. 

873 for inputBand in inputBands: 

874 if inputBand not in outputBands: 

875 self.log.info("Ignoring %s band data in the input", inputBand) 

876 continue 

877 self.log.info("Transforming the catalog of band %s", inputBand) 

878 result = self.transform(inputBand, parq, funcs, dataId) 

879 dfDict[inputBand] = result.df 

880 analysisDict[inputBand] = result.analysis 

881 if templateDf.empty: 

882 templateDf = result.df 

883 

884 # Put filler values in columns of other wanted bands 

885 for filt in outputBands: 

886 if filt not in dfDict: 

887 self.log.info("Adding empty columns for band %s", filt) 

888 dfTemp = templateDf.copy() 

889 for col in dfTemp.columns: 

890 testValue = dfTemp[col].values[0] 

891 if isinstance(testValue, (np.bool_, pd.BooleanDtype)): 

892 # Boolean flag type, check if it is a "good" flag 

893 if col in self.config.goodFlags: 

894 fillValue = False 

895 else: 

896 fillValue = True 

897 elif isinstance(testValue, numbers.Integral): 

898 # Checking numbers.Integral catches all flavors 

899 # of python, numpy, pandas, etc. integers. 

900 # We must ensure this is not an unsigned integer. 

901 if isinstance(testValue, np.unsignedinteger): 

902 raise ValueError("Parquet tables may not have unsigned integer columns.") 

903 else: 

904 fillValue = self.config.integerFillValue 

905 else: 

906 fillValue = self.config.floatFillValue 

907 dfTemp[col].values[:] = fillValue 

908 dfDict[filt] = dfTemp 

909 

910 # This makes a multilevel column index, with band as first level 

911 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

912 

913 if not self.config.multilevelOutput: 

914 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

915 if self.config.primaryKey in noDupCols: 

916 noDupCols.remove(self.config.primaryKey) 

917 if dataId is not None: 

918 noDupCols += list(dataId.keys()) 

919 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

920 inputBands=inputBands) 

921 

922 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

923 

924 return df 

925 

926 

927class TractObjectDataIdContainer(CoaddDataIdContainer): 

928 

929 def makeDataRefList(self, namespace): 

930 """Make self.refList from self.idList 

931 

932 Generate a list of data references given tract and/or patch. 

933 This was adapted from `TractQADataIdContainer`, which was 

934 `TractDataIdContainer` modifie to not require "filter". 

935 Only existing dataRefs are returned. 

936 """ 

937 def getPatchRefList(tract): 

938 return [namespace.butler.dataRef(datasetType=self.datasetType, 

939 tract=tract.getId(), 

940 patch="%d,%d" % patch.getIndex()) for patch in tract] 

941 

942 tractRefs = defaultdict(list) # Data references for each tract 

943 for dataId in self.idList: 

944 skymap = self.getSkymap(namespace) 

945 

946 if "tract" in dataId: 

947 tractId = dataId["tract"] 

948 if "patch" in dataId: 

949 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

950 tract=tractId, 

951 patch=dataId['patch'])) 

952 else: 

953 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

954 else: 

955 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

956 for tract in skymap) 

957 outputRefList = [] 

958 for tractRefList in tractRefs.values(): 

959 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

960 outputRefList.append(existingRefs) 

961 

962 self.refList = outputRefList 

963 

964 

965class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

966 dimensions=("tract", "skymap")): 

967 inputCatalogs = connectionTypes.Input( 

968 doc="Per-Patch objectTables conforming to the standard data model.", 

969 name="objectTable", 

970 storageClass="DataFrame", 

971 dimensions=("tract", "patch", "skymap"), 

972 multiple=True, 

973 ) 

974 outputCatalog = connectionTypes.Output( 

975 doc="Pre-tract horizontal concatenation of the input objectTables", 

976 name="objectTable_tract", 

977 storageClass="DataFrame", 

978 dimensions=("tract", "skymap"), 

979 ) 

980 

981 

982class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

983 pipelineConnections=ConsolidateObjectTableConnections): 

984 coaddName = pexConfig.Field( 

985 dtype=str, 

986 default="deep", 

987 doc="Name of coadd" 

988 ) 

989 

990 

991class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

992 """Write patch-merged source tables to a tract-level parquet file 

993 

994 Concatenates `objectTable` list into a per-visit `objectTable_tract` 

995 """ 

996 _DefaultName = "consolidateObjectTable" 

997 ConfigClass = ConsolidateObjectTableConfig 

998 

999 inputDataset = 'objectTable' 

1000 outputDataset = 'objectTable_tract' 

1001 

1002 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1003 inputs = butlerQC.get(inputRefs) 

1004 self.log.info("Concatenating %s per-patch Object Tables", 

1005 len(inputs['inputCatalogs'])) 

1006 df = pd.concat(inputs['inputCatalogs']) 

1007 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1008 

1009 @classmethod 

1010 def _makeArgumentParser(cls): 

1011 parser = ArgumentParser(name=cls._DefaultName) 

1012 

1013 parser.add_id_argument("--id", cls.inputDataset, 

1014 help="data ID, e.g. --id tract=12345", 

1015 ContainerClass=TractObjectDataIdContainer) 

1016 return parser 

1017 

1018 def runDataRef(self, patchRefList): 

1019 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

1020 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1021 

1022 def writeMetadata(self, dataRef): 

1023 """No metadata to write. 

1024 """ 

1025 pass 

1026 

1027 

1028class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

1029 defaultTemplates={"catalogType": ""}, 

1030 dimensions=("instrument", "visit", "detector")): 

1031 

1032 inputCatalog = connectionTypes.Input( 

1033 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

1034 name="{catalogType}source", 

1035 storageClass="DataFrame", 

1036 dimensions=("instrument", "visit", "detector"), 

1037 deferLoad=True 

1038 ) 

1039 outputCatalog = connectionTypes.Output( 

1040 doc="Narrower, per-detector Source Table transformed and converted per a " 

1041 "specified set of functors", 

1042 name="{catalogType}sourceTable", 

1043 storageClass="DataFrame", 

1044 dimensions=("instrument", "visit", "detector") 

1045 ) 

1046 

1047 

1048class TransformSourceTableConfig(TransformCatalogBaseConfig, 

1049 pipelineConnections=TransformSourceTableConnections): 

1050 

1051 def setDefaults(self): 

1052 super().setDefaults() 

1053 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml') 

1054 self.primaryKey = 'sourceId' 

1055 

1056 

1057class TransformSourceTableTask(TransformCatalogBaseTask): 

1058 """Transform/standardize a source catalog 

1059 """ 

1060 _DefaultName = "transformSourceTable" 

1061 ConfigClass = TransformSourceTableConfig 

1062 

1063 inputDataset = 'source' 

1064 outputDataset = 'sourceTable' 

1065 

1066 @classmethod 

1067 def _makeArgumentParser(cls): 

1068 parser = ArgumentParser(name=cls._DefaultName) 

1069 parser.add_id_argument("--id", datasetType=cls.inputDataset, 

1070 level="sensor", 

1071 help="data ID, e.g. --id visit=12345 ccd=0") 

1072 return parser 

1073 

1074 def runDataRef(self, dataRef): 

1075 """Override to specify band label to run().""" 

1076 parq = dataRef.get() 

1077 funcs = self.getFunctors() 

1078 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel 

1079 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band) 

1080 self.write(df, dataRef) 

1081 return df 

1082 

1083 

1084class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1085 dimensions=("instrument", "visit",), 

1086 defaultTemplates={"calexpType": ""}): 

1087 calexp = connectionTypes.Input( 

1088 doc="Processed exposures used for metadata", 

1089 name="{calexpType}calexp", 

1090 storageClass="ExposureF", 

1091 dimensions=("instrument", "visit", "detector"), 

1092 deferLoad=True, 

1093 multiple=True, 

1094 ) 

1095 visitSummary = connectionTypes.Output( 

1096 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1097 "detector id for the id and are sorted for fast lookups of a " 

1098 "detector."), 

1099 name="{calexpType}visitSummary", 

1100 storageClass="ExposureCatalog", 

1101 dimensions=("instrument", "visit"), 

1102 ) 

1103 

1104 

1105class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1106 pipelineConnections=ConsolidateVisitSummaryConnections): 

1107 """Config for ConsolidateVisitSummaryTask""" 

1108 pass 

1109 

1110 

1111class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

1112 """Task to consolidate per-detector visit metadata. 

1113 

1114 This task aggregates the following metadata from all the detectors in a 

1115 single visit into an exposure catalog: 

1116 - The visitInfo. 

1117 - The wcs. 

1118 - The photoCalib. 

1119 - The physical_filter and band (if available). 

1120 - The psf size, shape, and effective area at the center of the detector. 

1121 - The corners of the bounding box in right ascension/declination. 

1122 

1123 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1124 are not persisted here because of storage concerns, and because of their 

1125 limited utility as summary statistics. 

1126 

1127 Tests for this task are performed in ci_hsc_gen3. 

1128 """ 

1129 _DefaultName = "consolidateVisitSummary" 

1130 ConfigClass = ConsolidateVisitSummaryConfig 

1131 

1132 @classmethod 

1133 def _makeArgumentParser(cls): 

1134 parser = ArgumentParser(name=cls._DefaultName) 

1135 

1136 parser.add_id_argument("--id", "calexp", 

1137 help="data ID, e.g. --id visit=12345", 

1138 ContainerClass=VisitDataIdContainer) 

1139 return parser 

1140 

1141 def writeMetadata(self, dataRef): 

1142 """No metadata to persist, so override to remove metadata persistance. 

1143 """ 

1144 pass 

1145 

1146 def writeConfig(self, butler, clobber=False, doBackup=True): 

1147 """No config to persist, so override to remove config persistance. 

1148 """ 

1149 pass 

1150 

1151 def runDataRef(self, dataRefList): 

1152 visit = dataRefList[0].dataId['visit'] 

1153 

1154 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1155 len(dataRefList), visit) 

1156 

1157 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

1158 

1159 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

1160 

1161 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1162 dataRefs = butlerQC.get(inputRefs.calexp) 

1163 visit = dataRefs[0].dataId.byName()['visit'] 

1164 

1165 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1166 len(dataRefs), visit) 

1167 

1168 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1169 

1170 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1171 

1172 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

1173 """Make a combined exposure catalog from a list of dataRefs. 

1174 These dataRefs must point to exposures with wcs, summaryStats, 

1175 and other visit metadata. 

1176 

1177 Parameters 

1178 ---------- 

1179 visit : `int` 

1180 Visit identification number. 

1181 dataRefs : `list` 

1182 List of dataRefs in visit. May be list of 

1183 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

1184 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

1185 isGen3 : `bool`, optional 

1186 Specifies if this is a Gen3 list of datarefs. 

1187 

1188 Returns 

1189 ------- 

1190 visitSummary : `lsst.afw.table.ExposureCatalog` 

1191 Exposure catalog with per-detector summary information. 

1192 """ 

1193 schema = self._makeVisitSummarySchema() 

1194 cat = afwTable.ExposureCatalog(schema) 

1195 cat.resize(len(dataRefs)) 

1196 

1197 cat['visit'] = visit 

1198 

1199 for i, dataRef in enumerate(dataRefs): 

1200 if isGen3: 

1201 visitInfo = dataRef.get(component='visitInfo') 

1202 filterLabel = dataRef.get(component='filterLabel') 

1203 summaryStats = dataRef.get(component='summaryStats') 

1204 detector = dataRef.get(component='detector') 

1205 wcs = dataRef.get(component='wcs') 

1206 photoCalib = dataRef.get(component='photoCalib') 

1207 detector = dataRef.get(component='detector') 

1208 bbox = dataRef.get(component='bbox') 

1209 validPolygon = dataRef.get(component='validPolygon') 

1210 else: 

1211 # Note that we need to read the calexp because there is 

1212 # no magic access to the psf except through the exposure. 

1213 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

1214 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

1215 visitInfo = exp.getInfo().getVisitInfo() 

1216 filterLabel = dataRef.get("calexp_filterLabel") 

1217 summaryStats = exp.getInfo().getSummaryStats() 

1218 wcs = exp.getWcs() 

1219 photoCalib = exp.getPhotoCalib() 

1220 detector = exp.getDetector() 

1221 bbox = dataRef.get(datasetType='calexp_bbox') 

1222 validPolygon = exp.getInfo().getValidPolygon() 

1223 

1224 rec = cat[i] 

1225 rec.setBBox(bbox) 

1226 rec.setVisitInfo(visitInfo) 

1227 rec.setWcs(wcs) 

1228 rec.setPhotoCalib(photoCalib) 

1229 rec.setValidPolygon(validPolygon) 

1230 

1231 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1232 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1233 rec.setId(detector.getId()) 

1234 rec['psfSigma'] = summaryStats.psfSigma 

1235 rec['psfIxx'] = summaryStats.psfIxx 

1236 rec['psfIyy'] = summaryStats.psfIyy 

1237 rec['psfIxy'] = summaryStats.psfIxy 

1238 rec['psfArea'] = summaryStats.psfArea 

1239 rec['raCorners'][:] = summaryStats.raCorners 

1240 rec['decCorners'][:] = summaryStats.decCorners 

1241 rec['ra'] = summaryStats.ra 

1242 rec['decl'] = summaryStats.decl 

1243 rec['zenithDistance'] = summaryStats.zenithDistance 

1244 rec['zeroPoint'] = summaryStats.zeroPoint 

1245 rec['skyBg'] = summaryStats.skyBg 

1246 rec['skyNoise'] = summaryStats.skyNoise 

1247 rec['meanVar'] = summaryStats.meanVar 

1248 rec['astromOffsetMean'] = summaryStats.astromOffsetMean 

1249 rec['astromOffsetStd'] = summaryStats.astromOffsetStd 

1250 rec['nPsfStar'] = summaryStats.nPsfStar 

1251 rec['psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median 

1252 rec['psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median 

1253 rec['psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter 

1254 rec['psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter 

1255 rec['psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian 

1256 rec['psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter 

1257 rec['psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter 

1258 

1259 metadata = dafBase.PropertyList() 

1260 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1261 # We are looping over existing datarefs, so the following is true 

1262 metadata.add("COMMENT", "Only detectors with data have entries.") 

1263 cat.setMetadata(metadata) 

1264 

1265 cat.sort() 

1266 return cat 

1267 

1268 def _makeVisitSummarySchema(self): 

1269 """Make the schema for the visitSummary catalog.""" 

1270 schema = afwTable.ExposureTable.makeMinimalSchema() 

1271 schema.addField('visit', type='I', doc='Visit number') 

1272 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1273 schema.addField('band', type='String', size=32, doc='Name of band') 

1274 schema.addField('psfSigma', type='F', 

1275 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

1276 schema.addField('psfArea', type='F', 

1277 doc='PSF model effective area (center of chip) (pixel**2)') 

1278 schema.addField('psfIxx', type='F', 

1279 doc='PSF model Ixx (center of chip) (pixel**2)') 

1280 schema.addField('psfIyy', type='F', 

1281 doc='PSF model Iyy (center of chip) (pixel**2)') 

1282 schema.addField('psfIxy', type='F', 

1283 doc='PSF model Ixy (center of chip) (pixel**2)') 

1284 schema.addField('raCorners', type='ArrayD', size=4, 

1285 doc='Right Ascension of bounding box corners (degrees)') 

1286 schema.addField('decCorners', type='ArrayD', size=4, 

1287 doc='Declination of bounding box corners (degrees)') 

1288 schema.addField('ra', type='D', 

1289 doc='Right Ascension of bounding box center (degrees)') 

1290 schema.addField('decl', type='D', 

1291 doc='Declination of bounding box center (degrees)') 

1292 schema.addField('zenithDistance', type='F', 

1293 doc='Zenith distance of bounding box center (degrees)') 

1294 schema.addField('zeroPoint', type='F', 

1295 doc='Mean zeropoint in detector (mag)') 

1296 schema.addField('skyBg', type='F', 

1297 doc='Average sky background (ADU)') 

1298 schema.addField('skyNoise', type='F', 

1299 doc='Average sky noise (ADU)') 

1300 schema.addField('meanVar', type='F', 

1301 doc='Mean variance of the weight plane (ADU**2)') 

1302 schema.addField('astromOffsetMean', type='F', 

1303 doc='Mean offset of astrometric calibration matches (arcsec)') 

1304 schema.addField('astromOffsetStd', type='F', 

1305 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)') 

1306 schema.addField('nPsfStar', type='I', doc='Number of stars used for PSF model') 

1307 schema.addField('psfStarDeltaE1Median', type='F', 

1308 doc='Median E1 residual (starE1 - psfE1) for psf stars') 

1309 schema.addField('psfStarDeltaE2Median', type='F', 

1310 doc='Median E2 residual (starE2 - psfE2) for psf stars') 

1311 schema.addField('psfStarDeltaE1Scatter', type='F', 

1312 doc='Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars') 

1313 schema.addField('psfStarDeltaE2Scatter', type='F', 

1314 doc='Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars') 

1315 schema.addField('psfStarDeltaSizeMedian', type='F', 

1316 doc='Median size residual (starSize - psfSize) for psf stars (pixel)') 

1317 schema.addField('psfStarDeltaSizeScatter', type='F', 

1318 doc='Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)') 

1319 schema.addField('psfStarScaledDeltaSizeScatter', type='F', 

1320 doc='Scatter (via MAD) of size residual scaled by median size squared') 

1321 

1322 return schema 

1323 

1324 

1325class VisitDataIdContainer(DataIdContainer): 

1326 """DataIdContainer that groups sensor-level id's by visit 

1327 """ 

1328 

1329 def makeDataRefList(self, namespace): 

1330 """Make self.refList from self.idList 

1331 

1332 Generate a list of data references grouped by visit. 

1333 

1334 Parameters 

1335 ---------- 

1336 namespace : `argparse.Namespace` 

1337 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments 

1338 """ 

1339 # Group by visits 

1340 visitRefs = defaultdict(list) 

1341 for dataId in self.idList: 

1342 if "visit" in dataId: 

1343 visitId = dataId["visit"] 

1344 # append all subsets to 

1345 subset = namespace.butler.subset(self.datasetType, dataId=dataId) 

1346 visitRefs[visitId].extend([dataRef for dataRef in subset]) 

1347 

1348 outputRefList = [] 

1349 for refList in visitRefs.values(): 

1350 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1351 if existingRefs: 

1352 outputRefList.append(existingRefs) 

1353 

1354 self.refList = outputRefList 

1355 

1356 

1357class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1358 defaultTemplates={"catalogType": ""}, 

1359 dimensions=("instrument", "visit")): 

1360 inputCatalogs = connectionTypes.Input( 

1361 doc="Input per-detector Source Tables", 

1362 name="{catalogType}sourceTable", 

1363 storageClass="DataFrame", 

1364 dimensions=("instrument", "visit", "detector"), 

1365 multiple=True 

1366 ) 

1367 outputCatalog = connectionTypes.Output( 

1368 doc="Per-visit concatenation of Source Table", 

1369 name="{catalogType}sourceTable_visit", 

1370 storageClass="DataFrame", 

1371 dimensions=("instrument", "visit") 

1372 ) 

1373 

1374 

1375class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1376 pipelineConnections=ConsolidateSourceTableConnections): 

1377 pass 

1378 

1379 

1380class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

1381 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1382 """ 

1383 _DefaultName = 'consolidateSourceTable' 

1384 ConfigClass = ConsolidateSourceTableConfig 

1385 

1386 inputDataset = 'sourceTable' 

1387 outputDataset = 'sourceTable_visit' 

1388 

1389 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1390 from .makeCoaddTempExp import reorderRefs 

1391 

1392 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs] 

1393 detectorOrder.sort() 

1394 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector') 

1395 inputs = butlerQC.get(inputRefs) 

1396 self.log.info("Concatenating %s per-detector Source Tables", 

1397 len(inputs['inputCatalogs'])) 

1398 df = pd.concat(inputs['inputCatalogs']) 

1399 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1400 

1401 def runDataRef(self, dataRefList): 

1402 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1403 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1404 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1405 

1406 @classmethod 

1407 def _makeArgumentParser(cls): 

1408 parser = ArgumentParser(name=cls._DefaultName) 

1409 

1410 parser.add_id_argument("--id", cls.inputDataset, 

1411 help="data ID, e.g. --id visit=12345", 

1412 ContainerClass=VisitDataIdContainer) 

1413 return parser 

1414 

1415 def writeMetadata(self, dataRef): 

1416 """No metadata to write. 

1417 """ 

1418 pass 

1419 

1420 def writeConfig(self, butler, clobber=False, doBackup=True): 

1421 """No config to write. 

1422 """ 

1423 pass 

1424 

1425 

1426class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1427 dimensions=("instrument",), 

1428 defaultTemplates={"calexpType": ""}): 

1429 visitSummaryRefs = connectionTypes.Input( 

1430 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1431 name="{calexpType}visitSummary", 

1432 storageClass="ExposureCatalog", 

1433 dimensions=("instrument", "visit"), 

1434 multiple=True, 

1435 deferLoad=True, 

1436 ) 

1437 outputCatalog = connectionTypes.Output( 

1438 doc="CCD and Visit metadata table", 

1439 name="ccdVisitTable", 

1440 storageClass="DataFrame", 

1441 dimensions=("instrument",) 

1442 ) 

1443 

1444 

1445class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1446 pipelineConnections=MakeCcdVisitTableConnections): 

1447 pass 

1448 

1449 

1450class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1451 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs. 

1452 """ 

1453 _DefaultName = 'makeCcdVisitTable' 

1454 ConfigClass = MakeCcdVisitTableConfig 

1455 

1456 def run(self, visitSummaryRefs): 

1457 """ Make a table of ccd information from the `visitSummary` catalogs. 

1458 Parameters 

1459 ---------- 

1460 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1461 List of DeferredDatasetHandles pointing to exposure catalogs with 

1462 per-detector summary information. 

1463 Returns 

1464 ------- 

1465 result : `lsst.pipe.Base.Struct` 

1466 Results struct with attribute: 

1467 - `outputCatalog` 

1468 Catalog of ccd and visit information. 

1469 """ 

1470 ccdEntries = [] 

1471 for visitSummaryRef in visitSummaryRefs: 

1472 visitSummary = visitSummaryRef.get() 

1473 visitInfo = visitSummary[0].getVisitInfo() 

1474 

1475 ccdEntry = {} 

1476 summaryTable = visitSummary.asAstropy() 

1477 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance', 

1478 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise'] 

1479 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1480 # 'visit' is the human readible visit number 

1481 # 'visitId' is the key to the visitId table. They are the same 

1482 # Technically you should join to get the visit from the visit table 

1483 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"}) 

1484 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in 

1485 summaryTable['id']] 

1486 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId) 

1487 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds] 

1488 ccdEntry['ccdVisitId'] = ccdVisitIds 

1489 ccdEntry['detector'] = summaryTable['id'] 

1490 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary]) 

1491 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1492 

1493 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1494 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1495 expTime = visitInfo.getExposureTime() 

1496 ccdEntry['expTime'] = expTime 

1497 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1498 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1499 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1500 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1501 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1502 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1503 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1504 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1505 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1506 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1507 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1508 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1509 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags, 

1510 # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted. 

1511 ccdEntries.append(ccdEntry) 

1512 

1513 outputCatalog = pd.concat(ccdEntries) 

1514 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True) 

1515 return pipeBase.Struct(outputCatalog=outputCatalog) 

1516 

1517 

1518class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1519 dimensions=("instrument",), 

1520 defaultTemplates={"calexpType": ""}): 

1521 visitSummaries = connectionTypes.Input( 

1522 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1523 name="{calexpType}visitSummary", 

1524 storageClass="ExposureCatalog", 

1525 dimensions=("instrument", "visit",), 

1526 multiple=True, 

1527 deferLoad=True, 

1528 ) 

1529 outputCatalog = connectionTypes.Output( 

1530 doc="Visit metadata table", 

1531 name="visitTable", 

1532 storageClass="DataFrame", 

1533 dimensions=("instrument",) 

1534 ) 

1535 

1536 

1537class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1538 pipelineConnections=MakeVisitTableConnections): 

1539 pass 

1540 

1541 

1542class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1543 """Produce a `visitTable` from the `visitSummary` exposure catalogs. 

1544 """ 

1545 _DefaultName = 'makeVisitTable' 

1546 ConfigClass = MakeVisitTableConfig 

1547 

1548 def run(self, visitSummaries): 

1549 """ Make a table of visit information from the `visitSummary` catalogs 

1550 

1551 Parameters 

1552 ---------- 

1553 visitSummaries : list of `lsst.afw.table.ExposureCatalog` 

1554 List of exposure catalogs with per-detector summary information. 

1555 Returns 

1556 ------- 

1557 result : `lsst.pipe.Base.Struct` 

1558 Results struct with attribute: 

1559 ``outputCatalog`` 

1560 Catalog of visit information. 

1561 """ 

1562 visitEntries = [] 

1563 for visitSummary in visitSummaries: 

1564 visitSummary = visitSummary.get() 

1565 visitRow = visitSummary[0] 

1566 visitInfo = visitRow.getVisitInfo() 

1567 

1568 visitEntry = {} 

1569 visitEntry["visitId"] = visitRow['visit'] 

1570 visitEntry["visit"] = visitRow['visit'] 

1571 visitEntry["physical_filter"] = visitRow['physical_filter'] 

1572 visitEntry["band"] = visitRow['band'] 

1573 raDec = visitInfo.getBoresightRaDec() 

1574 visitEntry["ra"] = raDec.getRa().asDegrees() 

1575 visitEntry["decl"] = raDec.getDec().asDegrees() 

1576 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1577 azAlt = visitInfo.getBoresightAzAlt() 

1578 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1579 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1580 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1581 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1582 visitEntry["obsStart"] = visitInfo.getDate().toPython() 

1583 visitEntry["expTime"] = visitInfo.getExposureTime() 

1584 visitEntries.append(visitEntry) 

1585 # TODO: DM-30623, Add programId, exposureType, expMidpt, cameraTemp, mirror1Temp, mirror2Temp, 

1586 # mirror3Temp, domeTemp, externalTemp, dimmSeeing, pwvGPS, pwvMW, flags, nExposures 

1587 

1588 outputCatalog = pd.DataFrame(data=visitEntries) 

1589 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True) 

1590 return pipeBase.Struct(outputCatalog=outputCatalog) 

1591 

1592 

1593class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1594 dimensions=("instrument", "visit", "detector", "skymap", "tract")): 

1595 

1596 inputCatalog = connectionTypes.Input( 

1597 doc="Primary per-detector, single-epoch forced-photometry catalog. " 

1598 "By default, it is the output of ForcedPhotCcdTask on calexps", 

1599 name="forced_src", 

1600 storageClass="SourceCatalog", 

1601 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1602 ) 

1603 inputCatalogDiff = connectionTypes.Input( 

1604 doc="Secondary multi-epoch, per-detector, forced photometry catalog. " 

1605 "By default, it is the output of ForcedPhotCcdTask run on image differences.", 

1606 name="forced_diff", 

1607 storageClass="SourceCatalog", 

1608 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1609 ) 

1610 outputCatalog = connectionTypes.Output( 

1611 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format", 

1612 name="mergedForcedSource", 

1613 storageClass="DataFrame", 

1614 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1615 ) 

1616 

1617 

1618class WriteForcedSourceTableConfig(WriteSourceTableConfig, 

1619 pipelineConnections=WriteForcedSourceTableConnections): 

1620 key = lsst.pex.config.Field( 

1621 doc="Column on which to join the two input tables on and make the primary key of the output", 

1622 dtype=str, 

1623 default="objectId", 

1624 ) 

1625 

1626 

1627class WriteForcedSourceTableTask(pipeBase.PipelineTask): 

1628 """Merge and convert per-detector forced source catalogs to parquet 

1629 

1630 Because the predecessor ForcedPhotCcdTask operates per-detector, 

1631 per-tract, (i.e., it has tract in its dimensions), detectors 

1632 on the tract boundary may have multiple forced source catalogs. 

1633 

1634 The successor task TransformForcedSourceTable runs per-patch 

1635 and temporally-aggregates overlapping mergedForcedSource catalogs from all 

1636 available multiple epochs. 

1637 """ 

1638 _DefaultName = "writeForcedSourceTable" 

1639 ConfigClass = WriteForcedSourceTableConfig 

1640 

1641 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1642 inputs = butlerQC.get(inputRefs) 

1643 # Add ccdVisitId to allow joining with CcdVisitTable 

1644 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

1645 inputs['band'] = butlerQC.quantum.dataId.full['band'] 

1646 outputs = self.run(**inputs) 

1647 butlerQC.put(outputs, outputRefs) 

1648 

1649 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None): 

1650 dfs = [] 

1651 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')): 

1652 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False) 

1653 df = df.reindex(sorted(df.columns), axis=1) 

1654 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA 

1655 df['band'] = band if band else pd.NA 

1656 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns], 

1657 names=('dataset', 'column')) 

1658 

1659 dfs.append(df) 

1660 

1661 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

1662 return pipeBase.Struct(outputCatalog=outputCatalog) 

1663 

1664 

1665class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1666 dimensions=("instrument", "skymap", "patch", "tract")): 

1667 

1668 inputCatalogs = connectionTypes.Input( 

1669 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask", 

1670 name="mergedForcedSource", 

1671 storageClass="DataFrame", 

1672 dimensions=("instrument", "visit", "detector", "skymap", "tract"), 

1673 multiple=True, 

1674 deferLoad=True 

1675 ) 

1676 referenceCatalog = connectionTypes.Input( 

1677 doc="Reference catalog which was used to seed the forcedPhot. Columns " 

1678 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner " 

1679 "are expected.", 

1680 name="objectTable", 

1681 storageClass="DataFrame", 

1682 dimensions=("tract", "patch", "skymap"), 

1683 deferLoad=True 

1684 ) 

1685 outputCatalog = connectionTypes.Output( 

1686 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a " 

1687 "specified set of functors", 

1688 name="forcedSourceTable", 

1689 storageClass="DataFrame", 

1690 dimensions=("tract", "patch", "skymap") 

1691 ) 

1692 

1693 

1694class TransformForcedSourceTableConfig(TransformCatalogBaseConfig, 

1695 pipelineConnections=TransformForcedSourceTableConnections): 

1696 referenceColumns = pexConfig.ListField( 

1697 dtype=str, 

1698 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"], 

1699 optional=True, 

1700 doc="Columns to pull from reference catalog", 

1701 ) 

1702 keyRef = lsst.pex.config.Field( 

1703 doc="Column on which to join the two input tables on and make the primary key of the output", 

1704 dtype=str, 

1705 default="objectId", 

1706 ) 

1707 key = lsst.pex.config.Field( 

1708 doc="Rename the output DataFrame index to this name", 

1709 dtype=str, 

1710 default="forcedSourceId", 

1711 ) 

1712 

1713 def setDefaults(self): 

1714 super().setDefaults() 

1715 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml') 

1716 

1717 

1718class TransformForcedSourceTableTask(TransformCatalogBaseTask): 

1719 """Transform/standardize a ForcedSource catalog 

1720 

1721 Transforms each wide, per-detector forcedSource parquet table per the 

1722 specification file (per-camera defaults found in ForcedSource.yaml). 

1723 All epochs that overlap the patch are aggregated into one per-patch 

1724 narrow-parquet file. 

1725 

1726 No de-duplication of rows is performed. Duplicate resolutions flags are 

1727 pulled in from the referenceCatalog: `detect_isPrimary`, 

1728 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate 

1729 for analysis or compare duplicates for QA. 

1730 

1731 The resulting table includes multiple bands. Epochs (MJDs) and other useful 

1732 per-visit rows can be retreived by joining with the CcdVisitTable on 

1733 ccdVisitId. 

1734 """ 

1735 _DefaultName = "transformForcedSourceTable" 

1736 ConfigClass = TransformForcedSourceTableConfig 

1737 

1738 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1739 inputs = butlerQC.get(inputRefs) 

1740 if self.funcs is None: 

1741 raise ValueError("config.functorFile is None. " 

1742 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

1743 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs, 

1744 dataId=outputRefs.outputCatalog.dataId.full) 

1745 

1746 butlerQC.put(outputs, outputRefs) 

1747 

1748 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None): 

1749 dfs = [] 

1750 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns}) 

1751 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs))) 

1752 for handle in inputCatalogs: 

1753 result = self.transform(None, handle, funcs, dataId) 

1754 # Filter for only rows that were detected on (overlap) the patch 

1755 dfs.append(result.df.join(ref, how='inner')) 

1756 

1757 outputCatalog = pd.concat(dfs) 

1758 

1759 # Now that we are done joining on config.keyRef 

1760 # Change index to config.key by 

1761 outputCatalog.index.rename(self.config.keyRef, inplace=True) 

1762 # Add config.keyRef to the column list 

1763 outputCatalog.reset_index(inplace=True) 

1764 # set the forcedSourceId to the index. This is specified in the ForcedSource.yaml 

1765 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True) 

1766 # Rename it to the config.key 

1767 outputCatalog.index.rename(self.config.key, inplace=True) 

1768 

1769 self.log.info("Made a table of %d columns and %d rows", 

1770 len(outputCatalog.columns), len(outputCatalog)) 

1771 return pipeBase.Struct(outputCatalog=outputCatalog) 

1772 

1773 

1774class ConsolidateTractConnections(pipeBase.PipelineTaskConnections, 

1775 defaultTemplates={"catalogType": ""}, 

1776 dimensions=("instrument", "tract")): 

1777 inputCatalogs = connectionTypes.Input( 

1778 doc="Input per-patch DataFrame Tables to be concatenated", 

1779 name="{catalogType}ForcedSourceTable", 

1780 storageClass="DataFrame", 

1781 dimensions=("tract", "patch", "skymap"), 

1782 multiple=True, 

1783 ) 

1784 

1785 outputCatalog = connectionTypes.Output( 

1786 doc="Output per-tract concatenation of DataFrame Tables", 

1787 name="{catalogType}ForcedSourceTable_tract", 

1788 storageClass="DataFrame", 

1789 dimensions=("tract", "skymap"), 

1790 ) 

1791 

1792 

1793class ConsolidateTractConfig(pipeBase.PipelineTaskConfig, 

1794 pipelineConnections=ConsolidateTractConnections): 

1795 pass 

1796 

1797 

1798class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask): 

1799 """Concatenate any per-patch, dataframe list into a single 

1800 per-tract DataFrame 

1801 """ 

1802 _DefaultName = 'ConsolidateTract' 

1803 ConfigClass = ConsolidateTractConfig 

1804 

1805 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1806 inputs = butlerQC.get(inputRefs) 

1807 # Not checking at least one inputCatalog exists because that'd be an empty QG 

1808 self.log.info("Concatenating %s per-patch %s Tables", 

1809 len(inputs['inputCatalogs']), 

1810 inputRefs.inputCatalogs[0].datasetType.name) 

1811 df = pd.concat(inputs['inputCatalogs']) 

1812 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)