Coverage for python/lsst/pipe/tasks/postprocess.py: 33%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

723 statements  

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24from collections import defaultdict 

25import numpy as np 

26import numbers 

27import os 

28 

29import lsst.geom 

30import lsst.pex.config as pexConfig 

31import lsst.pipe.base as pipeBase 

32import lsst.daf.base as dafBase 

33from lsst.pipe.base import connectionTypes 

34import lsst.afw.table as afwTable 

35from lsst.meas.base import SingleFrameMeasurementTask 

36from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

37from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

38from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate 

39 

40from .parquetTable import ParquetTable 

41from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

42from .functors import CompositeFunctor, Column 

43 

44 

45def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

46 """Flattens a dataframe with multilevel column index 

47 """ 

48 newDf = pd.DataFrame() 

49 # band is the level 0 index 

50 dfBands = df.columns.unique(level=0).values 

51 for band in dfBands: 

52 subdf = df[band] 

53 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

54 newColumns = {c: columnFormat.format(band, c) 

55 for c in subdf.columns if c not in noDupCols} 

56 cols = list(newColumns.keys()) 

57 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

58 

59 # Band must be present in the input and output or else column is all NaN: 

60 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

61 # Get the unexploded columns from any present band's partition 

62 noDupDf = df[presentBands[0]][noDupCols] 

63 newDf = pd.concat([noDupDf, newDf], axis=1) 

64 return newDf 

65 

66 

67class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

68 defaultTemplates={"coaddName": "deep"}, 

69 dimensions=("tract", "patch", "skymap")): 

70 inputCatalogMeas = connectionTypes.Input( 

71 doc="Catalog of source measurements on the deepCoadd.", 

72 dimensions=("tract", "patch", "band", "skymap"), 

73 storageClass="SourceCatalog", 

74 name="{coaddName}Coadd_meas", 

75 multiple=True 

76 ) 

77 inputCatalogForcedSrc = connectionTypes.Input( 

78 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

79 dimensions=("tract", "patch", "band", "skymap"), 

80 storageClass="SourceCatalog", 

81 name="{coaddName}Coadd_forced_src", 

82 multiple=True 

83 ) 

84 inputCatalogRef = connectionTypes.Input( 

85 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

86 "for each detection in deepCoadd_mergeDet.", 

87 dimensions=("tract", "patch", "skymap"), 

88 storageClass="SourceCatalog", 

89 name="{coaddName}Coadd_ref" 

90 ) 

91 outputCatalog = connectionTypes.Output( 

92 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

93 "stored as a DataFrame with a multi-level column index per-patch.", 

94 dimensions=("tract", "patch", "skymap"), 

95 storageClass="DataFrame", 

96 name="{coaddName}Coadd_obj" 

97 ) 

98 

99 

100class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

101 pipelineConnections=WriteObjectTableConnections): 

102 engine = pexConfig.Field( 

103 dtype=str, 

104 default="pyarrow", 

105 doc="Parquet engine for writing (pyarrow or fastparquet)" 

106 ) 

107 coaddName = pexConfig.Field( 

108 dtype=str, 

109 default="deep", 

110 doc="Name of coadd" 

111 ) 

112 

113 

114class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

115 """Write filter-merged source tables to parquet 

116 """ 

117 _DefaultName = "writeObjectTable" 

118 ConfigClass = WriteObjectTableConfig 

119 RunnerClass = MergeSourcesRunner 

120 

121 # Names of table datasets to be merged 

122 inputDatasets = ('forced_src', 'meas', 'ref') 

123 

124 # Tag of output dataset written by `MergeSourcesTask.write` 

125 outputDataset = 'obj' 

126 

127 def __init__(self, butler=None, schema=None, **kwargs): 

128 # It is a shame that this class can't use the default init for CmdLineTask 

129 # But to do so would require its own special task runner, which is many 

130 # more lines of specialization, so this is how it is for now 

131 super().__init__(**kwargs) 

132 

133 def runDataRef(self, patchRefList): 

134 """! 

135 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in 

136 subclasses that inherit from MergeSourcesTask. 

137 @param[in] patchRefList list of data references for each filter 

138 """ 

139 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

140 dataId = patchRefList[0].dataId 

141 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

142 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog)) 

143 

144 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

145 inputs = butlerQC.get(inputRefs) 

146 

147 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

148 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

149 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

150 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

151 

152 catalogs = {} 

153 for band in measDict.keys(): 

154 catalogs[band] = {'meas': measDict[band]['meas'], 

155 'forced_src': forcedSourceDict[band]['forced_src'], 

156 'ref': inputs['inputCatalogRef']} 

157 dataId = butlerQC.quantum.dataId 

158 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

159 outputs = pipeBase.Struct(outputCatalog=df) 

160 butlerQC.put(outputs, outputRefs) 

161 

162 @classmethod 

163 def _makeArgumentParser(cls): 

164 """Create a suitable ArgumentParser. 

165 

166 We will use the ArgumentParser to get a list of data 

167 references for patches; the RunnerClass will sort them into lists 

168 of data references for the same patch. 

169 

170 References first of self.inputDatasets, rather than 

171 self.inputDataset 

172 """ 

173 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

174 

175 def readCatalog(self, patchRef): 

176 """Read input catalogs 

177 

178 Read all the input datasets given by the 'inputDatasets' 

179 attribute. 

180 

181 Parameters 

182 ---------- 

183 patchRef : `lsst.daf.persistence.ButlerDataRef` 

184 Data reference for patch 

185 

186 Returns 

187 ------- 

188 Tuple consisting of band name and a dict of catalogs, keyed by 

189 dataset name 

190 """ 

191 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel 

192 catalogDict = {} 

193 for dataset in self.inputDatasets: 

194 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

195 self.log.info("Read %d sources from %s for band %s: %s", 

196 len(catalog), dataset, band, patchRef.dataId) 

197 catalogDict[dataset] = catalog 

198 return band, catalogDict 

199 

200 def run(self, catalogs, tract, patch): 

201 """Merge multiple catalogs. 

202 

203 Parameters 

204 ---------- 

205 catalogs : `dict` 

206 Mapping from filter names to dict of catalogs. 

207 tract : int 

208 tractId to use for the tractId column 

209 patch : str 

210 patchId to use for the patchId column 

211 

212 Returns 

213 ------- 

214 catalog : `pandas.DataFrame` 

215 Merged dataframe 

216 """ 

217 

218 dfs = [] 

219 for filt, tableDict in catalogs.items(): 

220 for dataset, table in tableDict.items(): 

221 # Convert afwTable to pandas DataFrame 

222 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

223 

224 # Sort columns by name, to ensure matching schema among patches 

225 df = df.reindex(sorted(df.columns), axis=1) 

226 df['tractId'] = tract 

227 df['patchId'] = patch 

228 

229 # Make columns a 3-level MultiIndex 

230 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

231 names=('dataset', 'band', 'column')) 

232 dfs.append(df) 

233 

234 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

235 return catalog 

236 

237 def write(self, patchRef, catalog): 

238 """Write the output. 

239 

240 Parameters 

241 ---------- 

242 catalog : `ParquetTable` 

243 Catalog to write 

244 patchRef : `lsst.daf.persistence.ButlerDataRef` 

245 Data reference for patch 

246 """ 

247 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

248 # since the filter isn't actually part of the data ID for the dataset we're saving, 

249 # it's confusing to see it in the log message, even if the butler simply ignores it. 

250 mergeDataId = patchRef.dataId.copy() 

251 del mergeDataId["filter"] 

252 self.log.info("Wrote merged catalog: %s", mergeDataId) 

253 

254 def writeMetadata(self, dataRefList): 

255 """No metadata to write, and not sure how to write it for a list of dataRefs. 

256 """ 

257 pass 

258 

259 

260class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

261 defaultTemplates={"catalogType": ""}, 

262 dimensions=("instrument", "visit", "detector")): 

263 

264 catalog = connectionTypes.Input( 

265 doc="Input full-depth catalog of sources produced by CalibrateTask", 

266 name="{catalogType}src", 

267 storageClass="SourceCatalog", 

268 dimensions=("instrument", "visit", "detector") 

269 ) 

270 outputCatalog = connectionTypes.Output( 

271 doc="Catalog of sources, `src` in Parquet format. The 'id' column is " 

272 "replaced with an index; all other columns are unchanged.", 

273 name="{catalogType}source", 

274 storageClass="DataFrame", 

275 dimensions=("instrument", "visit", "detector") 

276 ) 

277 

278 

279class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

280 pipelineConnections=WriteSourceTableConnections): 

281 doApplyExternalPhotoCalib = pexConfig.Field( 

282 dtype=bool, 

283 default=False, 

284 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if " 

285 "generating Source Tables from older src tables which do not already have local calib columns") 

286 ) 

287 doApplyExternalSkyWcs = pexConfig.Field( 

288 dtype=bool, 

289 default=False, 

290 doc=("Add local WCS columns from the calexp.wcs? Should only set True if " 

291 "generating Source Tables from older src tables which do not already have local calib columns") 

292 ) 

293 

294 

295class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

296 """Write source table to parquet 

297 """ 

298 _DefaultName = "writeSourceTable" 

299 ConfigClass = WriteSourceTableConfig 

300 

301 def runDataRef(self, dataRef): 

302 src = dataRef.get('src') 

303 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

304 src = self.addCalibColumns(src, dataRef) 

305 

306 ccdVisitId = dataRef.get('ccdExposureId') 

307 result = self.run(src, ccdVisitId=ccdVisitId) 

308 dataRef.put(result.table, 'source') 

309 

310 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

311 inputs = butlerQC.get(inputRefs) 

312 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

313 result = self.run(**inputs).table 

314 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

315 butlerQC.put(outputs, outputRefs) 

316 

317 def run(self, catalog, ccdVisitId=None): 

318 """Convert `src` catalog to parquet 

319 

320 Parameters 

321 ---------- 

322 catalog: `afwTable.SourceCatalog` 

323 catalog to be converted 

324 ccdVisitId: `int` 

325 ccdVisitId to be added as a column 

326 

327 Returns 

328 ------- 

329 result : `lsst.pipe.base.Struct` 

330 ``table`` 

331 `ParquetTable` version of the input catalog 

332 """ 

333 self.log.info("Generating parquet table from src catalog %s", ccdVisitId) 

334 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

335 df['ccdVisitId'] = ccdVisitId 

336 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

337 

338 def addCalibColumns(self, catalog, dataRef): 

339 """Add columns with local calibration evaluated at each centroid 

340 

341 for backwards compatibility with old repos. 

342 This exists for the purpose of converting old src catalogs 

343 (which don't have the expected local calib columns) to Source Tables. 

344 

345 Parameters 

346 ---------- 

347 catalog: `afwTable.SourceCatalog` 

348 catalog to which calib columns will be added 

349 dataRef: `lsst.daf.persistence.ButlerDataRef 

350 for fetching the calibs from disk. 

351 

352 Returns 

353 ------- 

354 newCat: `afwTable.SourceCatalog` 

355 Source Catalog with requested local calib columns 

356 """ 

357 mapper = afwTable.SchemaMapper(catalog.schema) 

358 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

359 measureConfig.doReplaceWithNoise = False 

360 

361 # Just need the WCS or the PhotoCalib attached to an exposue 

362 exposure = dataRef.get('calexp_sub', 

363 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0))) 

364 

365 mapper = afwTable.SchemaMapper(catalog.schema) 

366 mapper.addMinimalSchema(catalog.schema, True) 

367 schema = mapper.getOutputSchema() 

368 

369 exposureIdInfo = dataRef.get("expIdInfo") 

370 measureConfig.plugins.names = [] 

371 if self.config.doApplyExternalSkyWcs: 

372 plugin = 'base_LocalWcs' 

373 if plugin in schema: 

374 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False") 

375 else: 

376 measureConfig.plugins.names.add(plugin) 

377 

378 if self.config.doApplyExternalPhotoCalib: 

379 plugin = 'base_LocalPhotoCalib' 

380 if plugin in schema: 

381 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False") 

382 else: 

383 measureConfig.plugins.names.add(plugin) 

384 

385 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

386 newCat = afwTable.SourceCatalog(schema) 

387 newCat.extend(catalog, mapper=mapper) 

388 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

389 return newCat 

390 

391 def writeMetadata(self, dataRef): 

392 """No metadata to write. 

393 """ 

394 pass 

395 

396 @classmethod 

397 def _makeArgumentParser(cls): 

398 parser = ArgumentParser(name=cls._DefaultName) 

399 parser.add_id_argument("--id", 'src', 

400 help="data ID, e.g. --id visit=12345 ccd=0") 

401 return parser 

402 

403 

404class PostprocessAnalysis(object): 

405 """Calculate columns from ParquetTable 

406 

407 This object manages and organizes an arbitrary set of computations 

408 on a catalog. The catalog is defined by a 

409 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a 

410 `deepCoadd_obj` dataset, and the computations are defined by a collection 

411 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

412 a `CompositeFunctor`). 

413 

414 After the object is initialized, accessing the `.df` attribute (which 

415 holds the `pandas.DataFrame` containing the results of the calculations) triggers 

416 computation of said dataframe. 

417 

418 One of the conveniences of using this object is the ability to define a desired common 

419 filter for all functors. This enables the same functor collection to be passed to 

420 several different `PostprocessAnalysis` objects without having to change the original 

421 functor collection, since the `filt` keyword argument of this object triggers an 

422 overwrite of the `filt` property for all functors in the collection. 

423 

424 This object also allows a list of refFlags to be passed, and defines a set of default 

425 refFlags that are always included even if not requested. 

426 

427 If a list of `ParquetTable` object is passed, rather than a single one, then the 

428 calculations will be mapped over all the input catalogs. In principle, it should 

429 be straightforward to parallelize this activity, but initial tests have failed 

430 (see TODO in code comments). 

431 

432 Parameters 

433 ---------- 

434 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

435 Source catalog(s) for computation 

436 

437 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

438 Computations to do (functors that act on `parq`). 

439 If a dict, the output 

440 DataFrame will have columns keyed accordingly. 

441 If a list, the column keys will come from the 

442 `.shortname` attribute of each functor. 

443 

444 filt : `str` (optional) 

445 Filter in which to calculate. If provided, 

446 this will overwrite any existing `.filt` attribute 

447 of the provided functors. 

448 

449 flags : `list` (optional) 

450 List of flags (per-band) to include in output table. 

451 Taken from the `meas` dataset if applied to a multilevel Object Table. 

452 

453 refFlags : `list` (optional) 

454 List of refFlags (only reference band) to include in output table. 

455 

456 forcedFlags : `list` (optional) 

457 List of flags (per-band) to include in output table. 

458 Taken from the ``forced_src`` dataset if applied to a 

459 multilevel Object Table. Intended for flags from measurement plugins 

460 only run during multi-band forced-photometry. 

461 """ 

462 _defaultRefFlags = [] 

463 _defaultFuncs = () 

464 

465 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

466 self.parq = parq 

467 self.functors = functors 

468 

469 self.filt = filt 

470 self.flags = list(flags) if flags is not None else [] 

471 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

472 self.refFlags = list(self._defaultRefFlags) 

473 if refFlags is not None: 

474 self.refFlags += list(refFlags) 

475 

476 self._df = None 

477 

478 @property 

479 def defaultFuncs(self): 

480 funcs = dict(self._defaultFuncs) 

481 return funcs 

482 

483 @property 

484 def func(self): 

485 additionalFuncs = self.defaultFuncs 

486 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

487 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

488 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

489 

490 if isinstance(self.functors, CompositeFunctor): 

491 func = self.functors 

492 else: 

493 func = CompositeFunctor(self.functors) 

494 

495 func.funcDict.update(additionalFuncs) 

496 func.filt = self.filt 

497 

498 return func 

499 

500 @property 

501 def noDupCols(self): 

502 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

503 

504 @property 

505 def df(self): 

506 if self._df is None: 

507 self.compute() 

508 return self._df 

509 

510 def compute(self, dropna=False, pool=None): 

511 # map over multiple parquet tables 

512 if type(self.parq) in (list, tuple): 

513 if pool is None: 

514 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

515 else: 

516 # TODO: Figure out why this doesn't work (pyarrow pickling issues?) 

517 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

518 self._df = pd.concat(dflist) 

519 else: 

520 self._df = self.func(self.parq, dropna=dropna) 

521 

522 return self._df 

523 

524 

525class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

526 dimensions=()): 

527 """Expected Connections for subclasses of TransformCatalogBaseTask. 

528 

529 Must be subclassed. 

530 """ 

531 inputCatalog = connectionTypes.Input( 

532 name="", 

533 storageClass="DataFrame", 

534 ) 

535 outputCatalog = connectionTypes.Output( 

536 name="", 

537 storageClass="DataFrame", 

538 ) 

539 

540 

541class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

542 pipelineConnections=TransformCatalogBaseConnections): 

543 functorFile = pexConfig.Field( 

544 dtype=str, 

545 doc="Path to YAML file specifying Science Data Model functors to use " 

546 "when copying columns and computing calibrated values.", 

547 default=None, 

548 optional=True 

549 ) 

550 primaryKey = pexConfig.Field( 

551 dtype=str, 

552 doc="Name of column to be set as the DataFrame index. If None, the index" 

553 "will be named `id`", 

554 default=None, 

555 optional=True 

556 ) 

557 

558 

559class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask): 

560 """Base class for transforming/standardizing a catalog 

561 

562 by applying functors that convert units and apply calibrations. 

563 The purpose of this task is to perform a set of computations on 

564 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

565 results to a new dataset (which needs to be declared in an `outputDataset` 

566 attribute). 

567 

568 The calculations to be performed are defined in a YAML file that specifies 

569 a set of functors to be computed, provided as 

570 a `--functorFile` config parameter. An example of such a YAML file 

571 is the following: 

572 

573 funcs: 

574 psfMag: 

575 functor: Mag 

576 args: 

577 - base_PsfFlux 

578 filt: HSC-G 

579 dataset: meas 

580 cmodel_magDiff: 

581 functor: MagDiff 

582 args: 

583 - modelfit_CModel 

584 - base_PsfFlux 

585 filt: HSC-G 

586 gauss_magDiff: 

587 functor: MagDiff 

588 args: 

589 - base_GaussianFlux 

590 - base_PsfFlux 

591 filt: HSC-G 

592 count: 

593 functor: Column 

594 args: 

595 - base_InputCount_value 

596 filt: HSC-G 

597 deconvolved_moments: 

598 functor: DeconvolvedMoments 

599 filt: HSC-G 

600 dataset: forced_src 

601 refFlags: 

602 - calib_psfUsed 

603 - merge_measurement_i 

604 - merge_measurement_r 

605 - merge_measurement_z 

606 - merge_measurement_y 

607 - merge_measurement_g 

608 - base_PixelFlags_flag_inexact_psfCenter 

609 - detect_isPrimary 

610 

611 The names for each entry under "func" will become the names of columns in the 

612 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`. 

613 Positional arguments to be passed to each functor are in the `args` list, 

614 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`, 

615 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization. 

616 

617 The "flags" entry is the default shortcut for `Column` functors. 

618 All columns listed under "flags" will be copied to the output table 

619 untransformed. They can be of any datatype. 

620 In the special case of transforming a multi-level oject table with 

621 band and dataset indices (deepCoadd_obj), these will be taked from the 

622 `meas` dataset and exploded out per band. 

623 

624 There are two special shortcuts that only apply when transforming 

625 multi-level Object (deepCoadd_obj) tables: 

626 - The "refFlags" entry is shortcut for `Column` functor 

627 taken from the `'ref'` dataset if transforming an ObjectTable. 

628 - The "forcedFlags" entry is shortcut for `Column` functors. 

629 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

630 These are expanded out per band. 

631 

632 

633 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

634 to organize and excecute the calculations. 

635 

636 """ 

637 @property 

638 def _DefaultName(self): 

639 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

640 

641 @property 

642 def outputDataset(self): 

643 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

644 

645 @property 

646 def inputDataset(self): 

647 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

648 

649 @property 

650 def ConfigClass(self): 

651 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

652 

653 def __init__(self, *args, **kwargs): 

654 super().__init__(*args, **kwargs) 

655 if self.config.functorFile: 

656 self.log.info('Loading tranform functor definitions from %s', 

657 self.config.functorFile) 

658 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

659 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

660 else: 

661 self.funcs = None 

662 

663 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

664 inputs = butlerQC.get(inputRefs) 

665 if self.funcs is None: 

666 raise ValueError("config.functorFile is None. " 

667 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

668 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

669 dataId=outputRefs.outputCatalog.dataId.full) 

670 outputs = pipeBase.Struct(outputCatalog=result) 

671 butlerQC.put(outputs, outputRefs) 

672 

673 def runDataRef(self, dataRef): 

674 parq = dataRef.get() 

675 if self.funcs is None: 

676 raise ValueError("config.functorFile is None. " 

677 "Must be a valid path to yaml in order to run as a CommandlineTask.") 

678 df = self.run(parq, funcs=self.funcs, dataId=dataRef.dataId) 

679 self.write(df, dataRef) 

680 return df 

681 

682 def run(self, parq, funcs=None, dataId=None, band=None): 

683 """Do postprocessing calculations 

684 

685 Takes a `ParquetTable` object and dataId, 

686 returns a dataframe with results of postprocessing calculations. 

687 

688 Parameters 

689 ---------- 

690 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

691 ParquetTable from which calculations are done. 

692 funcs : `lsst.pipe.tasks.functors.Functors` 

693 Functors to apply to the table's columns 

694 dataId : dict, optional 

695 Used to add a `patchId` column to the output dataframe. 

696 band : `str`, optional 

697 Filter band that is being processed. 

698 

699 Returns 

700 ------ 

701 `pandas.DataFrame` 

702 

703 """ 

704 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

705 

706 df = self.transform(band, parq, funcs, dataId).df 

707 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

708 return df 

709 

710 def getFunctors(self): 

711 return self.funcs 

712 

713 def getAnalysis(self, parq, funcs=None, band=None): 

714 if funcs is None: 

715 funcs = self.funcs 

716 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

717 return analysis 

718 

719 def transform(self, band, parq, funcs, dataId): 

720 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

721 df = analysis.df 

722 if dataId is not None: 

723 for key, value in dataId.items(): 

724 df[str(key)] = value 

725 

726 if self.config.primaryKey: 

727 if df.index.name != self.config.primaryKey and self.config.primaryKey in df: 

728 df.reset_index(inplace=True, drop=True) 

729 df.set_index(self.config.primaryKey, inplace=True) 

730 

731 return pipeBase.Struct( 

732 df=df, 

733 analysis=analysis 

734 ) 

735 

736 def write(self, df, parqRef): 

737 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

738 

739 def writeMetadata(self, dataRef): 

740 """No metadata to write. 

741 """ 

742 pass 

743 

744 

745class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

746 defaultTemplates={"coaddName": "deep"}, 

747 dimensions=("tract", "patch", "skymap")): 

748 inputCatalog = connectionTypes.Input( 

749 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

750 "stored as a DataFrame with a multi-level column index per-patch.", 

751 dimensions=("tract", "patch", "skymap"), 

752 storageClass="DataFrame", 

753 name="{coaddName}Coadd_obj", 

754 deferLoad=True, 

755 ) 

756 outputCatalog = connectionTypes.Output( 

757 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

758 "data model.", 

759 dimensions=("tract", "patch", "skymap"), 

760 storageClass="DataFrame", 

761 name="objectTable" 

762 ) 

763 

764 

765class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

766 pipelineConnections=TransformObjectCatalogConnections): 

767 coaddName = pexConfig.Field( 

768 dtype=str, 

769 default="deep", 

770 doc="Name of coadd" 

771 ) 

772 # TODO: remove in DM-27177 

773 filterMap = pexConfig.DictField( 

774 keytype=str, 

775 itemtype=str, 

776 default={}, 

777 doc=("Dictionary mapping full filter name to short one for column name munging." 

778 "These filters determine the output columns no matter what filters the " 

779 "input data actually contain."), 

780 deprecated=("Coadds are now identified by the band, so this transform is unused." 

781 "Will be removed after v22.") 

782 ) 

783 outputBands = pexConfig.ListField( 

784 dtype=str, 

785 default=None, 

786 optional=True, 

787 doc=("These bands and only these bands will appear in the output," 

788 " NaN-filled if the input does not include them." 

789 " If None, then use all bands found in the input.") 

790 ) 

791 camelCase = pexConfig.Field( 

792 dtype=bool, 

793 default=False, 

794 doc=("Write per-band columns names with camelCase, else underscore " 

795 "For example: gPsFlux instead of g_PsFlux.") 

796 ) 

797 multilevelOutput = pexConfig.Field( 

798 dtype=bool, 

799 default=False, 

800 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

801 "and name-munged (False).") 

802 ) 

803 goodFlags = pexConfig.ListField( 

804 dtype=str, 

805 default=[], 

806 doc=("List of 'good' flags that should be set False when populating empty tables. " 

807 "All other flags are considered to be 'bad' flags and will be set to True.") 

808 ) 

809 floatFillValue = pexConfig.Field( 

810 dtype=float, 

811 default=np.nan, 

812 doc="Fill value for float fields when populating empty tables." 

813 ) 

814 integerFillValue = pexConfig.Field( 

815 dtype=int, 

816 default=-1, 

817 doc="Fill value for integer fields when populating empty tables." 

818 ) 

819 

820 def setDefaults(self): 

821 super().setDefaults() 

822 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml') 

823 self.primaryKey = 'objectId' 

824 self.goodFlags = ['calib_astrometry_used', 

825 'calib_photometry_reserved', 

826 'calib_photometry_used', 

827 'calib_psf_candidate', 

828 'calib_psf_reserved', 

829 'calib_psf_used'] 

830 

831 

832class TransformObjectCatalogTask(TransformCatalogBaseTask): 

833 """Produce a flattened Object Table to match the format specified in 

834 sdm_schemas. 

835 

836 Do the same set of postprocessing calculations on all bands 

837 

838 This is identical to `TransformCatalogBaseTask`, except for that it does the 

839 specified functor calculations for all filters present in the 

840 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

841 by the YAML file will be superceded. 

842 """ 

843 _DefaultName = "transformObjectCatalog" 

844 ConfigClass = TransformObjectCatalogConfig 

845 

846 # Used by Gen 2 runDataRef only: 

847 inputDataset = 'deepCoadd_obj' 

848 outputDataset = 'objectTable' 

849 

850 @classmethod 

851 def _makeArgumentParser(cls): 

852 parser = ArgumentParser(name=cls._DefaultName) 

853 parser.add_id_argument("--id", cls.inputDataset, 

854 ContainerClass=CoaddDataIdContainer, 

855 help="data ID, e.g. --id tract=12345 patch=1,2") 

856 return parser 

857 

858 def run(self, parq, funcs=None, dataId=None, band=None): 

859 # NOTE: band kwarg is ignored here. 

860 dfDict = {} 

861 analysisDict = {} 

862 templateDf = pd.DataFrame() 

863 

864 if isinstance(parq, DeferredDatasetHandle): 

865 columns = parq.get(component='columns') 

866 inputBands = columns.unique(level=1).values 

867 else: 

868 inputBands = parq.columnLevelNames['band'] 

869 

870 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

871 

872 # Perform transform for data of filters that exist in parq. 

873 for inputBand in inputBands: 

874 if inputBand not in outputBands: 

875 self.log.info("Ignoring %s band data in the input", inputBand) 

876 continue 

877 self.log.info("Transforming the catalog of band %s", inputBand) 

878 result = self.transform(inputBand, parq, funcs, dataId) 

879 dfDict[inputBand] = result.df 

880 analysisDict[inputBand] = result.analysis 

881 if templateDf.empty: 

882 templateDf = result.df 

883 

884 # Put filler values in columns of other wanted bands 

885 for filt in outputBands: 

886 if filt not in dfDict: 

887 self.log.info("Adding empty columns for band %s", filt) 

888 dfTemp = templateDf.copy() 

889 for col in dfTemp.columns: 

890 testValue = dfTemp[col].values[0] 

891 if isinstance(testValue, (np.bool_, pd.BooleanDtype)): 

892 # Boolean flag type, check if it is a "good" flag 

893 if col in self.config.goodFlags: 

894 fillValue = False 

895 else: 

896 fillValue = True 

897 elif isinstance(testValue, numbers.Integral): 

898 # Checking numbers.Integral catches all flavors 

899 # of python, numpy, pandas, etc. integers. 

900 # We must ensure this is not an unsigned integer. 

901 if isinstance(testValue, np.unsignedinteger): 

902 raise ValueError("Parquet tables may not have unsigned integer columns.") 

903 else: 

904 fillValue = self.config.integerFillValue 

905 else: 

906 fillValue = self.config.floatFillValue 

907 dfTemp[col].values[:] = fillValue 

908 dfDict[filt] = dfTemp 

909 

910 # This makes a multilevel column index, with band as first level 

911 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

912 

913 if not self.config.multilevelOutput: 

914 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

915 if self.config.primaryKey in noDupCols: 

916 noDupCols.remove(self.config.primaryKey) 

917 if dataId is not None: 

918 noDupCols += list(dataId.keys()) 

919 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

920 inputBands=inputBands) 

921 

922 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

923 

924 return df 

925 

926 

927class TractObjectDataIdContainer(CoaddDataIdContainer): 

928 

929 def makeDataRefList(self, namespace): 

930 """Make self.refList from self.idList 

931 

932 Generate a list of data references given tract and/or patch. 

933 This was adapted from `TractQADataIdContainer`, which was 

934 `TractDataIdContainer` modifie to not require "filter". 

935 Only existing dataRefs are returned. 

936 """ 

937 def getPatchRefList(tract): 

938 return [namespace.butler.dataRef(datasetType=self.datasetType, 

939 tract=tract.getId(), 

940 patch="%d,%d" % patch.getIndex()) for patch in tract] 

941 

942 tractRefs = defaultdict(list) # Data references for each tract 

943 for dataId in self.idList: 

944 skymap = self.getSkymap(namespace) 

945 

946 if "tract" in dataId: 

947 tractId = dataId["tract"] 

948 if "patch" in dataId: 

949 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

950 tract=tractId, 

951 patch=dataId['patch'])) 

952 else: 

953 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

954 else: 

955 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

956 for tract in skymap) 

957 outputRefList = [] 

958 for tractRefList in tractRefs.values(): 

959 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

960 outputRefList.append(existingRefs) 

961 

962 self.refList = outputRefList 

963 

964 

965class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

966 dimensions=("tract", "skymap")): 

967 inputCatalogs = connectionTypes.Input( 

968 doc="Per-Patch objectTables conforming to the standard data model.", 

969 name="objectTable", 

970 storageClass="DataFrame", 

971 dimensions=("tract", "patch", "skymap"), 

972 multiple=True, 

973 ) 

974 outputCatalog = connectionTypes.Output( 

975 doc="Pre-tract horizontal concatenation of the input objectTables", 

976 name="objectTable_tract", 

977 storageClass="DataFrame", 

978 dimensions=("tract", "skymap"), 

979 ) 

980 

981 

982class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

983 pipelineConnections=ConsolidateObjectTableConnections): 

984 coaddName = pexConfig.Field( 

985 dtype=str, 

986 default="deep", 

987 doc="Name of coadd" 

988 ) 

989 

990 

991class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

992 """Write patch-merged source tables to a tract-level parquet file 

993 

994 Concatenates `objectTable` list into a per-visit `objectTable_tract` 

995 """ 

996 _DefaultName = "consolidateObjectTable" 

997 ConfigClass = ConsolidateObjectTableConfig 

998 

999 inputDataset = 'objectTable' 

1000 outputDataset = 'objectTable_tract' 

1001 

1002 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1003 inputs = butlerQC.get(inputRefs) 

1004 self.log.info("Concatenating %s per-patch Object Tables", 

1005 len(inputs['inputCatalogs'])) 

1006 df = pd.concat(inputs['inputCatalogs']) 

1007 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1008 

1009 @classmethod 

1010 def _makeArgumentParser(cls): 

1011 parser = ArgumentParser(name=cls._DefaultName) 

1012 

1013 parser.add_id_argument("--id", cls.inputDataset, 

1014 help="data ID, e.g. --id tract=12345", 

1015 ContainerClass=TractObjectDataIdContainer) 

1016 return parser 

1017 

1018 def runDataRef(self, patchRefList): 

1019 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

1020 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1021 

1022 def writeMetadata(self, dataRef): 

1023 """No metadata to write. 

1024 """ 

1025 pass 

1026 

1027 

1028class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

1029 defaultTemplates={"catalogType": ""}, 

1030 dimensions=("instrument", "visit", "detector")): 

1031 

1032 inputCatalog = connectionTypes.Input( 

1033 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

1034 name="{catalogType}source", 

1035 storageClass="DataFrame", 

1036 dimensions=("instrument", "visit", "detector"), 

1037 deferLoad=True 

1038 ) 

1039 outputCatalog = connectionTypes.Output( 

1040 doc="Narrower, per-detector Source Table transformed and converted per a " 

1041 "specified set of functors", 

1042 name="{catalogType}sourceTable", 

1043 storageClass="DataFrame", 

1044 dimensions=("instrument", "visit", "detector") 

1045 ) 

1046 

1047 

1048class TransformSourceTableConfig(TransformCatalogBaseConfig, 

1049 pipelineConnections=TransformSourceTableConnections): 

1050 

1051 def setDefaults(self): 

1052 super().setDefaults() 

1053 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml') 

1054 self.primaryKey = 'sourceId' 

1055 

1056 

1057class TransformSourceTableTask(TransformCatalogBaseTask): 

1058 """Transform/standardize a source catalog 

1059 """ 

1060 _DefaultName = "transformSourceTable" 

1061 ConfigClass = TransformSourceTableConfig 

1062 

1063 inputDataset = 'source' 

1064 outputDataset = 'sourceTable' 

1065 

1066 @classmethod 

1067 def _makeArgumentParser(cls): 

1068 parser = ArgumentParser(name=cls._DefaultName) 

1069 parser.add_id_argument("--id", datasetType=cls.inputDataset, 

1070 level="sensor", 

1071 help="data ID, e.g. --id visit=12345 ccd=0") 

1072 return parser 

1073 

1074 def runDataRef(self, dataRef): 

1075 """Override to specify band label to run().""" 

1076 parq = dataRef.get() 

1077 funcs = self.getFunctors() 

1078 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel 

1079 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band) 

1080 self.write(df, dataRef) 

1081 return df 

1082 

1083 

1084class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1085 dimensions=("instrument", "visit",), 

1086 defaultTemplates={"calexpType": ""}): 

1087 calexp = connectionTypes.Input( 

1088 doc="Processed exposures used for metadata", 

1089 name="{calexpType}calexp", 

1090 storageClass="ExposureF", 

1091 dimensions=("instrument", "visit", "detector"), 

1092 deferLoad=True, 

1093 multiple=True, 

1094 ) 

1095 visitSummary = connectionTypes.Output( 

1096 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1097 "detector id for the id and are sorted for fast lookups of a " 

1098 "detector."), 

1099 name="{calexpType}visitSummary", 

1100 storageClass="ExposureCatalog", 

1101 dimensions=("instrument", "visit"), 

1102 ) 

1103 

1104 

1105class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1106 pipelineConnections=ConsolidateVisitSummaryConnections): 

1107 """Config for ConsolidateVisitSummaryTask""" 

1108 pass 

1109 

1110 

1111class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

1112 """Task to consolidate per-detector visit metadata. 

1113 

1114 This task aggregates the following metadata from all the detectors in a 

1115 single visit into an exposure catalog: 

1116 - The visitInfo. 

1117 - The wcs. 

1118 - The photoCalib. 

1119 - The physical_filter and band (if available). 

1120 - The psf size, shape, and effective area at the center of the detector. 

1121 - The corners of the bounding box in right ascension/declination. 

1122 

1123 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1124 are not persisted here because of storage concerns, and because of their 

1125 limited utility as summary statistics. 

1126 

1127 Tests for this task are performed in ci_hsc_gen3. 

1128 """ 

1129 _DefaultName = "consolidateVisitSummary" 

1130 ConfigClass = ConsolidateVisitSummaryConfig 

1131 

1132 @classmethod 

1133 def _makeArgumentParser(cls): 

1134 parser = ArgumentParser(name=cls._DefaultName) 

1135 

1136 parser.add_id_argument("--id", "calexp", 

1137 help="data ID, e.g. --id visit=12345", 

1138 ContainerClass=VisitDataIdContainer) 

1139 return parser 

1140 

1141 def writeMetadata(self, dataRef): 

1142 """No metadata to persist, so override to remove metadata persistance. 

1143 """ 

1144 pass 

1145 

1146 def writeConfig(self, butler, clobber=False, doBackup=True): 

1147 """No config to persist, so override to remove config persistance. 

1148 """ 

1149 pass 

1150 

1151 def runDataRef(self, dataRefList): 

1152 visit = dataRefList[0].dataId['visit'] 

1153 

1154 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1155 len(dataRefList), visit) 

1156 

1157 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

1158 

1159 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

1160 

1161 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1162 dataRefs = butlerQC.get(inputRefs.calexp) 

1163 visit = dataRefs[0].dataId.byName()['visit'] 

1164 

1165 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1166 len(dataRefs), visit) 

1167 

1168 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1169 

1170 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1171 

1172 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

1173 """Make a combined exposure catalog from a list of dataRefs. 

1174 These dataRefs must point to exposures with wcs, summaryStats, 

1175 and other visit metadata. 

1176 

1177 Parameters 

1178 ---------- 

1179 visit : `int` 

1180 Visit identification number. 

1181 dataRefs : `list` 

1182 List of dataRefs in visit. May be list of 

1183 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

1184 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

1185 isGen3 : `bool`, optional 

1186 Specifies if this is a Gen3 list of datarefs. 

1187 

1188 Returns 

1189 ------- 

1190 visitSummary : `lsst.afw.table.ExposureCatalog` 

1191 Exposure catalog with per-detector summary information. 

1192 """ 

1193 schema = self._makeVisitSummarySchema() 

1194 cat = afwTable.ExposureCatalog(schema) 

1195 cat.resize(len(dataRefs)) 

1196 

1197 cat['visit'] = visit 

1198 

1199 for i, dataRef in enumerate(dataRefs): 

1200 if isGen3: 

1201 visitInfo = dataRef.get(component='visitInfo') 

1202 filterLabel = dataRef.get(component='filterLabel') 

1203 summaryStats = dataRef.get(component='summaryStats') 

1204 detector = dataRef.get(component='detector') 

1205 wcs = dataRef.get(component='wcs') 

1206 photoCalib = dataRef.get(component='photoCalib') 

1207 detector = dataRef.get(component='detector') 

1208 bbox = dataRef.get(component='bbox') 

1209 validPolygon = dataRef.get(component='validPolygon') 

1210 else: 

1211 # Note that we need to read the calexp because there is 

1212 # no magic access to the psf except through the exposure. 

1213 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

1214 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

1215 visitInfo = exp.getInfo().getVisitInfo() 

1216 filterLabel = dataRef.get("calexp_filterLabel") 

1217 summaryStats = exp.getInfo().getSummaryStats() 

1218 wcs = exp.getWcs() 

1219 photoCalib = exp.getPhotoCalib() 

1220 detector = exp.getDetector() 

1221 bbox = dataRef.get(datasetType='calexp_bbox') 

1222 validPolygon = exp.getInfo().getValidPolygon() 

1223 

1224 rec = cat[i] 

1225 rec.setBBox(bbox) 

1226 rec.setVisitInfo(visitInfo) 

1227 rec.setWcs(wcs) 

1228 rec.setPhotoCalib(photoCalib) 

1229 rec.setValidPolygon(validPolygon) 

1230 

1231 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1232 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1233 rec.setId(detector.getId()) 

1234 rec['psfSigma'] = summaryStats.psfSigma 

1235 rec['psfIxx'] = summaryStats.psfIxx 

1236 rec['psfIyy'] = summaryStats.psfIyy 

1237 rec['psfIxy'] = summaryStats.psfIxy 

1238 rec['psfArea'] = summaryStats.psfArea 

1239 rec['raCorners'][:] = summaryStats.raCorners 

1240 rec['decCorners'][:] = summaryStats.decCorners 

1241 rec['ra'] = summaryStats.ra 

1242 rec['decl'] = summaryStats.decl 

1243 rec['zenithDistance'] = summaryStats.zenithDistance 

1244 rec['zeroPoint'] = summaryStats.zeroPoint 

1245 rec['skyBg'] = summaryStats.skyBg 

1246 rec['skyNoise'] = summaryStats.skyNoise 

1247 rec['meanVar'] = summaryStats.meanVar 

1248 rec['astromOffsetMean'] = summaryStats.astromOffsetMean 

1249 rec['astromOffsetStd'] = summaryStats.astromOffsetStd 

1250 rec['nPsfStar'] = summaryStats.nPsfStar 

1251 rec['psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median 

1252 rec['psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median 

1253 rec['psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter 

1254 rec['psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter 

1255 rec['psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian 

1256 rec['psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter 

1257 rec['psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter 

1258 

1259 metadata = dafBase.PropertyList() 

1260 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1261 # We are looping over existing datarefs, so the following is true 

1262 metadata.add("COMMENT", "Only detectors with data have entries.") 

1263 cat.setMetadata(metadata) 

1264 

1265 cat.sort() 

1266 return cat 

1267 

1268 def _makeVisitSummarySchema(self): 

1269 """Make the schema for the visitSummary catalog.""" 

1270 schema = afwTable.ExposureTable.makeMinimalSchema() 

1271 schema.addField('visit', type='I', doc='Visit number') 

1272 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1273 schema.addField('band', type='String', size=32, doc='Name of band') 

1274 schema.addField('psfSigma', type='F', 

1275 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

1276 schema.addField('psfArea', type='F', 

1277 doc='PSF model effective area (center of chip) (pixel**2)') 

1278 schema.addField('psfIxx', type='F', 

1279 doc='PSF model Ixx (center of chip) (pixel**2)') 

1280 schema.addField('psfIyy', type='F', 

1281 doc='PSF model Iyy (center of chip) (pixel**2)') 

1282 schema.addField('psfIxy', type='F', 

1283 doc='PSF model Ixy (center of chip) (pixel**2)') 

1284 schema.addField('raCorners', type='ArrayD', size=4, 

1285 doc='Right Ascension of bounding box corners (degrees)') 

1286 schema.addField('decCorners', type='ArrayD', size=4, 

1287 doc='Declination of bounding box corners (degrees)') 

1288 schema.addField('ra', type='D', 

1289 doc='Right Ascension of bounding box center (degrees)') 

1290 schema.addField('decl', type='D', 

1291 doc='Declination of bounding box center (degrees)') 

1292 schema.addField('zenithDistance', type='F', 

1293 doc='Zenith distance of bounding box center (degrees)') 

1294 schema.addField('zeroPoint', type='F', 

1295 doc='Mean zeropoint in detector (mag)') 

1296 schema.addField('skyBg', type='F', 

1297 doc='Average sky background (ADU)') 

1298 schema.addField('skyNoise', type='F', 

1299 doc='Average sky noise (ADU)') 

1300 schema.addField('meanVar', type='F', 

1301 doc='Mean variance of the weight plane (ADU**2)') 

1302 schema.addField('astromOffsetMean', type='F', 

1303 doc='Mean offset of astrometric calibration matches (arcsec)') 

1304 schema.addField('astromOffsetStd', type='F', 

1305 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)') 

1306 schema.addField('nPsfStar', type='I', doc='Number of stars used for PSF model') 

1307 schema.addField('psfStarDeltaE1Median', type='F', 

1308 doc='Median E1 residual (starE1 - psfE1) for psf stars') 

1309 schema.addField('psfStarDeltaE2Median', type='F', 

1310 doc='Median E2 residual (starE2 - psfE2) for psf stars') 

1311 schema.addField('psfStarDeltaE1Scatter', type='F', 

1312 doc='Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars') 

1313 schema.addField('psfStarDeltaE2Scatter', type='F', 

1314 doc='Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars') 

1315 schema.addField('psfStarDeltaSizeMedian', type='F', 

1316 doc='Median size residual (starSize - psfSize) for psf stars (pixel)') 

1317 schema.addField('psfStarDeltaSizeScatter', type='F', 

1318 doc='Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)') 

1319 schema.addField('psfStarScaledDeltaSizeScatter', type='F', 

1320 doc='Scatter (via MAD) of size residual scaled by median size squared') 

1321 

1322 return schema 

1323 

1324 

1325class VisitDataIdContainer(DataIdContainer): 

1326 """DataIdContainer that groups sensor-level id's by visit 

1327 """ 

1328 

1329 def makeDataRefList(self, namespace): 

1330 """Make self.refList from self.idList 

1331 

1332 Generate a list of data references grouped by visit. 

1333 

1334 Parameters 

1335 ---------- 

1336 namespace : `argparse.Namespace` 

1337 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments 

1338 """ 

1339 # Group by visits 

1340 visitRefs = defaultdict(list) 

1341 for dataId in self.idList: 

1342 if "visit" in dataId: 

1343 visitId = dataId["visit"] 

1344 # append all subsets to 

1345 subset = namespace.butler.subset(self.datasetType, dataId=dataId) 

1346 visitRefs[visitId].extend([dataRef for dataRef in subset]) 

1347 

1348 outputRefList = [] 

1349 for refList in visitRefs.values(): 

1350 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1351 if existingRefs: 

1352 outputRefList.append(existingRefs) 

1353 

1354 self.refList = outputRefList 

1355 

1356 

1357class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1358 defaultTemplates={"catalogType": ""}, 

1359 dimensions=("instrument", "visit")): 

1360 inputCatalogs = connectionTypes.Input( 

1361 doc="Input per-detector Source Tables", 

1362 name="{catalogType}sourceTable", 

1363 storageClass="DataFrame", 

1364 dimensions=("instrument", "visit", "detector"), 

1365 multiple=True 

1366 ) 

1367 outputCatalog = connectionTypes.Output( 

1368 doc="Per-visit concatenation of Source Table", 

1369 name="{catalogType}sourceTable_visit", 

1370 storageClass="DataFrame", 

1371 dimensions=("instrument", "visit") 

1372 ) 

1373 

1374 

1375class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1376 pipelineConnections=ConsolidateSourceTableConnections): 

1377 pass 

1378 

1379 

1380class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

1381 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1382 """ 

1383 _DefaultName = 'consolidateSourceTable' 

1384 ConfigClass = ConsolidateSourceTableConfig 

1385 

1386 inputDataset = 'sourceTable' 

1387 outputDataset = 'sourceTable_visit' 

1388 

1389 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1390 inputs = butlerQC.get(inputRefs) 

1391 self.log.info("Concatenating %s per-detector Source Tables", 

1392 len(inputs['inputCatalogs'])) 

1393 df = pd.concat(inputs['inputCatalogs']) 

1394 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1395 

1396 def runDataRef(self, dataRefList): 

1397 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1398 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1399 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1400 

1401 @classmethod 

1402 def _makeArgumentParser(cls): 

1403 parser = ArgumentParser(name=cls._DefaultName) 

1404 

1405 parser.add_id_argument("--id", cls.inputDataset, 

1406 help="data ID, e.g. --id visit=12345", 

1407 ContainerClass=VisitDataIdContainer) 

1408 return parser 

1409 

1410 def writeMetadata(self, dataRef): 

1411 """No metadata to write. 

1412 """ 

1413 pass 

1414 

1415 def writeConfig(self, butler, clobber=False, doBackup=True): 

1416 """No config to write. 

1417 """ 

1418 pass 

1419 

1420 

1421class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1422 dimensions=("instrument",), 

1423 defaultTemplates={"calexpType": ""}): 

1424 visitSummaryRefs = connectionTypes.Input( 

1425 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1426 name="{calexpType}visitSummary", 

1427 storageClass="ExposureCatalog", 

1428 dimensions=("instrument", "visit"), 

1429 multiple=True, 

1430 deferLoad=True, 

1431 ) 

1432 outputCatalog = connectionTypes.Output( 

1433 doc="CCD and Visit metadata table", 

1434 name="ccdVisitTable", 

1435 storageClass="DataFrame", 

1436 dimensions=("instrument",) 

1437 ) 

1438 

1439 

1440class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1441 pipelineConnections=MakeCcdVisitTableConnections): 

1442 pass 

1443 

1444 

1445class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1446 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs. 

1447 """ 

1448 _DefaultName = 'makeCcdVisitTable' 

1449 ConfigClass = MakeCcdVisitTableConfig 

1450 

1451 def run(self, visitSummaryRefs): 

1452 """ Make a table of ccd information from the `visitSummary` catalogs. 

1453 Parameters 

1454 ---------- 

1455 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1456 List of DeferredDatasetHandles pointing to exposure catalogs with 

1457 per-detector summary information. 

1458 Returns 

1459 ------- 

1460 result : `lsst.pipe.Base.Struct` 

1461 Results struct with attribute: 

1462 - `outputCatalog` 

1463 Catalog of ccd and visit information. 

1464 """ 

1465 ccdEntries = [] 

1466 for visitSummaryRef in visitSummaryRefs: 

1467 visitSummary = visitSummaryRef.get() 

1468 visitInfo = visitSummary[0].getVisitInfo() 

1469 

1470 ccdEntry = {} 

1471 summaryTable = visitSummary.asAstropy() 

1472 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance', 

1473 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise'] 

1474 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1475 # 'visit' is the human readible visit number 

1476 # 'visitId' is the key to the visitId table. They are the same 

1477 # Technically you should join to get the visit from the visit table 

1478 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"}) 

1479 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in 

1480 summaryTable['id']] 

1481 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId) 

1482 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds] 

1483 ccdEntry['ccdVisitId'] = ccdVisitIds 

1484 ccdEntry['detector'] = summaryTable['id'] 

1485 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary]) 

1486 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1487 

1488 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1489 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1490 expTime = visitInfo.getExposureTime() 

1491 ccdEntry['expTime'] = expTime 

1492 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1493 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1494 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1495 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1496 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1497 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1498 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1499 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1500 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1501 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1502 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1503 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1504 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags, 

1505 # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted. 

1506 ccdEntries.append(ccdEntry) 

1507 

1508 outputCatalog = pd.concat(ccdEntries) 

1509 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True) 

1510 return pipeBase.Struct(outputCatalog=outputCatalog) 

1511 

1512 

1513class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1514 dimensions=("instrument",), 

1515 defaultTemplates={"calexpType": ""}): 

1516 visitSummaries = connectionTypes.Input( 

1517 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1518 name="{calexpType}visitSummary", 

1519 storageClass="ExposureCatalog", 

1520 dimensions=("instrument", "visit",), 

1521 multiple=True, 

1522 deferLoad=True, 

1523 ) 

1524 outputCatalog = connectionTypes.Output( 

1525 doc="Visit metadata table", 

1526 name="visitTable", 

1527 storageClass="DataFrame", 

1528 dimensions=("instrument",) 

1529 ) 

1530 

1531 

1532class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1533 pipelineConnections=MakeVisitTableConnections): 

1534 pass 

1535 

1536 

1537class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1538 """Produce a `visitTable` from the `visitSummary` exposure catalogs. 

1539 """ 

1540 _DefaultName = 'makeVisitTable' 

1541 ConfigClass = MakeVisitTableConfig 

1542 

1543 def run(self, visitSummaries): 

1544 """ Make a table of visit information from the `visitSummary` catalogs 

1545 

1546 Parameters 

1547 ---------- 

1548 visitSummaries : list of `lsst.afw.table.ExposureCatalog` 

1549 List of exposure catalogs with per-detector summary information. 

1550 Returns 

1551 ------- 

1552 result : `lsst.pipe.Base.Struct` 

1553 Results struct with attribute: 

1554 ``outputCatalog`` 

1555 Catalog of visit information. 

1556 """ 

1557 visitEntries = [] 

1558 for visitSummary in visitSummaries: 

1559 visitSummary = visitSummary.get() 

1560 visitRow = visitSummary[0] 

1561 visitInfo = visitRow.getVisitInfo() 

1562 

1563 visitEntry = {} 

1564 visitEntry["visitId"] = visitRow['visit'] 

1565 visitEntry["visit"] = visitRow['visit'] 

1566 visitEntry["physical_filter"] = visitRow['physical_filter'] 

1567 visitEntry["band"] = visitRow['band'] 

1568 raDec = visitInfo.getBoresightRaDec() 

1569 visitEntry["ra"] = raDec.getRa().asDegrees() 

1570 visitEntry["decl"] = raDec.getDec().asDegrees() 

1571 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1572 azAlt = visitInfo.getBoresightAzAlt() 

1573 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1574 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1575 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1576 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1577 visitEntry["obsStart"] = visitInfo.getDate().toPython() 

1578 visitEntry["expTime"] = visitInfo.getExposureTime() 

1579 visitEntries.append(visitEntry) 

1580 # TODO: DM-30623, Add programId, exposureType, expMidpt, cameraTemp, mirror1Temp, mirror2Temp, 

1581 # mirror3Temp, domeTemp, externalTemp, dimmSeeing, pwvGPS, pwvMW, flags, nExposures 

1582 

1583 outputCatalog = pd.DataFrame(data=visitEntries) 

1584 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True) 

1585 return pipeBase.Struct(outputCatalog=outputCatalog) 

1586 

1587 

1588class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1589 dimensions=("instrument", "visit", "detector", "skymap", "tract")): 

1590 

1591 inputCatalog = connectionTypes.Input( 

1592 doc="Primary per-detector, single-epoch forced-photometry catalog. " 

1593 "By default, it is the output of ForcedPhotCcdTask on calexps", 

1594 name="forced_src", 

1595 storageClass="SourceCatalog", 

1596 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1597 ) 

1598 inputCatalogDiff = connectionTypes.Input( 

1599 doc="Secondary multi-epoch, per-detector, forced photometry catalog. " 

1600 "By default, it is the output of ForcedPhotCcdTask run on image differences.", 

1601 name="forced_diff", 

1602 storageClass="SourceCatalog", 

1603 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1604 ) 

1605 outputCatalog = connectionTypes.Output( 

1606 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format", 

1607 name="mergedForcedSource", 

1608 storageClass="DataFrame", 

1609 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1610 ) 

1611 

1612 

1613class WriteForcedSourceTableConfig(WriteSourceTableConfig, 

1614 pipelineConnections=WriteForcedSourceTableConnections): 

1615 key = lsst.pex.config.Field( 

1616 doc="Column on which to join the two input tables on and make the primary key of the output", 

1617 dtype=str, 

1618 default="objectId", 

1619 ) 

1620 

1621 

1622class WriteForcedSourceTableTask(pipeBase.PipelineTask): 

1623 """Merge and convert per-detector forced source catalogs to parquet 

1624 

1625 Because the predecessor ForcedPhotCcdTask operates per-detector, 

1626 per-tract, (i.e., it has tract in its dimensions), detectors 

1627 on the tract boundary may have multiple forced source catalogs. 

1628 

1629 The successor task TransformForcedSourceTable runs per-patch 

1630 and temporally-aggregates overlapping mergedForcedSource catalogs from all 

1631 available multiple epochs. 

1632 """ 

1633 _DefaultName = "writeForcedSourceTable" 

1634 ConfigClass = WriteForcedSourceTableConfig 

1635 

1636 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1637 inputs = butlerQC.get(inputRefs) 

1638 # Add ccdVisitId to allow joining with CcdVisitTable 

1639 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

1640 inputs['band'] = butlerQC.quantum.dataId.full['band'] 

1641 outputs = self.run(**inputs) 

1642 butlerQC.put(outputs, outputRefs) 

1643 

1644 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None): 

1645 dfs = [] 

1646 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')): 

1647 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False) 

1648 df = df.reindex(sorted(df.columns), axis=1) 

1649 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA 

1650 df['band'] = band if band else pd.NA 

1651 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns], 

1652 names=('dataset', 'column')) 

1653 

1654 dfs.append(df) 

1655 

1656 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

1657 return pipeBase.Struct(outputCatalog=outputCatalog) 

1658 

1659 

1660class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1661 dimensions=("instrument", "skymap", "patch", "tract")): 

1662 

1663 inputCatalogs = connectionTypes.Input( 

1664 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask", 

1665 name="mergedForcedSource", 

1666 storageClass="DataFrame", 

1667 dimensions=("instrument", "visit", "detector", "skymap", "tract"), 

1668 multiple=True, 

1669 deferLoad=True 

1670 ) 

1671 referenceCatalog = connectionTypes.Input( 

1672 doc="Reference catalog which was used to seed the forcedPhot. Columns " 

1673 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner " 

1674 "are expected.", 

1675 name="objectTable", 

1676 storageClass="DataFrame", 

1677 dimensions=("tract", "patch", "skymap"), 

1678 deferLoad=True 

1679 ) 

1680 outputCatalog = connectionTypes.Output( 

1681 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a " 

1682 "specified set of functors", 

1683 name="forcedSourceTable", 

1684 storageClass="DataFrame", 

1685 dimensions=("tract", "patch", "skymap") 

1686 ) 

1687 

1688 

1689class TransformForcedSourceTableConfig(TransformCatalogBaseConfig, 

1690 pipelineConnections=TransformForcedSourceTableConnections): 

1691 referenceColumns = pexConfig.ListField( 

1692 dtype=str, 

1693 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"], 

1694 optional=True, 

1695 doc="Columns to pull from reference catalog", 

1696 ) 

1697 keyRef = lsst.pex.config.Field( 

1698 doc="Column on which to join the two input tables on and make the primary key of the output", 

1699 dtype=str, 

1700 default="objectId", 

1701 ) 

1702 key = lsst.pex.config.Field( 

1703 doc="Rename the output DataFrame index to this name", 

1704 dtype=str, 

1705 default="forcedSourceId", 

1706 ) 

1707 

1708 def setDefaults(self): 

1709 super().setDefaults() 

1710 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml') 

1711 

1712 

1713class TransformForcedSourceTableTask(TransformCatalogBaseTask): 

1714 """Transform/standardize a ForcedSource catalog 

1715 

1716 Transforms each wide, per-detector forcedSource parquet table per the 

1717 specification file (per-camera defaults found in ForcedSource.yaml). 

1718 All epochs that overlap the patch are aggregated into one per-patch 

1719 narrow-parquet file. 

1720 

1721 No de-duplication of rows is performed. Duplicate resolutions flags are 

1722 pulled in from the referenceCatalog: `detect_isPrimary`, 

1723 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate 

1724 for analysis or compare duplicates for QA. 

1725 

1726 The resulting table includes multiple bands. Epochs (MJDs) and other useful 

1727 per-visit rows can be retreived by joining with the CcdVisitTable on 

1728 ccdVisitId. 

1729 """ 

1730 _DefaultName = "transformForcedSourceTable" 

1731 ConfigClass = TransformForcedSourceTableConfig 

1732 

1733 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1734 inputs = butlerQC.get(inputRefs) 

1735 if self.funcs is None: 

1736 raise ValueError("config.functorFile is None. " 

1737 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

1738 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs, 

1739 dataId=outputRefs.outputCatalog.dataId.full) 

1740 

1741 butlerQC.put(outputs, outputRefs) 

1742 

1743 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None): 

1744 dfs = [] 

1745 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns}) 

1746 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs))) 

1747 for handle in inputCatalogs: 

1748 result = self.transform(None, handle, funcs, dataId) 

1749 # Filter for only rows that were detected on (overlap) the patch 

1750 dfs.append(result.df.join(ref, how='inner')) 

1751 

1752 outputCatalog = pd.concat(dfs) 

1753 

1754 # Now that we are done joining on config.keyRef 

1755 # Change index to config.key by 

1756 outputCatalog.index.rename(self.config.keyRef, inplace=True) 

1757 # Add config.keyRef to the column list 

1758 outputCatalog.reset_index(inplace=True) 

1759 # set the forcedSourceId to the index. This is specified in the ForcedSource.yaml 

1760 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True) 

1761 # Rename it to the config.key 

1762 outputCatalog.index.rename(self.config.key, inplace=True) 

1763 

1764 self.log.info("Made a table of %d columns and %d rows", 

1765 len(outputCatalog.columns), len(outputCatalog)) 

1766 return pipeBase.Struct(outputCatalog=outputCatalog) 

1767 

1768 

1769class ConsolidateTractConnections(pipeBase.PipelineTaskConnections, 

1770 defaultTemplates={"catalogType": ""}, 

1771 dimensions=("instrument", "tract")): 

1772 inputCatalogs = connectionTypes.Input( 

1773 doc="Input per-patch DataFrame Tables to be concatenated", 

1774 name="{catalogType}ForcedSourceTable", 

1775 storageClass="DataFrame", 

1776 dimensions=("tract", "patch", "skymap"), 

1777 multiple=True, 

1778 ) 

1779 

1780 outputCatalog = connectionTypes.Output( 

1781 doc="Output per-tract concatenation of DataFrame Tables", 

1782 name="{catalogType}ForcedSourceTable_tract", 

1783 storageClass="DataFrame", 

1784 dimensions=("tract", "skymap"), 

1785 ) 

1786 

1787 

1788class ConsolidateTractConfig(pipeBase.PipelineTaskConfig, 

1789 pipelineConnections=ConsolidateTractConnections): 

1790 pass 

1791 

1792 

1793class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask): 

1794 """Concatenate any per-patch, dataframe list into a single 

1795 per-tract DataFrame 

1796 """ 

1797 _DefaultName = 'ConsolidateTract' 

1798 ConfigClass = ConsolidateTractConfig 

1799 

1800 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1801 inputs = butlerQC.get(inputRefs) 

1802 # Not checking at least one inputCatalog exists because that'd be an empty QG 

1803 self.log.info("Concatenating %s per-patch %s Tables", 

1804 len(inputs['inputCatalogs']), 

1805 inputRefs.inputCatalogs[0].datasetType.name) 

1806 df = pd.concat(inputs['inputCatalogs']) 

1807 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)