Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24import numpy as np 

25from collections import defaultdict 

26 

27import lsst.geom 

28import lsst.pex.config as pexConfig 

29import lsst.pipe.base as pipeBase 

30from lsst.pipe.base import connectionTypes 

31import lsst.afw.table as afwTable 

32from lsst.meas.base import SingleFrameMeasurementTask 

33from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

34from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

35 

36from .parquetTable import ParquetTable 

37from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

38from .functors import CompositeFunctor, RAColumn, DecColumn, Column 

39 

40 

41def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False): 

42 """Flattens a dataframe with multilevel column index 

43 """ 

44 newDf = pd.DataFrame() 

45 for band in set(df.columns.to_frame()['band']): 

46 subdf = df[band] 

47 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

48 newColumns = {c: columnFormat.format(band, c) 

49 for c in subdf.columns if c not in noDupCols} 

50 cols = list(newColumns.keys()) 

51 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

52 

53 newDf = pd.concat([subdf[noDupCols], newDf], axis=1) 

54 return newDf 

55 

56 

57class WriteObjectTableConfig(pexConfig.Config): 

58 engine = pexConfig.Field( 

59 dtype=str, 

60 default="pyarrow", 

61 doc="Parquet engine for writing (pyarrow or fastparquet)" 

62 ) 

63 coaddName = pexConfig.Field( 

64 dtype=str, 

65 default="deep", 

66 doc="Name of coadd" 

67 ) 

68 

69 

70class WriteObjectTableTask(CmdLineTask): 

71 """Write filter-merged source tables to parquet 

72 """ 

73 _DefaultName = "writeObjectTable" 

74 ConfigClass = WriteObjectTableConfig 

75 RunnerClass = MergeSourcesRunner 

76 

77 # Names of table datasets to be merged 

78 inputDatasets = ('forced_src', 'meas', 'ref') 

79 

80 # Tag of output dataset written by `MergeSourcesTask.write` 

81 outputDataset = 'obj' 

82 

83 def __init__(self, butler=None, schema=None, **kwargs): 

84 # It is a shame that this class can't use the default init for CmdLineTask 

85 # But to do so would require its own special task runner, which is many 

86 # more lines of specialization, so this is how it is for now 

87 CmdLineTask.__init__(self, **kwargs) 

88 

89 def runDataRef(self, patchRefList): 

90 """! 

91 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in 

92 subclasses that inherit from MergeSourcesTask. 

93 @param[in] patchRefList list of data references for each filter 

94 """ 

95 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

96 dataId = patchRefList[0].dataId 

97 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

98 self.write(patchRefList[0], mergedCatalog) 

99 

100 @classmethod 

101 def _makeArgumentParser(cls): 

102 """Create a suitable ArgumentParser. 

103 

104 We will use the ArgumentParser to get a list of data 

105 references for patches; the RunnerClass will sort them into lists 

106 of data references for the same patch. 

107 

108 References first of self.inputDatasets, rather than 

109 self.inputDataset 

110 """ 

111 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

112 

113 def readCatalog(self, patchRef): 

114 """Read input catalogs 

115 

116 Read all the input datasets given by the 'inputDatasets' 

117 attribute. 

118 

119 Parameters 

120 ---------- 

121 patchRef : `lsst.daf.persistence.ButlerDataRef` 

122 Data reference for patch 

123 

124 Returns 

125 ------- 

126 Tuple consisting of band name and a dict of catalogs, keyed by 

127 dataset name 

128 """ 

129 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel 

130 catalogDict = {} 

131 for dataset in self.inputDatasets: 

132 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

133 self.log.info("Read %d sources from %s for band %s: %s" % 

134 (len(catalog), dataset, band, patchRef.dataId)) 

135 catalogDict[dataset] = catalog 

136 return band, catalogDict 

137 

138 def run(self, catalogs, tract, patch): 

139 """Merge multiple catalogs. 

140 

141 Parameters 

142 ---------- 

143 catalogs : `dict` 

144 Mapping from filter names to dict of catalogs. 

145 tract : int 

146 tractId to use for the tractId column 

147 patch : str 

148 patchId to use for the patchId column 

149 

150 Returns 

151 ------- 

152 catalog : `lsst.pipe.tasks.parquetTable.ParquetTable` 

153 Merged dataframe, with each column prefixed by 

154 `filter_tag(filt)`, wrapped in the parquet writer shim class. 

155 """ 

156 

157 dfs = [] 

158 for filt, tableDict in catalogs.items(): 

159 for dataset, table in tableDict.items(): 

160 # Convert afwTable to pandas DataFrame 

161 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

162 

163 # Sort columns by name, to ensure matching schema among patches 

164 df = df.reindex(sorted(df.columns), axis=1) 

165 df['tractId'] = tract 

166 df['patchId'] = patch 

167 

168 # Make columns a 3-level MultiIndex 

169 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

170 names=('dataset', 'band', 'column')) 

171 dfs.append(df) 

172 

173 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

174 return ParquetTable(dataFrame=catalog) 

175 

176 def write(self, patchRef, catalog): 

177 """Write the output. 

178 

179 Parameters 

180 ---------- 

181 catalog : `ParquetTable` 

182 Catalog to write 

183 patchRef : `lsst.daf.persistence.ButlerDataRef` 

184 Data reference for patch 

185 """ 

186 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

187 # since the filter isn't actually part of the data ID for the dataset we're saving, 

188 # it's confusing to see it in the log message, even if the butler simply ignores it. 

189 mergeDataId = patchRef.dataId.copy() 

190 del mergeDataId["filter"] 

191 self.log.info("Wrote merged catalog: %s" % (mergeDataId,)) 

192 

193 def writeMetadata(self, dataRefList): 

194 """No metadata to write, and not sure how to write it for a list of dataRefs. 

195 """ 

196 pass 

197 

198 

199class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

200 dimensions=("instrument", "visit", "detector")): 

201 

202 catalog = connectionTypes.Input( 

203 doc="Input full-depth catalog of sources produced by CalibrateTask", 

204 name="src", 

205 storageClass="SourceCatalog", 

206 dimensions=("instrument", "visit", "detector") 

207 ) 

208 outputCatalog = connectionTypes.Output( 

209 doc="Catalog of sources, `src` in Parquet format", 

210 name="source", 

211 storageClass="DataFrame", 

212 dimensions=("instrument", "visit", "detector") 

213 ) 

214 

215 

216class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

217 pipelineConnections=WriteSourceTableConnections): 

218 doApplyExternalPhotoCalib = pexConfig.Field( 

219 dtype=bool, 

220 default=False, 

221 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if " 

222 "generating Source Tables from older src tables which do not already have local calib columns") 

223 ) 

224 doApplyExternalSkyWcs = pexConfig.Field( 

225 dtype=bool, 

226 default=False, 

227 doc=("Add local WCS columns from the calexp.wcs? Should only set True if " 

228 "generating Source Tables from older src tables which do not already have local calib columns") 

229 ) 

230 

231 

232class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

233 """Write source table to parquet 

234 """ 

235 _DefaultName = "writeSourceTable" 

236 ConfigClass = WriteSourceTableConfig 

237 

238 def runDataRef(self, dataRef): 

239 src = dataRef.get('src') 

240 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

241 src = self.addCalibColumns(src, dataRef) 

242 

243 ccdVisitId = dataRef.get('ccdExposureId') 

244 result = self.run(src, ccdVisitId=ccdVisitId) 

245 dataRef.put(result.table, 'source') 

246 

247 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

248 inputs = butlerQC.get(inputRefs) 

249 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

250 result = self.run(**inputs).table 

251 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

252 butlerQC.put(outputs, outputRefs) 

253 

254 def run(self, catalog, ccdVisitId=None): 

255 """Convert `src` catalog to parquet 

256 

257 Parameters 

258 ---------- 

259 catalog: `afwTable.SourceCatalog` 

260 catalog to be converted 

261 ccdVisitId: `int` 

262 ccdVisitId to be added as a column 

263 

264 Returns 

265 ------- 

266 result : `lsst.pipe.base.Struct` 

267 ``table`` 

268 `ParquetTable` version of the input catalog 

269 """ 

270 self.log.info("Generating parquet table from src catalog %s", ccdVisitId) 

271 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

272 df['ccdVisitId'] = ccdVisitId 

273 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

274 

275 def addCalibColumns(self, catalog, dataRef): 

276 """Add columns with local calibration evaluated at each centroid 

277 

278 for backwards compatibility with old repos. 

279 This exists for the purpose of converting old src catalogs 

280 (which don't have the expected local calib columns) to Source Tables. 

281 

282 Parameters 

283 ---------- 

284 catalog: `afwTable.SourceCatalog` 

285 catalog to which calib columns will be added 

286 dataRef: `lsst.daf.persistence.ButlerDataRef 

287 for fetching the calibs from disk. 

288 

289 Returns 

290 ------- 

291 newCat: `afwTable.SourceCatalog` 

292 Source Catalog with requested local calib columns 

293 """ 

294 mapper = afwTable.SchemaMapper(catalog.schema) 

295 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

296 measureConfig.doReplaceWithNoise = False 

297 

298 # Just need the WCS or the PhotoCalib attached to an exposue 

299 exposure = dataRef.get('calexp_sub', 

300 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0))) 

301 

302 mapper = afwTable.SchemaMapper(catalog.schema) 

303 mapper.addMinimalSchema(catalog.schema, True) 

304 schema = mapper.getOutputSchema() 

305 

306 exposureIdInfo = dataRef.get("expIdInfo") 

307 measureConfig.plugins.names = [] 

308 if self.config.doApplyExternalSkyWcs: 

309 plugin = 'base_LocalWcs' 

310 if plugin in schema: 

311 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False") 

312 else: 

313 measureConfig.plugins.names.add(plugin) 

314 

315 if self.config.doApplyExternalPhotoCalib: 

316 plugin = 'base_LocalPhotoCalib' 

317 if plugin in schema: 

318 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False") 

319 else: 

320 measureConfig.plugins.names.add(plugin) 

321 

322 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

323 newCat = afwTable.SourceCatalog(schema) 

324 newCat.extend(catalog, mapper=mapper) 

325 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

326 return newCat 

327 

328 def writeMetadata(self, dataRef): 

329 """No metadata to write. 

330 """ 

331 pass 

332 

333 @classmethod 

334 def _makeArgumentParser(cls): 

335 parser = ArgumentParser(name=cls._DefaultName) 

336 parser.add_id_argument("--id", 'src', 

337 help="data ID, e.g. --id visit=12345 ccd=0") 

338 return parser 

339 

340 

341class PostprocessAnalysis(object): 

342 """Calculate columns from ParquetTable 

343 

344 This object manages and organizes an arbitrary set of computations 

345 on a catalog. The catalog is defined by a 

346 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a 

347 `deepCoadd_obj` dataset, and the computations are defined by a collection 

348 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

349 a `CompositeFunctor`). 

350 

351 After the object is initialized, accessing the `.df` attribute (which 

352 holds the `pandas.DataFrame` containing the results of the calculations) triggers 

353 computation of said dataframe. 

354 

355 One of the conveniences of using this object is the ability to define a desired common 

356 filter for all functors. This enables the same functor collection to be passed to 

357 several different `PostprocessAnalysis` objects without having to change the original 

358 functor collection, since the `filt` keyword argument of this object triggers an 

359 overwrite of the `filt` property for all functors in the collection. 

360 

361 This object also allows a list of refFlags to be passed, and defines a set of default 

362 refFlags that are always included even if not requested. 

363 

364 If a list of `ParquetTable` object is passed, rather than a single one, then the 

365 calculations will be mapped over all the input catalogs. In principle, it should 

366 be straightforward to parallelize this activity, but initial tests have failed 

367 (see TODO in code comments). 

368 

369 Parameters 

370 ---------- 

371 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

372 Source catalog(s) for computation 

373 

374 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

375 Computations to do (functors that act on `parq`). 

376 If a dict, the output 

377 DataFrame will have columns keyed accordingly. 

378 If a list, the column keys will come from the 

379 `.shortname` attribute of each functor. 

380 

381 filt : `str` (optional) 

382 Filter in which to calculate. If provided, 

383 this will overwrite any existing `.filt` attribute 

384 of the provided functors. 

385 

386 flags : `list` (optional) 

387 List of flags (per-band) to include in output table. 

388 

389 refFlags : `list` (optional) 

390 List of refFlags (only reference band) to include in output table. 

391 

392 

393 """ 

394 _defaultRefFlags = [] 

395 _defaultFuncs = (('coord_ra', RAColumn()), 

396 ('coord_dec', DecColumn())) 

397 

398 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None): 

399 self.parq = parq 

400 self.functors = functors 

401 

402 self.filt = filt 

403 self.flags = list(flags) if flags is not None else [] 

404 self.refFlags = list(self._defaultRefFlags) 

405 if refFlags is not None: 

406 self.refFlags += list(refFlags) 

407 

408 self._df = None 

409 

410 @property 

411 def defaultFuncs(self): 

412 funcs = dict(self._defaultFuncs) 

413 return funcs 

414 

415 @property 

416 def func(self): 

417 additionalFuncs = self.defaultFuncs 

418 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

419 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

420 

421 if isinstance(self.functors, CompositeFunctor): 

422 func = self.functors 

423 else: 

424 func = CompositeFunctor(self.functors) 

425 

426 func.funcDict.update(additionalFuncs) 

427 func.filt = self.filt 

428 

429 return func 

430 

431 @property 

432 def noDupCols(self): 

433 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

434 

435 @property 

436 def df(self): 

437 if self._df is None: 

438 self.compute() 

439 return self._df 

440 

441 def compute(self, dropna=False, pool=None): 

442 # map over multiple parquet tables 

443 if type(self.parq) in (list, tuple): 

444 if pool is None: 

445 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

446 else: 

447 # TODO: Figure out why this doesn't work (pyarrow pickling issues?) 

448 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

449 self._df = pd.concat(dflist) 

450 else: 

451 self._df = self.func(self.parq, dropna=dropna) 

452 

453 return self._df 

454 

455 

456class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

457 dimensions=()): 

458 """Expected Connections for subclasses of TransformCatalogBaseTask. 

459 

460 Must be subclassed. 

461 """ 

462 inputCatalog = connectionTypes.Input( 

463 name="", 

464 storageClass="DataFrame", 

465 ) 

466 outputCatalog = connectionTypes.Output( 

467 name="", 

468 storageClass="DataFrame", 

469 ) 

470 

471 

472class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

473 pipelineConnections=TransformCatalogBaseConnections): 

474 functorFile = pexConfig.Field( 

475 dtype=str, 

476 doc='Path to YAML file specifying functors to be computed', 

477 default=None, 

478 optional=True 

479 ) 

480 

481 

482class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask): 

483 """Base class for transforming/standardizing a catalog 

484 

485 by applying functors that convert units and apply calibrations. 

486 The purpose of this task is to perform a set of computations on 

487 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

488 results to a new dataset (which needs to be declared in an `outputDataset` 

489 attribute). 

490 

491 The calculations to be performed are defined in a YAML file that specifies 

492 a set of functors to be computed, provided as 

493 a `--functorFile` config parameter. An example of such a YAML file 

494 is the following: 

495 

496 funcs: 

497 psfMag: 

498 functor: Mag 

499 args: 

500 - base_PsfFlux 

501 filt: HSC-G 

502 dataset: meas 

503 cmodel_magDiff: 

504 functor: MagDiff 

505 args: 

506 - modelfit_CModel 

507 - base_PsfFlux 

508 filt: HSC-G 

509 gauss_magDiff: 

510 functor: MagDiff 

511 args: 

512 - base_GaussianFlux 

513 - base_PsfFlux 

514 filt: HSC-G 

515 count: 

516 functor: Column 

517 args: 

518 - base_InputCount_value 

519 filt: HSC-G 

520 deconvolved_moments: 

521 functor: DeconvolvedMoments 

522 filt: HSC-G 

523 dataset: forced_src 

524 refFlags: 

525 - calib_psfUsed 

526 - merge_measurement_i 

527 - merge_measurement_r 

528 - merge_measurement_z 

529 - merge_measurement_y 

530 - merge_measurement_g 

531 - base_PixelFlags_flag_inexact_psfCenter 

532 - detect_isPrimary 

533 

534 The names for each entry under "func" will become the names of columns in the 

535 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`. 

536 Positional arguments to be passed to each functor are in the `args` list, 

537 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`, 

538 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization. 

539 

540 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and 

541 taken from the `'ref'` dataset. 

542 

543 The "flags" entry will be expanded out per band. 

544 

545 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

546 to organize and excecute the calculations. 

547 

548 """ 

549 @property 

550 def _DefaultName(self): 

551 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

552 

553 @property 

554 def outputDataset(self): 

555 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

556 

557 @property 

558 def inputDataset(self): 

559 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

560 

561 @property 

562 def ConfigClass(self): 

563 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

564 

565 def __init__(self, *args, **kwargs): 

566 super().__init__(*args, **kwargs) 

567 if self.config.functorFile: 

568 self.log.info('Loading tranform functor definitions from %s', 

569 self.config.functorFile) 

570 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

571 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

572 else: 

573 self.funcs = None 

574 

575 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

576 inputs = butlerQC.get(inputRefs) 

577 if self.funcs is None: 

578 raise ValueError("config.functorFile is None. " 

579 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

580 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

581 dataId=outputRefs.outputCatalog.dataId.full) 

582 outputs = pipeBase.Struct(outputCatalog=result) 

583 butlerQC.put(outputs, outputRefs) 

584 

585 def runDataRef(self, dataRef): 

586 parq = dataRef.get() 

587 if self.funcs is None: 

588 raise ValueError("config.functorFile is None. " 

589 "Must be a valid path to yaml in order to run as a CommandlineTask.") 

590 df = self.run(parq, funcs=self.funcs, dataId=dataRef.dataId) 

591 self.write(df, dataRef) 

592 return df 

593 

594 def run(self, parq, funcs=None, dataId=None, band=None): 

595 """Do postprocessing calculations 

596 

597 Takes a `ParquetTable` object and dataId, 

598 returns a dataframe with results of postprocessing calculations. 

599 

600 Parameters 

601 ---------- 

602 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

603 ParquetTable from which calculations are done. 

604 funcs : `lsst.pipe.tasks.functors.Functors` 

605 Functors to apply to the table's columns 

606 dataId : dict, optional 

607 Used to add a `patchId` column to the output dataframe. 

608 band : `str`, optional 

609 Filter band that is being processed. 

610 

611 Returns 

612 ------ 

613 `pandas.DataFrame` 

614 

615 """ 

616 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

617 

618 df = self.transform(band, parq, funcs, dataId).df 

619 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

620 return df 

621 

622 def getFunctors(self): 

623 return self.funcs 

624 

625 def getAnalysis(self, parq, funcs=None, band=None): 

626 if funcs is None: 

627 funcs = self.funcs 

628 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

629 return analysis 

630 

631 def transform(self, band, parq, funcs, dataId): 

632 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

633 df = analysis.df 

634 if dataId is not None: 

635 for key, value in dataId.items(): 

636 df[key] = value 

637 

638 return pipeBase.Struct( 

639 df=df, 

640 analysis=analysis 

641 ) 

642 

643 def write(self, df, parqRef): 

644 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

645 

646 def writeMetadata(self, dataRef): 

647 """No metadata to write. 

648 """ 

649 pass 

650 

651 

652class TransformObjectCatalogConfig(TransformCatalogBaseConfig): 

653 coaddName = pexConfig.Field( 

654 dtype=str, 

655 default="deep", 

656 doc="Name of coadd" 

657 ) 

658 # TODO: remove in DM-27177 

659 filterMap = pexConfig.DictField( 

660 keytype=str, 

661 itemtype=str, 

662 default={}, 

663 doc=("Dictionary mapping full filter name to short one for column name munging." 

664 "These filters determine the output columns no matter what filters the " 

665 "input data actually contain."), 

666 deprecated=("Coadds are now identified by the band, so this transform is unused." 

667 "Will be removed after v22.") 

668 ) 

669 outputBands = pexConfig.ListField( 

670 dtype=str, 

671 default=None, 

672 optional=True, 

673 doc=("These bands and only these bands will appear in the output," 

674 " NaN-filled if the input does not include them." 

675 " If None, then use all bands found in the input.") 

676 ) 

677 camelCase = pexConfig.Field( 

678 dtype=bool, 

679 default=True, 

680 doc=("Write per-band columns names with camelCase, else underscore " 

681 "For example: gPsFlux instead of g_PsFlux.") 

682 ) 

683 multilevelOutput = pexConfig.Field( 

684 dtype=bool, 

685 default=False, 

686 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

687 "and name-munged (False).") 

688 ) 

689 

690 

691class TransformObjectCatalogTask(TransformCatalogBaseTask): 

692 """Produce a flattened Object Table to match the format specified in 

693 sdm_schemas. 

694 

695 Do the same set of postprocessing calculations on all bands 

696 

697 This is identical to `TransformCatalogBaseTask`, except for that it does the 

698 specified functor calculations for all filters present in the 

699 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

700 by the YAML file will be superceded. 

701 """ 

702 _DefaultName = "transformObjectCatalog" 

703 ConfigClass = TransformObjectCatalogConfig 

704 

705 inputDataset = 'deepCoadd_obj' 

706 outputDataset = 'objectTable' 

707 

708 @classmethod 

709 def _makeArgumentParser(cls): 

710 parser = ArgumentParser(name=cls._DefaultName) 

711 parser.add_id_argument("--id", cls.inputDataset, 

712 ContainerClass=CoaddDataIdContainer, 

713 help="data ID, e.g. --id tract=12345 patch=1,2") 

714 return parser 

715 

716 def run(self, parq, funcs=None, dataId=None, band=None): 

717 # NOTE: band kwarg is ignored here. 

718 dfDict = {} 

719 analysisDict = {} 

720 templateDf = pd.DataFrame() 

721 outputBands = parq.columnLevelNames['band'] if self.config.outputBands is None else \ 

722 self.config.outputBands 

723 

724 # Perform transform for data of filters that exist in parq. 

725 for inputBand in parq.columnLevelNames['band']: 

726 if inputBand not in outputBands: 

727 self.log.info("Ignoring %s band data in the input", inputBand) 

728 continue 

729 self.log.info("Transforming the catalog of band %s", inputBand) 

730 result = self.transform(inputBand, parq, funcs, dataId) 

731 dfDict[inputBand] = result.df 

732 analysisDict[inputBand] = result.analysis 

733 if templateDf.empty: 

734 templateDf = result.df 

735 

736 # Fill NaNs in columns of other wanted bands 

737 for filt in outputBands: 

738 if filt not in dfDict: 

739 self.log.info("Adding empty columns for band %s", filt) 

740 dfDict[filt] = pd.DataFrame().reindex_like(templateDf) 

741 

742 # This makes a multilevel column index, with band as first level 

743 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

744 

745 if not self.config.multilevelOutput: 

746 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

747 if dataId is not None: 

748 noDupCols += list(dataId.keys()) 

749 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase) 

750 

751 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

752 return df 

753 

754 

755class TractObjectDataIdContainer(CoaddDataIdContainer): 

756 

757 def makeDataRefList(self, namespace): 

758 """Make self.refList from self.idList 

759 

760 Generate a list of data references given tract and/or patch. 

761 This was adapted from `TractQADataIdContainer`, which was 

762 `TractDataIdContainer` modifie to not require "filter". 

763 Only existing dataRefs are returned. 

764 """ 

765 def getPatchRefList(tract): 

766 return [namespace.butler.dataRef(datasetType=self.datasetType, 

767 tract=tract.getId(), 

768 patch="%d,%d" % patch.getIndex()) for patch in tract] 

769 

770 tractRefs = defaultdict(list) # Data references for each tract 

771 for dataId in self.idList: 

772 skymap = self.getSkymap(namespace) 

773 

774 if "tract" in dataId: 

775 tractId = dataId["tract"] 

776 if "patch" in dataId: 

777 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

778 tract=tractId, 

779 patch=dataId['patch'])) 

780 else: 

781 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

782 else: 

783 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

784 for tract in skymap) 

785 outputRefList = [] 

786 for tractRefList in tractRefs.values(): 

787 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

788 outputRefList.append(existingRefs) 

789 

790 self.refList = outputRefList 

791 

792 

793class ConsolidateObjectTableConfig(pexConfig.Config): 

794 coaddName = pexConfig.Field( 

795 dtype=str, 

796 default="deep", 

797 doc="Name of coadd" 

798 ) 

799 

800 

801class ConsolidateObjectTableTask(CmdLineTask): 

802 """Write patch-merged source tables to a tract-level parquet file 

803 """ 

804 _DefaultName = "consolidateObjectTable" 

805 ConfigClass = ConsolidateObjectTableConfig 

806 

807 inputDataset = 'objectTable' 

808 outputDataset = 'objectTable_tract' 

809 

810 @classmethod 

811 def _makeArgumentParser(cls): 

812 parser = ArgumentParser(name=cls._DefaultName) 

813 

814 parser.add_id_argument("--id", cls.inputDataset, 

815 help="data ID, e.g. --id tract=12345", 

816 ContainerClass=TractObjectDataIdContainer) 

817 return parser 

818 

819 def runDataRef(self, patchRefList): 

820 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

821 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

822 

823 def writeMetadata(self, dataRef): 

824 """No metadata to write. 

825 """ 

826 pass 

827 

828 

829class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

830 dimensions=("instrument", "visit", "detector")): 

831 

832 inputCatalog = connectionTypes.Input( 

833 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

834 name="source", 

835 storageClass="DataFrame", 

836 dimensions=("instrument", "visit", "detector"), 

837 deferLoad=True 

838 ) 

839 outputCatalog = connectionTypes.Output( 

840 doc="Narrower, per-detector Source Table transformed and converted per a " 

841 "specified set of functors", 

842 name="sourceTable", 

843 storageClass="DataFrame", 

844 dimensions=("instrument", "visit", "detector") 

845 ) 

846 

847 

848class TransformSourceTableConfig(TransformCatalogBaseConfig, 

849 pipelineConnections=TransformSourceTableConnections): 

850 pass 

851 

852 

853class TransformSourceTableTask(TransformCatalogBaseTask): 

854 """Transform/standardize a source catalog 

855 """ 

856 _DefaultName = "transformSourceTable" 

857 ConfigClass = TransformSourceTableConfig 

858 

859 inputDataset = 'source' 

860 outputDataset = 'sourceTable' 

861 

862 @classmethod 

863 def _makeArgumentParser(cls): 

864 parser = ArgumentParser(name=cls._DefaultName) 

865 parser.add_id_argument("--id", datasetType=cls.inputDataset, 

866 level="sensor", 

867 help="data ID, e.g. --id visit=12345 ccd=0") 

868 return parser 

869 

870 def runDataRef(self, dataRef): 

871 """Override to specify band label to run().""" 

872 parq = dataRef.get() 

873 funcs = self.getFunctors() 

874 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel 

875 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band) 

876 self.write(df, dataRef) 

877 return df 

878 

879 

880class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

881 dimensions=("instrument", "visit",), 

882 defaultTemplates={}): 

883 calexp = connectionTypes.Input( 

884 doc="Processed exposures used for metadata", 

885 name="calexp", 

886 storageClass="ExposureF", 

887 dimensions=("instrument", "visit", "detector"), 

888 deferLoad=True, 

889 multiple=True, 

890 ) 

891 visitSummary = connectionTypes.Output( 

892 doc="Consolidated visit-level exposure metadata", 

893 name="visitSummary", 

894 storageClass="ExposureCatalog", 

895 dimensions=("instrument", "visit"), 

896 ) 

897 

898 

899class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

900 pipelineConnections=ConsolidateVisitSummaryConnections): 

901 """Config for ConsolidateVisitSummaryTask""" 

902 pass 

903 

904 

905class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

906 """Task to consolidate per-detector visit metadata. 

907 

908 This task aggregates the following metadata from all the detectors in a 

909 single visit into an exposure catalog: 

910 - The visitInfo. 

911 - The wcs. 

912 - The photoCalib. 

913 - The physical_filter and band (if available). 

914 - The psf size, shape, and effective area at the center of the detector. 

915 - The corners of the bounding box in right ascension/declination. 

916 

917 Other quantities such as Psf, ApCorrMap, and TransmissionCurve are not 

918 persisted here because of storage concerns, and because of their limited 

919 utility as summary statistics. 

920 

921 Tests for this task are performed in ci_hsc_gen3. 

922 """ 

923 _DefaultName = "consolidateVisitSummary" 

924 ConfigClass = ConsolidateVisitSummaryConfig 

925 

926 @classmethod 

927 def _makeArgumentParser(cls): 

928 parser = ArgumentParser(name=cls._DefaultName) 

929 

930 parser.add_id_argument("--id", "calexp", 

931 help="data ID, e.g. --id visit=12345", 

932 ContainerClass=VisitDataIdContainer) 

933 return parser 

934 

935 def writeMetadata(self, dataRef): 

936 """No metadata to persist, so override to remove metadata persistance. 

937 """ 

938 pass 

939 

940 def writeConfig(self, butler, clobber=False, doBackup=True): 

941 """No config to persist, so override to remove config persistance. 

942 """ 

943 pass 

944 

945 def runDataRef(self, dataRefList): 

946 visit = dataRefList[0].dataId['visit'] 

947 

948 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

949 (len(dataRefList), visit)) 

950 

951 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

952 

953 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

954 

955 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

956 dataRefs = butlerQC.get(inputRefs.calexp) 

957 visit = dataRefs[0].dataId.byName()['visit'] 

958 

959 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

960 (len(dataRefs), visit)) 

961 

962 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

963 

964 butlerQC.put(expCatalog, outputRefs.visitSummary) 

965 

966 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

967 """Make a combined exposure catalog from a list of dataRefs. 

968 

969 Parameters 

970 ---------- 

971 visit : `int` 

972 Visit identification number 

973 dataRefs : `list` 

974 List of calexp dataRefs in visit. May be list of 

975 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

976 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

977 isGen3 : `bool`, optional 

978 Specifies if this is a Gen3 list of datarefs. 

979 

980 Returns 

981 ------- 

982 visitSummary : `lsst.afw.table.ExposureCatalog` 

983 Exposure catalog with per-detector summary information. 

984 """ 

985 schema = afwTable.ExposureTable.makeMinimalSchema() 

986 schema.addField('visit', type='I', doc='Visit number') 

987 schema.addField('detector_id', type='I', doc='Detector number') 

988 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

989 schema.addField('band', type='String', size=32, doc='Name of band') 

990 schema.addField('psfSigma', type='F', 

991 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

992 schema.addField('psfArea', type='F', 

993 doc='PSF model effective area (center of chip) (pixel**2)') 

994 schema.addField('psfIxx', type='F', 

995 doc='PSF model Ixx (center of chip) (pixel**2)') 

996 schema.addField('psfIyy', type='F', 

997 doc='PSF model Iyy (center of chip) (pixel**2)') 

998 schema.addField('psfIxy', type='F', 

999 doc='PSF model Ixy (center of chip) (pixel**2)') 

1000 schema.addField('raCorners', type='ArrayD', size=4, 

1001 doc='Right Ascension of bounding box corners (degrees)') 

1002 schema.addField('decCorners', type='ArrayD', size=4, 

1003 doc='Declination of bounding box corners (degrees)') 

1004 

1005 cat = afwTable.ExposureCatalog(schema) 

1006 cat.resize(len(dataRefs)) 

1007 

1008 cat['visit'] = visit 

1009 

1010 for i, dataRef in enumerate(dataRefs): 

1011 if isGen3: 

1012 visitInfo = dataRef.get(component='visitInfo') 

1013 filterLabel = dataRef.get(component='filterLabel') 

1014 psf = dataRef.get(component='psf') 

1015 wcs = dataRef.get(component='wcs') 

1016 photoCalib = dataRef.get(component='photoCalib') 

1017 detector = dataRef.get(component='detector') 

1018 bbox = dataRef.get(component='bbox') 

1019 validPolygon = dataRef.get(component='validPolygon') 

1020 else: 

1021 # Note that we need to read the calexp because there is 

1022 # no magic access to the psf except through the exposure. 

1023 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

1024 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

1025 visitInfo = exp.getInfo().getVisitInfo() 

1026 filterLabel = dataRef.get("calexp_filterLabel") 

1027 psf = exp.getPsf() 

1028 wcs = exp.getWcs() 

1029 photoCalib = exp.getPhotoCalib() 

1030 detector = exp.getDetector() 

1031 bbox = dataRef.get(datasetType='calexp_bbox') 

1032 validPolygon = exp.getInfo().getValidPolygon() 

1033 

1034 rec = cat[i] 

1035 rec.setBBox(bbox) 

1036 rec.setVisitInfo(visitInfo) 

1037 rec.setWcs(wcs) 

1038 rec.setPhotoCalib(photoCalib) 

1039 rec.setDetector(detector) 

1040 rec.setValidPolygon(validPolygon) 

1041 

1042 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1043 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1044 rec['detector_id'] = detector.getId() 

1045 shape = psf.computeShape(bbox.getCenter()) 

1046 rec['psfSigma'] = shape.getDeterminantRadius() 

1047 rec['psfIxx'] = shape.getIxx() 

1048 rec['psfIyy'] = shape.getIyy() 

1049 rec['psfIxy'] = shape.getIxy() 

1050 im = psf.computeKernelImage(bbox.getCenter()) 

1051 # The calculation of effective psf area is taken from 

1052 # meas_base/src/PsfFlux.cc#L112. See 

1053 # https://github.com/lsst/meas_base/blob/ 

1054 # 750bffe6620e565bda731add1509507f5c40c8bb/src/PsfFlux.cc#L112 

1055 rec['psfArea'] = np.sum(im.array)/np.sum(im.array**2.) 

1056 

1057 sph_pts = wcs.pixelToSky(lsst.geom.Box2D(bbox).getCorners()) 

1058 rec['raCorners'][:] = [sph.getRa().asDegrees() for sph in sph_pts] 

1059 rec['decCorners'][:] = [sph.getDec().asDegrees() for sph in sph_pts] 

1060 

1061 return cat 

1062 

1063 

1064class VisitDataIdContainer(DataIdContainer): 

1065 """DataIdContainer that groups sensor-level id's by visit 

1066 """ 

1067 

1068 def makeDataRefList(self, namespace): 

1069 """Make self.refList from self.idList 

1070 

1071 Generate a list of data references grouped by visit. 

1072 

1073 Parameters 

1074 ---------- 

1075 namespace : `argparse.Namespace` 

1076 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments 

1077 """ 

1078 # Group by visits 

1079 visitRefs = defaultdict(list) 

1080 for dataId in self.idList: 

1081 if "visit" in dataId: 

1082 visitId = dataId["visit"] 

1083 # append all subsets to 

1084 subset = namespace.butler.subset(self.datasetType, dataId=dataId) 

1085 visitRefs[visitId].extend([dataRef for dataRef in subset]) 

1086 

1087 outputRefList = [] 

1088 for refList in visitRefs.values(): 

1089 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1090 if existingRefs: 

1091 outputRefList.append(existingRefs) 

1092 

1093 self.refList = outputRefList 

1094 

1095 

1096class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1097 dimensions=("instrument", "visit")): 

1098 inputCatalogs = connectionTypes.Input( 

1099 doc="Input per-detector Source Tables", 

1100 name="sourceTable", 

1101 storageClass="DataFrame", 

1102 dimensions=("instrument", "visit", "detector"), 

1103 multiple=True 

1104 ) 

1105 outputCatalog = connectionTypes.Output( 

1106 doc="Per-visit concatenation of Source Table", 

1107 name="sourceTable_visit", 

1108 storageClass="DataFrame", 

1109 dimensions=("instrument", "visit") 

1110 ) 

1111 

1112 

1113class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1114 pipelineConnections=ConsolidateSourceTableConnections): 

1115 pass 

1116 

1117 

1118class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

1119 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1120 """ 

1121 _DefaultName = 'consolidateSourceTable' 

1122 ConfigClass = ConsolidateSourceTableConfig 

1123 

1124 inputDataset = 'sourceTable' 

1125 outputDataset = 'sourceTable_visit' 

1126 

1127 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1128 inputs = butlerQC.get(inputRefs) 

1129 self.log.info("Concatenating %s per-detector Source Tables", 

1130 len(inputs['inputCatalogs'])) 

1131 df = pd.concat(inputs['inputCatalogs']) 

1132 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1133 

1134 def runDataRef(self, dataRefList): 

1135 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1136 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1137 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1138 

1139 @classmethod 

1140 def _makeArgumentParser(cls): 

1141 parser = ArgumentParser(name=cls._DefaultName) 

1142 

1143 parser.add_id_argument("--id", cls.inputDataset, 

1144 help="data ID, e.g. --id visit=12345", 

1145 ContainerClass=VisitDataIdContainer) 

1146 return parser 

1147 

1148 def writeMetadata(self, dataRef): 

1149 """No metadata to write. 

1150 """ 

1151 pass 

1152 

1153 def writeConfig(self, butler, clobber=False, doBackup=True): 

1154 """No config to write. 

1155 """ 

1156 pass