Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24import numpy as np 

25from collections import defaultdict 

26 

27import lsst.geom 

28import lsst.pex.config as pexConfig 

29import lsst.pipe.base as pipeBase 

30import lsst.daf.base as dafBase 

31from lsst.pipe.base import connectionTypes 

32import lsst.afw.table as afwTable 

33from lsst.meas.base import SingleFrameMeasurementTask 

34from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

35from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

36 

37from .parquetTable import ParquetTable 

38from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

39from .functors import CompositeFunctor, RAColumn, DecColumn, Column 

40 

41 

42def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False): 

43 """Flattens a dataframe with multilevel column index 

44 """ 

45 newDf = pd.DataFrame() 

46 for band in set(df.columns.to_frame()['band']): 

47 subdf = df[band] 

48 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

49 newColumns = {c: columnFormat.format(band, c) 

50 for c in subdf.columns if c not in noDupCols} 

51 cols = list(newColumns.keys()) 

52 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

53 

54 newDf = pd.concat([subdf[noDupCols], newDf], axis=1) 

55 return newDf 

56 

57 

58class WriteObjectTableConfig(pexConfig.Config): 

59 engine = pexConfig.Field( 

60 dtype=str, 

61 default="pyarrow", 

62 doc="Parquet engine for writing (pyarrow or fastparquet)" 

63 ) 

64 coaddName = pexConfig.Field( 

65 dtype=str, 

66 default="deep", 

67 doc="Name of coadd" 

68 ) 

69 

70 

71class WriteObjectTableTask(CmdLineTask): 

72 """Write filter-merged source tables to parquet 

73 """ 

74 _DefaultName = "writeObjectTable" 

75 ConfigClass = WriteObjectTableConfig 

76 RunnerClass = MergeSourcesRunner 

77 

78 # Names of table datasets to be merged 

79 inputDatasets = ('forced_src', 'meas', 'ref') 

80 

81 # Tag of output dataset written by `MergeSourcesTask.write` 

82 outputDataset = 'obj' 

83 

84 def __init__(self, butler=None, schema=None, **kwargs): 

85 # It is a shame that this class can't use the default init for CmdLineTask 

86 # But to do so would require its own special task runner, which is many 

87 # more lines of specialization, so this is how it is for now 

88 CmdLineTask.__init__(self, **kwargs) 

89 

90 def runDataRef(self, patchRefList): 

91 """! 

92 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in 

93 subclasses that inherit from MergeSourcesTask. 

94 @param[in] patchRefList list of data references for each filter 

95 """ 

96 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

97 dataId = patchRefList[0].dataId 

98 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

99 self.write(patchRefList[0], mergedCatalog) 

100 

101 @classmethod 

102 def _makeArgumentParser(cls): 

103 """Create a suitable ArgumentParser. 

104 

105 We will use the ArgumentParser to get a list of data 

106 references for patches; the RunnerClass will sort them into lists 

107 of data references for the same patch. 

108 

109 References first of self.inputDatasets, rather than 

110 self.inputDataset 

111 """ 

112 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

113 

114 def readCatalog(self, patchRef): 

115 """Read input catalogs 

116 

117 Read all the input datasets given by the 'inputDatasets' 

118 attribute. 

119 

120 Parameters 

121 ---------- 

122 patchRef : `lsst.daf.persistence.ButlerDataRef` 

123 Data reference for patch 

124 

125 Returns 

126 ------- 

127 Tuple consisting of band name and a dict of catalogs, keyed by 

128 dataset name 

129 """ 

130 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel 

131 catalogDict = {} 

132 for dataset in self.inputDatasets: 

133 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

134 self.log.info("Read %d sources from %s for band %s: %s" % 

135 (len(catalog), dataset, band, patchRef.dataId)) 

136 catalogDict[dataset] = catalog 

137 return band, catalogDict 

138 

139 def run(self, catalogs, tract, patch): 

140 """Merge multiple catalogs. 

141 

142 Parameters 

143 ---------- 

144 catalogs : `dict` 

145 Mapping from filter names to dict of catalogs. 

146 tract : int 

147 tractId to use for the tractId column 

148 patch : str 

149 patchId to use for the patchId column 

150 

151 Returns 

152 ------- 

153 catalog : `lsst.pipe.tasks.parquetTable.ParquetTable` 

154 Merged dataframe, with each column prefixed by 

155 `filter_tag(filt)`, wrapped in the parquet writer shim class. 

156 """ 

157 

158 dfs = [] 

159 for filt, tableDict in catalogs.items(): 

160 for dataset, table in tableDict.items(): 

161 # Convert afwTable to pandas DataFrame 

162 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

163 

164 # Sort columns by name, to ensure matching schema among patches 

165 df = df.reindex(sorted(df.columns), axis=1) 

166 df['tractId'] = tract 

167 df['patchId'] = patch 

168 

169 # Make columns a 3-level MultiIndex 

170 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

171 names=('dataset', 'band', 'column')) 

172 dfs.append(df) 

173 

174 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

175 return ParquetTable(dataFrame=catalog) 

176 

177 def write(self, patchRef, catalog): 

178 """Write the output. 

179 

180 Parameters 

181 ---------- 

182 catalog : `ParquetTable` 

183 Catalog to write 

184 patchRef : `lsst.daf.persistence.ButlerDataRef` 

185 Data reference for patch 

186 """ 

187 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

188 # since the filter isn't actually part of the data ID for the dataset we're saving, 

189 # it's confusing to see it in the log message, even if the butler simply ignores it. 

190 mergeDataId = patchRef.dataId.copy() 

191 del mergeDataId["filter"] 

192 self.log.info("Wrote merged catalog: %s" % (mergeDataId,)) 

193 

194 def writeMetadata(self, dataRefList): 

195 """No metadata to write, and not sure how to write it for a list of dataRefs. 

196 """ 

197 pass 

198 

199 

200class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

201 dimensions=("instrument", "visit", "detector")): 

202 

203 catalog = connectionTypes.Input( 

204 doc="Input full-depth catalog of sources produced by CalibrateTask", 

205 name="src", 

206 storageClass="SourceCatalog", 

207 dimensions=("instrument", "visit", "detector") 

208 ) 

209 outputCatalog = connectionTypes.Output( 

210 doc="Catalog of sources, `src` in Parquet format", 

211 name="source", 

212 storageClass="DataFrame", 

213 dimensions=("instrument", "visit", "detector") 

214 ) 

215 

216 

217class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

218 pipelineConnections=WriteSourceTableConnections): 

219 doApplyExternalPhotoCalib = pexConfig.Field( 

220 dtype=bool, 

221 default=False, 

222 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if " 

223 "generating Source Tables from older src tables which do not already have local calib columns") 

224 ) 

225 doApplyExternalSkyWcs = pexConfig.Field( 

226 dtype=bool, 

227 default=False, 

228 doc=("Add local WCS columns from the calexp.wcs? Should only set True if " 

229 "generating Source Tables from older src tables which do not already have local calib columns") 

230 ) 

231 

232 

233class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

234 """Write source table to parquet 

235 """ 

236 _DefaultName = "writeSourceTable" 

237 ConfigClass = WriteSourceTableConfig 

238 

239 def runDataRef(self, dataRef): 

240 src = dataRef.get('src') 

241 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

242 src = self.addCalibColumns(src, dataRef) 

243 

244 ccdVisitId = dataRef.get('ccdExposureId') 

245 result = self.run(src, ccdVisitId=ccdVisitId) 

246 dataRef.put(result.table, 'source') 

247 

248 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

249 inputs = butlerQC.get(inputRefs) 

250 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

251 result = self.run(**inputs).table 

252 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

253 butlerQC.put(outputs, outputRefs) 

254 

255 def run(self, catalog, ccdVisitId=None): 

256 """Convert `src` catalog to parquet 

257 

258 Parameters 

259 ---------- 

260 catalog: `afwTable.SourceCatalog` 

261 catalog to be converted 

262 ccdVisitId: `int` 

263 ccdVisitId to be added as a column 

264 

265 Returns 

266 ------- 

267 result : `lsst.pipe.base.Struct` 

268 ``table`` 

269 `ParquetTable` version of the input catalog 

270 """ 

271 self.log.info("Generating parquet table from src catalog %s", ccdVisitId) 

272 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

273 df['ccdVisitId'] = ccdVisitId 

274 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

275 

276 def addCalibColumns(self, catalog, dataRef): 

277 """Add columns with local calibration evaluated at each centroid 

278 

279 for backwards compatibility with old repos. 

280 This exists for the purpose of converting old src catalogs 

281 (which don't have the expected local calib columns) to Source Tables. 

282 

283 Parameters 

284 ---------- 

285 catalog: `afwTable.SourceCatalog` 

286 catalog to which calib columns will be added 

287 dataRef: `lsst.daf.persistence.ButlerDataRef 

288 for fetching the calibs from disk. 

289 

290 Returns 

291 ------- 

292 newCat: `afwTable.SourceCatalog` 

293 Source Catalog with requested local calib columns 

294 """ 

295 mapper = afwTable.SchemaMapper(catalog.schema) 

296 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

297 measureConfig.doReplaceWithNoise = False 

298 

299 # Just need the WCS or the PhotoCalib attached to an exposue 

300 exposure = dataRef.get('calexp_sub', 

301 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0))) 

302 

303 mapper = afwTable.SchemaMapper(catalog.schema) 

304 mapper.addMinimalSchema(catalog.schema, True) 

305 schema = mapper.getOutputSchema() 

306 

307 exposureIdInfo = dataRef.get("expIdInfo") 

308 measureConfig.plugins.names = [] 

309 if self.config.doApplyExternalSkyWcs: 

310 plugin = 'base_LocalWcs' 

311 if plugin in schema: 

312 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False") 

313 else: 

314 measureConfig.plugins.names.add(plugin) 

315 

316 if self.config.doApplyExternalPhotoCalib: 

317 plugin = 'base_LocalPhotoCalib' 

318 if plugin in schema: 

319 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False") 

320 else: 

321 measureConfig.plugins.names.add(plugin) 

322 

323 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

324 newCat = afwTable.SourceCatalog(schema) 

325 newCat.extend(catalog, mapper=mapper) 

326 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

327 return newCat 

328 

329 def writeMetadata(self, dataRef): 

330 """No metadata to write. 

331 """ 

332 pass 

333 

334 @classmethod 

335 def _makeArgumentParser(cls): 

336 parser = ArgumentParser(name=cls._DefaultName) 

337 parser.add_id_argument("--id", 'src', 

338 help="data ID, e.g. --id visit=12345 ccd=0") 

339 return parser 

340 

341 

342class PostprocessAnalysis(object): 

343 """Calculate columns from ParquetTable 

344 

345 This object manages and organizes an arbitrary set of computations 

346 on a catalog. The catalog is defined by a 

347 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a 

348 `deepCoadd_obj` dataset, and the computations are defined by a collection 

349 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

350 a `CompositeFunctor`). 

351 

352 After the object is initialized, accessing the `.df` attribute (which 

353 holds the `pandas.DataFrame` containing the results of the calculations) triggers 

354 computation of said dataframe. 

355 

356 One of the conveniences of using this object is the ability to define a desired common 

357 filter for all functors. This enables the same functor collection to be passed to 

358 several different `PostprocessAnalysis` objects without having to change the original 

359 functor collection, since the `filt` keyword argument of this object triggers an 

360 overwrite of the `filt` property for all functors in the collection. 

361 

362 This object also allows a list of refFlags to be passed, and defines a set of default 

363 refFlags that are always included even if not requested. 

364 

365 If a list of `ParquetTable` object is passed, rather than a single one, then the 

366 calculations will be mapped over all the input catalogs. In principle, it should 

367 be straightforward to parallelize this activity, but initial tests have failed 

368 (see TODO in code comments). 

369 

370 Parameters 

371 ---------- 

372 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

373 Source catalog(s) for computation 

374 

375 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

376 Computations to do (functors that act on `parq`). 

377 If a dict, the output 

378 DataFrame will have columns keyed accordingly. 

379 If a list, the column keys will come from the 

380 `.shortname` attribute of each functor. 

381 

382 filt : `str` (optional) 

383 Filter in which to calculate. If provided, 

384 this will overwrite any existing `.filt` attribute 

385 of the provided functors. 

386 

387 flags : `list` (optional) 

388 List of flags (per-band) to include in output table. 

389 

390 refFlags : `list` (optional) 

391 List of refFlags (only reference band) to include in output table. 

392 

393 

394 """ 

395 _defaultRefFlags = [] 

396 _defaultFuncs = (('coord_ra', RAColumn()), 

397 ('coord_dec', DecColumn())) 

398 

399 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None): 

400 self.parq = parq 

401 self.functors = functors 

402 

403 self.filt = filt 

404 self.flags = list(flags) if flags is not None else [] 

405 self.refFlags = list(self._defaultRefFlags) 

406 if refFlags is not None: 

407 self.refFlags += list(refFlags) 

408 

409 self._df = None 

410 

411 @property 

412 def defaultFuncs(self): 

413 funcs = dict(self._defaultFuncs) 

414 return funcs 

415 

416 @property 

417 def func(self): 

418 additionalFuncs = self.defaultFuncs 

419 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

420 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

421 

422 if isinstance(self.functors, CompositeFunctor): 

423 func = self.functors 

424 else: 

425 func = CompositeFunctor(self.functors) 

426 

427 func.funcDict.update(additionalFuncs) 

428 func.filt = self.filt 

429 

430 return func 

431 

432 @property 

433 def noDupCols(self): 

434 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

435 

436 @property 

437 def df(self): 

438 if self._df is None: 

439 self.compute() 

440 return self._df 

441 

442 def compute(self, dropna=False, pool=None): 

443 # map over multiple parquet tables 

444 if type(self.parq) in (list, tuple): 

445 if pool is None: 

446 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

447 else: 

448 # TODO: Figure out why this doesn't work (pyarrow pickling issues?) 

449 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

450 self._df = pd.concat(dflist) 

451 else: 

452 self._df = self.func(self.parq, dropna=dropna) 

453 

454 return self._df 

455 

456 

457class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

458 dimensions=()): 

459 """Expected Connections for subclasses of TransformCatalogBaseTask. 

460 

461 Must be subclassed. 

462 """ 

463 inputCatalog = connectionTypes.Input( 

464 name="", 

465 storageClass="DataFrame", 

466 ) 

467 outputCatalog = connectionTypes.Output( 

468 name="", 

469 storageClass="DataFrame", 

470 ) 

471 

472 

473class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

474 pipelineConnections=TransformCatalogBaseConnections): 

475 functorFile = pexConfig.Field( 

476 dtype=str, 

477 doc='Path to YAML file specifying functors to be computed', 

478 default=None, 

479 optional=True 

480 ) 

481 

482 

483class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask): 

484 """Base class for transforming/standardizing a catalog 

485 

486 by applying functors that convert units and apply calibrations. 

487 The purpose of this task is to perform a set of computations on 

488 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

489 results to a new dataset (which needs to be declared in an `outputDataset` 

490 attribute). 

491 

492 The calculations to be performed are defined in a YAML file that specifies 

493 a set of functors to be computed, provided as 

494 a `--functorFile` config parameter. An example of such a YAML file 

495 is the following: 

496 

497 funcs: 

498 psfMag: 

499 functor: Mag 

500 args: 

501 - base_PsfFlux 

502 filt: HSC-G 

503 dataset: meas 

504 cmodel_magDiff: 

505 functor: MagDiff 

506 args: 

507 - modelfit_CModel 

508 - base_PsfFlux 

509 filt: HSC-G 

510 gauss_magDiff: 

511 functor: MagDiff 

512 args: 

513 - base_GaussianFlux 

514 - base_PsfFlux 

515 filt: HSC-G 

516 count: 

517 functor: Column 

518 args: 

519 - base_InputCount_value 

520 filt: HSC-G 

521 deconvolved_moments: 

522 functor: DeconvolvedMoments 

523 filt: HSC-G 

524 dataset: forced_src 

525 refFlags: 

526 - calib_psfUsed 

527 - merge_measurement_i 

528 - merge_measurement_r 

529 - merge_measurement_z 

530 - merge_measurement_y 

531 - merge_measurement_g 

532 - base_PixelFlags_flag_inexact_psfCenter 

533 - detect_isPrimary 

534 

535 The names for each entry under "func" will become the names of columns in the 

536 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`. 

537 Positional arguments to be passed to each functor are in the `args` list, 

538 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`, 

539 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization. 

540 

541 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and 

542 taken from the `'ref'` dataset. 

543 

544 The "flags" entry will be expanded out per band. 

545 

546 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

547 to organize and excecute the calculations. 

548 

549 """ 

550 @property 

551 def _DefaultName(self): 

552 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

553 

554 @property 

555 def outputDataset(self): 

556 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

557 

558 @property 

559 def inputDataset(self): 

560 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

561 

562 @property 

563 def ConfigClass(self): 

564 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

565 

566 def __init__(self, *args, **kwargs): 

567 super().__init__(*args, **kwargs) 

568 if self.config.functorFile: 

569 self.log.info('Loading tranform functor definitions from %s', 

570 self.config.functorFile) 

571 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

572 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

573 else: 

574 self.funcs = None 

575 

576 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

577 inputs = butlerQC.get(inputRefs) 

578 if self.funcs is None: 

579 raise ValueError("config.functorFile is None. " 

580 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

581 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

582 dataId=outputRefs.outputCatalog.dataId.full) 

583 outputs = pipeBase.Struct(outputCatalog=result) 

584 butlerQC.put(outputs, outputRefs) 

585 

586 def runDataRef(self, dataRef): 

587 parq = dataRef.get() 

588 if self.funcs is None: 

589 raise ValueError("config.functorFile is None. " 

590 "Must be a valid path to yaml in order to run as a CommandlineTask.") 

591 df = self.run(parq, funcs=self.funcs, dataId=dataRef.dataId) 

592 self.write(df, dataRef) 

593 return df 

594 

595 def run(self, parq, funcs=None, dataId=None, band=None): 

596 """Do postprocessing calculations 

597 

598 Takes a `ParquetTable` object and dataId, 

599 returns a dataframe with results of postprocessing calculations. 

600 

601 Parameters 

602 ---------- 

603 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

604 ParquetTable from which calculations are done. 

605 funcs : `lsst.pipe.tasks.functors.Functors` 

606 Functors to apply to the table's columns 

607 dataId : dict, optional 

608 Used to add a `patchId` column to the output dataframe. 

609 band : `str`, optional 

610 Filter band that is being processed. 

611 

612 Returns 

613 ------ 

614 `pandas.DataFrame` 

615 

616 """ 

617 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

618 

619 df = self.transform(band, parq, funcs, dataId).df 

620 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

621 return df 

622 

623 def getFunctors(self): 

624 return self.funcs 

625 

626 def getAnalysis(self, parq, funcs=None, band=None): 

627 if funcs is None: 

628 funcs = self.funcs 

629 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

630 return analysis 

631 

632 def transform(self, band, parq, funcs, dataId): 

633 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

634 df = analysis.df 

635 if dataId is not None: 

636 for key, value in dataId.items(): 

637 df[key] = value 

638 

639 return pipeBase.Struct( 

640 df=df, 

641 analysis=analysis 

642 ) 

643 

644 def write(self, df, parqRef): 

645 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

646 

647 def writeMetadata(self, dataRef): 

648 """No metadata to write. 

649 """ 

650 pass 

651 

652 

653class TransformObjectCatalogConfig(TransformCatalogBaseConfig): 

654 coaddName = pexConfig.Field( 

655 dtype=str, 

656 default="deep", 

657 doc="Name of coadd" 

658 ) 

659 # TODO: remove in DM-27177 

660 filterMap = pexConfig.DictField( 

661 keytype=str, 

662 itemtype=str, 

663 default={}, 

664 doc=("Dictionary mapping full filter name to short one for column name munging." 

665 "These filters determine the output columns no matter what filters the " 

666 "input data actually contain."), 

667 deprecated=("Coadds are now identified by the band, so this transform is unused." 

668 "Will be removed after v22.") 

669 ) 

670 outputBands = pexConfig.ListField( 

671 dtype=str, 

672 default=None, 

673 optional=True, 

674 doc=("These bands and only these bands will appear in the output," 

675 " NaN-filled if the input does not include them." 

676 " If None, then use all bands found in the input.") 

677 ) 

678 camelCase = pexConfig.Field( 

679 dtype=bool, 

680 default=True, 

681 doc=("Write per-band columns names with camelCase, else underscore " 

682 "For example: gPsFlux instead of g_PsFlux.") 

683 ) 

684 multilevelOutput = pexConfig.Field( 

685 dtype=bool, 

686 default=False, 

687 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

688 "and name-munged (False).") 

689 ) 

690 

691 

692class TransformObjectCatalogTask(TransformCatalogBaseTask): 

693 """Produce a flattened Object Table to match the format specified in 

694 sdm_schemas. 

695 

696 Do the same set of postprocessing calculations on all bands 

697 

698 This is identical to `TransformCatalogBaseTask`, except for that it does the 

699 specified functor calculations for all filters present in the 

700 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

701 by the YAML file will be superceded. 

702 """ 

703 _DefaultName = "transformObjectCatalog" 

704 ConfigClass = TransformObjectCatalogConfig 

705 

706 inputDataset = 'deepCoadd_obj' 

707 outputDataset = 'objectTable' 

708 

709 @classmethod 

710 def _makeArgumentParser(cls): 

711 parser = ArgumentParser(name=cls._DefaultName) 

712 parser.add_id_argument("--id", cls.inputDataset, 

713 ContainerClass=CoaddDataIdContainer, 

714 help="data ID, e.g. --id tract=12345 patch=1,2") 

715 return parser 

716 

717 def run(self, parq, funcs=None, dataId=None, band=None): 

718 # NOTE: band kwarg is ignored here. 

719 dfDict = {} 

720 analysisDict = {} 

721 templateDf = pd.DataFrame() 

722 outputBands = parq.columnLevelNames['band'] if self.config.outputBands is None else \ 

723 self.config.outputBands 

724 

725 # Perform transform for data of filters that exist in parq. 

726 for inputBand in parq.columnLevelNames['band']: 

727 if inputBand not in outputBands: 

728 self.log.info("Ignoring %s band data in the input", inputBand) 

729 continue 

730 self.log.info("Transforming the catalog of band %s", inputBand) 

731 result = self.transform(inputBand, parq, funcs, dataId) 

732 dfDict[inputBand] = result.df 

733 analysisDict[inputBand] = result.analysis 

734 if templateDf.empty: 

735 templateDf = result.df 

736 

737 # Fill NaNs in columns of other wanted bands 

738 for filt in outputBands: 

739 if filt not in dfDict: 

740 self.log.info("Adding empty columns for band %s", filt) 

741 dfDict[filt] = pd.DataFrame().reindex_like(templateDf) 

742 

743 # This makes a multilevel column index, with band as first level 

744 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

745 

746 if not self.config.multilevelOutput: 

747 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

748 if dataId is not None: 

749 noDupCols += list(dataId.keys()) 

750 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase) 

751 

752 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

753 return df 

754 

755 

756class TractObjectDataIdContainer(CoaddDataIdContainer): 

757 

758 def makeDataRefList(self, namespace): 

759 """Make self.refList from self.idList 

760 

761 Generate a list of data references given tract and/or patch. 

762 This was adapted from `TractQADataIdContainer`, which was 

763 `TractDataIdContainer` modifie to not require "filter". 

764 Only existing dataRefs are returned. 

765 """ 

766 def getPatchRefList(tract): 

767 return [namespace.butler.dataRef(datasetType=self.datasetType, 

768 tract=tract.getId(), 

769 patch="%d,%d" % patch.getIndex()) for patch in tract] 

770 

771 tractRefs = defaultdict(list) # Data references for each tract 

772 for dataId in self.idList: 

773 skymap = self.getSkymap(namespace) 

774 

775 if "tract" in dataId: 

776 tractId = dataId["tract"] 

777 if "patch" in dataId: 

778 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

779 tract=tractId, 

780 patch=dataId['patch'])) 

781 else: 

782 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

783 else: 

784 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

785 for tract in skymap) 

786 outputRefList = [] 

787 for tractRefList in tractRefs.values(): 

788 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

789 outputRefList.append(existingRefs) 

790 

791 self.refList = outputRefList 

792 

793 

794class ConsolidateObjectTableConfig(pexConfig.Config): 

795 coaddName = pexConfig.Field( 

796 dtype=str, 

797 default="deep", 

798 doc="Name of coadd" 

799 ) 

800 

801 

802class ConsolidateObjectTableTask(CmdLineTask): 

803 """Write patch-merged source tables to a tract-level parquet file 

804 """ 

805 _DefaultName = "consolidateObjectTable" 

806 ConfigClass = ConsolidateObjectTableConfig 

807 

808 inputDataset = 'objectTable' 

809 outputDataset = 'objectTable_tract' 

810 

811 @classmethod 

812 def _makeArgumentParser(cls): 

813 parser = ArgumentParser(name=cls._DefaultName) 

814 

815 parser.add_id_argument("--id", cls.inputDataset, 

816 help="data ID, e.g. --id tract=12345", 

817 ContainerClass=TractObjectDataIdContainer) 

818 return parser 

819 

820 def runDataRef(self, patchRefList): 

821 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

822 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

823 

824 def writeMetadata(self, dataRef): 

825 """No metadata to write. 

826 """ 

827 pass 

828 

829 

830class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

831 dimensions=("instrument", "visit", "detector")): 

832 

833 inputCatalog = connectionTypes.Input( 

834 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

835 name="source", 

836 storageClass="DataFrame", 

837 dimensions=("instrument", "visit", "detector"), 

838 deferLoad=True 

839 ) 

840 outputCatalog = connectionTypes.Output( 

841 doc="Narrower, per-detector Source Table transformed and converted per a " 

842 "specified set of functors", 

843 name="sourceTable", 

844 storageClass="DataFrame", 

845 dimensions=("instrument", "visit", "detector") 

846 ) 

847 

848 

849class TransformSourceTableConfig(TransformCatalogBaseConfig, 

850 pipelineConnections=TransformSourceTableConnections): 

851 pass 

852 

853 

854class TransformSourceTableTask(TransformCatalogBaseTask): 

855 """Transform/standardize a source catalog 

856 """ 

857 _DefaultName = "transformSourceTable" 

858 ConfigClass = TransformSourceTableConfig 

859 

860 inputDataset = 'source' 

861 outputDataset = 'sourceTable' 

862 

863 @classmethod 

864 def _makeArgumentParser(cls): 

865 parser = ArgumentParser(name=cls._DefaultName) 

866 parser.add_id_argument("--id", datasetType=cls.inputDataset, 

867 level="sensor", 

868 help="data ID, e.g. --id visit=12345 ccd=0") 

869 return parser 

870 

871 def runDataRef(self, dataRef): 

872 """Override to specify band label to run().""" 

873 parq = dataRef.get() 

874 funcs = self.getFunctors() 

875 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel 

876 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band) 

877 self.write(df, dataRef) 

878 return df 

879 

880 

881class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

882 dimensions=("instrument", "visit",), 

883 defaultTemplates={}): 

884 calexp = connectionTypes.Input( 

885 doc="Processed exposures used for metadata", 

886 name="calexp", 

887 storageClass="ExposureF", 

888 dimensions=("instrument", "visit", "detector"), 

889 deferLoad=True, 

890 multiple=True, 

891 ) 

892 visitSummary = connectionTypes.Output( 

893 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

894 "detector id for the id and are sorted for fast lookups of a " 

895 "detector."), 

896 name="visitSummary", 

897 storageClass="ExposureCatalog", 

898 dimensions=("instrument", "visit"), 

899 ) 

900 

901 

902class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

903 pipelineConnections=ConsolidateVisitSummaryConnections): 

904 """Config for ConsolidateVisitSummaryTask""" 

905 pass 

906 

907 

908class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

909 """Task to consolidate per-detector visit metadata. 

910 

911 This task aggregates the following metadata from all the detectors in a 

912 single visit into an exposure catalog: 

913 - The visitInfo. 

914 - The wcs. 

915 - The photoCalib. 

916 - The physical_filter and band (if available). 

917 - The psf size, shape, and effective area at the center of the detector. 

918 - The corners of the bounding box in right ascension/declination. 

919 

920 Other quantities such as Psf, ApCorrMap, and TransmissionCurve are not 

921 persisted here because of storage concerns, and because of their limited 

922 utility as summary statistics. 

923 

924 Tests for this task are performed in ci_hsc_gen3. 

925 """ 

926 _DefaultName = "consolidateVisitSummary" 

927 ConfigClass = ConsolidateVisitSummaryConfig 

928 

929 @classmethod 

930 def _makeArgumentParser(cls): 

931 parser = ArgumentParser(name=cls._DefaultName) 

932 

933 parser.add_id_argument("--id", "calexp", 

934 help="data ID, e.g. --id visit=12345", 

935 ContainerClass=VisitDataIdContainer) 

936 return parser 

937 

938 def writeMetadata(self, dataRef): 

939 """No metadata to persist, so override to remove metadata persistance. 

940 """ 

941 pass 

942 

943 def writeConfig(self, butler, clobber=False, doBackup=True): 

944 """No config to persist, so override to remove config persistance. 

945 """ 

946 pass 

947 

948 def runDataRef(self, dataRefList): 

949 visit = dataRefList[0].dataId['visit'] 

950 

951 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

952 (len(dataRefList), visit)) 

953 

954 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

955 

956 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

957 

958 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

959 dataRefs = butlerQC.get(inputRefs.calexp) 

960 visit = dataRefs[0].dataId.byName()['visit'] 

961 

962 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

963 (len(dataRefs), visit)) 

964 

965 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

966 

967 butlerQC.put(expCatalog, outputRefs.visitSummary) 

968 

969 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

970 """Make a combined exposure catalog from a list of dataRefs. 

971 

972 Parameters 

973 ---------- 

974 visit : `int` 

975 Visit identification number 

976 dataRefs : `list` 

977 List of calexp dataRefs in visit. May be list of 

978 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

979 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

980 isGen3 : `bool`, optional 

981 Specifies if this is a Gen3 list of datarefs. 

982 

983 Returns 

984 ------- 

985 visitSummary : `lsst.afw.table.ExposureCatalog` 

986 Exposure catalog with per-detector summary information. 

987 """ 

988 schema = afwTable.ExposureTable.makeMinimalSchema() 

989 schema.addField('visit', type='I', doc='Visit number') 

990 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

991 schema.addField('band', type='String', size=32, doc='Name of band') 

992 schema.addField('psfSigma', type='F', 

993 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

994 schema.addField('psfArea', type='F', 

995 doc='PSF model effective area (center of chip) (pixel**2)') 

996 schema.addField('psfIxx', type='F', 

997 doc='PSF model Ixx (center of chip) (pixel**2)') 

998 schema.addField('psfIyy', type='F', 

999 doc='PSF model Iyy (center of chip) (pixel**2)') 

1000 schema.addField('psfIxy', type='F', 

1001 doc='PSF model Ixy (center of chip) (pixel**2)') 

1002 schema.addField('raCorners', type='ArrayD', size=4, 

1003 doc='Right Ascension of bounding box corners (degrees)') 

1004 schema.addField('decCorners', type='ArrayD', size=4, 

1005 doc='Declination of bounding box corners (degrees)') 

1006 

1007 cat = afwTable.ExposureCatalog(schema) 

1008 cat.resize(len(dataRefs)) 

1009 

1010 cat['visit'] = visit 

1011 

1012 for i, dataRef in enumerate(dataRefs): 

1013 if isGen3: 

1014 visitInfo = dataRef.get(component='visitInfo') 

1015 filterLabel = dataRef.get(component='filterLabel') 

1016 psf = dataRef.get(component='psf') 

1017 wcs = dataRef.get(component='wcs') 

1018 photoCalib = dataRef.get(component='photoCalib') 

1019 detector = dataRef.get(component='detector') 

1020 bbox = dataRef.get(component='bbox') 

1021 validPolygon = dataRef.get(component='validPolygon') 

1022 else: 

1023 # Note that we need to read the calexp because there is 

1024 # no magic access to the psf except through the exposure. 

1025 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

1026 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

1027 visitInfo = exp.getInfo().getVisitInfo() 

1028 filterLabel = dataRef.get("calexp_filterLabel") 

1029 psf = exp.getPsf() 

1030 wcs = exp.getWcs() 

1031 photoCalib = exp.getPhotoCalib() 

1032 detector = exp.getDetector() 

1033 bbox = dataRef.get(datasetType='calexp_bbox') 

1034 validPolygon = exp.getInfo().getValidPolygon() 

1035 

1036 rec = cat[i] 

1037 rec.setBBox(bbox) 

1038 rec.setVisitInfo(visitInfo) 

1039 rec.setWcs(wcs) 

1040 rec.setPhotoCalib(photoCalib) 

1041 rec.setDetector(detector) 

1042 rec.setValidPolygon(validPolygon) 

1043 

1044 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1045 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1046 rec.setId(detector.getId()) 

1047 shape = psf.computeShape(bbox.getCenter()) 

1048 rec['psfSigma'] = shape.getDeterminantRadius() 

1049 rec['psfIxx'] = shape.getIxx() 

1050 rec['psfIyy'] = shape.getIyy() 

1051 rec['psfIxy'] = shape.getIxy() 

1052 im = psf.computeKernelImage(bbox.getCenter()) 

1053 # The calculation of effective psf area is taken from 

1054 # meas_base/src/PsfFlux.cc#L112. See 

1055 # https://github.com/lsst/meas_base/blob/ 

1056 # 750bffe6620e565bda731add1509507f5c40c8bb/src/PsfFlux.cc#L112 

1057 rec['psfArea'] = np.sum(im.array)/np.sum(im.array**2.) 

1058 

1059 sph_pts = wcs.pixelToSky(lsst.geom.Box2D(bbox).getCorners()) 

1060 rec['raCorners'][:] = [sph.getRa().asDegrees() for sph in sph_pts] 

1061 rec['decCorners'][:] = [sph.getDec().asDegrees() for sph in sph_pts] 

1062 

1063 metadata = dafBase.PropertyList() 

1064 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1065 # We are looping over existing datarefs, so the following is true 

1066 metadata.add("COMMENT", "Only detectors with data have entries.") 

1067 cat.setMetadata(metadata) 

1068 

1069 cat.sort() 

1070 return cat 

1071 

1072 

1073class VisitDataIdContainer(DataIdContainer): 

1074 """DataIdContainer that groups sensor-level id's by visit 

1075 """ 

1076 

1077 def makeDataRefList(self, namespace): 

1078 """Make self.refList from self.idList 

1079 

1080 Generate a list of data references grouped by visit. 

1081 

1082 Parameters 

1083 ---------- 

1084 namespace : `argparse.Namespace` 

1085 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments 

1086 """ 

1087 # Group by visits 

1088 visitRefs = defaultdict(list) 

1089 for dataId in self.idList: 

1090 if "visit" in dataId: 

1091 visitId = dataId["visit"] 

1092 # append all subsets to 

1093 subset = namespace.butler.subset(self.datasetType, dataId=dataId) 

1094 visitRefs[visitId].extend([dataRef for dataRef in subset]) 

1095 

1096 outputRefList = [] 

1097 for refList in visitRefs.values(): 

1098 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1099 if existingRefs: 

1100 outputRefList.append(existingRefs) 

1101 

1102 self.refList = outputRefList 

1103 

1104 

1105class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1106 dimensions=("instrument", "visit")): 

1107 inputCatalogs = connectionTypes.Input( 

1108 doc="Input per-detector Source Tables", 

1109 name="sourceTable", 

1110 storageClass="DataFrame", 

1111 dimensions=("instrument", "visit", "detector"), 

1112 multiple=True 

1113 ) 

1114 outputCatalog = connectionTypes.Output( 

1115 doc="Per-visit concatenation of Source Table", 

1116 name="sourceTable_visit", 

1117 storageClass="DataFrame", 

1118 dimensions=("instrument", "visit") 

1119 ) 

1120 

1121 

1122class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1123 pipelineConnections=ConsolidateSourceTableConnections): 

1124 pass 

1125 

1126 

1127class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

1128 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1129 """ 

1130 _DefaultName = 'consolidateSourceTable' 

1131 ConfigClass = ConsolidateSourceTableConfig 

1132 

1133 inputDataset = 'sourceTable' 

1134 outputDataset = 'sourceTable_visit' 

1135 

1136 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1137 inputs = butlerQC.get(inputRefs) 

1138 self.log.info("Concatenating %s per-detector Source Tables", 

1139 len(inputs['inputCatalogs'])) 

1140 df = pd.concat(inputs['inputCatalogs']) 

1141 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1142 

1143 def runDataRef(self, dataRefList): 

1144 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1145 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1146 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1147 

1148 @classmethod 

1149 def _makeArgumentParser(cls): 

1150 parser = ArgumentParser(name=cls._DefaultName) 

1151 

1152 parser.add_id_argument("--id", cls.inputDataset, 

1153 help="data ID, e.g. --id visit=12345", 

1154 ContainerClass=VisitDataIdContainer) 

1155 return parser 

1156 

1157 def writeMetadata(self, dataRef): 

1158 """No metadata to write. 

1159 """ 

1160 pass 

1161 

1162 def writeConfig(self, butler, clobber=False, doBackup=True): 

1163 """No config to write. 

1164 """ 

1165 pass