Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24import numpy as np 

25from collections import defaultdict 

26 

27import lsst.geom 

28import lsst.pex.config as pexConfig 

29import lsst.pipe.base as pipeBase 

30from lsst.pipe.base import connectionTypes 

31import lsst.afw.table as afwTable 

32from lsst.meas.base import SingleFrameMeasurementTask 

33from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

34from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

35 

36from .parquetTable import ParquetTable 

37from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

38from .functors import CompositeFunctor, RAColumn, DecColumn, Column 

39 

40 

41def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False): 

42 """Flattens a dataframe with multilevel column index 

43 """ 

44 newDf = pd.DataFrame() 

45 for band in set(df.columns.to_frame()['band']): 

46 subdf = df[band] 

47 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

48 newColumns = {c: columnFormat.format(band, c) 

49 for c in subdf.columns if c not in noDupCols} 

50 cols = list(newColumns.keys()) 

51 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

52 

53 newDf = pd.concat([subdf[noDupCols], newDf], axis=1) 

54 return newDf 

55 

56 

57class WriteObjectTableConfig(pexConfig.Config): 

58 engine = pexConfig.Field( 

59 dtype=str, 

60 default="pyarrow", 

61 doc="Parquet engine for writing (pyarrow or fastparquet)" 

62 ) 

63 coaddName = pexConfig.Field( 

64 dtype=str, 

65 default="deep", 

66 doc="Name of coadd" 

67 ) 

68 

69 

70class WriteObjectTableTask(CmdLineTask): 

71 """Write filter-merged source tables to parquet 

72 """ 

73 _DefaultName = "writeObjectTable" 

74 ConfigClass = WriteObjectTableConfig 

75 RunnerClass = MergeSourcesRunner 

76 

77 # Names of table datasets to be merged 

78 inputDatasets = ('forced_src', 'meas', 'ref') 

79 

80 # Tag of output dataset written by `MergeSourcesTask.write` 

81 outputDataset = 'obj' 

82 

83 def __init__(self, butler=None, schema=None, **kwargs): 

84 # It is a shame that this class can't use the default init for CmdLineTask 

85 # But to do so would require its own special task runner, which is many 

86 # more lines of specialization, so this is how it is for now 

87 CmdLineTask.__init__(self, **kwargs) 

88 

89 def runDataRef(self, patchRefList): 

90 """! 

91 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in 

92 subclasses that inherit from MergeSourcesTask. 

93 @param[in] patchRefList list of data references for each filter 

94 """ 

95 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

96 dataId = patchRefList[0].dataId 

97 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

98 self.write(patchRefList[0], mergedCatalog) 

99 

100 @classmethod 

101 def _makeArgumentParser(cls): 

102 """Create a suitable ArgumentParser. 

103 

104 We will use the ArgumentParser to get a list of data 

105 references for patches; the RunnerClass will sort them into lists 

106 of data references for the same patch. 

107 

108 References first of self.inputDatasets, rather than 

109 self.inputDataset 

110 """ 

111 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

112 

113 def readCatalog(self, patchRef): 

114 """Read input catalogs 

115 

116 Read all the input datasets given by the 'inputDatasets' 

117 attribute. 

118 

119 Parameters 

120 ---------- 

121 patchRef : `lsst.daf.persistence.ButlerDataRef` 

122 Data reference for patch 

123 

124 Returns 

125 ------- 

126 Tuple consisting of band name and a dict of catalogs, keyed by 

127 dataset name 

128 """ 

129 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel 

130 catalogDict = {} 

131 for dataset in self.inputDatasets: 

132 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

133 self.log.info("Read %d sources from %s for band %s: %s" % 

134 (len(catalog), dataset, band, patchRef.dataId)) 

135 catalogDict[dataset] = catalog 

136 return band, catalogDict 

137 

138 def run(self, catalogs, tract, patch): 

139 """Merge multiple catalogs. 

140 

141 Parameters 

142 ---------- 

143 catalogs : `dict` 

144 Mapping from filter names to dict of catalogs. 

145 tract : int 

146 tractId to use for the tractId column 

147 patch : str 

148 patchId to use for the patchId column 

149 

150 Returns 

151 ------- 

152 catalog : `lsst.pipe.tasks.parquetTable.ParquetTable` 

153 Merged dataframe, with each column prefixed by 

154 `filter_tag(filt)`, wrapped in the parquet writer shim class. 

155 """ 

156 

157 dfs = [] 

158 for filt, tableDict in catalogs.items(): 

159 for dataset, table in tableDict.items(): 

160 # Convert afwTable to pandas DataFrame 

161 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

162 

163 # Sort columns by name, to ensure matching schema among patches 

164 df = df.reindex(sorted(df.columns), axis=1) 

165 df['tractId'] = tract 

166 df['patchId'] = patch 

167 

168 # Make columns a 3-level MultiIndex 

169 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

170 names=('dataset', 'band', 'column')) 

171 dfs.append(df) 

172 

173 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

174 return ParquetTable(dataFrame=catalog) 

175 

176 def write(self, patchRef, catalog): 

177 """Write the output. 

178 

179 Parameters 

180 ---------- 

181 catalog : `ParquetTable` 

182 Catalog to write 

183 patchRef : `lsst.daf.persistence.ButlerDataRef` 

184 Data reference for patch 

185 """ 

186 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

187 # since the filter isn't actually part of the data ID for the dataset we're saving, 

188 # it's confusing to see it in the log message, even if the butler simply ignores it. 

189 mergeDataId = patchRef.dataId.copy() 

190 del mergeDataId["filter"] 

191 self.log.info("Wrote merged catalog: %s" % (mergeDataId,)) 

192 

193 def writeMetadata(self, dataRefList): 

194 """No metadata to write, and not sure how to write it for a list of dataRefs. 

195 """ 

196 pass 

197 

198 

199class WriteSourceTableConfig(pexConfig.Config): 

200 doApplyExternalPhotoCalib = pexConfig.Field( 

201 dtype=bool, 

202 default=False, 

203 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if " 

204 "generating Source Tables from older src tables which do not already have local calib columns") 

205 ) 

206 doApplyExternalSkyWcs = pexConfig.Field( 

207 dtype=bool, 

208 default=False, 

209 doc=("Add local WCS columns from the calexp.wcs? Should only set True if " 

210 "generating Source Tables from older src tables which do not already have local calib columns") 

211 ) 

212 

213 

214class WriteSourceTableTask(CmdLineTask): 

215 """Write source table to parquet 

216 """ 

217 _DefaultName = "writeSourceTable" 

218 ConfigClass = WriteSourceTableConfig 

219 

220 def runDataRef(self, dataRef): 

221 src = dataRef.get('src') 

222 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

223 src = self.addCalibColumns(src, dataRef) 

224 

225 ccdVisitId = dataRef.get('ccdExposureId') 

226 result = self.run(src, ccdVisitId=ccdVisitId) 

227 dataRef.put(result.table, 'source') 

228 

229 def run(self, catalog, ccdVisitId=None): 

230 """Convert `src` catalog to parquet 

231 

232 Parameters 

233 ---------- 

234 catalog: `afwTable.SourceCatalog` 

235 catalog to be converted 

236 ccdVisitId: `int` 

237 ccdVisitId to be added as a column 

238 

239 Returns 

240 ------- 

241 result : `lsst.pipe.base.Struct` 

242 ``table`` 

243 `ParquetTable` version of the input catalog 

244 """ 

245 self.log.info("Generating parquet table from src catalog") 

246 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

247 df['ccdVisitId'] = ccdVisitId 

248 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

249 

250 def addCalibColumns(self, catalog, dataRef): 

251 """Add columns with local calibration evaluated at each centroid 

252 

253 for backwards compatibility with old repos. 

254 This exists for the purpose of converting old src catalogs 

255 (which don't have the expected local calib columns) to Source Tables. 

256 

257 Parameters 

258 ---------- 

259 catalog: `afwTable.SourceCatalog` 

260 catalog to which calib columns will be added 

261 dataRef: `lsst.daf.persistence.ButlerDataRef 

262 for fetching the calibs from disk. 

263 

264 Returns 

265 ------- 

266 newCat: `afwTable.SourceCatalog` 

267 Source Catalog with requested local calib columns 

268 """ 

269 mapper = afwTable.SchemaMapper(catalog.schema) 

270 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

271 measureConfig.doReplaceWithNoise = False 

272 

273 # Just need the WCS or the PhotoCalib attached to an exposue 

274 exposure = dataRef.get('calexp_sub', 

275 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0))) 

276 

277 mapper = afwTable.SchemaMapper(catalog.schema) 

278 mapper.addMinimalSchema(catalog.schema, True) 

279 schema = mapper.getOutputSchema() 

280 

281 exposureIdInfo = dataRef.get("expIdInfo") 

282 measureConfig.plugins.names = [] 

283 if self.config.doApplyExternalSkyWcs: 

284 plugin = 'base_LocalWcs' 

285 if plugin in schema: 

286 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False") 

287 else: 

288 measureConfig.plugins.names.add(plugin) 

289 

290 if self.config.doApplyExternalPhotoCalib: 

291 plugin = 'base_LocalPhotoCalib' 

292 if plugin in schema: 

293 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False") 

294 else: 

295 measureConfig.plugins.names.add(plugin) 

296 

297 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

298 newCat = afwTable.SourceCatalog(schema) 

299 newCat.extend(catalog, mapper=mapper) 

300 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

301 return newCat 

302 

303 def writeMetadata(self, dataRef): 

304 """No metadata to write. 

305 """ 

306 pass 

307 

308 @classmethod 

309 def _makeArgumentParser(cls): 

310 parser = ArgumentParser(name=cls._DefaultName) 

311 parser.add_id_argument("--id", 'src', 

312 help="data ID, e.g. --id visit=12345 ccd=0") 

313 return parser 

314 

315 

316class PostprocessAnalysis(object): 

317 """Calculate columns from ParquetTable 

318 

319 This object manages and organizes an arbitrary set of computations 

320 on a catalog. The catalog is defined by a 

321 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a 

322 `deepCoadd_obj` dataset, and the computations are defined by a collection 

323 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

324 a `CompositeFunctor`). 

325 

326 After the object is initialized, accessing the `.df` attribute (which 

327 holds the `pandas.DataFrame` containing the results of the calculations) triggers 

328 computation of said dataframe. 

329 

330 One of the conveniences of using this object is the ability to define a desired common 

331 filter for all functors. This enables the same functor collection to be passed to 

332 several different `PostprocessAnalysis` objects without having to change the original 

333 functor collection, since the `filt` keyword argument of this object triggers an 

334 overwrite of the `filt` property for all functors in the collection. 

335 

336 This object also allows a list of refFlags to be passed, and defines a set of default 

337 refFlags that are always included even if not requested. 

338 

339 If a list of `ParquetTable` object is passed, rather than a single one, then the 

340 calculations will be mapped over all the input catalogs. In principle, it should 

341 be straightforward to parallelize this activity, but initial tests have failed 

342 (see TODO in code comments). 

343 

344 Parameters 

345 ---------- 

346 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

347 Source catalog(s) for computation 

348 

349 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

350 Computations to do (functors that act on `parq`). 

351 If a dict, the output 

352 DataFrame will have columns keyed accordingly. 

353 If a list, the column keys will come from the 

354 `.shortname` attribute of each functor. 

355 

356 filt : `str` (optional) 

357 Filter in which to calculate. If provided, 

358 this will overwrite any existing `.filt` attribute 

359 of the provided functors. 

360 

361 flags : `list` (optional) 

362 List of flags (per-band) to include in output table. 

363 

364 refFlags : `list` (optional) 

365 List of refFlags (only reference band) to include in output table. 

366 

367 

368 """ 

369 _defaultRefFlags = [] 

370 _defaultFuncs = (('coord_ra', RAColumn()), 

371 ('coord_dec', DecColumn())) 

372 

373 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None): 

374 self.parq = parq 

375 self.functors = functors 

376 

377 self.filt = filt 

378 self.flags = list(flags) if flags is not None else [] 

379 self.refFlags = list(self._defaultRefFlags) 

380 if refFlags is not None: 

381 self.refFlags += list(refFlags) 

382 

383 self._df = None 

384 

385 @property 

386 def defaultFuncs(self): 

387 funcs = dict(self._defaultFuncs) 

388 return funcs 

389 

390 @property 

391 def func(self): 

392 additionalFuncs = self.defaultFuncs 

393 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

394 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

395 

396 if isinstance(self.functors, CompositeFunctor): 

397 func = self.functors 

398 else: 

399 func = CompositeFunctor(self.functors) 

400 

401 func.funcDict.update(additionalFuncs) 

402 func.filt = self.filt 

403 

404 return func 

405 

406 @property 

407 def noDupCols(self): 

408 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

409 

410 @property 

411 def df(self): 

412 if self._df is None: 

413 self.compute() 

414 return self._df 

415 

416 def compute(self, dropna=False, pool=None): 

417 # map over multiple parquet tables 

418 if type(self.parq) in (list, tuple): 

419 if pool is None: 

420 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

421 else: 

422 # TODO: Figure out why this doesn't work (pyarrow pickling issues?) 

423 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

424 self._df = pd.concat(dflist) 

425 else: 

426 self._df = self.func(self.parq, dropna=dropna) 

427 

428 return self._df 

429 

430 

431class TransformCatalogBaseConfig(pexConfig.Config): 

432 functorFile = pexConfig.Field( 

433 dtype=str, 

434 doc='Path to YAML file specifying functors to be computed', 

435 default=None, 

436 optional=True 

437 ) 

438 

439 

440class TransformCatalogBaseTask(CmdLineTask): 

441 """Base class for transforming/standardizing a catalog 

442 

443 by applying functors that convert units and apply calibrations. 

444 The purpose of this task is to perform a set of computations on 

445 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

446 results to a new dataset (which needs to be declared in an `outputDataset` 

447 attribute). 

448 

449 The calculations to be performed are defined in a YAML file that specifies 

450 a set of functors to be computed, provided as 

451 a `--functorFile` config parameter. An example of such a YAML file 

452 is the following: 

453 

454 funcs: 

455 psfMag: 

456 functor: Mag 

457 args: 

458 - base_PsfFlux 

459 filt: HSC-G 

460 dataset: meas 

461 cmodel_magDiff: 

462 functor: MagDiff 

463 args: 

464 - modelfit_CModel 

465 - base_PsfFlux 

466 filt: HSC-G 

467 gauss_magDiff: 

468 functor: MagDiff 

469 args: 

470 - base_GaussianFlux 

471 - base_PsfFlux 

472 filt: HSC-G 

473 count: 

474 functor: Column 

475 args: 

476 - base_InputCount_value 

477 filt: HSC-G 

478 deconvolved_moments: 

479 functor: DeconvolvedMoments 

480 filt: HSC-G 

481 dataset: forced_src 

482 refFlags: 

483 - calib_psfUsed 

484 - merge_measurement_i 

485 - merge_measurement_r 

486 - merge_measurement_z 

487 - merge_measurement_y 

488 - merge_measurement_g 

489 - base_PixelFlags_flag_inexact_psfCenter 

490 - detect_isPrimary 

491 

492 The names for each entry under "func" will become the names of columns in the 

493 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`. 

494 Positional arguments to be passed to each functor are in the `args` list, 

495 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`, 

496 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization. 

497 

498 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and 

499 taken from the `'ref'` dataset. 

500 

501 The "flags" entry will be expanded out per band. 

502 

503 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

504 to organize and excecute the calculations. 

505 

506 """ 

507 @property 

508 def _DefaultName(self): 

509 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

510 

511 @property 

512 def outputDataset(self): 

513 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

514 

515 @property 

516 def inputDataset(self): 

517 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

518 

519 @property 

520 def ConfigClass(self): 

521 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

522 

523 def runDataRef(self, dataRef): 

524 parq = dataRef.get() 

525 funcs = self.getFunctors() 

526 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId) 

527 self.write(df, dataRef) 

528 return df 

529 

530 def run(self, parq, funcs=None, dataId=None, band=None): 

531 """Do postprocessing calculations 

532 

533 Takes a `ParquetTable` object and dataId, 

534 returns a dataframe with results of postprocessing calculations. 

535 

536 Parameters 

537 ---------- 

538 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

539 ParquetTable from which calculations are done. 

540 funcs : `lsst.pipe.tasks.functors.Functors` 

541 Functors to apply to the table's columns 

542 dataId : dict, optional 

543 Used to add a `patchId` column to the output dataframe. 

544 band : `str`, optional 

545 Filter band that is being processed. 

546 

547 Returns 

548 ------ 

549 `pandas.DataFrame` 

550 

551 """ 

552 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

553 

554 df = self.transform(band, parq, funcs, dataId).df 

555 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

556 return df 

557 

558 def getFunctors(self): 

559 funcs = CompositeFunctor.from_file(self.config.functorFile) 

560 funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

561 return funcs 

562 

563 def getAnalysis(self, parq, funcs=None, band=None): 

564 # Avoids disk access if funcs is passed 

565 if funcs is None: 

566 funcs = self.getFunctors() 

567 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

568 return analysis 

569 

570 def transform(self, band, parq, funcs, dataId): 

571 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

572 df = analysis.df 

573 if dataId is not None: 

574 for key, value in dataId.items(): 

575 df[key] = value 

576 

577 return pipeBase.Struct( 

578 df=df, 

579 analysis=analysis 

580 ) 

581 

582 def write(self, df, parqRef): 

583 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

584 

585 def writeMetadata(self, dataRef): 

586 """No metadata to write. 

587 """ 

588 pass 

589 

590 

591class TransformObjectCatalogConfig(TransformCatalogBaseConfig): 

592 coaddName = pexConfig.Field( 

593 dtype=str, 

594 default="deep", 

595 doc="Name of coadd" 

596 ) 

597 # TODO: remove in DM-27177 

598 filterMap = pexConfig.DictField( 

599 keytype=str, 

600 itemtype=str, 

601 default={}, 

602 doc=("Dictionary mapping full filter name to short one for column name munging." 

603 "These filters determine the output columns no matter what filters the " 

604 "input data actually contain."), 

605 deprecated=("Coadds are now identified by the band, so this transform is unused." 

606 "Will be removed after v22.") 

607 ) 

608 outputBands = pexConfig.ListField( 

609 dtype=str, 

610 default=None, 

611 optional=True, 

612 doc=("These bands and only these bands will appear in the output," 

613 " NaN-filled if the input does not include them." 

614 " If None, then use all bands found in the input.") 

615 ) 

616 camelCase = pexConfig.Field( 

617 dtype=bool, 

618 default=True, 

619 doc=("Write per-band columns names with camelCase, else underscore " 

620 "For example: gPsFlux instead of g_PsFlux.") 

621 ) 

622 multilevelOutput = pexConfig.Field( 

623 dtype=bool, 

624 default=False, 

625 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

626 "and name-munged (False).") 

627 ) 

628 

629 

630class TransformObjectCatalogTask(TransformCatalogBaseTask): 

631 """Produce a flattened Object Table to match the format specified in 

632 sdm_schemas. 

633 

634 Do the same set of postprocessing calculations on all bands 

635 

636 This is identical to `TransformCatalogBaseTask`, except for that it does the 

637 specified functor calculations for all filters present in the 

638 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

639 by the YAML file will be superceded. 

640 """ 

641 _DefaultName = "transformObjectCatalog" 

642 ConfigClass = TransformObjectCatalogConfig 

643 

644 inputDataset = 'deepCoadd_obj' 

645 outputDataset = 'objectTable' 

646 

647 @classmethod 

648 def _makeArgumentParser(cls): 

649 parser = ArgumentParser(name=cls._DefaultName) 

650 parser.add_id_argument("--id", cls.inputDataset, 

651 ContainerClass=CoaddDataIdContainer, 

652 help="data ID, e.g. --id tract=12345 patch=1,2") 

653 return parser 

654 

655 def run(self, parq, funcs=None, dataId=None, band=None): 

656 # NOTE: band kwarg is ignored here. 

657 dfDict = {} 

658 analysisDict = {} 

659 templateDf = pd.DataFrame() 

660 outputBands = parq.columnLevelNames['band'] if self.config.outputBands is None else \ 

661 self.config.outputBands 

662 

663 # Perform transform for data of filters that exist in parq. 

664 for inputBand in parq.columnLevelNames['band']: 

665 if inputBand not in outputBands: 

666 self.log.info("Ignoring %s band data in the input", inputBand) 

667 continue 

668 self.log.info("Transforming the catalog of band %s", inputBand) 

669 result = self.transform(inputBand, parq, funcs, dataId) 

670 dfDict[inputBand] = result.df 

671 analysisDict[inputBand] = result.analysis 

672 if templateDf.empty: 

673 templateDf = result.df 

674 

675 # Fill NaNs in columns of other wanted bands 

676 for filt in outputBands: 

677 if filt not in dfDict: 

678 self.log.info("Adding empty columns for band %s", filt) 

679 dfDict[filt] = pd.DataFrame().reindex_like(templateDf) 

680 

681 # This makes a multilevel column index, with band as first level 

682 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

683 

684 if not self.config.multilevelOutput: 

685 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

686 if dataId is not None: 

687 noDupCols += list(dataId.keys()) 

688 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase) 

689 

690 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

691 return df 

692 

693 

694class TractObjectDataIdContainer(CoaddDataIdContainer): 

695 

696 def makeDataRefList(self, namespace): 

697 """Make self.refList from self.idList 

698 

699 Generate a list of data references given tract and/or patch. 

700 This was adapted from `TractQADataIdContainer`, which was 

701 `TractDataIdContainer` modifie to not require "filter". 

702 Only existing dataRefs are returned. 

703 """ 

704 def getPatchRefList(tract): 

705 return [namespace.butler.dataRef(datasetType=self.datasetType, 

706 tract=tract.getId(), 

707 patch="%d,%d" % patch.getIndex()) for patch in tract] 

708 

709 tractRefs = defaultdict(list) # Data references for each tract 

710 for dataId in self.idList: 

711 skymap = self.getSkymap(namespace) 

712 

713 if "tract" in dataId: 

714 tractId = dataId["tract"] 

715 if "patch" in dataId: 

716 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

717 tract=tractId, 

718 patch=dataId['patch'])) 

719 else: 

720 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

721 else: 

722 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

723 for tract in skymap) 

724 outputRefList = [] 

725 for tractRefList in tractRefs.values(): 

726 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

727 outputRefList.append(existingRefs) 

728 

729 self.refList = outputRefList 

730 

731 

732class ConsolidateObjectTableConfig(pexConfig.Config): 

733 coaddName = pexConfig.Field( 

734 dtype=str, 

735 default="deep", 

736 doc="Name of coadd" 

737 ) 

738 

739 

740class ConsolidateObjectTableTask(CmdLineTask): 

741 """Write patch-merged source tables to a tract-level parquet file 

742 """ 

743 _DefaultName = "consolidateObjectTable" 

744 ConfigClass = ConsolidateObjectTableConfig 

745 

746 inputDataset = 'objectTable' 

747 outputDataset = 'objectTable_tract' 

748 

749 @classmethod 

750 def _makeArgumentParser(cls): 

751 parser = ArgumentParser(name=cls._DefaultName) 

752 

753 parser.add_id_argument("--id", cls.inputDataset, 

754 help="data ID, e.g. --id tract=12345", 

755 ContainerClass=TractObjectDataIdContainer) 

756 return parser 

757 

758 def runDataRef(self, patchRefList): 

759 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

760 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

761 

762 def writeMetadata(self, dataRef): 

763 """No metadata to write. 

764 """ 

765 pass 

766 

767 

768class TransformSourceTableConfig(TransformCatalogBaseConfig): 

769 pass 

770 

771 

772class TransformSourceTableTask(TransformCatalogBaseTask): 

773 """Transform/standardize a source catalog 

774 """ 

775 _DefaultName = "transformSourceTable" 

776 ConfigClass = TransformSourceTableConfig 

777 

778 inputDataset = 'source' 

779 outputDataset = 'sourceTable' 

780 

781 def writeMetadata(self, dataRef): 

782 """No metadata to write. 

783 """ 

784 pass 

785 

786 @classmethod 

787 def _makeArgumentParser(cls): 

788 parser = ArgumentParser(name=cls._DefaultName) 

789 parser.add_id_argument("--id", datasetType=cls.inputDataset, 

790 level="sensor", 

791 help="data ID, e.g. --id visit=12345 ccd=0") 

792 return parser 

793 

794 def runDataRef(self, dataRef): 

795 """Override to specify band label to run().""" 

796 parq = dataRef.get() 

797 funcs = self.getFunctors() 

798 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel 

799 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band) 

800 self.write(df, dataRef) 

801 return df 

802 

803 

804class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

805 dimensions=("instrument", "visit",), 

806 defaultTemplates={}): 

807 calexp = connectionTypes.Input( 

808 doc="Processed exposures used for metadata", 

809 name="calexp", 

810 storageClass="ExposureF", 

811 dimensions=("instrument", "visit", "detector"), 

812 deferLoad=True, 

813 multiple=True, 

814 ) 

815 visitSummary = connectionTypes.Output( 

816 doc="Consolidated visit-level exposure metadata", 

817 name="visitSummary", 

818 storageClass="ExposureCatalog", 

819 dimensions=("instrument", "visit"), 

820 ) 

821 

822 

823class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

824 pipelineConnections=ConsolidateVisitSummaryConnections): 

825 """Config for ConsolidateVisitSummaryTask""" 

826 pass 

827 

828 

829class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

830 """Task to consolidate per-detector visit metadata. 

831 

832 This task aggregates the following metadata from all the detectors in a 

833 single visit into an exposure catalog: 

834 - The visitInfo. 

835 - The wcs. 

836 - The photoCalib. 

837 - The physical_filter and band (if available). 

838 - The psf size, shape, and effective area at the center of the detector. 

839 - The corners of the bounding box in right ascension/declination. 

840 

841 Other quantities such as Psf, ApCorrMap, and TransmissionCurve are not 

842 persisted here because of storage concerns, and because of their limited 

843 utility as summary statistics. 

844 

845 Tests for this task are performed in ci_hsc_gen3. 

846 """ 

847 _DefaultName = "consolidateVisitSummary" 

848 ConfigClass = ConsolidateVisitSummaryConfig 

849 

850 @classmethod 

851 def _makeArgumentParser(cls): 

852 parser = ArgumentParser(name=cls._DefaultName) 

853 

854 parser.add_id_argument("--id", "calexp", 

855 help="data ID, e.g. --id visit=12345", 

856 ContainerClass=VisitDataIdContainer) 

857 return parser 

858 

859 def writeMetadata(self, dataRef): 

860 """No metadata to persist, so override to remove metadata persistance. 

861 """ 

862 pass 

863 

864 def writeConfig(self, butler, clobber=False, doBackup=True): 

865 """No config to persist, so override to remove config persistance. 

866 """ 

867 pass 

868 

869 def runDataRef(self, dataRefList): 

870 visit = dataRefList[0].dataId['visit'] 

871 

872 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

873 (len(dataRefList), visit)) 

874 

875 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

876 

877 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

878 

879 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

880 dataRefs = butlerQC.get(inputRefs.calexp) 

881 visit = dataRefs[0].dataId.byName()['visit'] 

882 

883 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

884 (len(dataRefs), visit)) 

885 

886 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

887 

888 butlerQC.put(expCatalog, outputRefs.visitSummary) 

889 

890 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

891 """Make a combined exposure catalog from a list of dataRefs. 

892 

893 Parameters 

894 ---------- 

895 visit : `int` 

896 Visit identification number 

897 dataRefs : `list` 

898 List of calexp dataRefs in visit. May be list of 

899 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

900 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

901 isGen3 : `bool`, optional 

902 Specifies if this is a Gen3 list of datarefs. 

903 

904 Returns 

905 ------- 

906 visitSummary : `lsst.afw.table.ExposureCatalog` 

907 Exposure catalog with per-detector summary information. 

908 """ 

909 schema = afwTable.ExposureTable.makeMinimalSchema() 

910 schema.addField('visit', type='I', doc='Visit number') 

911 schema.addField('detector_id', type='I', doc='Detector number') 

912 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

913 schema.addField('band', type='String', size=32, doc='Name of band') 

914 schema.addField('psfSigma', type='F', 

915 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

916 schema.addField('psfArea', type='F', 

917 doc='PSF model effective area (center of chip) (pixel**2)') 

918 schema.addField('psfIxx', type='F', 

919 doc='PSF model Ixx (center of chip) (pixel**2)') 

920 schema.addField('psfIyy', type='F', 

921 doc='PSF model Iyy (center of chip) (pixel**2)') 

922 schema.addField('psfIxy', type='F', 

923 doc='PSF model Ixy (center of chip) (pixel**2)') 

924 schema.addField('raCorners', type='ArrayD', size=4, 

925 doc='Right Ascension of bounding box corners (degrees)') 

926 schema.addField('decCorners', type='ArrayD', size=4, 

927 doc='Declination of bounding box corners (degrees)') 

928 

929 cat = afwTable.ExposureCatalog(schema) 

930 cat.resize(len(dataRefs)) 

931 

932 cat['visit'] = visit 

933 

934 for i, dataRef in enumerate(dataRefs): 

935 if isGen3: 

936 visitInfo = dataRef.get(component='visitInfo') 

937 filterLabel = dataRef.get(component='filterLabel') 

938 psf = dataRef.get(component='psf') 

939 wcs = dataRef.get(component='wcs') 

940 photoCalib = dataRef.get(component='photoCalib') 

941 detector = dataRef.get(component='detector') 

942 bbox = dataRef.get(component='bbox') 

943 validPolygon = dataRef.get(component='validPolygon') 

944 else: 

945 # Note that we need to read the calexp because there is 

946 # no magic access to the psf except through the exposure. 

947 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

948 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

949 visitInfo = exp.getInfo().getVisitInfo() 

950 filterLabel = dataRef.get("calexp_filterLabel") 

951 psf = exp.getPsf() 

952 wcs = exp.getWcs() 

953 photoCalib = exp.getPhotoCalib() 

954 detector = exp.getDetector() 

955 bbox = dataRef.get(datasetType='calexp_bbox') 

956 validPolygon = exp.getInfo().getValidPolygon() 

957 

958 rec = cat[i] 

959 rec.setBBox(bbox) 

960 rec.setVisitInfo(visitInfo) 

961 rec.setWcs(wcs) 

962 rec.setPhotoCalib(photoCalib) 

963 rec.setDetector(detector) 

964 rec.setValidPolygon(validPolygon) 

965 

966 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

967 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

968 rec['detector_id'] = detector.getId() 

969 shape = psf.computeShape(bbox.getCenter()) 

970 rec['psfSigma'] = shape.getDeterminantRadius() 

971 rec['psfIxx'] = shape.getIxx() 

972 rec['psfIyy'] = shape.getIyy() 

973 rec['psfIxy'] = shape.getIxy() 

974 im = psf.computeKernelImage(bbox.getCenter()) 

975 # The calculation of effective psf area is taken from 

976 # meas_base/src/PsfFlux.cc#L112. See 

977 # https://github.com/lsst/meas_base/blob/ 

978 # 750bffe6620e565bda731add1509507f5c40c8bb/src/PsfFlux.cc#L112 

979 rec['psfArea'] = np.sum(im.array)/np.sum(im.array**2.) 

980 

981 sph_pts = wcs.pixelToSky(lsst.geom.Box2D(bbox).getCorners()) 

982 rec['raCorners'][:] = [sph.getRa().asDegrees() for sph in sph_pts] 

983 rec['decCorners'][:] = [sph.getDec().asDegrees() for sph in sph_pts] 

984 

985 return cat 

986 

987 

988class VisitDataIdContainer(DataIdContainer): 

989 """DataIdContainer that groups sensor-level id's by visit 

990 """ 

991 

992 def makeDataRefList(self, namespace): 

993 """Make self.refList from self.idList 

994 

995 Generate a list of data references grouped by visit. 

996 

997 Parameters 

998 ---------- 

999 namespace : `argparse.Namespace` 

1000 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments 

1001 """ 

1002 # Group by visits 

1003 visitRefs = defaultdict(list) 

1004 for dataId in self.idList: 

1005 if "visit" in dataId: 

1006 visitId = dataId["visit"] 

1007 # append all subsets to 

1008 subset = namespace.butler.subset(self.datasetType, dataId=dataId) 

1009 visitRefs[visitId].extend([dataRef for dataRef in subset]) 

1010 

1011 outputRefList = [] 

1012 for refList in visitRefs.values(): 

1013 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1014 if existingRefs: 

1015 outputRefList.append(existingRefs) 

1016 

1017 self.refList = outputRefList 

1018 

1019 

1020class ConsolidateSourceTableConfig(pexConfig.Config): 

1021 pass 

1022 

1023 

1024class ConsolidateSourceTableTask(CmdLineTask): 

1025 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1026 """ 

1027 _DefaultName = 'consolidateSourceTable' 

1028 ConfigClass = ConsolidateSourceTableConfig 

1029 

1030 inputDataset = 'sourceTable' 

1031 outputDataset = 'sourceTable_visit' 

1032 

1033 def runDataRef(self, dataRefList): 

1034 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1035 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1036 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1037 

1038 @classmethod 

1039 def _makeArgumentParser(cls): 

1040 parser = ArgumentParser(name=cls._DefaultName) 

1041 

1042 parser.add_id_argument("--id", cls.inputDataset, 

1043 help="data ID, e.g. --id visit=12345", 

1044 ContainerClass=VisitDataIdContainer) 

1045 return parser 

1046 

1047 def writeMetadata(self, dataRef): 

1048 """No metadata to write. 

1049 """ 

1050 pass 

1051 

1052 def writeConfig(self, butler, clobber=False, doBackup=True): 

1053 """No config to write. 

1054 """ 

1055 pass