Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24import numpy as np 

25from collections import defaultdict 

26 

27import lsst.geom 

28import lsst.pex.config as pexConfig 

29import lsst.pipe.base as pipeBase 

30from lsst.pipe.base import connectionTypes 

31import lsst.afw.table as afwTable 

32from lsst.meas.base import SingleFrameMeasurementTask 

33from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

34from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

35 

36from .parquetTable import ParquetTable 

37from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

38from .functors import CompositeFunctor, RAColumn, DecColumn, Column 

39 

40 

41def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False): 

42 """Flattens a dataframe with multilevel column index 

43 """ 

44 newDf = pd.DataFrame() 

45 for band in set(df.columns.to_frame()['band']): 

46 subdf = df[band] 

47 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

48 newColumns = {c: columnFormat.format(band, c) 

49 for c in subdf.columns if c not in noDupCols} 

50 cols = list(newColumns.keys()) 

51 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

52 

53 newDf = pd.concat([subdf[noDupCols], newDf], axis=1) 

54 return newDf 

55 

56 

57class WriteObjectTableConfig(pexConfig.Config): 

58 priorityList = pexConfig.ListField( 

59 dtype=str, 

60 default=[], 

61 doc="Priority-ordered list of bands for the merge." 

62 ) 

63 engine = pexConfig.Field( 

64 dtype=str, 

65 default="pyarrow", 

66 doc="Parquet engine for writing (pyarrow or fastparquet)" 

67 ) 

68 coaddName = pexConfig.Field( 

69 dtype=str, 

70 default="deep", 

71 doc="Name of coadd" 

72 ) 

73 

74 def validate(self): 

75 pexConfig.Config.validate(self) 

76 if len(self.priorityList) == 0: 

77 raise RuntimeError("No priority list provided") 

78 

79 

80class WriteObjectTableTask(CmdLineTask): 

81 """Write filter-merged source tables to parquet 

82 """ 

83 _DefaultName = "writeObjectTable" 

84 ConfigClass = WriteObjectTableConfig 

85 RunnerClass = MergeSourcesRunner 

86 

87 # Names of table datasets to be merged 

88 inputDatasets = ('forced_src', 'meas', 'ref') 

89 

90 # Tag of output dataset written by `MergeSourcesTask.write` 

91 outputDataset = 'obj' 

92 

93 def __init__(self, butler=None, schema=None, **kwargs): 

94 # It is a shame that this class can't use the default init for CmdLineTask 

95 # But to do so would require its own special task runner, which is many 

96 # more lines of specialization, so this is how it is for now 

97 CmdLineTask.__init__(self, **kwargs) 

98 

99 def runDataRef(self, patchRefList): 

100 """! 

101 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in 

102 subclasses that inherit from MergeSourcesTask. 

103 @param[in] patchRefList list of data references for each filter 

104 """ 

105 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

106 dataId = patchRefList[0].dataId 

107 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

108 self.write(patchRefList[0], mergedCatalog) 

109 

110 @classmethod 

111 def _makeArgumentParser(cls): 

112 """Create a suitable ArgumentParser. 

113 

114 We will use the ArgumentParser to get a list of data 

115 references for patches; the RunnerClass will sort them into lists 

116 of data references for the same patch. 

117 

118 References first of self.inputDatasets, rather than 

119 self.inputDataset 

120 """ 

121 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

122 

123 def readCatalog(self, patchRef): 

124 """Read input catalogs 

125 

126 Read all the input datasets given by the 'inputDatasets' 

127 attribute. 

128 

129 Parameters 

130 ---------- 

131 patchRef : `lsst.daf.persistence.ButlerDataRef` 

132 Data reference for patch 

133 

134 Returns 

135 ------- 

136 Tuple consisting of band name and a dict of catalogs, keyed by 

137 dataset name 

138 """ 

139 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel 

140 catalogDict = {} 

141 for dataset in self.inputDatasets: 

142 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

143 self.log.info("Read %d sources from %s for band %s: %s" % 

144 (len(catalog), dataset, band, patchRef.dataId)) 

145 catalogDict[dataset] = catalog 

146 return band, catalogDict 

147 

148 def run(self, catalogs, tract, patch): 

149 """Merge multiple catalogs. 

150 

151 Parameters 

152 ---------- 

153 catalogs : `dict` 

154 Mapping from filter names to dict of catalogs. 

155 tract : int 

156 tractId to use for the tractId column 

157 patch : str 

158 patchId to use for the patchId column 

159 

160 Returns 

161 ------- 

162 catalog : `lsst.pipe.tasks.parquetTable.ParquetTable` 

163 Merged dataframe, with each column prefixed by 

164 `filter_tag(filt)`, wrapped in the parquet writer shim class. 

165 """ 

166 

167 dfs = [] 

168 for filt, tableDict in catalogs.items(): 

169 for dataset, table in tableDict.items(): 

170 # Convert afwTable to pandas DataFrame 

171 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

172 

173 # Sort columns by name, to ensure matching schema among patches 

174 df = df.reindex(sorted(df.columns), axis=1) 

175 df['tractId'] = tract 

176 df['patchId'] = patch 

177 

178 # Make columns a 3-level MultiIndex 

179 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

180 names=('dataset', 'band', 'column')) 

181 dfs.append(df) 

182 

183 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

184 return ParquetTable(dataFrame=catalog) 

185 

186 def write(self, patchRef, catalog): 

187 """Write the output. 

188 

189 Parameters 

190 ---------- 

191 catalog : `ParquetTable` 

192 Catalog to write 

193 patchRef : `lsst.daf.persistence.ButlerDataRef` 

194 Data reference for patch 

195 """ 

196 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

197 # since the filter isn't actually part of the data ID for the dataset we're saving, 

198 # it's confusing to see it in the log message, even if the butler simply ignores it. 

199 mergeDataId = patchRef.dataId.copy() 

200 del mergeDataId["filter"] 

201 self.log.info("Wrote merged catalog: %s" % (mergeDataId,)) 

202 

203 def writeMetadata(self, dataRefList): 

204 """No metadata to write, and not sure how to write it for a list of dataRefs. 

205 """ 

206 pass 

207 

208 

209class WriteSourceTableConfig(pexConfig.Config): 

210 doApplyExternalPhotoCalib = pexConfig.Field( 

211 dtype=bool, 

212 default=False, 

213 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if " 

214 "generating Source Tables from older src tables which do not already have local calib columns") 

215 ) 

216 doApplyExternalSkyWcs = pexConfig.Field( 

217 dtype=bool, 

218 default=False, 

219 doc=("Add local WCS columns from the calexp.wcs? Should only set True if " 

220 "generating Source Tables from older src tables which do not already have local calib columns") 

221 ) 

222 

223 

224class WriteSourceTableTask(CmdLineTask): 

225 """Write source table to parquet 

226 """ 

227 _DefaultName = "writeSourceTable" 

228 ConfigClass = WriteSourceTableConfig 

229 

230 def runDataRef(self, dataRef): 

231 src = dataRef.get('src') 

232 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

233 src = self.addCalibColumns(src, dataRef) 

234 

235 ccdVisitId = dataRef.get('ccdExposureId') 

236 result = self.run(src, ccdVisitId=ccdVisitId) 

237 dataRef.put(result.table, 'source') 

238 

239 def run(self, catalog, ccdVisitId=None): 

240 """Convert `src` catalog to parquet 

241 

242 Parameters 

243 ---------- 

244 catalog: `afwTable.SourceCatalog` 

245 catalog to be converted 

246 ccdVisitId: `int` 

247 ccdVisitId to be added as a column 

248 

249 Returns 

250 ------- 

251 result : `lsst.pipe.base.Struct` 

252 ``table`` 

253 `ParquetTable` version of the input catalog 

254 """ 

255 self.log.info("Generating parquet table from src catalog") 

256 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

257 df['ccdVisitId'] = ccdVisitId 

258 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

259 

260 def addCalibColumns(self, catalog, dataRef): 

261 """Add columns with local calibration evaluated at each centroid 

262 

263 for backwards compatibility with old repos. 

264 This exists for the purpose of converting old src catalogs 

265 (which don't have the expected local calib columns) to Source Tables. 

266 

267 Parameters 

268 ---------- 

269 catalog: `afwTable.SourceCatalog` 

270 catalog to which calib columns will be added 

271 dataRef: `lsst.daf.persistence.ButlerDataRef 

272 for fetching the calibs from disk. 

273 

274 Returns 

275 ------- 

276 newCat: `afwTable.SourceCatalog` 

277 Source Catalog with requested local calib columns 

278 """ 

279 mapper = afwTable.SchemaMapper(catalog.schema) 

280 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

281 measureConfig.doReplaceWithNoise = False 

282 

283 # Just need the WCS or the PhotoCalib attached to an exposue 

284 exposure = dataRef.get('calexp_sub', 

285 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0))) 

286 

287 mapper = afwTable.SchemaMapper(catalog.schema) 

288 mapper.addMinimalSchema(catalog.schema, True) 

289 schema = mapper.getOutputSchema() 

290 

291 exposureIdInfo = dataRef.get("expIdInfo") 

292 measureConfig.plugins.names = [] 

293 if self.config.doApplyExternalSkyWcs: 

294 plugin = 'base_LocalWcs' 

295 if plugin in schema: 

296 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False") 

297 else: 

298 measureConfig.plugins.names.add(plugin) 

299 

300 if self.config.doApplyExternalPhotoCalib: 

301 plugin = 'base_LocalPhotoCalib' 

302 if plugin in schema: 

303 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False") 

304 else: 

305 measureConfig.plugins.names.add(plugin) 

306 

307 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

308 newCat = afwTable.SourceCatalog(schema) 

309 newCat.extend(catalog, mapper=mapper) 

310 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

311 return newCat 

312 

313 def writeMetadata(self, dataRef): 

314 """No metadata to write. 

315 """ 

316 pass 

317 

318 @classmethod 

319 def _makeArgumentParser(cls): 

320 parser = ArgumentParser(name=cls._DefaultName) 

321 parser.add_id_argument("--id", 'src', 

322 help="data ID, e.g. --id visit=12345 ccd=0") 

323 return parser 

324 

325 

326class PostprocessAnalysis(object): 

327 """Calculate columns from ParquetTable 

328 

329 This object manages and organizes an arbitrary set of computations 

330 on a catalog. The catalog is defined by a 

331 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a 

332 `deepCoadd_obj` dataset, and the computations are defined by a collection 

333 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

334 a `CompositeFunctor`). 

335 

336 After the object is initialized, accessing the `.df` attribute (which 

337 holds the `pandas.DataFrame` containing the results of the calculations) triggers 

338 computation of said dataframe. 

339 

340 One of the conveniences of using this object is the ability to define a desired common 

341 filter for all functors. This enables the same functor collection to be passed to 

342 several different `PostprocessAnalysis` objects without having to change the original 

343 functor collection, since the `filt` keyword argument of this object triggers an 

344 overwrite of the `filt` property for all functors in the collection. 

345 

346 This object also allows a list of refFlags to be passed, and defines a set of default 

347 refFlags that are always included even if not requested. 

348 

349 If a list of `ParquetTable` object is passed, rather than a single one, then the 

350 calculations will be mapped over all the input catalogs. In principle, it should 

351 be straightforward to parallelize this activity, but initial tests have failed 

352 (see TODO in code comments). 

353 

354 Parameters 

355 ---------- 

356 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

357 Source catalog(s) for computation 

358 

359 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

360 Computations to do (functors that act on `parq`). 

361 If a dict, the output 

362 DataFrame will have columns keyed accordingly. 

363 If a list, the column keys will come from the 

364 `.shortname` attribute of each functor. 

365 

366 filt : `str` (optional) 

367 Filter in which to calculate. If provided, 

368 this will overwrite any existing `.filt` attribute 

369 of the provided functors. 

370 

371 flags : `list` (optional) 

372 List of flags (per-band) to include in output table. 

373 

374 refFlags : `list` (optional) 

375 List of refFlags (only reference band) to include in output table. 

376 

377 

378 """ 

379 _defaultRefFlags = [] 

380 _defaultFuncs = (('coord_ra', RAColumn()), 

381 ('coord_dec', DecColumn())) 

382 

383 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None): 

384 self.parq = parq 

385 self.functors = functors 

386 

387 self.filt = filt 

388 self.flags = list(flags) if flags is not None else [] 

389 self.refFlags = list(self._defaultRefFlags) 

390 if refFlags is not None: 

391 self.refFlags += list(refFlags) 

392 

393 self._df = None 

394 

395 @property 

396 def defaultFuncs(self): 

397 funcs = dict(self._defaultFuncs) 

398 return funcs 

399 

400 @property 

401 def func(self): 

402 additionalFuncs = self.defaultFuncs 

403 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

404 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

405 

406 if isinstance(self.functors, CompositeFunctor): 

407 func = self.functors 

408 else: 

409 func = CompositeFunctor(self.functors) 

410 

411 func.funcDict.update(additionalFuncs) 

412 func.filt = self.filt 

413 

414 return func 

415 

416 @property 

417 def noDupCols(self): 

418 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

419 

420 @property 

421 def df(self): 

422 if self._df is None: 

423 self.compute() 

424 return self._df 

425 

426 def compute(self, dropna=False, pool=None): 

427 # map over multiple parquet tables 

428 if type(self.parq) in (list, tuple): 

429 if pool is None: 

430 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

431 else: 

432 # TODO: Figure out why this doesn't work (pyarrow pickling issues?) 

433 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

434 self._df = pd.concat(dflist) 

435 else: 

436 self._df = self.func(self.parq, dropna=dropna) 

437 

438 return self._df 

439 

440 

441class TransformCatalogBaseConfig(pexConfig.Config): 

442 functorFile = pexConfig.Field( 

443 dtype=str, 

444 doc='Path to YAML file specifying functors to be computed', 

445 default=None, 

446 optional=True 

447 ) 

448 

449 

450class TransformCatalogBaseTask(CmdLineTask): 

451 """Base class for transforming/standardizing a catalog 

452 

453 by applying functors that convert units and apply calibrations. 

454 The purpose of this task is to perform a set of computations on 

455 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

456 results to a new dataset (which needs to be declared in an `outputDataset` 

457 attribute). 

458 

459 The calculations to be performed are defined in a YAML file that specifies 

460 a set of functors to be computed, provided as 

461 a `--functorFile` config parameter. An example of such a YAML file 

462 is the following: 

463 

464 funcs: 

465 psfMag: 

466 functor: Mag 

467 args: 

468 - base_PsfFlux 

469 filt: HSC-G 

470 dataset: meas 

471 cmodel_magDiff: 

472 functor: MagDiff 

473 args: 

474 - modelfit_CModel 

475 - base_PsfFlux 

476 filt: HSC-G 

477 gauss_magDiff: 

478 functor: MagDiff 

479 args: 

480 - base_GaussianFlux 

481 - base_PsfFlux 

482 filt: HSC-G 

483 count: 

484 functor: Column 

485 args: 

486 - base_InputCount_value 

487 filt: HSC-G 

488 deconvolved_moments: 

489 functor: DeconvolvedMoments 

490 filt: HSC-G 

491 dataset: forced_src 

492 refFlags: 

493 - calib_psfUsed 

494 - merge_measurement_i 

495 - merge_measurement_r 

496 - merge_measurement_z 

497 - merge_measurement_y 

498 - merge_measurement_g 

499 - base_PixelFlags_flag_inexact_psfCenter 

500 - detect_isPrimary 

501 

502 The names for each entry under "func" will become the names of columns in the 

503 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`. 

504 Positional arguments to be passed to each functor are in the `args` list, 

505 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`, 

506 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization. 

507 

508 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and 

509 taken from the `'ref'` dataset. 

510 

511 The "flags" entry will be expanded out per band. 

512 

513 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

514 to organize and excecute the calculations. 

515 

516 """ 

517 @property 

518 def _DefaultName(self): 

519 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

520 

521 @property 

522 def outputDataset(self): 

523 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

524 

525 @property 

526 def inputDataset(self): 

527 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

528 

529 @property 

530 def ConfigClass(self): 

531 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

532 

533 def runDataRef(self, dataRef): 

534 parq = dataRef.get() 

535 funcs = self.getFunctors() 

536 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId) 

537 self.write(df, dataRef) 

538 return df 

539 

540 def run(self, parq, funcs=None, dataId=None, band=None): 

541 """Do postprocessing calculations 

542 

543 Takes a `ParquetTable` object and dataId, 

544 returns a dataframe with results of postprocessing calculations. 

545 

546 Parameters 

547 ---------- 

548 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

549 ParquetTable from which calculations are done. 

550 funcs : `lsst.pipe.tasks.functors.Functors` 

551 Functors to apply to the table's columns 

552 dataId : dict, optional 

553 Used to add a `patchId` column to the output dataframe. 

554 band : `str`, optional 

555 Filter band that is being processed. 

556 

557 Returns 

558 ------ 

559 `pandas.DataFrame` 

560 

561 """ 

562 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

563 

564 df = self.transform(band, parq, funcs, dataId).df 

565 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

566 return df 

567 

568 def getFunctors(self): 

569 funcs = CompositeFunctor.from_file(self.config.functorFile) 

570 funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

571 return funcs 

572 

573 def getAnalysis(self, parq, funcs=None, band=None): 

574 # Avoids disk access if funcs is passed 

575 if funcs is None: 

576 funcs = self.getFunctors() 

577 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

578 return analysis 

579 

580 def transform(self, band, parq, funcs, dataId): 

581 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

582 df = analysis.df 

583 if dataId is not None: 

584 for key, value in dataId.items(): 

585 df[key] = value 

586 

587 return pipeBase.Struct( 

588 df=df, 

589 analysis=analysis 

590 ) 

591 

592 def write(self, df, parqRef): 

593 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

594 

595 def writeMetadata(self, dataRef): 

596 """No metadata to write. 

597 """ 

598 pass 

599 

600 

601class TransformObjectCatalogConfig(TransformCatalogBaseConfig): 

602 coaddName = pexConfig.Field( 

603 dtype=str, 

604 default="deep", 

605 doc="Name of coadd" 

606 ) 

607 # TODO: remove in DM-27177 

608 filterMap = pexConfig.DictField( 

609 keytype=str, 

610 itemtype=str, 

611 default={}, 

612 doc=("Dictionary mapping full filter name to short one for column name munging." 

613 "These filters determine the output columns no matter what filters the " 

614 "input data actually contain."), 

615 deprecated=("Coadds are now identified by the band, so this transform is unused." 

616 "Will be removed after v22.") 

617 ) 

618 outputBands = pexConfig.ListField( 

619 dtype=str, 

620 default=None, 

621 optional=True, 

622 doc=("These bands and only these bands will appear in the output," 

623 " NaN-filled if the input does not include them." 

624 " If None, then use all bands found in the input.") 

625 ) 

626 camelCase = pexConfig.Field( 

627 dtype=bool, 

628 default=True, 

629 doc=("Write per-band columns names with camelCase, else underscore " 

630 "For example: gPsFlux instead of g_PsFlux.") 

631 ) 

632 multilevelOutput = pexConfig.Field( 

633 dtype=bool, 

634 default=False, 

635 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

636 "and name-munged (False).") 

637 ) 

638 

639 

640class TransformObjectCatalogTask(TransformCatalogBaseTask): 

641 """Produce a flattened Object Table to match the format specified in 

642 sdm_schemas. 

643 

644 Do the same set of postprocessing calculations on all bands 

645 

646 This is identical to `TransformCatalogBaseTask`, except for that it does the 

647 specified functor calculations for all filters present in the 

648 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

649 by the YAML file will be superceded. 

650 """ 

651 _DefaultName = "transformObjectCatalog" 

652 ConfigClass = TransformObjectCatalogConfig 

653 

654 inputDataset = 'deepCoadd_obj' 

655 outputDataset = 'objectTable' 

656 

657 @classmethod 

658 def _makeArgumentParser(cls): 

659 parser = ArgumentParser(name=cls._DefaultName) 

660 parser.add_id_argument("--id", cls.inputDataset, 

661 ContainerClass=CoaddDataIdContainer, 

662 help="data ID, e.g. --id tract=12345 patch=1,2") 

663 return parser 

664 

665 def run(self, parq, funcs=None, dataId=None, band=None): 

666 # NOTE: band kwarg is ignored here. 

667 dfDict = {} 

668 analysisDict = {} 

669 templateDf = pd.DataFrame() 

670 outputBands = parq.columnLevelNames['band'] if self.config.outputBands is None else \ 

671 self.config.outputBands 

672 

673 # Perform transform for data of filters that exist in parq. 

674 for inputBand in parq.columnLevelNames['band']: 

675 if inputBand not in outputBands: 

676 self.log.info("Ignoring %s band data in the input", inputBand) 

677 continue 

678 self.log.info("Transforming the catalog of band %s", inputBand) 

679 result = self.transform(inputBand, parq, funcs, dataId) 

680 dfDict[inputBand] = result.df 

681 analysisDict[inputBand] = result.analysis 

682 if templateDf.empty: 

683 templateDf = result.df 

684 

685 # Fill NaNs in columns of other wanted bands 

686 for filt in outputBands: 

687 if filt not in dfDict: 

688 self.log.info("Adding empty columns for band %s", filt) 

689 dfDict[filt] = pd.DataFrame().reindex_like(templateDf) 

690 

691 # This makes a multilevel column index, with band as first level 

692 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

693 

694 if not self.config.multilevelOutput: 

695 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

696 if dataId is not None: 

697 noDupCols += list(dataId.keys()) 

698 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase) 

699 

700 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

701 return df 

702 

703 

704class TractObjectDataIdContainer(CoaddDataIdContainer): 

705 

706 def makeDataRefList(self, namespace): 

707 """Make self.refList from self.idList 

708 

709 Generate a list of data references given tract and/or patch. 

710 This was adapted from `TractQADataIdContainer`, which was 

711 `TractDataIdContainer` modifie to not require "filter". 

712 Only existing dataRefs are returned. 

713 """ 

714 def getPatchRefList(tract): 

715 return [namespace.butler.dataRef(datasetType=self.datasetType, 

716 tract=tract.getId(), 

717 patch="%d,%d" % patch.getIndex()) for patch in tract] 

718 

719 tractRefs = defaultdict(list) # Data references for each tract 

720 for dataId in self.idList: 

721 skymap = self.getSkymap(namespace) 

722 

723 if "tract" in dataId: 

724 tractId = dataId["tract"] 

725 if "patch" in dataId: 

726 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

727 tract=tractId, 

728 patch=dataId['patch'])) 

729 else: 

730 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

731 else: 

732 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

733 for tract in skymap) 

734 outputRefList = [] 

735 for tractRefList in tractRefs.values(): 

736 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

737 outputRefList.append(existingRefs) 

738 

739 self.refList = outputRefList 

740 

741 

742class ConsolidateObjectTableConfig(pexConfig.Config): 

743 coaddName = pexConfig.Field( 

744 dtype=str, 

745 default="deep", 

746 doc="Name of coadd" 

747 ) 

748 

749 

750class ConsolidateObjectTableTask(CmdLineTask): 

751 """Write patch-merged source tables to a tract-level parquet file 

752 """ 

753 _DefaultName = "consolidateObjectTable" 

754 ConfigClass = ConsolidateObjectTableConfig 

755 

756 inputDataset = 'objectTable' 

757 outputDataset = 'objectTable_tract' 

758 

759 @classmethod 

760 def _makeArgumentParser(cls): 

761 parser = ArgumentParser(name=cls._DefaultName) 

762 

763 parser.add_id_argument("--id", cls.inputDataset, 

764 help="data ID, e.g. --id tract=12345", 

765 ContainerClass=TractObjectDataIdContainer) 

766 return parser 

767 

768 def runDataRef(self, patchRefList): 

769 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

770 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

771 

772 def writeMetadata(self, dataRef): 

773 """No metadata to write. 

774 """ 

775 pass 

776 

777 

778class TransformSourceTableConfig(TransformCatalogBaseConfig): 

779 pass 

780 

781 

782class TransformSourceTableTask(TransformCatalogBaseTask): 

783 """Transform/standardize a source catalog 

784 """ 

785 _DefaultName = "transformSourceTable" 

786 ConfigClass = TransformSourceTableConfig 

787 

788 inputDataset = 'source' 

789 outputDataset = 'sourceTable' 

790 

791 def writeMetadata(self, dataRef): 

792 """No metadata to write. 

793 """ 

794 pass 

795 

796 @classmethod 

797 def _makeArgumentParser(cls): 

798 parser = ArgumentParser(name=cls._DefaultName) 

799 parser.add_id_argument("--id", datasetType=cls.inputDataset, 

800 level="sensor", 

801 help="data ID, e.g. --id visit=12345 ccd=0") 

802 return parser 

803 

804 def runDataRef(self, dataRef): 

805 """Override to specify band label to run().""" 

806 parq = dataRef.get() 

807 funcs = self.getFunctors() 

808 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel 

809 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band) 

810 self.write(df, dataRef) 

811 return df 

812 

813 

814class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

815 dimensions=("instrument", "visit",), 

816 defaultTemplates={}): 

817 calexp = connectionTypes.Input( 

818 doc="Processed exposures used for metadata", 

819 name="calexp", 

820 storageClass="ExposureF", 

821 dimensions=("instrument", "visit", "detector"), 

822 deferLoad=True, 

823 multiple=True, 

824 ) 

825 visitSummary = connectionTypes.Output( 

826 doc="Consolidated visit-level exposure metadata", 

827 name="visitSummary", 

828 storageClass="ExposureCatalog", 

829 dimensions=("instrument", "visit"), 

830 ) 

831 

832 

833class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

834 pipelineConnections=ConsolidateVisitSummaryConnections): 

835 """Config for ConsolidateVisitSummaryTask""" 

836 pass 

837 

838 

839class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

840 """Task to consolidate per-detector visit metadata. 

841 

842 This task aggregates the following metadata from all the detectors in a 

843 single visit into an exposure catalog: 

844 - The visitInfo. 

845 - The wcs. 

846 - The photoCalib. 

847 - The physical_filter and band (if available). 

848 - The psf size, shape, and effective area at the center of the detector. 

849 - The corners of the bounding box in right ascension/declination. 

850 

851 Other quantities such as Psf, ApCorrMap, and TransmissionCurve are not 

852 persisted here because of storage concerns, and because of their limited 

853 utility as summary statistics. 

854 

855 Tests for this task are performed in ci_hsc_gen3. 

856 """ 

857 _DefaultName = "consolidateVisitSummary" 

858 ConfigClass = ConsolidateVisitSummaryConfig 

859 

860 @classmethod 

861 def _makeArgumentParser(cls): 

862 parser = ArgumentParser(name=cls._DefaultName) 

863 

864 parser.add_id_argument("--id", "calexp", 

865 help="data ID, e.g. --id visit=12345", 

866 ContainerClass=VisitDataIdContainer) 

867 return parser 

868 

869 def writeMetadata(self, dataRef): 

870 """No metadata to persist, so override to remove metadata persistance. 

871 """ 

872 pass 

873 

874 def writeConfig(self, butler, clobber=False, doBackup=True): 

875 """No config to persist, so override to remove config persistance. 

876 """ 

877 pass 

878 

879 def runDataRef(self, dataRefList): 

880 visit = dataRefList[0].dataId['visit'] 

881 

882 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

883 (len(dataRefList), visit)) 

884 

885 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

886 

887 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

888 

889 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

890 dataRefs = butlerQC.get(inputRefs.calexp) 

891 visit = dataRefs[0].dataId.byName()['visit'] 

892 

893 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

894 (len(dataRefs), visit)) 

895 

896 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

897 

898 butlerQC.put(expCatalog, outputRefs.visitSummary) 

899 

900 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

901 """Make a combined exposure catalog from a list of dataRefs. 

902 

903 Parameters 

904 ---------- 

905 visit : `int` 

906 Visit identification number 

907 dataRefs : `list` 

908 List of calexp dataRefs in visit. May be list of 

909 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

910 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

911 isGen3 : `bool`, optional 

912 Specifies if this is a Gen3 list of datarefs. 

913 

914 Returns 

915 ------- 

916 visitSummary : `lsst.afw.table.ExposureCatalog` 

917 Exposure catalog with per-detector summary information. 

918 """ 

919 schema = afwTable.ExposureTable.makeMinimalSchema() 

920 schema.addField('visit', type='I', doc='Visit number') 

921 schema.addField('detector_id', type='I', doc='Detector number') 

922 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

923 schema.addField('band', type='String', size=32, doc='Name of band') 

924 schema.addField('psfSigma', type='F', 

925 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

926 schema.addField('psfArea', type='F', 

927 doc='PSF model effective area (center of chip) (pixel**2)') 

928 schema.addField('psfIxx', type='F', 

929 doc='PSF model Ixx (center of chip) (pixel**2)') 

930 schema.addField('psfIyy', type='F', 

931 doc='PSF model Iyy (center of chip) (pixel**2)') 

932 schema.addField('psfIxy', type='F', 

933 doc='PSF model Ixy (center of chip) (pixel**2)') 

934 schema.addField('raCorners', type='ArrayD', size=4, 

935 doc='Right Ascension of bounding box corners (degrees)') 

936 schema.addField('decCorners', type='ArrayD', size=4, 

937 doc='Declination of bounding box corners (degrees)') 

938 

939 cat = afwTable.ExposureCatalog(schema) 

940 cat.resize(len(dataRefs)) 

941 

942 cat['visit'] = visit 

943 

944 for i, dataRef in enumerate(dataRefs): 

945 if isGen3: 

946 visitInfo = dataRef.get(component='visitInfo') 

947 filterLabel = dataRef.get(component='filterLabel') 

948 psf = dataRef.get(component='psf') 

949 wcs = dataRef.get(component='wcs') 

950 photoCalib = dataRef.get(component='photoCalib') 

951 detector = dataRef.get(component='detector') 

952 bbox = dataRef.get(component='bbox') 

953 validPolygon = dataRef.get(component='validPolygon') 

954 else: 

955 # Note that we need to read the calexp because there is 

956 # no magic access to the psf except through the exposure. 

957 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

958 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

959 visitInfo = exp.getInfo().getVisitInfo() 

960 filterLabel = exp.getFilterLabel() 

961 psf = exp.getPsf() 

962 wcs = exp.getWcs() 

963 photoCalib = exp.getPhotoCalib() 

964 detector = exp.getDetector() 

965 bbox = dataRef.get(datasetType='calexp_bbox') 

966 validPolygon = exp.getInfo().getValidPolygon() 

967 

968 rec = cat[i] 

969 rec.setBBox(bbox) 

970 rec.setVisitInfo(visitInfo) 

971 rec.setWcs(wcs) 

972 rec.setPhotoCalib(photoCalib) 

973 rec.setDetector(detector) 

974 rec.setValidPolygon(validPolygon) 

975 

976 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

977 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

978 rec['detector_id'] = detector.getId() 

979 shape = psf.computeShape(bbox.getCenter()) 

980 rec['psfSigma'] = shape.getDeterminantRadius() 

981 rec['psfIxx'] = shape.getIxx() 

982 rec['psfIyy'] = shape.getIyy() 

983 rec['psfIxy'] = shape.getIxy() 

984 im = psf.computeKernelImage(bbox.getCenter()) 

985 # The calculation of effective psf area is taken from 

986 # meas_base/src/PsfFlux.cc#L112. See 

987 # https://github.com/lsst/meas_base/blob/ 

988 # 750bffe6620e565bda731add1509507f5c40c8bb/src/PsfFlux.cc#L112 

989 rec['psfArea'] = np.sum(im.array)/np.sum(im.array**2.) 

990 

991 sph_pts = wcs.pixelToSky(lsst.geom.Box2D(bbox).getCorners()) 

992 rec['raCorners'][:] = [sph.getRa().asDegrees() for sph in sph_pts] 

993 rec['decCorners'][:] = [sph.getDec().asDegrees() for sph in sph_pts] 

994 

995 return cat 

996 

997 

998class VisitDataIdContainer(DataIdContainer): 

999 """DataIdContainer that groups sensor-level id's by visit 

1000 """ 

1001 

1002 def makeDataRefList(self, namespace): 

1003 """Make self.refList from self.idList 

1004 

1005 Generate a list of data references grouped by visit. 

1006 

1007 Parameters 

1008 ---------- 

1009 namespace : `argparse.Namespace` 

1010 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments 

1011 """ 

1012 # Group by visits 

1013 visitRefs = defaultdict(list) 

1014 for dataId in self.idList: 

1015 if "visit" in dataId: 

1016 visitId = dataId["visit"] 

1017 # append all subsets to 

1018 subset = namespace.butler.subset(self.datasetType, dataId=dataId) 

1019 visitRefs[visitId].extend([dataRef for dataRef in subset]) 

1020 

1021 outputRefList = [] 

1022 for refList in visitRefs.values(): 

1023 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1024 if existingRefs: 

1025 outputRefList.append(existingRefs) 

1026 

1027 self.refList = outputRefList 

1028 

1029 

1030class ConsolidateSourceTableConfig(pexConfig.Config): 

1031 pass 

1032 

1033 

1034class ConsolidateSourceTableTask(CmdLineTask): 

1035 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1036 """ 

1037 _DefaultName = 'consolidateSourceTable' 

1038 ConfigClass = ConsolidateSourceTableConfig 

1039 

1040 inputDataset = 'sourceTable' 

1041 outputDataset = 'sourceTable_visit' 

1042 

1043 def runDataRef(self, dataRefList): 

1044 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1045 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1046 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1047 

1048 @classmethod 

1049 def _makeArgumentParser(cls): 

1050 parser = ArgumentParser(name=cls._DefaultName) 

1051 

1052 parser.add_id_argument("--id", cls.inputDataset, 

1053 help="data ID, e.g. --id visit=12345", 

1054 ContainerClass=VisitDataIdContainer) 

1055 return parser 

1056 

1057 def writeMetadata(self, dataRef): 

1058 """No metadata to write. 

1059 """ 

1060 pass 

1061 

1062 def writeConfig(self, butler, clobber=False, doBackup=True): 

1063 """No config to write. 

1064 """ 

1065 pass