Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24import numpy as np 

25from collections import defaultdict 

26 

27import lsst.geom 

28import lsst.pex.config as pexConfig 

29import lsst.pipe.base as pipeBase 

30from lsst.pipe.base import connectionTypes 

31import lsst.afw.table as afwTable 

32from lsst.meas.base import SingleFrameMeasurementTask 

33from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

34from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

35 

36from .parquetTable import ParquetTable 

37from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

38from .functors import CompositeFunctor, RAColumn, DecColumn, Column 

39 

40 

41def flattenFilters(df, filterDict, noDupCols=['coord_ra', 'coord_dec'], camelCase=False): 

42 """Flattens a dataframe with multilevel column index 

43 """ 

44 newDf = pd.DataFrame() 

45 for filt, filtShort in filterDict.items(): 

46 subdf = df[filt] 

47 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

48 newColumns = {c: columnFormat.format(filtShort, c) 

49 for c in subdf.columns if c not in noDupCols} 

50 cols = list(newColumns.keys()) 

51 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

52 

53 newDf = pd.concat([subdf[noDupCols], newDf], axis=1) 

54 return newDf 

55 

56 

57class WriteObjectTableConfig(pexConfig.Config): 

58 priorityList = pexConfig.ListField( 

59 dtype=str, 

60 default=[], 

61 doc="Priority-ordered list of bands for the merge." 

62 ) 

63 engine = pexConfig.Field( 

64 dtype=str, 

65 default="pyarrow", 

66 doc="Parquet engine for writing (pyarrow or fastparquet)" 

67 ) 

68 coaddName = pexConfig.Field( 

69 dtype=str, 

70 default="deep", 

71 doc="Name of coadd" 

72 ) 

73 

74 def validate(self): 

75 pexConfig.Config.validate(self) 

76 if len(self.priorityList) == 0: 

77 raise RuntimeError("No priority list provided") 

78 

79 

80class WriteObjectTableTask(CmdLineTask): 

81 """Write filter-merged source tables to parquet 

82 """ 

83 _DefaultName = "writeObjectTable" 

84 ConfigClass = WriteObjectTableConfig 

85 RunnerClass = MergeSourcesRunner 

86 

87 # Names of table datasets to be merged 

88 inputDatasets = ('forced_src', 'meas', 'ref') 

89 

90 # Tag of output dataset written by `MergeSourcesTask.write` 

91 outputDataset = 'obj' 

92 

93 def __init__(self, butler=None, schema=None, **kwargs): 

94 # It is a shame that this class can't use the default init for CmdLineTask 

95 # But to do so would require its own special task runner, which is many 

96 # more lines of specialization, so this is how it is for now 

97 CmdLineTask.__init__(self, **kwargs) 

98 

99 def runDataRef(self, patchRefList): 

100 """! 

101 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in 

102 subclasses that inherit from MergeSourcesTask. 

103 @param[in] patchRefList list of data references for each filter 

104 """ 

105 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

106 dataId = patchRefList[0].dataId 

107 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

108 self.write(patchRefList[0], mergedCatalog) 

109 

110 @classmethod 

111 def _makeArgumentParser(cls): 

112 """Create a suitable ArgumentParser. 

113 

114 We will use the ArgumentParser to get a list of data 

115 references for patches; the RunnerClass will sort them into lists 

116 of data references for the same patch. 

117 

118 References first of self.inputDatasets, rather than 

119 self.inputDataset 

120 """ 

121 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

122 

123 def readCatalog(self, patchRef): 

124 """Read input catalogs 

125 

126 Read all the input datasets given by the 'inputDatasets' 

127 attribute. 

128 

129 Parameters 

130 ---------- 

131 patchRef : `lsst.daf.persistence.ButlerDataRef` 

132 Data reference for patch 

133 

134 Returns 

135 ------- 

136 Tuple consisting of filter name and a dict of catalogs, keyed by 

137 dataset name 

138 """ 

139 filterName = patchRef.dataId["filter"] 

140 catalogDict = {} 

141 for dataset in self.inputDatasets: 

142 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

143 self.log.info("Read %d sources from %s for filter %s: %s" % 

144 (len(catalog), dataset, filterName, patchRef.dataId)) 

145 catalogDict[dataset] = catalog 

146 return filterName, catalogDict 

147 

148 def run(self, catalogs, tract, patch): 

149 """Merge multiple catalogs. 

150 

151 Parameters 

152 ---------- 

153 catalogs : `dict` 

154 Mapping from filter names to dict of catalogs. 

155 tract : int 

156 tractId to use for the tractId column 

157 patch : str 

158 patchId to use for the patchId column 

159 

160 Returns 

161 ------- 

162 catalog : `lsst.pipe.tasks.parquetTable.ParquetTable` 

163 Merged dataframe, with each column prefixed by 

164 `filter_tag(filt)`, wrapped in the parquet writer shim class. 

165 """ 

166 

167 dfs = [] 

168 for filt, tableDict in catalogs.items(): 

169 for dataset, table in tableDict.items(): 

170 # Convert afwTable to pandas DataFrame 

171 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

172 

173 # Sort columns by name, to ensure matching schema among patches 

174 df = df.reindex(sorted(df.columns), axis=1) 

175 df['tractId'] = tract 

176 df['patchId'] = patch 

177 

178 # Make columns a 3-level MultiIndex 

179 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

180 names=('dataset', 'filter', 'column')) 

181 dfs.append(df) 

182 

183 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

184 return ParquetTable(dataFrame=catalog) 

185 

186 def write(self, patchRef, catalog): 

187 """Write the output. 

188 

189 Parameters 

190 ---------- 

191 catalog : `ParquetTable` 

192 Catalog to write 

193 patchRef : `lsst.daf.persistence.ButlerDataRef` 

194 Data reference for patch 

195 """ 

196 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

197 # since the filter isn't actually part of the data ID for the dataset we're saving, 

198 # it's confusing to see it in the log message, even if the butler simply ignores it. 

199 mergeDataId = patchRef.dataId.copy() 

200 del mergeDataId["filter"] 

201 self.log.info("Wrote merged catalog: %s" % (mergeDataId,)) 

202 

203 def writeMetadata(self, dataRefList): 

204 """No metadata to write, and not sure how to write it for a list of dataRefs. 

205 """ 

206 pass 

207 

208 

209class WriteSourceTableConfig(pexConfig.Config): 

210 doApplyExternalPhotoCalib = pexConfig.Field( 

211 dtype=bool, 

212 default=False, 

213 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if " 

214 "generating Source Tables from older src tables which do not already have local calib columns") 

215 ) 

216 doApplyExternalSkyWcs = pexConfig.Field( 

217 dtype=bool, 

218 default=False, 

219 doc=("Add local WCS columns from the calexp.wcs? Should only set True if " 

220 "generating Source Tables from older src tables which do not already have local calib columns") 

221 ) 

222 

223 

224class WriteSourceTableTask(CmdLineTask): 

225 """Write source table to parquet 

226 """ 

227 _DefaultName = "writeSourceTable" 

228 ConfigClass = WriteSourceTableConfig 

229 

230 def runDataRef(self, dataRef): 

231 src = dataRef.get('src') 

232 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

233 src = self.addCalibColumns(src, dataRef) 

234 

235 ccdVisitId = dataRef.get('ccdExposureId') 

236 result = self.run(src, ccdVisitId=ccdVisitId) 

237 dataRef.put(result.table, 'source') 

238 

239 def run(self, catalog, ccdVisitId=None): 

240 """Convert `src` catalog to parquet 

241 

242 Parameters 

243 ---------- 

244 catalog: `afwTable.SourceCatalog` 

245 catalog to be converted 

246 ccdVisitId: `int` 

247 ccdVisitId to be added as a column 

248 

249 Returns 

250 ------- 

251 result : `lsst.pipe.base.Struct` 

252 ``table`` 

253 `ParquetTable` version of the input catalog 

254 """ 

255 self.log.info("Generating parquet table from src catalog") 

256 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

257 df['ccdVisitId'] = ccdVisitId 

258 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

259 

260 def addCalibColumns(self, catalog, dataRef): 

261 """Add columns with local calibration evaluated at each centroid 

262 

263 for backwards compatibility with old repos. 

264 This exists for the purpose of converting old src catalogs 

265 (which don't have the expected local calib columns) to Source Tables. 

266 

267 Parameters 

268 ---------- 

269 catalog: `afwTable.SourceCatalog` 

270 catalog to which calib columns will be added 

271 dataRef: `lsst.daf.persistence.ButlerDataRef 

272 for fetching the calibs from disk. 

273 

274 Returns 

275 ------- 

276 newCat: `afwTable.SourceCatalog` 

277 Source Catalog with requested local calib columns 

278 """ 

279 mapper = afwTable.SchemaMapper(catalog.schema) 

280 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

281 measureConfig.doReplaceWithNoise = False 

282 

283 # Just need the WCS or the PhotoCalib attached to an exposue 

284 exposure = dataRef.get('calexp_sub', 

285 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0))) 

286 

287 mapper = afwTable.SchemaMapper(catalog.schema) 

288 mapper.addMinimalSchema(catalog.schema, True) 

289 schema = mapper.getOutputSchema() 

290 

291 exposureIdInfo = dataRef.get("expIdInfo") 

292 measureConfig.plugins.names = [] 

293 if self.config.doApplyExternalSkyWcs: 

294 plugin = 'base_LocalWcs' 

295 if plugin in schema: 

296 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False") 

297 else: 

298 measureConfig.plugins.names.add(plugin) 

299 

300 if self.config.doApplyExternalPhotoCalib: 

301 plugin = 'base_LocalPhotoCalib' 

302 if plugin in schema: 

303 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False") 

304 else: 

305 measureConfig.plugins.names.add(plugin) 

306 

307 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

308 newCat = afwTable.SourceCatalog(schema) 

309 newCat.extend(catalog, mapper=mapper) 

310 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

311 return newCat 

312 

313 def writeMetadata(self, dataRef): 

314 """No metadata to write. 

315 """ 

316 pass 

317 

318 @classmethod 

319 def _makeArgumentParser(cls): 

320 parser = ArgumentParser(name=cls._DefaultName) 

321 parser.add_id_argument("--id", 'src', 

322 help="data ID, e.g. --id visit=12345 ccd=0") 

323 return parser 

324 

325 

326class PostprocessAnalysis(object): 

327 """Calculate columns from ParquetTable 

328 

329 This object manages and organizes an arbitrary set of computations 

330 on a catalog. The catalog is defined by a 

331 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a 

332 `deepCoadd_obj` dataset, and the computations are defined by a collection 

333 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

334 a `CompositeFunctor`). 

335 

336 After the object is initialized, accessing the `.df` attribute (which 

337 holds the `pandas.DataFrame` containing the results of the calculations) triggers 

338 computation of said dataframe. 

339 

340 One of the conveniences of using this object is the ability to define a desired common 

341 filter for all functors. This enables the same functor collection to be passed to 

342 several different `PostprocessAnalysis` objects without having to change the original 

343 functor collection, since the `filt` keyword argument of this object triggers an 

344 overwrite of the `filt` property for all functors in the collection. 

345 

346 This object also allows a list of refFlags to be passed, and defines a set of default 

347 refFlags that are always included even if not requested. 

348 

349 If a list of `ParquetTable` object is passed, rather than a single one, then the 

350 calculations will be mapped over all the input catalogs. In principle, it should 

351 be straightforward to parallelize this activity, but initial tests have failed 

352 (see TODO in code comments). 

353 

354 Parameters 

355 ---------- 

356 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

357 Source catalog(s) for computation 

358 

359 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

360 Computations to do (functors that act on `parq`). 

361 If a dict, the output 

362 DataFrame will have columns keyed accordingly. 

363 If a list, the column keys will come from the 

364 `.shortname` attribute of each functor. 

365 

366 filt : `str` (optional) 

367 Filter in which to calculate. If provided, 

368 this will overwrite any existing `.filt` attribute 

369 of the provided functors. 

370 

371 flags : `list` (optional) 

372 List of flags (per-band) to include in output table. 

373 

374 refFlags : `list` (optional) 

375 List of refFlags (only reference band) to include in output table. 

376 

377 

378 """ 

379 _defaultRefFlags = [] 

380 _defaultFuncs = (('coord_ra', RAColumn()), 

381 ('coord_dec', DecColumn())) 

382 

383 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None): 

384 self.parq = parq 

385 self.functors = functors 

386 

387 self.filt = filt 

388 self.flags = list(flags) if flags is not None else [] 

389 self.refFlags = list(self._defaultRefFlags) 

390 if refFlags is not None: 

391 self.refFlags += list(refFlags) 

392 

393 self._df = None 

394 

395 @property 

396 def defaultFuncs(self): 

397 funcs = dict(self._defaultFuncs) 

398 return funcs 

399 

400 @property 

401 def func(self): 

402 additionalFuncs = self.defaultFuncs 

403 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

404 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

405 

406 if isinstance(self.functors, CompositeFunctor): 

407 func = self.functors 

408 else: 

409 func = CompositeFunctor(self.functors) 

410 

411 func.funcDict.update(additionalFuncs) 

412 func.filt = self.filt 

413 

414 return func 

415 

416 @property 

417 def noDupCols(self): 

418 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

419 

420 @property 

421 def df(self): 

422 if self._df is None: 

423 self.compute() 

424 return self._df 

425 

426 def compute(self, dropna=False, pool=None): 

427 # map over multiple parquet tables 

428 if type(self.parq) in (list, tuple): 

429 if pool is None: 

430 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

431 else: 

432 # TODO: Figure out why this doesn't work (pyarrow pickling issues?) 

433 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

434 self._df = pd.concat(dflist) 

435 else: 

436 self._df = self.func(self.parq, dropna=dropna) 

437 

438 return self._df 

439 

440 

441class TransformCatalogBaseConfig(pexConfig.Config): 

442 functorFile = pexConfig.Field( 

443 dtype=str, 

444 doc='Path to YAML file specifying functors to be computed', 

445 default=None, 

446 optional=True 

447 ) 

448 

449 

450class TransformCatalogBaseTask(CmdLineTask): 

451 """Base class for transforming/standardizing a catalog 

452 

453 by applying functors that convert units and apply calibrations. 

454 The purpose of this task is to perform a set of computations on 

455 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

456 results to a new dataset (which needs to be declared in an `outputDataset` 

457 attribute). 

458 

459 The calculations to be performed are defined in a YAML file that specifies 

460 a set of functors to be computed, provided as 

461 a `--functorFile` config parameter. An example of such a YAML file 

462 is the following: 

463 

464 funcs: 

465 psfMag: 

466 functor: Mag 

467 args: 

468 - base_PsfFlux 

469 filt: HSC-G 

470 dataset: meas 

471 cmodel_magDiff: 

472 functor: MagDiff 

473 args: 

474 - modelfit_CModel 

475 - base_PsfFlux 

476 filt: HSC-G 

477 gauss_magDiff: 

478 functor: MagDiff 

479 args: 

480 - base_GaussianFlux 

481 - base_PsfFlux 

482 filt: HSC-G 

483 count: 

484 functor: Column 

485 args: 

486 - base_InputCount_value 

487 filt: HSC-G 

488 deconvolved_moments: 

489 functor: DeconvolvedMoments 

490 filt: HSC-G 

491 dataset: forced_src 

492 refFlags: 

493 - calib_psfUsed 

494 - merge_measurement_i 

495 - merge_measurement_r 

496 - merge_measurement_z 

497 - merge_measurement_y 

498 - merge_measurement_g 

499 - base_PixelFlags_flag_inexact_psfCenter 

500 - detect_isPrimary 

501 

502 The names for each entry under "func" will become the names of columns in the 

503 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`. 

504 Positional arguments to be passed to each functor are in the `args` list, 

505 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`, 

506 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization. 

507 

508 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and 

509 taken from the `'ref'` dataset. 

510 

511 The "flags" entry will be expanded out per band. 

512 

513 Note, if `'filter'` is provided as part of the `dataId` when running this task (even though 

514 `deepCoadd_obj` does not use `'filter'`), then this will override the `filt` kwargs 

515 provided in the YAML file, and the calculations will be done in that filter. 

516 

517 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

518 to organize and excecute the calculations. 

519 

520 """ 

521 @property 

522 def _DefaultName(self): 

523 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

524 

525 @property 

526 def outputDataset(self): 

527 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

528 

529 @property 

530 def inputDataset(self): 

531 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

532 

533 @property 

534 def ConfigClass(self): 

535 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

536 

537 def runDataRef(self, dataRef): 

538 parq = dataRef.get() 

539 funcs = self.getFunctors() 

540 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId) 

541 self.write(df, dataRef) 

542 return df 

543 

544 def run(self, parq, funcs=None, dataId=None): 

545 """Do postprocessing calculations 

546 

547 Takes a `ParquetTable` object and dataId, 

548 returns a dataframe with results of postprocessing calculations. 

549 

550 Parameters 

551 ---------- 

552 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

553 ParquetTable from which calculations are done. 

554 funcs : `lsst.pipe.tasks.functors.Functors` 

555 Functors to apply to the table's columns 

556 dataId : dict, optional 

557 Used to add a `patchId` column to the output dataframe. 

558 

559 Returns 

560 ------ 

561 `pandas.DataFrame` 

562 

563 """ 

564 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

565 

566 filt = dataId.get('filter', None) 

567 df = self.transform(filt, parq, funcs, dataId).df 

568 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

569 return df 

570 

571 def getFunctors(self): 

572 funcs = CompositeFunctor.from_file(self.config.functorFile) 

573 funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

574 return funcs 

575 

576 def getAnalysis(self, parq, funcs=None, filt=None): 

577 # Avoids disk access if funcs is passed 

578 if funcs is None: 

579 funcs = self.getFunctors() 

580 analysis = PostprocessAnalysis(parq, funcs, filt=filt) 

581 return analysis 

582 

583 def transform(self, filt, parq, funcs, dataId): 

584 analysis = self.getAnalysis(parq, funcs=funcs, filt=filt) 

585 df = analysis.df 

586 if dataId is not None: 

587 for key, value in dataId.items(): 

588 df[key] = value 

589 

590 return pipeBase.Struct( 

591 df=df, 

592 analysis=analysis 

593 ) 

594 

595 def write(self, df, parqRef): 

596 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

597 

598 def writeMetadata(self, dataRef): 

599 """No metadata to write. 

600 """ 

601 pass 

602 

603 

604class TransformObjectCatalogConfig(TransformCatalogBaseConfig): 

605 coaddName = pexConfig.Field( 

606 dtype=str, 

607 default="deep", 

608 doc="Name of coadd" 

609 ) 

610 filterMap = pexConfig.DictField( 

611 keytype=str, 

612 itemtype=str, 

613 default={}, 

614 doc=("Dictionary mapping full filter name to short one for column name munging." 

615 "These filters determine the output columns no matter what filters the " 

616 "input data actually contain.") 

617 ) 

618 camelCase = pexConfig.Field( 

619 dtype=bool, 

620 default=True, 

621 doc=("Write per-filter columns names with camelCase, else underscore " 

622 "For example: gPsfFlux instead of g_PsfFlux.") 

623 ) 

624 multilevelOutput = pexConfig.Field( 

625 dtype=bool, 

626 default=False, 

627 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

628 "and name-munged (False).") 

629 ) 

630 

631 

632class TransformObjectCatalogTask(TransformCatalogBaseTask): 

633 """Compute Flatted Object Table as defined in the DPDD 

634 

635 Do the same set of postprocessing calculations on all bands 

636 

637 This is identical to `TransformCatalogBaseTask`, except for that it does the 

638 specified functor calculations for all filters present in the 

639 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

640 by the YAML file will be superceded. 

641 """ 

642 _DefaultName = "transformObjectCatalog" 

643 ConfigClass = TransformObjectCatalogConfig 

644 

645 inputDataset = 'deepCoadd_obj' 

646 outputDataset = 'objectTable' 

647 

648 @classmethod 

649 def _makeArgumentParser(cls): 

650 parser = ArgumentParser(name=cls._DefaultName) 

651 parser.add_id_argument("--id", cls.inputDataset, 

652 ContainerClass=CoaddDataIdContainer, 

653 help="data ID, e.g. --id tract=12345 patch=1,2") 

654 return parser 

655 

656 def run(self, parq, funcs=None, dataId=None): 

657 dfDict = {} 

658 analysisDict = {} 

659 templateDf = pd.DataFrame() 

660 # Perform transform for data of filters that exist in parq and are 

661 # specified in config.filterMap 

662 for filt in parq.columnLevelNames['filter']: 

663 if filt not in self.config.filterMap: 

664 self.log.info("Ignoring %s data in the input", filt) 

665 continue 

666 self.log.info("Transforming the catalog of filter %s", filt) 

667 result = self.transform(filt, parq, funcs, dataId) 

668 dfDict[filt] = result.df 

669 analysisDict[filt] = result.analysis 

670 if templateDf.empty: 

671 templateDf = result.df 

672 

673 # Fill NaNs in columns of other wanted filters 

674 for filt in self.config.filterMap: 

675 if filt not in dfDict: 

676 self.log.info("Adding empty columns for filter %s", filt) 

677 dfDict[filt] = pd.DataFrame().reindex_like(templateDf) 

678 

679 # This makes a multilevel column index, with filter as first level 

680 df = pd.concat(dfDict, axis=1, names=['filter', 'column']) 

681 

682 if not self.config.multilevelOutput: 

683 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

684 if dataId is not None: 

685 noDupCols += list(dataId.keys()) 

686 df = flattenFilters(df, self.config.filterMap, noDupCols=noDupCols, 

687 camelCase=self.config.camelCase) 

688 

689 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

690 return df 

691 

692 

693class TractObjectDataIdContainer(CoaddDataIdContainer): 

694 

695 def makeDataRefList(self, namespace): 

696 """Make self.refList from self.idList 

697 

698 Generate a list of data references given tract and/or patch. 

699 This was adapted from `TractQADataIdContainer`, which was 

700 `TractDataIdContainer` modifie to not require "filter". 

701 Only existing dataRefs are returned. 

702 """ 

703 def getPatchRefList(tract): 

704 return [namespace.butler.dataRef(datasetType=self.datasetType, 

705 tract=tract.getId(), 

706 patch="%d,%d" % patch.getIndex()) for patch in tract] 

707 

708 tractRefs = defaultdict(list) # Data references for each tract 

709 for dataId in self.idList: 

710 skymap = self.getSkymap(namespace) 

711 

712 if "tract" in dataId: 

713 tractId = dataId["tract"] 

714 if "patch" in dataId: 

715 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

716 tract=tractId, 

717 patch=dataId['patch'])) 

718 else: 

719 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

720 else: 

721 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

722 for tract in skymap) 

723 outputRefList = [] 

724 for tractRefList in tractRefs.values(): 

725 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

726 outputRefList.append(existingRefs) 

727 

728 self.refList = outputRefList 

729 

730 

731class ConsolidateObjectTableConfig(pexConfig.Config): 

732 coaddName = pexConfig.Field( 

733 dtype=str, 

734 default="deep", 

735 doc="Name of coadd" 

736 ) 

737 

738 

739class ConsolidateObjectTableTask(CmdLineTask): 

740 """Write patch-merged source tables to a tract-level parquet file 

741 """ 

742 _DefaultName = "consolidateObjectTable" 

743 ConfigClass = ConsolidateObjectTableConfig 

744 

745 inputDataset = 'objectTable' 

746 outputDataset = 'objectTable_tract' 

747 

748 @classmethod 

749 def _makeArgumentParser(cls): 

750 parser = ArgumentParser(name=cls._DefaultName) 

751 

752 parser.add_id_argument("--id", cls.inputDataset, 

753 help="data ID, e.g. --id tract=12345", 

754 ContainerClass=TractObjectDataIdContainer) 

755 return parser 

756 

757 def runDataRef(self, patchRefList): 

758 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

759 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

760 

761 def writeMetadata(self, dataRef): 

762 """No metadata to write. 

763 """ 

764 pass 

765 

766 

767class TransformSourceTableConfig(TransformCatalogBaseConfig): 

768 pass 

769 

770 

771class TransformSourceTableTask(TransformCatalogBaseTask): 

772 """Transform/standardize a source catalog 

773 """ 

774 _DefaultName = "transformSourceTable" 

775 ConfigClass = TransformSourceTableConfig 

776 

777 inputDataset = 'source' 

778 outputDataset = 'sourceTable' 

779 

780 def writeMetadata(self, dataRef): 

781 """No metadata to write. 

782 """ 

783 pass 

784 

785 @classmethod 

786 def _makeArgumentParser(cls): 

787 parser = ArgumentParser(name=cls._DefaultName) 

788 parser.add_id_argument("--id", datasetType=cls.inputDataset, 

789 level="sensor", 

790 help="data ID, e.g. --id visit=12345 ccd=0") 

791 return parser 

792 

793 

794class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

795 dimensions=("instrument", "visit",), 

796 defaultTemplates={}): 

797 calexp = connectionTypes.Input( 

798 doc="Processed exposures used for metadata", 

799 name="calexp", 

800 storageClass="ExposureF", 

801 dimensions=("instrument", "visit", "detector"), 

802 deferLoad=True, 

803 multiple=True, 

804 ) 

805 visitSummary = connectionTypes.Output( 

806 doc="Consolidated visit-level exposure metadata", 

807 name="visitSummary", 

808 storageClass="ExposureCatalog", 

809 dimensions=("instrument", "visit"), 

810 ) 

811 

812 

813class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

814 pipelineConnections=ConsolidateVisitSummaryConnections): 

815 """Config for ConsolidateVisitSummaryTask""" 

816 pass 

817 

818 

819class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

820 """Task to consolidate per-detector visit metadata. 

821 

822 This task aggregates the following metadata from all the detectors in a 

823 single visit into an exposure catalog: 

824 - The visitInfo. 

825 - The wcs. 

826 - The photoCalib. 

827 - The physical_filter and band (if available). 

828 - The psf size, shape, and effective area at the center of the detector. 

829 - The corners of the bounding box in right ascension/declination. 

830 

831 Other quantities such as Psf, ApCorrMap, and TransmissionCurve are not 

832 persisted here because of storage concerns, and because of their limited 

833 utility as summary statistics. 

834 

835 Tests for this task are performed in ci_hsc_gen3. 

836 """ 

837 _DefaultName = "consolidateVisitSummary" 

838 ConfigClass = ConsolidateVisitSummaryConfig 

839 

840 @classmethod 

841 def _makeArgumentParser(cls): 

842 parser = ArgumentParser(name=cls._DefaultName) 

843 

844 parser.add_id_argument("--id", "calexp", 

845 help="data ID, e.g. --id visit=12345", 

846 ContainerClass=VisitDataIdContainer) 

847 return parser 

848 

849 def writeMetadata(self, dataRef): 

850 """No metadata to persist, so override to remove metadata persistance. 

851 """ 

852 pass 

853 

854 def writeConfig(self, butler, clobber=False, doBackup=True): 

855 """No config to persist, so override to remove config persistance. 

856 """ 

857 pass 

858 

859 def runDataRef(self, dataRefList): 

860 visit = dataRefList[0].dataId['visit'] 

861 

862 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

863 (len(dataRefList), visit)) 

864 

865 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

866 

867 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

868 

869 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

870 dataRefs = butlerQC.get(inputRefs.calexp) 

871 visit = dataRefs[0].dataId.byName()['visit'] 

872 

873 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

874 (len(dataRefs), visit)) 

875 

876 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

877 

878 butlerQC.put(expCatalog, outputRefs.visitSummary) 

879 

880 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

881 """Make a combined exposure catalog from a list of dataRefs. 

882 

883 Parameters 

884 ---------- 

885 visit : `int` 

886 Visit identification number 

887 dataRefs : `list` 

888 List of calexp dataRefs in visit. May be list of 

889 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

890 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

891 isGen3 : `bool`, optional 

892 Specifies if this is a Gen3 list of datarefs. 

893 

894 Returns 

895 ------- 

896 visitSummary : `lsst.afw.table.ExposureCatalog` 

897 Exposure catalog with per-detector summary information. 

898 """ 

899 schema = afwTable.ExposureTable.makeMinimalSchema() 

900 schema.addField('visit', type='I', doc='Visit number') 

901 schema.addField('detector_id', type='I', doc='Detector number') 

902 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

903 schema.addField('band', type='String', size=32, doc='Name of band') 

904 schema.addField('psfSigma', type='F', 

905 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

906 schema.addField('psfArea', type='F', 

907 doc='PSF model effective area (center of chip) (pixel**2)') 

908 schema.addField('psfIxx', type='F', 

909 doc='PSF model Ixx (center of chip) (pixel**2)') 

910 schema.addField('psfIyy', type='F', 

911 doc='PSF model Iyy (center of chip) (pixel**2)') 

912 schema.addField('psfIxy', type='F', 

913 doc='PSF model Ixy (center of chip) (pixel**2)') 

914 schema.addField('raCorners', type='ArrayD', size=4, 

915 doc='Right Ascension of bounding box corners (degrees)') 

916 schema.addField('decCorners', type='ArrayD', size=4, 

917 doc='Declination of bounding box corners (degrees)') 

918 

919 cat = afwTable.ExposureCatalog(schema) 

920 cat.resize(len(dataRefs)) 

921 

922 cat['visit'] = visit 

923 

924 for i, dataRef in enumerate(dataRefs): 

925 if isGen3: 

926 visitInfo = dataRef.get(component='visitInfo') 

927 filter_ = dataRef.get(component='filter') 

928 psf = dataRef.get(component='psf') 

929 wcs = dataRef.get(component='wcs') 

930 photoCalib = dataRef.get(component='photoCalib') 

931 detector = dataRef.get(component='detector') 

932 bbox = dataRef.get(component='bbox') 

933 validPolygon = dataRef.get(component='validPolygon') 

934 else: 

935 # Note that we need to read the calexp because there is 

936 # no magic access to the psf except through the exposure. 

937 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

938 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

939 visitInfo = exp.getInfo().getVisitInfo() 

940 filter_ = exp.getFilter() 

941 psf = exp.getPsf() 

942 wcs = exp.getWcs() 

943 photoCalib = exp.getPhotoCalib() 

944 detector = exp.getDetector() 

945 bbox = dataRef.get(datasetType='calexp_bbox') 

946 validPolygon = exp.getInfo().getValidPolygon() 

947 

948 rec = cat[i] 

949 rec.setBBox(bbox) 

950 rec.setVisitInfo(visitInfo) 

951 rec.setWcs(wcs) 

952 rec.setPhotoCalib(photoCalib) 

953 rec.setDetector(detector) 

954 rec.setValidPolygon(validPolygon) 

955 

956 # TODO: When RFC-730 is implemented we can fill both of these. 

957 rec['physical_filter'] = filter_.getName() 

958 rec['band'] = '' 

959 rec['detector_id'] = detector.getId() 

960 shape = psf.computeShape(bbox.getCenter()) 

961 rec['psfSigma'] = shape.getDeterminantRadius() 

962 rec['psfIxx'] = shape.getIxx() 

963 rec['psfIyy'] = shape.getIyy() 

964 rec['psfIxy'] = shape.getIxy() 

965 im = psf.computeKernelImage(bbox.getCenter()) 

966 # The calculation of effective psf area is taken from 

967 # meas_base/src/PsfFlux.cc#L112. See 

968 # https://github.com/lsst/meas_base/blob/ 

969 # 750bffe6620e565bda731add1509507f5c40c8bb/src/PsfFlux.cc#L112 

970 rec['psfArea'] = np.sum(im.array)/np.sum(im.array**2.) 

971 

972 sph_pts = wcs.pixelToSky(lsst.geom.Box2D(bbox).getCorners()) 

973 rec['raCorners'][:] = [sph.getRa().asDegrees() for sph in sph_pts] 

974 rec['decCorners'][:] = [sph.getDec().asDegrees() for sph in sph_pts] 

975 

976 return cat 

977 

978 

979class VisitDataIdContainer(DataIdContainer): 

980 """DataIdContainer that groups sensor-level id's by visit 

981 """ 

982 

983 def makeDataRefList(self, namespace): 

984 """Make self.refList from self.idList 

985 

986 Generate a list of data references grouped by visit. 

987 

988 Parameters 

989 ---------- 

990 namespace : `argparse.Namespace` 

991 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments 

992 """ 

993 def ccdDataRefList(visitId): 

994 """Get all possible ccds for a given visit""" 

995 ccds = namespace.butler.queryMetadata('src', ['ccd'], dataId={'visit': visitId}) 

996 return [namespace.butler.dataRef(datasetType=self.datasetType, 

997 visit=visitId, 

998 ccd=ccd) for ccd in ccds] 

999 # Group by visits 

1000 visitRefs = defaultdict(list) 

1001 for dataId in self.idList: 

1002 if "visit" in dataId: 

1003 visitId = dataId["visit"] 

1004 if "ccd" in dataId: 

1005 visitRefs[visitId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

1006 visit=visitId, ccd=dataId['ccd'])) 

1007 else: 

1008 visitRefs[visitId] += ccdDataRefList(visitId) 

1009 outputRefList = [] 

1010 for refList in visitRefs.values(): 

1011 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1012 outputRefList.append(existingRefs) 

1013 

1014 self.refList = outputRefList 

1015 

1016 

1017class ConsolidateSourceTableConfig(pexConfig.Config): 

1018 pass 

1019 

1020 

1021class ConsolidateSourceTableTask(CmdLineTask): 

1022 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1023 """ 

1024 _DefaultName = 'consolidateSourceTable' 

1025 ConfigClass = ConsolidateSourceTableConfig 

1026 

1027 inputDataset = 'sourceTable' 

1028 outputDataset = 'sourceTable_visit' 

1029 

1030 def runDataRef(self, dataRefList): 

1031 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1032 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1033 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1034 

1035 @classmethod 

1036 def _makeArgumentParser(cls): 

1037 parser = ArgumentParser(name=cls._DefaultName) 

1038 

1039 parser.add_id_argument("--id", cls.inputDataset, 

1040 help="data ID, e.g. --id visit=12345", 

1041 ContainerClass=VisitDataIdContainer) 

1042 return parser 

1043 

1044 def writeMetadata(self, dataRef): 

1045 """No metadata to write. 

1046 """ 

1047 pass 

1048 

1049 def writeConfig(self, butler, clobber=False, doBackup=True): 

1050 """No config to write. 

1051 """ 

1052 pass