Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24from collections import defaultdict 

25 

26import lsst.pex.config as pexConfig 

27import lsst.pipe.base as pipeBase 

28from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

29from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

30 

31from .parquetTable import ParquetTable 

32from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

33from .functors import CompositeFunctor, RAColumn, DecColumn, Column 

34 

35 

36def flattenFilters(df, filterDict, noDupCols=['coord_ra', 'coord_dec'], camelCase=False): 

37 """Flattens a dataframe with multilevel column index 

38 """ 

39 newDf = pd.DataFrame() 

40 for filt, filtShort in filterDict.items(): 

41 subdf = df[filt] 

42 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

43 newColumns = {c: columnFormat.format(filtShort, c) 

44 for c in subdf.columns if c not in noDupCols} 

45 cols = list(newColumns.keys()) 

46 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

47 

48 newDf = pd.concat([subdf[noDupCols], newDf], axis=1) 

49 return newDf 

50 

51 

52class WriteObjectTableConfig(pexConfig.Config): 

53 priorityList = pexConfig.ListField( 

54 dtype=str, 

55 default=[], 

56 doc="Priority-ordered list of bands for the merge." 

57 ) 

58 engine = pexConfig.Field( 

59 dtype=str, 

60 default="pyarrow", 

61 doc="Parquet engine for writing (pyarrow or fastparquet)" 

62 ) 

63 coaddName = pexConfig.Field( 

64 dtype=str, 

65 default="deep", 

66 doc="Name of coadd" 

67 ) 

68 

69 def validate(self): 

70 pexConfig.Config.validate(self) 

71 if len(self.priorityList) == 0: 

72 raise RuntimeError("No priority list provided") 

73 

74 

75class WriteObjectTableTask(CmdLineTask): 

76 """Write filter-merged source tables to parquet 

77 """ 

78 _DefaultName = "writeObjectTable" 

79 ConfigClass = WriteObjectTableConfig 

80 RunnerClass = MergeSourcesRunner 

81 

82 # Names of table datasets to be merged 

83 inputDatasets = ('forced_src', 'meas', 'ref') 

84 

85 # Tag of output dataset written by `MergeSourcesTask.write` 

86 outputDataset = 'obj' 

87 

88 def __init__(self, butler=None, schema=None, **kwargs): 

89 # It is a shame that this class can't use the default init for CmdLineTask 

90 # But to do so would require its own special task runner, which is many 

91 # more lines of specialization, so this is how it is for now 

92 CmdLineTask.__init__(self, **kwargs) 

93 

94 def runDataRef(self, patchRefList): 

95 """! 

96 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in 

97 subclasses that inherit from MergeSourcesTask. 

98 @param[in] patchRefList list of data references for each filter 

99 """ 

100 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

101 dataId = patchRefList[0].dataId 

102 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

103 self.write(patchRefList[0], mergedCatalog) 

104 

105 @classmethod 

106 def _makeArgumentParser(cls): 

107 """Create a suitable ArgumentParser. 

108 

109 We will use the ArgumentParser to get a list of data 

110 references for patches; the RunnerClass will sort them into lists 

111 of data references for the same patch. 

112 

113 References first of self.inputDatasets, rather than 

114 self.inputDataset 

115 """ 

116 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

117 

118 def readCatalog(self, patchRef): 

119 """Read input catalogs 

120 

121 Read all the input datasets given by the 'inputDatasets' 

122 attribute. 

123 

124 Parameters 

125 ---------- 

126 patchRef : `lsst.daf.persistence.ButlerDataRef` 

127 Data reference for patch 

128 

129 Returns 

130 ------- 

131 Tuple consisting of filter name and a dict of catalogs, keyed by 

132 dataset name 

133 """ 

134 filterName = patchRef.dataId["filter"] 

135 catalogDict = {} 

136 for dataset in self.inputDatasets: 

137 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

138 self.log.info("Read %d sources from %s for filter %s: %s" % 

139 (len(catalog), dataset, filterName, patchRef.dataId)) 

140 catalogDict[dataset] = catalog 

141 return filterName, catalogDict 

142 

143 def run(self, catalogs, tract, patch): 

144 """Merge multiple catalogs. 

145 

146 Parameters 

147 ---------- 

148 catalogs : `dict` 

149 Mapping from filter names to dict of catalogs. 

150 tract : int 

151 tractId to use for the tractId column 

152 patch : str 

153 patchId to use for the patchId column 

154 

155 Returns 

156 ------- 

157 catalog : `lsst.pipe.tasks.parquetTable.ParquetTable` 

158 Merged dataframe, with each column prefixed by 

159 `filter_tag(filt)`, wrapped in the parquet writer shim class. 

160 """ 

161 

162 dfs = [] 

163 for filt, tableDict in catalogs.items(): 

164 for dataset, table in tableDict.items(): 

165 # Convert afwTable to pandas DataFrame 

166 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

167 

168 # Sort columns by name, to ensure matching schema among patches 

169 df = df.reindex(sorted(df.columns), axis=1) 

170 df['tractId'] = tract 

171 df['patchId'] = patch 

172 

173 # Make columns a 3-level MultiIndex 

174 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

175 names=('dataset', 'filter', 'column')) 

176 dfs.append(df) 

177 

178 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

179 return ParquetTable(dataFrame=catalog) 

180 

181 def write(self, patchRef, catalog): 

182 """Write the output. 

183 

184 Parameters 

185 ---------- 

186 catalog : `ParquetTable` 

187 Catalog to write 

188 patchRef : `lsst.daf.persistence.ButlerDataRef` 

189 Data reference for patch 

190 """ 

191 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

192 # since the filter isn't actually part of the data ID for the dataset we're saving, 

193 # it's confusing to see it in the log message, even if the butler simply ignores it. 

194 mergeDataId = patchRef.dataId.copy() 

195 del mergeDataId["filter"] 

196 self.log.info("Wrote merged catalog: %s" % (mergeDataId,)) 

197 

198 def writeMetadata(self, dataRefList): 

199 """No metadata to write, and not sure how to write it for a list of dataRefs. 

200 """ 

201 pass 

202 

203 

204class WriteSourceTableConfig(pexConfig.Config): 

205 pass 

206 

207 

208class WriteSourceTableTask(CmdLineTask): 

209 """Write source table to parquet 

210 """ 

211 _DefaultName = "writeSourceTable" 

212 ConfigClass = WriteSourceTableConfig 

213 

214 def runDataRef(self, dataRef): 

215 src = dataRef.get('src') 

216 ccdVisitId = dataRef.get('ccdExposureId') 

217 result = self.run(src, ccdVisitId=ccdVisitId) 

218 dataRef.put(result.table, 'source') 

219 

220 def run(self, catalog, ccdVisitId=None): 

221 """Convert `src` catalog to parquet 

222 

223 Parameters 

224 ---------- 

225 catalog: `lsst.afw.table.SourceCatalog` 

226 catalog to be converted 

227 ccdVisitId: `int` 

228 ccdVisitId to be added as a column 

229 

230 Returns 

231 ------- 

232 result : `lsst.pipe.base.Struct` 

233 ``table`` 

234 `ParquetTable` version of the input catalog 

235 """ 

236 self.log.info("Generating parquet table from src catalog") 

237 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

238 df['ccdVisitId'] = ccdVisitId 

239 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

240 

241 def writeMetadata(self, dataRef): 

242 """No metadata to write. 

243 """ 

244 pass 

245 

246 def writeConfig(self, butler, clobber=False, doBackup=True): 

247 """No config to write. 

248 """ 

249 pass 

250 

251 @classmethod 

252 def _makeArgumentParser(cls): 

253 parser = ArgumentParser(name=cls._DefaultName) 

254 parser.add_id_argument("--id", 'src', 

255 help="data ID, e.g. --id visit=12345 ccd=0") 

256 return parser 

257 

258 

259class PostprocessAnalysis(object): 

260 """Calculate columns from ParquetTable 

261 

262 This object manages and organizes an arbitrary set of computations 

263 on a catalog. The catalog is defined by a 

264 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a 

265 `deepCoadd_obj` dataset, and the computations are defined by a collection 

266 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

267 a `CompositeFunctor`). 

268 

269 After the object is initialized, accessing the `.df` attribute (which 

270 holds the `pandas.DataFrame` containing the results of the calculations) triggers 

271 computation of said dataframe. 

272 

273 One of the conveniences of using this object is the ability to define a desired common 

274 filter for all functors. This enables the same functor collection to be passed to 

275 several different `PostprocessAnalysis` objects without having to change the original 

276 functor collection, since the `filt` keyword argument of this object triggers an 

277 overwrite of the `filt` property for all functors in the collection. 

278 

279 This object also allows a list of refFlags to be passed, and defines a set of default 

280 refFlags that are always included even if not requested. 

281 

282 If a list of `ParquetTable` object is passed, rather than a single one, then the 

283 calculations will be mapped over all the input catalogs. In principle, it should 

284 be straightforward to parallelize this activity, but initial tests have failed 

285 (see TODO in code comments). 

286 

287 Parameters 

288 ---------- 

289 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

290 Source catalog(s) for computation 

291 

292 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

293 Computations to do (functors that act on `parq`). 

294 If a dict, the output 

295 DataFrame will have columns keyed accordingly. 

296 If a list, the column keys will come from the 

297 `.shortname` attribute of each functor. 

298 

299 filt : `str` (optional) 

300 Filter in which to calculate. If provided, 

301 this will overwrite any existing `.filt` attribute 

302 of the provided functors. 

303 

304 flags : `list` (optional) 

305 List of flags (per-band) to include in output table. 

306 

307 refFlags : `list` (optional) 

308 List of refFlags (only reference band) to include in output table. 

309 

310 

311 """ 

312 _defaultRefFlags = [] 

313 _defaultFuncs = (('coord_ra', RAColumn()), 

314 ('coord_dec', DecColumn())) 

315 

316 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None): 

317 self.parq = parq 

318 self.functors = functors 

319 

320 self.filt = filt 

321 self.flags = list(flags) if flags is not None else [] 

322 self.refFlags = list(self._defaultRefFlags) 

323 if refFlags is not None: 

324 self.refFlags += list(refFlags) 

325 

326 self._df = None 

327 

328 @property 

329 def defaultFuncs(self): 

330 funcs = dict(self._defaultFuncs) 

331 return funcs 

332 

333 @property 

334 def func(self): 

335 additionalFuncs = self.defaultFuncs 

336 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

337 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

338 

339 if isinstance(self.functors, CompositeFunctor): 

340 func = self.functors 

341 else: 

342 func = CompositeFunctor(self.functors) 

343 

344 func.funcDict.update(additionalFuncs) 

345 func.filt = self.filt 

346 

347 return func 

348 

349 @property 

350 def noDupCols(self): 

351 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

352 

353 @property 

354 def df(self): 

355 if self._df is None: 

356 self.compute() 

357 return self._df 

358 

359 def compute(self, dropna=False, pool=None): 

360 # map over multiple parquet tables 

361 if type(self.parq) in (list, tuple): 

362 if pool is None: 

363 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

364 else: 

365 # TODO: Figure out why this doesn't work (pyarrow pickling issues?) 

366 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

367 self._df = pd.concat(dflist) 

368 else: 

369 self._df = self.func(self.parq, dropna=dropna) 

370 

371 return self._df 

372 

373 

374class TransformCatalogBaseConfig(pexConfig.Config): 

375 functorFile = pexConfig.Field( 

376 dtype=str, 

377 doc='Path to YAML file specifying functors to be computed', 

378 default=None, 

379 optional=True 

380 ) 

381 

382 

383class TransformCatalogBaseTask(CmdLineTask): 

384 """Base class for transforming/standardizing a catalog 

385 

386 by applying functors that convert units and apply calibrations. 

387 The purpose of this task is to perform a set of computations on 

388 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

389 results to a new dataset (which needs to be declared in an `outputDataset` 

390 attribute). 

391 

392 The calculations to be performed are defined in a YAML file that specifies 

393 a set of functors to be computed, provided as 

394 a `--functorFile` config parameter. An example of such a YAML file 

395 is the following: 

396 

397 funcs: 

398 psfMag: 

399 functor: Mag 

400 args: 

401 - base_PsfFlux 

402 filt: HSC-G 

403 dataset: meas 

404 cmodel_magDiff: 

405 functor: MagDiff 

406 args: 

407 - modelfit_CModel 

408 - base_PsfFlux 

409 filt: HSC-G 

410 gauss_magDiff: 

411 functor: MagDiff 

412 args: 

413 - base_GaussianFlux 

414 - base_PsfFlux 

415 filt: HSC-G 

416 count: 

417 functor: Column 

418 args: 

419 - base_InputCount_value 

420 filt: HSC-G 

421 deconvolved_moments: 

422 functor: DeconvolvedMoments 

423 filt: HSC-G 

424 dataset: forced_src 

425 refFlags: 

426 - calib_psfUsed 

427 - merge_measurement_i 

428 - merge_measurement_r 

429 - merge_measurement_z 

430 - merge_measurement_y 

431 - merge_measurement_g 

432 - base_PixelFlags_flag_inexact_psfCenter 

433 - detect_isPrimary 

434 

435 The names for each entry under "func" will become the names of columns in the 

436 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`. 

437 Positional arguments to be passed to each functor are in the `args` list, 

438 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`, 

439 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization. 

440 

441 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and 

442 taken from the `'ref'` dataset. 

443 

444 The "flags" entry will be expanded out per band. 

445 

446 Note, if `'filter'` is provided as part of the `dataId` when running this task (even though 

447 `deepCoadd_obj` does not use `'filter'`), then this will override the `filt` kwargs 

448 provided in the YAML file, and the calculations will be done in that filter. 

449 

450 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

451 to organize and excecute the calculations. 

452 

453 """ 

454 @property 

455 def _DefaultName(self): 

456 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

457 

458 @property 

459 def outputDataset(self): 

460 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

461 

462 @property 

463 def inputDataset(self): 

464 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

465 

466 @property 

467 def ConfigClass(self): 

468 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

469 

470 def runDataRef(self, dataRef): 

471 parq = dataRef.get() 

472 funcs = self.getFunctors() 

473 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId) 

474 self.write(df, dataRef) 

475 return df 

476 

477 def run(self, parq, funcs=None, dataId=None): 

478 """Do postprocessing calculations 

479 

480 Takes a `ParquetTable` object and dataId, 

481 returns a dataframe with results of postprocessing calculations. 

482 

483 Parameters 

484 ---------- 

485 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

486 ParquetTable from which calculations are done. 

487 funcs : `lsst.pipe.tasks.functors.Functors` 

488 Functors to apply to the table's columns 

489 dataId : dict, optional 

490 Used to add a `patchId` column to the output dataframe. 

491 

492 Returns 

493 ------ 

494 `pandas.DataFrame` 

495 

496 """ 

497 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

498 

499 filt = dataId.get('filter', None) 

500 df = self.transform(filt, parq, funcs, dataId).df 

501 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

502 return df 

503 

504 def getFunctors(self): 

505 funcs = CompositeFunctor.from_file(self.config.functorFile) 

506 funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

507 return funcs 

508 

509 def getAnalysis(self, parq, funcs=None, filt=None): 

510 # Avoids disk access if funcs is passed 

511 if funcs is None: 

512 funcs = self.getFunctors() 

513 analysis = PostprocessAnalysis(parq, funcs, filt=filt) 

514 return analysis 

515 

516 def transform(self, filt, parq, funcs, dataId): 

517 analysis = self.getAnalysis(parq, funcs=funcs, filt=filt) 

518 df = analysis.df 

519 if dataId is not None: 

520 for key, value in dataId.items(): 

521 df[key] = value 

522 

523 return pipeBase.Struct( 

524 df=df, 

525 analysis=analysis 

526 ) 

527 

528 def write(self, df, parqRef): 

529 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

530 

531 def writeMetadata(self, dataRef): 

532 """No metadata to write. 

533 """ 

534 pass 

535 

536 

537class TransformObjectCatalogConfig(TransformCatalogBaseConfig): 

538 coaddName = pexConfig.Field( 

539 dtype=str, 

540 default="deep", 

541 doc="Name of coadd" 

542 ) 

543 filterMap = pexConfig.DictField( 

544 keytype=str, 

545 itemtype=str, 

546 default={}, 

547 doc=("Dictionary mapping full filter name to short one for column name munging." 

548 "These filters determine the output columns no matter what filters the " 

549 "input data actually contain.") 

550 ) 

551 camelCase = pexConfig.Field( 

552 dtype=bool, 

553 default=True, 

554 doc=("Write per-filter columns names with camelCase, else underscore " 

555 "For example: gPsfFlux instead of g_PsfFlux.") 

556 ) 

557 multilevelOutput = pexConfig.Field( 

558 dtype=bool, 

559 default=False, 

560 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

561 "and name-munged (False).") 

562 ) 

563 

564 

565class TransformObjectCatalogTask(TransformCatalogBaseTask): 

566 """Compute Flatted Object Table as defined in the DPDD 

567 

568 Do the same set of postprocessing calculations on all bands 

569 

570 This is identical to `TransformCatalogBaseTask`, except for that it does the 

571 specified functor calculations for all filters present in the 

572 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

573 by the YAML file will be superceded. 

574 """ 

575 _DefaultName = "transformObjectCatalog" 

576 ConfigClass = TransformObjectCatalogConfig 

577 

578 inputDataset = 'deepCoadd_obj' 

579 outputDataset = 'objectTable' 

580 

581 @classmethod 

582 def _makeArgumentParser(cls): 

583 parser = ArgumentParser(name=cls._DefaultName) 

584 parser.add_id_argument("--id", cls.inputDataset, 

585 ContainerClass=CoaddDataIdContainer, 

586 help="data ID, e.g. --id tract=12345 patch=1,2") 

587 return parser 

588 

589 def run(self, parq, funcs=None, dataId=None): 

590 dfDict = {} 

591 analysisDict = {} 

592 templateDf = pd.DataFrame() 

593 # Perform transform for data of filters that exist in parq and are 

594 # specified in config.filterMap 

595 for filt in parq.columnLevelNames['filter']: 

596 if filt not in self.config.filterMap: 

597 self.log.info("Ignoring %s data in the input", filt) 

598 continue 

599 self.log.info("Transforming the catalog of filter %s", filt) 

600 result = self.transform(filt, parq, funcs, dataId) 

601 dfDict[filt] = result.df 

602 analysisDict[filt] = result.analysis 

603 if templateDf.empty: 

604 templateDf = result.df 

605 

606 # Fill NaNs in columns of other wanted filters 

607 for filt in self.config.filterMap: 

608 if filt not in dfDict: 

609 self.log.info("Adding empty columns for filter %s", filt) 

610 dfDict[filt] = pd.DataFrame().reindex_like(templateDf) 

611 

612 # This makes a multilevel column index, with filter as first level 

613 df = pd.concat(dfDict, axis=1, names=['filter', 'column']) 

614 

615 if not self.config.multilevelOutput: 

616 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

617 if dataId is not None: 

618 noDupCols += list(dataId.keys()) 

619 df = flattenFilters(df, self.config.filterMap, noDupCols=noDupCols, 

620 camelCase=self.config.camelCase) 

621 

622 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

623 return df 

624 

625 

626class TractObjectDataIdContainer(CoaddDataIdContainer): 

627 

628 def makeDataRefList(self, namespace): 

629 """Make self.refList from self.idList 

630 

631 Generate a list of data references given tract and/or patch. 

632 This was adapted from `TractQADataIdContainer`, which was 

633 `TractDataIdContainer` modifie to not require "filter". 

634 Only existing dataRefs are returned. 

635 """ 

636 def getPatchRefList(tract): 

637 return [namespace.butler.dataRef(datasetType=self.datasetType, 

638 tract=tract.getId(), 

639 patch="%d,%d" % patch.getIndex()) for patch in tract] 

640 

641 tractRefs = defaultdict(list) # Data references for each tract 

642 for dataId in self.idList: 

643 skymap = self.getSkymap(namespace) 

644 

645 if "tract" in dataId: 

646 tractId = dataId["tract"] 

647 if "patch" in dataId: 

648 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

649 tract=tractId, 

650 patch=dataId['patch'])) 

651 else: 

652 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

653 else: 

654 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

655 for tract in skymap) 

656 outputRefList = [] 

657 for tractRefList in tractRefs.values(): 

658 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

659 outputRefList.append(existingRefs) 

660 

661 self.refList = outputRefList 

662 

663 

664class ConsolidateObjectTableConfig(pexConfig.Config): 

665 coaddName = pexConfig.Field( 

666 dtype=str, 

667 default="deep", 

668 doc="Name of coadd" 

669 ) 

670 

671 

672class ConsolidateObjectTableTask(CmdLineTask): 

673 """Write patch-merged source tables to a tract-level parquet file 

674 """ 

675 _DefaultName = "consolidateObjectTable" 

676 ConfigClass = ConsolidateObjectTableConfig 

677 

678 inputDataset = 'objectTable' 

679 outputDataset = 'objectTable_tract' 

680 

681 @classmethod 

682 def _makeArgumentParser(cls): 

683 parser = ArgumentParser(name=cls._DefaultName) 

684 

685 parser.add_id_argument("--id", cls.inputDataset, 

686 help="data ID, e.g. --id tract=12345", 

687 ContainerClass=TractObjectDataIdContainer) 

688 return parser 

689 

690 def runDataRef(self, patchRefList): 

691 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

692 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

693 

694 def writeMetadata(self, dataRef): 

695 """No metadata to write. 

696 """ 

697 pass 

698 

699 

700class TransformSourceTableConfig(TransformCatalogBaseConfig): 

701 pass 

702 

703 

704class TransformSourceTableTask(TransformCatalogBaseTask): 

705 """Transform/standardize a source catalog 

706 """ 

707 _DefaultName = "transformSourceTable" 

708 ConfigClass = TransformSourceTableConfig 

709 

710 inputDataset = 'source' 

711 outputDataset = 'sourceTable' 

712 

713 def writeMetadata(self, dataRef): 

714 """No metadata to write. 

715 """ 

716 pass 

717 

718 @classmethod 

719 def _makeArgumentParser(cls): 

720 parser = ArgumentParser(name=cls._DefaultName) 

721 parser.add_id_argument("--id", datasetType=cls.inputDataset, 

722 level="sensor", 

723 help="data ID, e.g. --id visit=12345 ccd=0") 

724 return parser 

725 

726 

727class VisitDataIdContainer(DataIdContainer): 

728 """DataIdContainer that groups sensor-level id's by visit 

729 """ 

730 

731 def makeDataRefList(self, namespace): 

732 """Make self.refList from self.idList 

733 

734 Generate a list of data references grouped by visit. 

735 

736 Parameters 

737 ---------- 

738 namespace : `argparse.Namespace` 

739 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments 

740 """ 

741 def ccdDataRefList(visitId): 

742 """Get all possible ccds for a given visit""" 

743 ccds = namespace.butler.queryMetadata('src', ['ccd'], dataId={'visit': visitId}) 

744 return [namespace.butler.dataRef(datasetType=self.datasetType, 

745 visit=visitId, 

746 ccd=ccd) for ccd in ccds] 

747 # Group by visits 

748 visitRefs = defaultdict(list) 

749 for dataId in self.idList: 

750 if "visit" in dataId: 

751 visitId = dataId["visit"] 

752 if "ccd" in dataId: 

753 visitRefs[visitId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

754 visit=visitId, ccd=dataId['ccd'])) 

755 else: 

756 visitRefs[visitId] += ccdDataRefList(visitId) 

757 outputRefList = [] 

758 for refList in visitRefs.values(): 

759 existingRefs = [ref for ref in refList if ref.datasetExists()] 

760 outputRefList.append(existingRefs) 

761 

762 self.refList = outputRefList 

763 

764 

765class ConsolidateSourceTableConfig(pexConfig.Config): 

766 pass 

767 

768 

769class ConsolidateSourceTableTask(CmdLineTask): 

770 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

771 """ 

772 _DefaultName = 'consolidateSourceTable' 

773 ConfigClass = ConsolidateSourceTableConfig 

774 

775 inputDataset = 'sourceTable' 

776 outputDataset = 'sourceTable_visit' 

777 

778 def runDataRef(self, dataRefList): 

779 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

780 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

781 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

782 

783 @classmethod 

784 def _makeArgumentParser(cls): 

785 parser = ArgumentParser(name=cls._DefaultName) 

786 

787 parser.add_id_argument("--id", cls.inputDataset, 

788 help="data ID, e.g. --id visit=12345", 

789 ContainerClass=VisitDataIdContainer) 

790 return parser 

791 

792 def writeMetadata(self, dataRef): 

793 """No metadata to write. 

794 """ 

795 pass 

796 

797 def writeConfig(self, butler, clobber=False, doBackup=True): 

798 """No config to write. 

799 """ 

800 pass