Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24from collections import defaultdict 

25 

26import lsst.geom 

27import lsst.pex.config as pexConfig 

28import lsst.pipe.base as pipeBase 

29import lsst.daf.base as dafBase 

30from lsst.pipe.base import connectionTypes 

31import lsst.afw.table as afwTable 

32from lsst.meas.base import SingleFrameMeasurementTask 

33from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

34from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

35from lsst.daf.butler import DeferredDatasetHandle 

36 

37from .parquetTable import ParquetTable 

38from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

39from .functors import CompositeFunctor, RAColumn, DecColumn, Column 

40 

41 

42def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

43 """Flattens a dataframe with multilevel column index 

44 """ 

45 newDf = pd.DataFrame() 

46 # band is the level 0 index 

47 dfBands = df.columns.unique(level=0).values 

48 for band in dfBands: 

49 subdf = df[band] 

50 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

51 newColumns = {c: columnFormat.format(band, c) 

52 for c in subdf.columns if c not in noDupCols} 

53 cols = list(newColumns.keys()) 

54 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

55 

56 # Band must be present in the input and output or else column is all NaN: 

57 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

58 # Get the unexploded columns from any present band's partition 

59 noDupDf = df[presentBands[0]][noDupCols] 

60 newDf = pd.concat([noDupDf, newDf], axis=1) 

61 return newDf 

62 

63 

64class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

65 defaultTemplates={"coaddName": "deep"}, 

66 dimensions=("tract", "patch", "skymap")): 

67 inputCatalogMeas = connectionTypes.Input( 

68 doc="Catalog of source measurements on the deepCoadd.", 

69 dimensions=("tract", "patch", "band", "skymap"), 

70 storageClass="SourceCatalog", 

71 name="{coaddName}Coadd_meas", 

72 multiple=True 

73 ) 

74 inputCatalogForcedSrc = connectionTypes.Input( 

75 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

76 dimensions=("tract", "patch", "band", "skymap"), 

77 storageClass="SourceCatalog", 

78 name="{coaddName}Coadd_forced_src", 

79 multiple=True 

80 ) 

81 inputCatalogRef = connectionTypes.Input( 

82 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

83 "for each detection in deepCoadd_mergeDet.", 

84 dimensions=("tract", "patch", "skymap"), 

85 storageClass="SourceCatalog", 

86 name="{coaddName}Coadd_ref" 

87 ) 

88 outputCatalog = connectionTypes.Output( 

89 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

90 "stored as a DataFrame with a multi-level column index per-patch.", 

91 dimensions=("tract", "patch", "skymap"), 

92 storageClass="DataFrame", 

93 name="{coaddName}Coadd_obj" 

94 ) 

95 

96 

97class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

98 pipelineConnections=WriteObjectTableConnections): 

99 engine = pexConfig.Field( 

100 dtype=str, 

101 default="pyarrow", 

102 doc="Parquet engine for writing (pyarrow or fastparquet)" 

103 ) 

104 coaddName = pexConfig.Field( 

105 dtype=str, 

106 default="deep", 

107 doc="Name of coadd" 

108 ) 

109 

110 

111class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

112 """Write filter-merged source tables to parquet 

113 """ 

114 _DefaultName = "writeObjectTable" 

115 ConfigClass = WriteObjectTableConfig 

116 RunnerClass = MergeSourcesRunner 

117 

118 # Names of table datasets to be merged 

119 inputDatasets = ('forced_src', 'meas', 'ref') 

120 

121 # Tag of output dataset written by `MergeSourcesTask.write` 

122 outputDataset = 'obj' 

123 

124 def __init__(self, butler=None, schema=None, **kwargs): 

125 # It is a shame that this class can't use the default init for CmdLineTask 

126 # But to do so would require its own special task runner, which is many 

127 # more lines of specialization, so this is how it is for now 

128 super().__init__(**kwargs) 

129 

130 def runDataRef(self, patchRefList): 

131 """! 

132 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in 

133 subclasses that inherit from MergeSourcesTask. 

134 @param[in] patchRefList list of data references for each filter 

135 """ 

136 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

137 dataId = patchRefList[0].dataId 

138 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

139 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog)) 

140 

141 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

142 inputs = butlerQC.get(inputRefs) 

143 

144 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

145 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

146 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

147 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

148 

149 catalogs = {} 

150 for band in measDict.keys(): 

151 catalogs[band] = {'meas': measDict[band]['meas'], 

152 'forced_src': forcedSourceDict[band]['forced_src'], 

153 'ref': inputs['inputCatalogRef']} 

154 dataId = butlerQC.quantum.dataId 

155 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

156 outputs = pipeBase.Struct(outputCatalog=df) 

157 butlerQC.put(outputs, outputRefs) 

158 

159 @classmethod 

160 def _makeArgumentParser(cls): 

161 """Create a suitable ArgumentParser. 

162 

163 We will use the ArgumentParser to get a list of data 

164 references for patches; the RunnerClass will sort them into lists 

165 of data references for the same patch. 

166 

167 References first of self.inputDatasets, rather than 

168 self.inputDataset 

169 """ 

170 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

171 

172 def readCatalog(self, patchRef): 

173 """Read input catalogs 

174 

175 Read all the input datasets given by the 'inputDatasets' 

176 attribute. 

177 

178 Parameters 

179 ---------- 

180 patchRef : `lsst.daf.persistence.ButlerDataRef` 

181 Data reference for patch 

182 

183 Returns 

184 ------- 

185 Tuple consisting of band name and a dict of catalogs, keyed by 

186 dataset name 

187 """ 

188 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel 

189 catalogDict = {} 

190 for dataset in self.inputDatasets: 

191 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

192 self.log.info("Read %d sources from %s for band %s: %s" % 

193 (len(catalog), dataset, band, patchRef.dataId)) 

194 catalogDict[dataset] = catalog 

195 return band, catalogDict 

196 

197 def run(self, catalogs, tract, patch): 

198 """Merge multiple catalogs. 

199 

200 Parameters 

201 ---------- 

202 catalogs : `dict` 

203 Mapping from filter names to dict of catalogs. 

204 tract : int 

205 tractId to use for the tractId column 

206 patch : str 

207 patchId to use for the patchId column 

208 

209 Returns 

210 ------- 

211 catalog : `pandas.DataFrame` 

212 Merged dataframe 

213 """ 

214 

215 dfs = [] 

216 for filt, tableDict in catalogs.items(): 

217 for dataset, table in tableDict.items(): 

218 # Convert afwTable to pandas DataFrame 

219 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

220 

221 # Sort columns by name, to ensure matching schema among patches 

222 df = df.reindex(sorted(df.columns), axis=1) 

223 df['tractId'] = tract 

224 df['patchId'] = patch 

225 

226 # Make columns a 3-level MultiIndex 

227 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

228 names=('dataset', 'band', 'column')) 

229 dfs.append(df) 

230 

231 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

232 return catalog 

233 

234 def write(self, patchRef, catalog): 

235 """Write the output. 

236 

237 Parameters 

238 ---------- 

239 catalog : `ParquetTable` 

240 Catalog to write 

241 patchRef : `lsst.daf.persistence.ButlerDataRef` 

242 Data reference for patch 

243 """ 

244 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

245 # since the filter isn't actually part of the data ID for the dataset we're saving, 

246 # it's confusing to see it in the log message, even if the butler simply ignores it. 

247 mergeDataId = patchRef.dataId.copy() 

248 del mergeDataId["filter"] 

249 self.log.info("Wrote merged catalog: %s" % (mergeDataId,)) 

250 

251 def writeMetadata(self, dataRefList): 

252 """No metadata to write, and not sure how to write it for a list of dataRefs. 

253 """ 

254 pass 

255 

256 

257class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

258 dimensions=("instrument", "visit", "detector")): 

259 

260 catalog = connectionTypes.Input( 

261 doc="Input full-depth catalog of sources produced by CalibrateTask", 

262 name="src", 

263 storageClass="SourceCatalog", 

264 dimensions=("instrument", "visit", "detector") 

265 ) 

266 outputCatalog = connectionTypes.Output( 

267 doc="Catalog of sources, `src` in Parquet format", 

268 name="source", 

269 storageClass="DataFrame", 

270 dimensions=("instrument", "visit", "detector") 

271 ) 

272 

273 

274class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

275 pipelineConnections=WriteSourceTableConnections): 

276 doApplyExternalPhotoCalib = pexConfig.Field( 

277 dtype=bool, 

278 default=False, 

279 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if " 

280 "generating Source Tables from older src tables which do not already have local calib columns") 

281 ) 

282 doApplyExternalSkyWcs = pexConfig.Field( 

283 dtype=bool, 

284 default=False, 

285 doc=("Add local WCS columns from the calexp.wcs? Should only set True if " 

286 "generating Source Tables from older src tables which do not already have local calib columns") 

287 ) 

288 

289 

290class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

291 """Write source table to parquet 

292 """ 

293 _DefaultName = "writeSourceTable" 

294 ConfigClass = WriteSourceTableConfig 

295 

296 def runDataRef(self, dataRef): 

297 src = dataRef.get('src') 

298 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

299 src = self.addCalibColumns(src, dataRef) 

300 

301 ccdVisitId = dataRef.get('ccdExposureId') 

302 result = self.run(src, ccdVisitId=ccdVisitId) 

303 dataRef.put(result.table, 'source') 

304 

305 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

306 inputs = butlerQC.get(inputRefs) 

307 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

308 result = self.run(**inputs).table 

309 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

310 butlerQC.put(outputs, outputRefs) 

311 

312 def run(self, catalog, ccdVisitId=None): 

313 """Convert `src` catalog to parquet 

314 

315 Parameters 

316 ---------- 

317 catalog: `afwTable.SourceCatalog` 

318 catalog to be converted 

319 ccdVisitId: `int` 

320 ccdVisitId to be added as a column 

321 

322 Returns 

323 ------- 

324 result : `lsst.pipe.base.Struct` 

325 ``table`` 

326 `ParquetTable` version of the input catalog 

327 """ 

328 self.log.info("Generating parquet table from src catalog %s", ccdVisitId) 

329 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

330 df['ccdVisitId'] = ccdVisitId 

331 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

332 

333 def addCalibColumns(self, catalog, dataRef): 

334 """Add columns with local calibration evaluated at each centroid 

335 

336 for backwards compatibility with old repos. 

337 This exists for the purpose of converting old src catalogs 

338 (which don't have the expected local calib columns) to Source Tables. 

339 

340 Parameters 

341 ---------- 

342 catalog: `afwTable.SourceCatalog` 

343 catalog to which calib columns will be added 

344 dataRef: `lsst.daf.persistence.ButlerDataRef 

345 for fetching the calibs from disk. 

346 

347 Returns 

348 ------- 

349 newCat: `afwTable.SourceCatalog` 

350 Source Catalog with requested local calib columns 

351 """ 

352 mapper = afwTable.SchemaMapper(catalog.schema) 

353 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

354 measureConfig.doReplaceWithNoise = False 

355 

356 # Just need the WCS or the PhotoCalib attached to an exposue 

357 exposure = dataRef.get('calexp_sub', 

358 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0))) 

359 

360 mapper = afwTable.SchemaMapper(catalog.schema) 

361 mapper.addMinimalSchema(catalog.schema, True) 

362 schema = mapper.getOutputSchema() 

363 

364 exposureIdInfo = dataRef.get("expIdInfo") 

365 measureConfig.plugins.names = [] 

366 if self.config.doApplyExternalSkyWcs: 

367 plugin = 'base_LocalWcs' 

368 if plugin in schema: 

369 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False") 

370 else: 

371 measureConfig.plugins.names.add(plugin) 

372 

373 if self.config.doApplyExternalPhotoCalib: 

374 plugin = 'base_LocalPhotoCalib' 

375 if plugin in schema: 

376 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False") 

377 else: 

378 measureConfig.plugins.names.add(plugin) 

379 

380 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

381 newCat = afwTable.SourceCatalog(schema) 

382 newCat.extend(catalog, mapper=mapper) 

383 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

384 return newCat 

385 

386 def writeMetadata(self, dataRef): 

387 """No metadata to write. 

388 """ 

389 pass 

390 

391 @classmethod 

392 def _makeArgumentParser(cls): 

393 parser = ArgumentParser(name=cls._DefaultName) 

394 parser.add_id_argument("--id", 'src', 

395 help="data ID, e.g. --id visit=12345 ccd=0") 

396 return parser 

397 

398 

399class PostprocessAnalysis(object): 

400 """Calculate columns from ParquetTable 

401 

402 This object manages and organizes an arbitrary set of computations 

403 on a catalog. The catalog is defined by a 

404 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a 

405 `deepCoadd_obj` dataset, and the computations are defined by a collection 

406 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

407 a `CompositeFunctor`). 

408 

409 After the object is initialized, accessing the `.df` attribute (which 

410 holds the `pandas.DataFrame` containing the results of the calculations) triggers 

411 computation of said dataframe. 

412 

413 One of the conveniences of using this object is the ability to define a desired common 

414 filter for all functors. This enables the same functor collection to be passed to 

415 several different `PostprocessAnalysis` objects without having to change the original 

416 functor collection, since the `filt` keyword argument of this object triggers an 

417 overwrite of the `filt` property for all functors in the collection. 

418 

419 This object also allows a list of refFlags to be passed, and defines a set of default 

420 refFlags that are always included even if not requested. 

421 

422 If a list of `ParquetTable` object is passed, rather than a single one, then the 

423 calculations will be mapped over all the input catalogs. In principle, it should 

424 be straightforward to parallelize this activity, but initial tests have failed 

425 (see TODO in code comments). 

426 

427 Parameters 

428 ---------- 

429 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

430 Source catalog(s) for computation 

431 

432 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

433 Computations to do (functors that act on `parq`). 

434 If a dict, the output 

435 DataFrame will have columns keyed accordingly. 

436 If a list, the column keys will come from the 

437 `.shortname` attribute of each functor. 

438 

439 filt : `str` (optional) 

440 Filter in which to calculate. If provided, 

441 this will overwrite any existing `.filt` attribute 

442 of the provided functors. 

443 

444 flags : `list` (optional) 

445 List of flags (per-band) to include in output table. 

446 

447 refFlags : `list` (optional) 

448 List of refFlags (only reference band) to include in output table. 

449 

450 

451 """ 

452 _defaultRefFlags = [] 

453 _defaultFuncs = (('coord_ra', RAColumn()), 

454 ('coord_dec', DecColumn())) 

455 

456 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None): 

457 self.parq = parq 

458 self.functors = functors 

459 

460 self.filt = filt 

461 self.flags = list(flags) if flags is not None else [] 

462 self.refFlags = list(self._defaultRefFlags) 

463 if refFlags is not None: 

464 self.refFlags += list(refFlags) 

465 

466 self._df = None 

467 

468 @property 

469 def defaultFuncs(self): 

470 funcs = dict(self._defaultFuncs) 

471 return funcs 

472 

473 @property 

474 def func(self): 

475 additionalFuncs = self.defaultFuncs 

476 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

477 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

478 

479 if isinstance(self.functors, CompositeFunctor): 

480 func = self.functors 

481 else: 

482 func = CompositeFunctor(self.functors) 

483 

484 func.funcDict.update(additionalFuncs) 

485 func.filt = self.filt 

486 

487 return func 

488 

489 @property 

490 def noDupCols(self): 

491 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

492 

493 @property 

494 def df(self): 

495 if self._df is None: 

496 self.compute() 

497 return self._df 

498 

499 def compute(self, dropna=False, pool=None): 

500 # map over multiple parquet tables 

501 if type(self.parq) in (list, tuple): 

502 if pool is None: 

503 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

504 else: 

505 # TODO: Figure out why this doesn't work (pyarrow pickling issues?) 

506 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

507 self._df = pd.concat(dflist) 

508 else: 

509 self._df = self.func(self.parq, dropna=dropna) 

510 

511 return self._df 

512 

513 

514class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

515 dimensions=()): 

516 """Expected Connections for subclasses of TransformCatalogBaseTask. 

517 

518 Must be subclassed. 

519 """ 

520 inputCatalog = connectionTypes.Input( 

521 name="", 

522 storageClass="DataFrame", 

523 ) 

524 outputCatalog = connectionTypes.Output( 

525 name="", 

526 storageClass="DataFrame", 

527 ) 

528 

529 

530class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

531 pipelineConnections=TransformCatalogBaseConnections): 

532 functorFile = pexConfig.Field( 

533 dtype=str, 

534 doc='Path to YAML file specifying functors to be computed', 

535 default=None, 

536 optional=True 

537 ) 

538 

539 

540class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask): 

541 """Base class for transforming/standardizing a catalog 

542 

543 by applying functors that convert units and apply calibrations. 

544 The purpose of this task is to perform a set of computations on 

545 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

546 results to a new dataset (which needs to be declared in an `outputDataset` 

547 attribute). 

548 

549 The calculations to be performed are defined in a YAML file that specifies 

550 a set of functors to be computed, provided as 

551 a `--functorFile` config parameter. An example of such a YAML file 

552 is the following: 

553 

554 funcs: 

555 psfMag: 

556 functor: Mag 

557 args: 

558 - base_PsfFlux 

559 filt: HSC-G 

560 dataset: meas 

561 cmodel_magDiff: 

562 functor: MagDiff 

563 args: 

564 - modelfit_CModel 

565 - base_PsfFlux 

566 filt: HSC-G 

567 gauss_magDiff: 

568 functor: MagDiff 

569 args: 

570 - base_GaussianFlux 

571 - base_PsfFlux 

572 filt: HSC-G 

573 count: 

574 functor: Column 

575 args: 

576 - base_InputCount_value 

577 filt: HSC-G 

578 deconvolved_moments: 

579 functor: DeconvolvedMoments 

580 filt: HSC-G 

581 dataset: forced_src 

582 refFlags: 

583 - calib_psfUsed 

584 - merge_measurement_i 

585 - merge_measurement_r 

586 - merge_measurement_z 

587 - merge_measurement_y 

588 - merge_measurement_g 

589 - base_PixelFlags_flag_inexact_psfCenter 

590 - detect_isPrimary 

591 

592 The names for each entry under "func" will become the names of columns in the 

593 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`. 

594 Positional arguments to be passed to each functor are in the `args` list, 

595 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`, 

596 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization. 

597 

598 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and 

599 taken from the `'ref'` dataset. 

600 

601 The "flags" entry will be expanded out per band. 

602 

603 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

604 to organize and excecute the calculations. 

605 

606 """ 

607 @property 

608 def _DefaultName(self): 

609 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

610 

611 @property 

612 def outputDataset(self): 

613 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

614 

615 @property 

616 def inputDataset(self): 

617 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

618 

619 @property 

620 def ConfigClass(self): 

621 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

622 

623 def __init__(self, *args, **kwargs): 

624 super().__init__(*args, **kwargs) 

625 if self.config.functorFile: 

626 self.log.info('Loading tranform functor definitions from %s', 

627 self.config.functorFile) 

628 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

629 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

630 else: 

631 self.funcs = None 

632 

633 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

634 inputs = butlerQC.get(inputRefs) 

635 if self.funcs is None: 

636 raise ValueError("config.functorFile is None. " 

637 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

638 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

639 dataId=outputRefs.outputCatalog.dataId.full) 

640 outputs = pipeBase.Struct(outputCatalog=result) 

641 butlerQC.put(outputs, outputRefs) 

642 

643 def runDataRef(self, dataRef): 

644 parq = dataRef.get() 

645 if self.funcs is None: 

646 raise ValueError("config.functorFile is None. " 

647 "Must be a valid path to yaml in order to run as a CommandlineTask.") 

648 df = self.run(parq, funcs=self.funcs, dataId=dataRef.dataId) 

649 self.write(df, dataRef) 

650 return df 

651 

652 def run(self, parq, funcs=None, dataId=None, band=None): 

653 """Do postprocessing calculations 

654 

655 Takes a `ParquetTable` object and dataId, 

656 returns a dataframe with results of postprocessing calculations. 

657 

658 Parameters 

659 ---------- 

660 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

661 ParquetTable from which calculations are done. 

662 funcs : `lsst.pipe.tasks.functors.Functors` 

663 Functors to apply to the table's columns 

664 dataId : dict, optional 

665 Used to add a `patchId` column to the output dataframe. 

666 band : `str`, optional 

667 Filter band that is being processed. 

668 

669 Returns 

670 ------ 

671 `pandas.DataFrame` 

672 

673 """ 

674 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

675 

676 df = self.transform(band, parq, funcs, dataId).df 

677 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

678 return df 

679 

680 def getFunctors(self): 

681 return self.funcs 

682 

683 def getAnalysis(self, parq, funcs=None, band=None): 

684 if funcs is None: 

685 funcs = self.funcs 

686 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

687 return analysis 

688 

689 def transform(self, band, parq, funcs, dataId): 

690 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

691 df = analysis.df 

692 if dataId is not None: 

693 for key, value in dataId.items(): 

694 df[str(key)] = value 

695 

696 return pipeBase.Struct( 

697 df=df, 

698 analysis=analysis 

699 ) 

700 

701 def write(self, df, parqRef): 

702 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

703 

704 def writeMetadata(self, dataRef): 

705 """No metadata to write. 

706 """ 

707 pass 

708 

709 

710class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

711 defaultTemplates={"coaddName": "deep"}, 

712 dimensions=("tract", "patch", "skymap")): 

713 inputCatalog = connectionTypes.Input( 

714 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

715 "stored as a DataFrame with a multi-level column index per-patch.", 

716 dimensions=("tract", "patch", "skymap"), 

717 storageClass="DataFrame", 

718 name="{coaddName}Coadd_obj", 

719 deferLoad=True, 

720 ) 

721 outputCatalog = connectionTypes.Output( 

722 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

723 "data model.", 

724 dimensions=("tract", "patch", "skymap"), 

725 storageClass="DataFrame", 

726 name="objectTable" 

727 ) 

728 

729 

730class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

731 pipelineConnections=TransformObjectCatalogConnections): 

732 coaddName = pexConfig.Field( 

733 dtype=str, 

734 default="deep", 

735 doc="Name of coadd" 

736 ) 

737 # TODO: remove in DM-27177 

738 filterMap = pexConfig.DictField( 

739 keytype=str, 

740 itemtype=str, 

741 default={}, 

742 doc=("Dictionary mapping full filter name to short one for column name munging." 

743 "These filters determine the output columns no matter what filters the " 

744 "input data actually contain."), 

745 deprecated=("Coadds are now identified by the band, so this transform is unused." 

746 "Will be removed after v22.") 

747 ) 

748 outputBands = pexConfig.ListField( 

749 dtype=str, 

750 default=None, 

751 optional=True, 

752 doc=("These bands and only these bands will appear in the output," 

753 " NaN-filled if the input does not include them." 

754 " If None, then use all bands found in the input.") 

755 ) 

756 camelCase = pexConfig.Field( 

757 dtype=bool, 

758 default=True, 

759 doc=("Write per-band columns names with camelCase, else underscore " 

760 "For example: gPsFlux instead of g_PsFlux.") 

761 ) 

762 multilevelOutput = pexConfig.Field( 

763 dtype=bool, 

764 default=False, 

765 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

766 "and name-munged (False).") 

767 ) 

768 

769 

770class TransformObjectCatalogTask(TransformCatalogBaseTask): 

771 """Produce a flattened Object Table to match the format specified in 

772 sdm_schemas. 

773 

774 Do the same set of postprocessing calculations on all bands 

775 

776 This is identical to `TransformCatalogBaseTask`, except for that it does the 

777 specified functor calculations for all filters present in the 

778 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

779 by the YAML file will be superceded. 

780 """ 

781 _DefaultName = "transformObjectCatalog" 

782 ConfigClass = TransformObjectCatalogConfig 

783 

784 # Used by Gen 2 runDataRef only: 

785 inputDataset = 'deepCoadd_obj' 

786 outputDataset = 'objectTable' 

787 

788 @classmethod 

789 def _makeArgumentParser(cls): 

790 parser = ArgumentParser(name=cls._DefaultName) 

791 parser.add_id_argument("--id", cls.inputDataset, 

792 ContainerClass=CoaddDataIdContainer, 

793 help="data ID, e.g. --id tract=12345 patch=1,2") 

794 return parser 

795 

796 def run(self, parq, funcs=None, dataId=None, band=None): 

797 # NOTE: band kwarg is ignored here. 

798 dfDict = {} 

799 analysisDict = {} 

800 templateDf = pd.DataFrame() 

801 

802 if isinstance(parq, DeferredDatasetHandle): 

803 columns = parq.get(component='columns') 

804 inputBands = columns.unique(level=1).values 

805 else: 

806 inputBands = parq.columnLevelNames['band'] 

807 

808 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

809 

810 # Perform transform for data of filters that exist in parq. 

811 for inputBand in inputBands: 

812 if inputBand not in outputBands: 

813 self.log.info("Ignoring %s band data in the input", inputBand) 

814 continue 

815 self.log.info("Transforming the catalog of band %s", inputBand) 

816 result = self.transform(inputBand, parq, funcs, dataId) 

817 dfDict[inputBand] = result.df 

818 analysisDict[inputBand] = result.analysis 

819 if templateDf.empty: 

820 templateDf = result.df 

821 

822 # Fill NaNs in columns of other wanted bands 

823 for filt in outputBands: 

824 if filt not in dfDict: 

825 self.log.info("Adding empty columns for band %s", filt) 

826 dfDict[filt] = pd.DataFrame().reindex_like(templateDf) 

827 

828 # This makes a multilevel column index, with band as first level 

829 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

830 

831 if not self.config.multilevelOutput: 

832 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

833 if dataId is not None: 

834 noDupCols += list(dataId.keys()) 

835 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

836 inputBands=inputBands) 

837 

838 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

839 return df 

840 

841 

842class TractObjectDataIdContainer(CoaddDataIdContainer): 

843 

844 def makeDataRefList(self, namespace): 

845 """Make self.refList from self.idList 

846 

847 Generate a list of data references given tract and/or patch. 

848 This was adapted from `TractQADataIdContainer`, which was 

849 `TractDataIdContainer` modifie to not require "filter". 

850 Only existing dataRefs are returned. 

851 """ 

852 def getPatchRefList(tract): 

853 return [namespace.butler.dataRef(datasetType=self.datasetType, 

854 tract=tract.getId(), 

855 patch="%d,%d" % patch.getIndex()) for patch in tract] 

856 

857 tractRefs = defaultdict(list) # Data references for each tract 

858 for dataId in self.idList: 

859 skymap = self.getSkymap(namespace) 

860 

861 if "tract" in dataId: 

862 tractId = dataId["tract"] 

863 if "patch" in dataId: 

864 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

865 tract=tractId, 

866 patch=dataId['patch'])) 

867 else: 

868 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

869 else: 

870 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

871 for tract in skymap) 

872 outputRefList = [] 

873 for tractRefList in tractRefs.values(): 

874 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

875 outputRefList.append(existingRefs) 

876 

877 self.refList = outputRefList 

878 

879 

880class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

881 dimensions=("tract", "skymap")): 

882 inputCatalogs = connectionTypes.Input( 

883 doc="Per-Patch objectTables conforming to the standard data model.", 

884 name="objectTable", 

885 storageClass="DataFrame", 

886 dimensions=("tract", "patch", "skymap"), 

887 multiple=True, 

888 ) 

889 outputCatalog = connectionTypes.Output( 

890 doc="Pre-tract horizontal concatenation of the input objectTables", 

891 name="objectTable_tract", 

892 storageClass="DataFrame", 

893 dimensions=("tract", "skymap"), 

894 ) 

895 

896 

897class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

898 pipelineConnections=ConsolidateObjectTableConnections): 

899 coaddName = pexConfig.Field( 

900 dtype=str, 

901 default="deep", 

902 doc="Name of coadd" 

903 ) 

904 

905 

906class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

907 """Write patch-merged source tables to a tract-level parquet file 

908 

909 Concatenates `objectTable` list into a per-visit `objectTable_tract` 

910 """ 

911 _DefaultName = "consolidateObjectTable" 

912 ConfigClass = ConsolidateObjectTableConfig 

913 

914 inputDataset = 'objectTable' 

915 outputDataset = 'objectTable_tract' 

916 

917 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

918 inputs = butlerQC.get(inputRefs) 

919 self.log.info("Concatenating %s per-patch Object Tables", 

920 len(inputs['inputCatalogs'])) 

921 df = pd.concat(inputs['inputCatalogs']) 

922 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

923 

924 @classmethod 

925 def _makeArgumentParser(cls): 

926 parser = ArgumentParser(name=cls._DefaultName) 

927 

928 parser.add_id_argument("--id", cls.inputDataset, 

929 help="data ID, e.g. --id tract=12345", 

930 ContainerClass=TractObjectDataIdContainer) 

931 return parser 

932 

933 def runDataRef(self, patchRefList): 

934 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

935 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

936 

937 def writeMetadata(self, dataRef): 

938 """No metadata to write. 

939 """ 

940 pass 

941 

942 

943class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

944 dimensions=("instrument", "visit", "detector")): 

945 

946 inputCatalog = connectionTypes.Input( 

947 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

948 name="source", 

949 storageClass="DataFrame", 

950 dimensions=("instrument", "visit", "detector"), 

951 deferLoad=True 

952 ) 

953 outputCatalog = connectionTypes.Output( 

954 doc="Narrower, per-detector Source Table transformed and converted per a " 

955 "specified set of functors", 

956 name="sourceTable", 

957 storageClass="DataFrame", 

958 dimensions=("instrument", "visit", "detector") 

959 ) 

960 

961 

962class TransformSourceTableConfig(TransformCatalogBaseConfig, 

963 pipelineConnections=TransformSourceTableConnections): 

964 pass 

965 

966 

967class TransformSourceTableTask(TransformCatalogBaseTask): 

968 """Transform/standardize a source catalog 

969 """ 

970 _DefaultName = "transformSourceTable" 

971 ConfigClass = TransformSourceTableConfig 

972 

973 inputDataset = 'source' 

974 outputDataset = 'sourceTable' 

975 

976 @classmethod 

977 def _makeArgumentParser(cls): 

978 parser = ArgumentParser(name=cls._DefaultName) 

979 parser.add_id_argument("--id", datasetType=cls.inputDataset, 

980 level="sensor", 

981 help="data ID, e.g. --id visit=12345 ccd=0") 

982 return parser 

983 

984 def runDataRef(self, dataRef): 

985 """Override to specify band label to run().""" 

986 parq = dataRef.get() 

987 funcs = self.getFunctors() 

988 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel 

989 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band) 

990 self.write(df, dataRef) 

991 return df 

992 

993 

994class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

995 dimensions=("instrument", "visit",), 

996 defaultTemplates={}): 

997 calexp = connectionTypes.Input( 

998 doc="Processed exposures used for metadata", 

999 name="calexp", 

1000 storageClass="ExposureF", 

1001 dimensions=("instrument", "visit", "detector"), 

1002 deferLoad=True, 

1003 multiple=True, 

1004 ) 

1005 visitSummary = connectionTypes.Output( 

1006 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1007 "detector id for the id and are sorted for fast lookups of a " 

1008 "detector."), 

1009 name="visitSummary", 

1010 storageClass="ExposureCatalog", 

1011 dimensions=("instrument", "visit"), 

1012 ) 

1013 

1014 

1015class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1016 pipelineConnections=ConsolidateVisitSummaryConnections): 

1017 """Config for ConsolidateVisitSummaryTask""" 

1018 pass 

1019 

1020 

1021class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

1022 """Task to consolidate per-detector visit metadata. 

1023 

1024 This task aggregates the following metadata from all the detectors in a 

1025 single visit into an exposure catalog: 

1026 - The visitInfo. 

1027 - The wcs. 

1028 - The photoCalib. 

1029 - The physical_filter and band (if available). 

1030 - The psf size, shape, and effective area at the center of the detector. 

1031 - The corners of the bounding box in right ascension/declination. 

1032 

1033 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1034 are not persisted here because of storage concerns, and because of their 

1035 limited utility as summary statistics. 

1036 

1037 Tests for this task are performed in ci_hsc_gen3. 

1038 """ 

1039 _DefaultName = "consolidateVisitSummary" 

1040 ConfigClass = ConsolidateVisitSummaryConfig 

1041 

1042 @classmethod 

1043 def _makeArgumentParser(cls): 

1044 parser = ArgumentParser(name=cls._DefaultName) 

1045 

1046 parser.add_id_argument("--id", "calexp", 

1047 help="data ID, e.g. --id visit=12345", 

1048 ContainerClass=VisitDataIdContainer) 

1049 return parser 

1050 

1051 def writeMetadata(self, dataRef): 

1052 """No metadata to persist, so override to remove metadata persistance. 

1053 """ 

1054 pass 

1055 

1056 def writeConfig(self, butler, clobber=False, doBackup=True): 

1057 """No config to persist, so override to remove config persistance. 

1058 """ 

1059 pass 

1060 

1061 def runDataRef(self, dataRefList): 

1062 visit = dataRefList[0].dataId['visit'] 

1063 

1064 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

1065 (len(dataRefList), visit)) 

1066 

1067 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

1068 

1069 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

1070 

1071 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1072 dataRefs = butlerQC.get(inputRefs.calexp) 

1073 visit = dataRefs[0].dataId.byName()['visit'] 

1074 

1075 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

1076 (len(dataRefs), visit)) 

1077 

1078 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1079 

1080 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1081 

1082 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

1083 """Make a combined exposure catalog from a list of dataRefs. 

1084 These dataRefs must point to exposures with wcs, summaryStats, 

1085 and other visit metadata. 

1086 

1087 Parameters 

1088 ---------- 

1089 visit : `int` 

1090 Visit identification number. 

1091 dataRefs : `list` 

1092 List of dataRefs in visit. May be list of 

1093 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

1094 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

1095 isGen3 : `bool`, optional 

1096 Specifies if this is a Gen3 list of datarefs. 

1097 

1098 Returns 

1099 ------- 

1100 visitSummary : `lsst.afw.table.ExposureCatalog` 

1101 Exposure catalog with per-detector summary information. 

1102 """ 

1103 schema = self._makeVisitSummarySchema() 

1104 cat = afwTable.ExposureCatalog(schema) 

1105 cat.resize(len(dataRefs)) 

1106 

1107 cat['visit'] = visit 

1108 

1109 for i, dataRef in enumerate(dataRefs): 

1110 if isGen3: 

1111 visitInfo = dataRef.get(component='visitInfo') 

1112 filterLabel = dataRef.get(component='filterLabel') 

1113 summaryStats = dataRef.get(component='summaryStats') 

1114 detector = dataRef.get(component='detector') 

1115 wcs = dataRef.get(component='wcs') 

1116 photoCalib = dataRef.get(component='photoCalib') 

1117 detector = dataRef.get(component='detector') 

1118 bbox = dataRef.get(component='bbox') 

1119 validPolygon = dataRef.get(component='validPolygon') 

1120 else: 

1121 # Note that we need to read the calexp because there is 

1122 # no magic access to the psf except through the exposure. 

1123 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

1124 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

1125 visitInfo = exp.getInfo().getVisitInfo() 

1126 filterLabel = dataRef.get("calexp_filterLabel") 

1127 summaryStats = exp.getInfo().getSummaryStats() 

1128 wcs = exp.getWcs() 

1129 photoCalib = exp.getPhotoCalib() 

1130 detector = exp.getDetector() 

1131 bbox = dataRef.get(datasetType='calexp_bbox') 

1132 validPolygon = exp.getInfo().getValidPolygon() 

1133 

1134 rec = cat[i] 

1135 rec.setBBox(bbox) 

1136 rec.setVisitInfo(visitInfo) 

1137 rec.setWcs(wcs) 

1138 rec.setPhotoCalib(photoCalib) 

1139 rec.setValidPolygon(validPolygon) 

1140 

1141 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1142 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1143 rec.setId(detector.getId()) 

1144 rec['psfSigma'] = summaryStats.psfSigma 

1145 rec['psfIxx'] = summaryStats.psfIxx 

1146 rec['psfIyy'] = summaryStats.psfIyy 

1147 rec['psfIxy'] = summaryStats.psfIxy 

1148 rec['psfArea'] = summaryStats.psfArea 

1149 rec['raCorners'][:] = summaryStats.raCorners 

1150 rec['decCorners'][:] = summaryStats.decCorners 

1151 rec['ra'] = summaryStats.ra 

1152 rec['decl'] = summaryStats.decl 

1153 rec['zenithDistance'] = summaryStats.zenithDistance 

1154 rec['zeroPoint'] = summaryStats.zeroPoint 

1155 rec['skyBg'] = summaryStats.skyBg 

1156 rec['skyNoise'] = summaryStats.skyNoise 

1157 rec['meanVar'] = summaryStats.meanVar 

1158 

1159 metadata = dafBase.PropertyList() 

1160 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1161 # We are looping over existing datarefs, so the following is true 

1162 metadata.add("COMMENT", "Only detectors with data have entries.") 

1163 cat.setMetadata(metadata) 

1164 

1165 cat.sort() 

1166 return cat 

1167 

1168 def _makeVisitSummarySchema(self): 

1169 """Make the schema for the visitSummary catalog.""" 

1170 schema = afwTable.ExposureTable.makeMinimalSchema() 

1171 schema.addField('visit', type='I', doc='Visit number') 

1172 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1173 schema.addField('band', type='String', size=32, doc='Name of band') 

1174 schema.addField('psfSigma', type='F', 

1175 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

1176 schema.addField('psfArea', type='F', 

1177 doc='PSF model effective area (center of chip) (pixel**2)') 

1178 schema.addField('psfIxx', type='F', 

1179 doc='PSF model Ixx (center of chip) (pixel**2)') 

1180 schema.addField('psfIyy', type='F', 

1181 doc='PSF model Iyy (center of chip) (pixel**2)') 

1182 schema.addField('psfIxy', type='F', 

1183 doc='PSF model Ixy (center of chip) (pixel**2)') 

1184 schema.addField('raCorners', type='ArrayD', size=4, 

1185 doc='Right Ascension of bounding box corners (degrees)') 

1186 schema.addField('decCorners', type='ArrayD', size=4, 

1187 doc='Declination of bounding box corners (degrees)') 

1188 schema.addField('ra', type='D', 

1189 doc='Right Ascension of bounding box center (degrees)') 

1190 schema.addField('decl', type='D', 

1191 doc='Declination of bounding box center (degrees)') 

1192 schema.addField('zenithDistance', type='F', 

1193 doc='Zenith distance of bounding box center (degrees)') 

1194 schema.addField('zeroPoint', type='F', 

1195 doc='Mean zeropoint in detector (mag)') 

1196 schema.addField('skyBg', type='F', 

1197 doc='Average sky background (ADU)') 

1198 schema.addField('skyNoise', type='F', 

1199 doc='Average sky noise (ADU)') 

1200 schema.addField('meanVar', type='F', 

1201 doc='Mean variance of the weight plane (ADU**2)') 

1202 

1203 return schema 

1204 

1205 

1206class VisitDataIdContainer(DataIdContainer): 

1207 """DataIdContainer that groups sensor-level id's by visit 

1208 """ 

1209 

1210 def makeDataRefList(self, namespace): 

1211 """Make self.refList from self.idList 

1212 

1213 Generate a list of data references grouped by visit. 

1214 

1215 Parameters 

1216 ---------- 

1217 namespace : `argparse.Namespace` 

1218 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments 

1219 """ 

1220 # Group by visits 

1221 visitRefs = defaultdict(list) 

1222 for dataId in self.idList: 

1223 if "visit" in dataId: 

1224 visitId = dataId["visit"] 

1225 # append all subsets to 

1226 subset = namespace.butler.subset(self.datasetType, dataId=dataId) 

1227 visitRefs[visitId].extend([dataRef for dataRef in subset]) 

1228 

1229 outputRefList = [] 

1230 for refList in visitRefs.values(): 

1231 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1232 if existingRefs: 

1233 outputRefList.append(existingRefs) 

1234 

1235 self.refList = outputRefList 

1236 

1237 

1238class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1239 dimensions=("instrument", "visit")): 

1240 inputCatalogs = connectionTypes.Input( 

1241 doc="Input per-detector Source Tables", 

1242 name="sourceTable", 

1243 storageClass="DataFrame", 

1244 dimensions=("instrument", "visit", "detector"), 

1245 multiple=True 

1246 ) 

1247 outputCatalog = connectionTypes.Output( 

1248 doc="Per-visit concatenation of Source Table", 

1249 name="sourceTable_visit", 

1250 storageClass="DataFrame", 

1251 dimensions=("instrument", "visit") 

1252 ) 

1253 

1254 

1255class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1256 pipelineConnections=ConsolidateSourceTableConnections): 

1257 pass 

1258 

1259 

1260class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

1261 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1262 """ 

1263 _DefaultName = 'consolidateSourceTable' 

1264 ConfigClass = ConsolidateSourceTableConfig 

1265 

1266 inputDataset = 'sourceTable' 

1267 outputDataset = 'sourceTable_visit' 

1268 

1269 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1270 inputs = butlerQC.get(inputRefs) 

1271 self.log.info("Concatenating %s per-detector Source Tables", 

1272 len(inputs['inputCatalogs'])) 

1273 df = pd.concat(inputs['inputCatalogs']) 

1274 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1275 

1276 def runDataRef(self, dataRefList): 

1277 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1278 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1279 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1280 

1281 @classmethod 

1282 def _makeArgumentParser(cls): 

1283 parser = ArgumentParser(name=cls._DefaultName) 

1284 

1285 parser.add_id_argument("--id", cls.inputDataset, 

1286 help="data ID, e.g. --id visit=12345", 

1287 ContainerClass=VisitDataIdContainer) 

1288 return parser 

1289 

1290 def writeMetadata(self, dataRef): 

1291 """No metadata to write. 

1292 """ 

1293 pass 

1294 

1295 def writeConfig(self, butler, clobber=False, doBackup=True): 

1296 """No config to write. 

1297 """ 

1298 pass