Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24from collections import defaultdict 

25import numpy as np 

26 

27import lsst.geom 

28import lsst.pex.config as pexConfig 

29import lsst.pipe.base as pipeBase 

30import lsst.daf.base as dafBase 

31from lsst.pipe.base import connectionTypes 

32import lsst.afw.table as afwTable 

33from lsst.meas.base import SingleFrameMeasurementTask 

34from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

35from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

36from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate 

37 

38from .parquetTable import ParquetTable 

39from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

40from .functors import CompositeFunctor, Column 

41 

42 

43def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

44 """Flattens a dataframe with multilevel column index 

45 """ 

46 newDf = pd.DataFrame() 

47 # band is the level 0 index 

48 dfBands = df.columns.unique(level=0).values 

49 for band in dfBands: 

50 subdf = df[band] 

51 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

52 newColumns = {c: columnFormat.format(band, c) 

53 for c in subdf.columns if c not in noDupCols} 

54 cols = list(newColumns.keys()) 

55 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

56 

57 # Band must be present in the input and output or else column is all NaN: 

58 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

59 # Get the unexploded columns from any present band's partition 

60 noDupDf = df[presentBands[0]][noDupCols] 

61 newDf = pd.concat([noDupDf, newDf], axis=1) 

62 return newDf 

63 

64 

65class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

66 defaultTemplates={"coaddName": "deep"}, 

67 dimensions=("tract", "patch", "skymap")): 

68 inputCatalogMeas = connectionTypes.Input( 

69 doc="Catalog of source measurements on the deepCoadd.", 

70 dimensions=("tract", "patch", "band", "skymap"), 

71 storageClass="SourceCatalog", 

72 name="{coaddName}Coadd_meas", 

73 multiple=True 

74 ) 

75 inputCatalogForcedSrc = connectionTypes.Input( 

76 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

77 dimensions=("tract", "patch", "band", "skymap"), 

78 storageClass="SourceCatalog", 

79 name="{coaddName}Coadd_forced_src", 

80 multiple=True 

81 ) 

82 inputCatalogRef = connectionTypes.Input( 

83 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

84 "for each detection in deepCoadd_mergeDet.", 

85 dimensions=("tract", "patch", "skymap"), 

86 storageClass="SourceCatalog", 

87 name="{coaddName}Coadd_ref" 

88 ) 

89 outputCatalog = connectionTypes.Output( 

90 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

91 "stored as a DataFrame with a multi-level column index per-patch.", 

92 dimensions=("tract", "patch", "skymap"), 

93 storageClass="DataFrame", 

94 name="{coaddName}Coadd_obj" 

95 ) 

96 

97 

98class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

99 pipelineConnections=WriteObjectTableConnections): 

100 engine = pexConfig.Field( 

101 dtype=str, 

102 default="pyarrow", 

103 doc="Parquet engine for writing (pyarrow or fastparquet)" 

104 ) 

105 coaddName = pexConfig.Field( 

106 dtype=str, 

107 default="deep", 

108 doc="Name of coadd" 

109 ) 

110 

111 

112class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

113 """Write filter-merged source tables to parquet 

114 """ 

115 _DefaultName = "writeObjectTable" 

116 ConfigClass = WriteObjectTableConfig 

117 RunnerClass = MergeSourcesRunner 

118 

119 # Names of table datasets to be merged 

120 inputDatasets = ('forced_src', 'meas', 'ref') 

121 

122 # Tag of output dataset written by `MergeSourcesTask.write` 

123 outputDataset = 'obj' 

124 

125 def __init__(self, butler=None, schema=None, **kwargs): 

126 # It is a shame that this class can't use the default init for CmdLineTask 

127 # But to do so would require its own special task runner, which is many 

128 # more lines of specialization, so this is how it is for now 

129 super().__init__(**kwargs) 

130 

131 def runDataRef(self, patchRefList): 

132 """! 

133 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in 

134 subclasses that inherit from MergeSourcesTask. 

135 @param[in] patchRefList list of data references for each filter 

136 """ 

137 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

138 dataId = patchRefList[0].dataId 

139 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

140 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog)) 

141 

142 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

143 inputs = butlerQC.get(inputRefs) 

144 

145 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

146 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

147 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

148 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

149 

150 catalogs = {} 

151 for band in measDict.keys(): 

152 catalogs[band] = {'meas': measDict[band]['meas'], 

153 'forced_src': forcedSourceDict[band]['forced_src'], 

154 'ref': inputs['inputCatalogRef']} 

155 dataId = butlerQC.quantum.dataId 

156 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

157 outputs = pipeBase.Struct(outputCatalog=df) 

158 butlerQC.put(outputs, outputRefs) 

159 

160 @classmethod 

161 def _makeArgumentParser(cls): 

162 """Create a suitable ArgumentParser. 

163 

164 We will use the ArgumentParser to get a list of data 

165 references for patches; the RunnerClass will sort them into lists 

166 of data references for the same patch. 

167 

168 References first of self.inputDatasets, rather than 

169 self.inputDataset 

170 """ 

171 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

172 

173 def readCatalog(self, patchRef): 

174 """Read input catalogs 

175 

176 Read all the input datasets given by the 'inputDatasets' 

177 attribute. 

178 

179 Parameters 

180 ---------- 

181 patchRef : `lsst.daf.persistence.ButlerDataRef` 

182 Data reference for patch 

183 

184 Returns 

185 ------- 

186 Tuple consisting of band name and a dict of catalogs, keyed by 

187 dataset name 

188 """ 

189 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel 

190 catalogDict = {} 

191 for dataset in self.inputDatasets: 

192 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

193 self.log.info("Read %d sources from %s for band %s: %s", 

194 len(catalog), dataset, band, patchRef.dataId) 

195 catalogDict[dataset] = catalog 

196 return band, catalogDict 

197 

198 def run(self, catalogs, tract, patch): 

199 """Merge multiple catalogs. 

200 

201 Parameters 

202 ---------- 

203 catalogs : `dict` 

204 Mapping from filter names to dict of catalogs. 

205 tract : int 

206 tractId to use for the tractId column 

207 patch : str 

208 patchId to use for the patchId column 

209 

210 Returns 

211 ------- 

212 catalog : `pandas.DataFrame` 

213 Merged dataframe 

214 """ 

215 

216 dfs = [] 

217 for filt, tableDict in catalogs.items(): 

218 for dataset, table in tableDict.items(): 

219 # Convert afwTable to pandas DataFrame 

220 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

221 

222 # Sort columns by name, to ensure matching schema among patches 

223 df = df.reindex(sorted(df.columns), axis=1) 

224 df['tractId'] = tract 

225 df['patchId'] = patch 

226 

227 # Make columns a 3-level MultiIndex 

228 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

229 names=('dataset', 'band', 'column')) 

230 dfs.append(df) 

231 

232 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

233 return catalog 

234 

235 def write(self, patchRef, catalog): 

236 """Write the output. 

237 

238 Parameters 

239 ---------- 

240 catalog : `ParquetTable` 

241 Catalog to write 

242 patchRef : `lsst.daf.persistence.ButlerDataRef` 

243 Data reference for patch 

244 """ 

245 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

246 # since the filter isn't actually part of the data ID for the dataset we're saving, 

247 # it's confusing to see it in the log message, even if the butler simply ignores it. 

248 mergeDataId = patchRef.dataId.copy() 

249 del mergeDataId["filter"] 

250 self.log.info("Wrote merged catalog: %s", mergeDataId) 

251 

252 def writeMetadata(self, dataRefList): 

253 """No metadata to write, and not sure how to write it for a list of dataRefs. 

254 """ 

255 pass 

256 

257 

258class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

259 defaultTemplates={"catalogType": ""}, 

260 dimensions=("instrument", "visit", "detector")): 

261 

262 catalog = connectionTypes.Input( 

263 doc="Input full-depth catalog of sources produced by CalibrateTask", 

264 name="{catalogType}src", 

265 storageClass="SourceCatalog", 

266 dimensions=("instrument", "visit", "detector") 

267 ) 

268 outputCatalog = connectionTypes.Output( 

269 doc="Catalog of sources, `src` in Parquet format. The 'id' column is " 

270 "replaced with an index; all other columns are unchanged.", 

271 name="{catalogType}source", 

272 storageClass="DataFrame", 

273 dimensions=("instrument", "visit", "detector") 

274 ) 

275 

276 

277class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

278 pipelineConnections=WriteSourceTableConnections): 

279 doApplyExternalPhotoCalib = pexConfig.Field( 

280 dtype=bool, 

281 default=False, 

282 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if " 

283 "generating Source Tables from older src tables which do not already have local calib columns") 

284 ) 

285 doApplyExternalSkyWcs = pexConfig.Field( 

286 dtype=bool, 

287 default=False, 

288 doc=("Add local WCS columns from the calexp.wcs? Should only set True if " 

289 "generating Source Tables from older src tables which do not already have local calib columns") 

290 ) 

291 

292 

293class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

294 """Write source table to parquet 

295 """ 

296 _DefaultName = "writeSourceTable" 

297 ConfigClass = WriteSourceTableConfig 

298 

299 def runDataRef(self, dataRef): 

300 src = dataRef.get('src') 

301 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

302 src = self.addCalibColumns(src, dataRef) 

303 

304 ccdVisitId = dataRef.get('ccdExposureId') 

305 result = self.run(src, ccdVisitId=ccdVisitId) 

306 dataRef.put(result.table, 'source') 

307 

308 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

309 inputs = butlerQC.get(inputRefs) 

310 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

311 result = self.run(**inputs).table 

312 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

313 butlerQC.put(outputs, outputRefs) 

314 

315 def run(self, catalog, ccdVisitId=None): 

316 """Convert `src` catalog to parquet 

317 

318 Parameters 

319 ---------- 

320 catalog: `afwTable.SourceCatalog` 

321 catalog to be converted 

322 ccdVisitId: `int` 

323 ccdVisitId to be added as a column 

324 

325 Returns 

326 ------- 

327 result : `lsst.pipe.base.Struct` 

328 ``table`` 

329 `ParquetTable` version of the input catalog 

330 """ 

331 self.log.info("Generating parquet table from src catalog %s", ccdVisitId) 

332 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

333 df['ccdVisitId'] = ccdVisitId 

334 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

335 

336 def addCalibColumns(self, catalog, dataRef): 

337 """Add columns with local calibration evaluated at each centroid 

338 

339 for backwards compatibility with old repos. 

340 This exists for the purpose of converting old src catalogs 

341 (which don't have the expected local calib columns) to Source Tables. 

342 

343 Parameters 

344 ---------- 

345 catalog: `afwTable.SourceCatalog` 

346 catalog to which calib columns will be added 

347 dataRef: `lsst.daf.persistence.ButlerDataRef 

348 for fetching the calibs from disk. 

349 

350 Returns 

351 ------- 

352 newCat: `afwTable.SourceCatalog` 

353 Source Catalog with requested local calib columns 

354 """ 

355 mapper = afwTable.SchemaMapper(catalog.schema) 

356 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

357 measureConfig.doReplaceWithNoise = False 

358 

359 # Just need the WCS or the PhotoCalib attached to an exposue 

360 exposure = dataRef.get('calexp_sub', 

361 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0))) 

362 

363 mapper = afwTable.SchemaMapper(catalog.schema) 

364 mapper.addMinimalSchema(catalog.schema, True) 

365 schema = mapper.getOutputSchema() 

366 

367 exposureIdInfo = dataRef.get("expIdInfo") 

368 measureConfig.plugins.names = [] 

369 if self.config.doApplyExternalSkyWcs: 

370 plugin = 'base_LocalWcs' 

371 if plugin in schema: 

372 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False") 

373 else: 

374 measureConfig.plugins.names.add(plugin) 

375 

376 if self.config.doApplyExternalPhotoCalib: 

377 plugin = 'base_LocalPhotoCalib' 

378 if plugin in schema: 

379 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False") 

380 else: 

381 measureConfig.plugins.names.add(plugin) 

382 

383 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

384 newCat = afwTable.SourceCatalog(schema) 

385 newCat.extend(catalog, mapper=mapper) 

386 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

387 return newCat 

388 

389 def writeMetadata(self, dataRef): 

390 """No metadata to write. 

391 """ 

392 pass 

393 

394 @classmethod 

395 def _makeArgumentParser(cls): 

396 parser = ArgumentParser(name=cls._DefaultName) 

397 parser.add_id_argument("--id", 'src', 

398 help="data ID, e.g. --id visit=12345 ccd=0") 

399 return parser 

400 

401 

402class PostprocessAnalysis(object): 

403 """Calculate columns from ParquetTable 

404 

405 This object manages and organizes an arbitrary set of computations 

406 on a catalog. The catalog is defined by a 

407 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a 

408 `deepCoadd_obj` dataset, and the computations are defined by a collection 

409 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

410 a `CompositeFunctor`). 

411 

412 After the object is initialized, accessing the `.df` attribute (which 

413 holds the `pandas.DataFrame` containing the results of the calculations) triggers 

414 computation of said dataframe. 

415 

416 One of the conveniences of using this object is the ability to define a desired common 

417 filter for all functors. This enables the same functor collection to be passed to 

418 several different `PostprocessAnalysis` objects without having to change the original 

419 functor collection, since the `filt` keyword argument of this object triggers an 

420 overwrite of the `filt` property for all functors in the collection. 

421 

422 This object also allows a list of refFlags to be passed, and defines a set of default 

423 refFlags that are always included even if not requested. 

424 

425 If a list of `ParquetTable` object is passed, rather than a single one, then the 

426 calculations will be mapped over all the input catalogs. In principle, it should 

427 be straightforward to parallelize this activity, but initial tests have failed 

428 (see TODO in code comments). 

429 

430 Parameters 

431 ---------- 

432 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

433 Source catalog(s) for computation 

434 

435 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

436 Computations to do (functors that act on `parq`). 

437 If a dict, the output 

438 DataFrame will have columns keyed accordingly. 

439 If a list, the column keys will come from the 

440 `.shortname` attribute of each functor. 

441 

442 filt : `str` (optional) 

443 Filter in which to calculate. If provided, 

444 this will overwrite any existing `.filt` attribute 

445 of the provided functors. 

446 

447 flags : `list` (optional) 

448 List of flags (per-band) to include in output table. 

449 Taken from the `meas` dataset if applied to a multilevel Object Table. 

450 

451 refFlags : `list` (optional) 

452 List of refFlags (only reference band) to include in output table. 

453 

454 forcedFlags : `list` (optional) 

455 List of flags (per-band) to include in output table. 

456 Taken from the ``forced_src`` dataset if applied to a 

457 multilevel Object Table. Intended for flags from measurement plugins 

458 only run during multi-band forced-photometry. 

459 """ 

460 _defaultRefFlags = [] 

461 _defaultFuncs = () 

462 

463 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

464 self.parq = parq 

465 self.functors = functors 

466 

467 self.filt = filt 

468 self.flags = list(flags) if flags is not None else [] 

469 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

470 self.refFlags = list(self._defaultRefFlags) 

471 if refFlags is not None: 

472 self.refFlags += list(refFlags) 

473 

474 self._df = None 

475 

476 @property 

477 def defaultFuncs(self): 

478 funcs = dict(self._defaultFuncs) 

479 return funcs 

480 

481 @property 

482 def func(self): 

483 additionalFuncs = self.defaultFuncs 

484 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

485 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

486 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

487 

488 if isinstance(self.functors, CompositeFunctor): 

489 func = self.functors 

490 else: 

491 func = CompositeFunctor(self.functors) 

492 

493 func.funcDict.update(additionalFuncs) 

494 func.filt = self.filt 

495 

496 return func 

497 

498 @property 

499 def noDupCols(self): 

500 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

501 

502 @property 

503 def df(self): 

504 if self._df is None: 

505 self.compute() 

506 return self._df 

507 

508 def compute(self, dropna=False, pool=None): 

509 # map over multiple parquet tables 

510 if type(self.parq) in (list, tuple): 

511 if pool is None: 

512 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

513 else: 

514 # TODO: Figure out why this doesn't work (pyarrow pickling issues?) 

515 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

516 self._df = pd.concat(dflist) 

517 else: 

518 self._df = self.func(self.parq, dropna=dropna) 

519 

520 return self._df 

521 

522 

523class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

524 dimensions=()): 

525 """Expected Connections for subclasses of TransformCatalogBaseTask. 

526 

527 Must be subclassed. 

528 """ 

529 inputCatalog = connectionTypes.Input( 

530 name="", 

531 storageClass="DataFrame", 

532 ) 

533 outputCatalog = connectionTypes.Output( 

534 name="", 

535 storageClass="DataFrame", 

536 ) 

537 

538 

539class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

540 pipelineConnections=TransformCatalogBaseConnections): 

541 functorFile = pexConfig.Field( 

542 dtype=str, 

543 doc='Path to YAML file specifying functors to be computed', 

544 default=None, 

545 optional=True 

546 ) 

547 

548 

549class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask): 

550 """Base class for transforming/standardizing a catalog 

551 

552 by applying functors that convert units and apply calibrations. 

553 The purpose of this task is to perform a set of computations on 

554 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

555 results to a new dataset (which needs to be declared in an `outputDataset` 

556 attribute). 

557 

558 The calculations to be performed are defined in a YAML file that specifies 

559 a set of functors to be computed, provided as 

560 a `--functorFile` config parameter. An example of such a YAML file 

561 is the following: 

562 

563 funcs: 

564 psfMag: 

565 functor: Mag 

566 args: 

567 - base_PsfFlux 

568 filt: HSC-G 

569 dataset: meas 

570 cmodel_magDiff: 

571 functor: MagDiff 

572 args: 

573 - modelfit_CModel 

574 - base_PsfFlux 

575 filt: HSC-G 

576 gauss_magDiff: 

577 functor: MagDiff 

578 args: 

579 - base_GaussianFlux 

580 - base_PsfFlux 

581 filt: HSC-G 

582 count: 

583 functor: Column 

584 args: 

585 - base_InputCount_value 

586 filt: HSC-G 

587 deconvolved_moments: 

588 functor: DeconvolvedMoments 

589 filt: HSC-G 

590 dataset: forced_src 

591 refFlags: 

592 - calib_psfUsed 

593 - merge_measurement_i 

594 - merge_measurement_r 

595 - merge_measurement_z 

596 - merge_measurement_y 

597 - merge_measurement_g 

598 - base_PixelFlags_flag_inexact_psfCenter 

599 - detect_isPrimary 

600 

601 The names for each entry under "func" will become the names of columns in the 

602 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`. 

603 Positional arguments to be passed to each functor are in the `args` list, 

604 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`, 

605 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization. 

606 

607 The "flags" entry is the default shortcut for `Column` functors. 

608 All columns listed under "flags" will be copied to the output table 

609 untransformed. They can be of any datatype. 

610 In the special case of transforming a multi-level oject table with 

611 band and dataset indices (deepCoadd_obj), these will be taked from the 

612 `meas` dataset and exploded out per band. 

613 

614 There are two special shortcuts that only apply when transforming 

615 multi-level Object (deepCoadd_obj) tables: 

616 - The "refFlags" entry is shortcut for `Column` functor 

617 taken from the `'ref'` dataset if transforming an ObjectTable. 

618 - The "forcedFlags" entry is shortcut for `Column` functors. 

619 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

620 These are expanded out per band. 

621 

622 

623 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

624 to organize and excecute the calculations. 

625 

626 """ 

627 @property 

628 def _DefaultName(self): 

629 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

630 

631 @property 

632 def outputDataset(self): 

633 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

634 

635 @property 

636 def inputDataset(self): 

637 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

638 

639 @property 

640 def ConfigClass(self): 

641 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

642 

643 def __init__(self, *args, **kwargs): 

644 super().__init__(*args, **kwargs) 

645 if self.config.functorFile: 

646 self.log.info('Loading tranform functor definitions from %s', 

647 self.config.functorFile) 

648 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

649 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

650 else: 

651 self.funcs = None 

652 

653 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

654 inputs = butlerQC.get(inputRefs) 

655 if self.funcs is None: 

656 raise ValueError("config.functorFile is None. " 

657 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

658 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

659 dataId=outputRefs.outputCatalog.dataId.full) 

660 outputs = pipeBase.Struct(outputCatalog=result) 

661 butlerQC.put(outputs, outputRefs) 

662 

663 def runDataRef(self, dataRef): 

664 parq = dataRef.get() 

665 if self.funcs is None: 

666 raise ValueError("config.functorFile is None. " 

667 "Must be a valid path to yaml in order to run as a CommandlineTask.") 

668 df = self.run(parq, funcs=self.funcs, dataId=dataRef.dataId) 

669 self.write(df, dataRef) 

670 return df 

671 

672 def run(self, parq, funcs=None, dataId=None, band=None): 

673 """Do postprocessing calculations 

674 

675 Takes a `ParquetTable` object and dataId, 

676 returns a dataframe with results of postprocessing calculations. 

677 

678 Parameters 

679 ---------- 

680 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

681 ParquetTable from which calculations are done. 

682 funcs : `lsst.pipe.tasks.functors.Functors` 

683 Functors to apply to the table's columns 

684 dataId : dict, optional 

685 Used to add a `patchId` column to the output dataframe. 

686 band : `str`, optional 

687 Filter band that is being processed. 

688 

689 Returns 

690 ------ 

691 `pandas.DataFrame` 

692 

693 """ 

694 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

695 

696 df = self.transform(band, parq, funcs, dataId).df 

697 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

698 return df 

699 

700 def getFunctors(self): 

701 return self.funcs 

702 

703 def getAnalysis(self, parq, funcs=None, band=None): 

704 if funcs is None: 

705 funcs = self.funcs 

706 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

707 return analysis 

708 

709 def transform(self, band, parq, funcs, dataId): 

710 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

711 df = analysis.df 

712 if dataId is not None: 

713 for key, value in dataId.items(): 

714 df[str(key)] = value 

715 

716 return pipeBase.Struct( 

717 df=df, 

718 analysis=analysis 

719 ) 

720 

721 def write(self, df, parqRef): 

722 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

723 

724 def writeMetadata(self, dataRef): 

725 """No metadata to write. 

726 """ 

727 pass 

728 

729 

730class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

731 defaultTemplates={"coaddName": "deep"}, 

732 dimensions=("tract", "patch", "skymap")): 

733 inputCatalog = connectionTypes.Input( 

734 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

735 "stored as a DataFrame with a multi-level column index per-patch.", 

736 dimensions=("tract", "patch", "skymap"), 

737 storageClass="DataFrame", 

738 name="{coaddName}Coadd_obj", 

739 deferLoad=True, 

740 ) 

741 outputCatalog = connectionTypes.Output( 

742 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

743 "data model.", 

744 dimensions=("tract", "patch", "skymap"), 

745 storageClass="DataFrame", 

746 name="objectTable" 

747 ) 

748 

749 

750class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

751 pipelineConnections=TransformObjectCatalogConnections): 

752 coaddName = pexConfig.Field( 

753 dtype=str, 

754 default="deep", 

755 doc="Name of coadd" 

756 ) 

757 # TODO: remove in DM-27177 

758 filterMap = pexConfig.DictField( 

759 keytype=str, 

760 itemtype=str, 

761 default={}, 

762 doc=("Dictionary mapping full filter name to short one for column name munging." 

763 "These filters determine the output columns no matter what filters the " 

764 "input data actually contain."), 

765 deprecated=("Coadds are now identified by the band, so this transform is unused." 

766 "Will be removed after v22.") 

767 ) 

768 outputBands = pexConfig.ListField( 

769 dtype=str, 

770 default=None, 

771 optional=True, 

772 doc=("These bands and only these bands will appear in the output," 

773 " NaN-filled if the input does not include them." 

774 " If None, then use all bands found in the input.") 

775 ) 

776 camelCase = pexConfig.Field( 

777 dtype=bool, 

778 default=True, 

779 doc=("Write per-band columns names with camelCase, else underscore " 

780 "For example: gPsFlux instead of g_PsFlux.") 

781 ) 

782 multilevelOutput = pexConfig.Field( 

783 dtype=bool, 

784 default=False, 

785 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

786 "and name-munged (False).") 

787 ) 

788 

789 

790class TransformObjectCatalogTask(TransformCatalogBaseTask): 

791 """Produce a flattened Object Table to match the format specified in 

792 sdm_schemas. 

793 

794 Do the same set of postprocessing calculations on all bands 

795 

796 This is identical to `TransformCatalogBaseTask`, except for that it does the 

797 specified functor calculations for all filters present in the 

798 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

799 by the YAML file will be superceded. 

800 """ 

801 _DefaultName = "transformObjectCatalog" 

802 ConfigClass = TransformObjectCatalogConfig 

803 

804 # Used by Gen 2 runDataRef only: 

805 inputDataset = 'deepCoadd_obj' 

806 outputDataset = 'objectTable' 

807 

808 @classmethod 

809 def _makeArgumentParser(cls): 

810 parser = ArgumentParser(name=cls._DefaultName) 

811 parser.add_id_argument("--id", cls.inputDataset, 

812 ContainerClass=CoaddDataIdContainer, 

813 help="data ID, e.g. --id tract=12345 patch=1,2") 

814 return parser 

815 

816 def run(self, parq, funcs=None, dataId=None, band=None): 

817 # NOTE: band kwarg is ignored here. 

818 dfDict = {} 

819 analysisDict = {} 

820 templateDf = pd.DataFrame() 

821 

822 if isinstance(parq, DeferredDatasetHandle): 

823 columns = parq.get(component='columns') 

824 inputBands = columns.unique(level=1).values 

825 else: 

826 inputBands = parq.columnLevelNames['band'] 

827 

828 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

829 

830 # Perform transform for data of filters that exist in parq. 

831 for inputBand in inputBands: 

832 if inputBand not in outputBands: 

833 self.log.info("Ignoring %s band data in the input", inputBand) 

834 continue 

835 self.log.info("Transforming the catalog of band %s", inputBand) 

836 result = self.transform(inputBand, parq, funcs, dataId) 

837 dfDict[inputBand] = result.df 

838 analysisDict[inputBand] = result.analysis 

839 if templateDf.empty: 

840 templateDf = result.df 

841 

842 # Fill NaNs in columns of other wanted bands 

843 for filt in outputBands: 

844 if filt not in dfDict: 

845 self.log.info("Adding empty columns for band %s", filt) 

846 dfDict[filt] = pd.DataFrame().reindex_like(templateDf) 

847 

848 # This makes a multilevel column index, with band as first level 

849 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

850 

851 if not self.config.multilevelOutput: 

852 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

853 if dataId is not None: 

854 noDupCols += list(dataId.keys()) 

855 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

856 inputBands=inputBands) 

857 

858 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

859 return df 

860 

861 

862class TractObjectDataIdContainer(CoaddDataIdContainer): 

863 

864 def makeDataRefList(self, namespace): 

865 """Make self.refList from self.idList 

866 

867 Generate a list of data references given tract and/or patch. 

868 This was adapted from `TractQADataIdContainer`, which was 

869 `TractDataIdContainer` modifie to not require "filter". 

870 Only existing dataRefs are returned. 

871 """ 

872 def getPatchRefList(tract): 

873 return [namespace.butler.dataRef(datasetType=self.datasetType, 

874 tract=tract.getId(), 

875 patch="%d,%d" % patch.getIndex()) for patch in tract] 

876 

877 tractRefs = defaultdict(list) # Data references for each tract 

878 for dataId in self.idList: 

879 skymap = self.getSkymap(namespace) 

880 

881 if "tract" in dataId: 

882 tractId = dataId["tract"] 

883 if "patch" in dataId: 

884 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

885 tract=tractId, 

886 patch=dataId['patch'])) 

887 else: 

888 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

889 else: 

890 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

891 for tract in skymap) 

892 outputRefList = [] 

893 for tractRefList in tractRefs.values(): 

894 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

895 outputRefList.append(existingRefs) 

896 

897 self.refList = outputRefList 

898 

899 

900class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

901 dimensions=("tract", "skymap")): 

902 inputCatalogs = connectionTypes.Input( 

903 doc="Per-Patch objectTables conforming to the standard data model.", 

904 name="objectTable", 

905 storageClass="DataFrame", 

906 dimensions=("tract", "patch", "skymap"), 

907 multiple=True, 

908 ) 

909 outputCatalog = connectionTypes.Output( 

910 doc="Pre-tract horizontal concatenation of the input objectTables", 

911 name="objectTable_tract", 

912 storageClass="DataFrame", 

913 dimensions=("tract", "skymap"), 

914 ) 

915 

916 

917class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

918 pipelineConnections=ConsolidateObjectTableConnections): 

919 coaddName = pexConfig.Field( 

920 dtype=str, 

921 default="deep", 

922 doc="Name of coadd" 

923 ) 

924 

925 

926class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

927 """Write patch-merged source tables to a tract-level parquet file 

928 

929 Concatenates `objectTable` list into a per-visit `objectTable_tract` 

930 """ 

931 _DefaultName = "consolidateObjectTable" 

932 ConfigClass = ConsolidateObjectTableConfig 

933 

934 inputDataset = 'objectTable' 

935 outputDataset = 'objectTable_tract' 

936 

937 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

938 inputs = butlerQC.get(inputRefs) 

939 self.log.info("Concatenating %s per-patch Object Tables", 

940 len(inputs['inputCatalogs'])) 

941 df = pd.concat(inputs['inputCatalogs']) 

942 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

943 

944 @classmethod 

945 def _makeArgumentParser(cls): 

946 parser = ArgumentParser(name=cls._DefaultName) 

947 

948 parser.add_id_argument("--id", cls.inputDataset, 

949 help="data ID, e.g. --id tract=12345", 

950 ContainerClass=TractObjectDataIdContainer) 

951 return parser 

952 

953 def runDataRef(self, patchRefList): 

954 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

955 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

956 

957 def writeMetadata(self, dataRef): 

958 """No metadata to write. 

959 """ 

960 pass 

961 

962 

963class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

964 defaultTemplates={"catalogType": ""}, 

965 dimensions=("instrument", "visit", "detector")): 

966 

967 inputCatalog = connectionTypes.Input( 

968 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

969 name="{catalogType}source", 

970 storageClass="DataFrame", 

971 dimensions=("instrument", "visit", "detector"), 

972 deferLoad=True 

973 ) 

974 outputCatalog = connectionTypes.Output( 

975 doc="Narrower, per-detector Source Table transformed and converted per a " 

976 "specified set of functors", 

977 name="{catalogType}sourceTable", 

978 storageClass="DataFrame", 

979 dimensions=("instrument", "visit", "detector") 

980 ) 

981 

982 

983class TransformSourceTableConfig(TransformCatalogBaseConfig, 

984 pipelineConnections=TransformSourceTableConnections): 

985 pass 

986 

987 

988class TransformSourceTableTask(TransformCatalogBaseTask): 

989 """Transform/standardize a source catalog 

990 """ 

991 _DefaultName = "transformSourceTable" 

992 ConfigClass = TransformSourceTableConfig 

993 

994 inputDataset = 'source' 

995 outputDataset = 'sourceTable' 

996 

997 @classmethod 

998 def _makeArgumentParser(cls): 

999 parser = ArgumentParser(name=cls._DefaultName) 

1000 parser.add_id_argument("--id", datasetType=cls.inputDataset, 

1001 level="sensor", 

1002 help="data ID, e.g. --id visit=12345 ccd=0") 

1003 return parser 

1004 

1005 def runDataRef(self, dataRef): 

1006 """Override to specify band label to run().""" 

1007 parq = dataRef.get() 

1008 funcs = self.getFunctors() 

1009 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel 

1010 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band) 

1011 self.write(df, dataRef) 

1012 return df 

1013 

1014 

1015class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1016 dimensions=("instrument", "visit",), 

1017 defaultTemplates={"calexpType": ""}): 

1018 calexp = connectionTypes.Input( 

1019 doc="Processed exposures used for metadata", 

1020 name="{calexpType}calexp", 

1021 storageClass="ExposureF", 

1022 dimensions=("instrument", "visit", "detector"), 

1023 deferLoad=True, 

1024 multiple=True, 

1025 ) 

1026 visitSummary = connectionTypes.Output( 

1027 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1028 "detector id for the id and are sorted for fast lookups of a " 

1029 "detector."), 

1030 name="{calexpType}visitSummary", 

1031 storageClass="ExposureCatalog", 

1032 dimensions=("instrument", "visit"), 

1033 ) 

1034 

1035 

1036class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1037 pipelineConnections=ConsolidateVisitSummaryConnections): 

1038 """Config for ConsolidateVisitSummaryTask""" 

1039 pass 

1040 

1041 

1042class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

1043 """Task to consolidate per-detector visit metadata. 

1044 

1045 This task aggregates the following metadata from all the detectors in a 

1046 single visit into an exposure catalog: 

1047 - The visitInfo. 

1048 - The wcs. 

1049 - The photoCalib. 

1050 - The physical_filter and band (if available). 

1051 - The psf size, shape, and effective area at the center of the detector. 

1052 - The corners of the bounding box in right ascension/declination. 

1053 

1054 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1055 are not persisted here because of storage concerns, and because of their 

1056 limited utility as summary statistics. 

1057 

1058 Tests for this task are performed in ci_hsc_gen3. 

1059 """ 

1060 _DefaultName = "consolidateVisitSummary" 

1061 ConfigClass = ConsolidateVisitSummaryConfig 

1062 

1063 @classmethod 

1064 def _makeArgumentParser(cls): 

1065 parser = ArgumentParser(name=cls._DefaultName) 

1066 

1067 parser.add_id_argument("--id", "calexp", 

1068 help="data ID, e.g. --id visit=12345", 

1069 ContainerClass=VisitDataIdContainer) 

1070 return parser 

1071 

1072 def writeMetadata(self, dataRef): 

1073 """No metadata to persist, so override to remove metadata persistance. 

1074 """ 

1075 pass 

1076 

1077 def writeConfig(self, butler, clobber=False, doBackup=True): 

1078 """No config to persist, so override to remove config persistance. 

1079 """ 

1080 pass 

1081 

1082 def runDataRef(self, dataRefList): 

1083 visit = dataRefList[0].dataId['visit'] 

1084 

1085 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1086 len(dataRefList), visit) 

1087 

1088 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

1089 

1090 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

1091 

1092 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1093 dataRefs = butlerQC.get(inputRefs.calexp) 

1094 visit = dataRefs[0].dataId.byName()['visit'] 

1095 

1096 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1097 len(dataRefs), visit) 

1098 

1099 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1100 

1101 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1102 

1103 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

1104 """Make a combined exposure catalog from a list of dataRefs. 

1105 These dataRefs must point to exposures with wcs, summaryStats, 

1106 and other visit metadata. 

1107 

1108 Parameters 

1109 ---------- 

1110 visit : `int` 

1111 Visit identification number. 

1112 dataRefs : `list` 

1113 List of dataRefs in visit. May be list of 

1114 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

1115 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

1116 isGen3 : `bool`, optional 

1117 Specifies if this is a Gen3 list of datarefs. 

1118 

1119 Returns 

1120 ------- 

1121 visitSummary : `lsst.afw.table.ExposureCatalog` 

1122 Exposure catalog with per-detector summary information. 

1123 """ 

1124 schema = self._makeVisitSummarySchema() 

1125 cat = afwTable.ExposureCatalog(schema) 

1126 cat.resize(len(dataRefs)) 

1127 

1128 cat['visit'] = visit 

1129 

1130 for i, dataRef in enumerate(dataRefs): 

1131 if isGen3: 

1132 visitInfo = dataRef.get(component='visitInfo') 

1133 filterLabel = dataRef.get(component='filterLabel') 

1134 summaryStats = dataRef.get(component='summaryStats') 

1135 detector = dataRef.get(component='detector') 

1136 wcs = dataRef.get(component='wcs') 

1137 photoCalib = dataRef.get(component='photoCalib') 

1138 detector = dataRef.get(component='detector') 

1139 bbox = dataRef.get(component='bbox') 

1140 validPolygon = dataRef.get(component='validPolygon') 

1141 else: 

1142 # Note that we need to read the calexp because there is 

1143 # no magic access to the psf except through the exposure. 

1144 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

1145 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

1146 visitInfo = exp.getInfo().getVisitInfo() 

1147 filterLabel = dataRef.get("calexp_filterLabel") 

1148 summaryStats = exp.getInfo().getSummaryStats() 

1149 wcs = exp.getWcs() 

1150 photoCalib = exp.getPhotoCalib() 

1151 detector = exp.getDetector() 

1152 bbox = dataRef.get(datasetType='calexp_bbox') 

1153 validPolygon = exp.getInfo().getValidPolygon() 

1154 

1155 rec = cat[i] 

1156 rec.setBBox(bbox) 

1157 rec.setVisitInfo(visitInfo) 

1158 rec.setWcs(wcs) 

1159 rec.setPhotoCalib(photoCalib) 

1160 rec.setValidPolygon(validPolygon) 

1161 

1162 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1163 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1164 rec.setId(detector.getId()) 

1165 rec['psfSigma'] = summaryStats.psfSigma 

1166 rec['psfIxx'] = summaryStats.psfIxx 

1167 rec['psfIyy'] = summaryStats.psfIyy 

1168 rec['psfIxy'] = summaryStats.psfIxy 

1169 rec['psfArea'] = summaryStats.psfArea 

1170 rec['raCorners'][:] = summaryStats.raCorners 

1171 rec['decCorners'][:] = summaryStats.decCorners 

1172 rec['ra'] = summaryStats.ra 

1173 rec['decl'] = summaryStats.decl 

1174 rec['zenithDistance'] = summaryStats.zenithDistance 

1175 rec['zeroPoint'] = summaryStats.zeroPoint 

1176 rec['skyBg'] = summaryStats.skyBg 

1177 rec['skyNoise'] = summaryStats.skyNoise 

1178 rec['meanVar'] = summaryStats.meanVar 

1179 rec['astromOffsetMean'] = summaryStats.astromOffsetMean 

1180 rec['astromOffsetStd'] = summaryStats.astromOffsetStd 

1181 

1182 metadata = dafBase.PropertyList() 

1183 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1184 # We are looping over existing datarefs, so the following is true 

1185 metadata.add("COMMENT", "Only detectors with data have entries.") 

1186 cat.setMetadata(metadata) 

1187 

1188 cat.sort() 

1189 return cat 

1190 

1191 def _makeVisitSummarySchema(self): 

1192 """Make the schema for the visitSummary catalog.""" 

1193 schema = afwTable.ExposureTable.makeMinimalSchema() 

1194 schema.addField('visit', type='I', doc='Visit number') 

1195 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1196 schema.addField('band', type='String', size=32, doc='Name of band') 

1197 schema.addField('psfSigma', type='F', 

1198 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

1199 schema.addField('psfArea', type='F', 

1200 doc='PSF model effective area (center of chip) (pixel**2)') 

1201 schema.addField('psfIxx', type='F', 

1202 doc='PSF model Ixx (center of chip) (pixel**2)') 

1203 schema.addField('psfIyy', type='F', 

1204 doc='PSF model Iyy (center of chip) (pixel**2)') 

1205 schema.addField('psfIxy', type='F', 

1206 doc='PSF model Ixy (center of chip) (pixel**2)') 

1207 schema.addField('raCorners', type='ArrayD', size=4, 

1208 doc='Right Ascension of bounding box corners (degrees)') 

1209 schema.addField('decCorners', type='ArrayD', size=4, 

1210 doc='Declination of bounding box corners (degrees)') 

1211 schema.addField('ra', type='D', 

1212 doc='Right Ascension of bounding box center (degrees)') 

1213 schema.addField('decl', type='D', 

1214 doc='Declination of bounding box center (degrees)') 

1215 schema.addField('zenithDistance', type='F', 

1216 doc='Zenith distance of bounding box center (degrees)') 

1217 schema.addField('zeroPoint', type='F', 

1218 doc='Mean zeropoint in detector (mag)') 

1219 schema.addField('skyBg', type='F', 

1220 doc='Average sky background (ADU)') 

1221 schema.addField('skyNoise', type='F', 

1222 doc='Average sky noise (ADU)') 

1223 schema.addField('meanVar', type='F', 

1224 doc='Mean variance of the weight plane (ADU**2)') 

1225 schema.addField('astromOffsetMean', type='F', 

1226 doc='Mean offset of astrometric calibration matches (arcsec)') 

1227 schema.addField('astromOffsetStd', type='F', 

1228 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)') 

1229 

1230 return schema 

1231 

1232 

1233class VisitDataIdContainer(DataIdContainer): 

1234 """DataIdContainer that groups sensor-level id's by visit 

1235 """ 

1236 

1237 def makeDataRefList(self, namespace): 

1238 """Make self.refList from self.idList 

1239 

1240 Generate a list of data references grouped by visit. 

1241 

1242 Parameters 

1243 ---------- 

1244 namespace : `argparse.Namespace` 

1245 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments 

1246 """ 

1247 # Group by visits 

1248 visitRefs = defaultdict(list) 

1249 for dataId in self.idList: 

1250 if "visit" in dataId: 

1251 visitId = dataId["visit"] 

1252 # append all subsets to 

1253 subset = namespace.butler.subset(self.datasetType, dataId=dataId) 

1254 visitRefs[visitId].extend([dataRef for dataRef in subset]) 

1255 

1256 outputRefList = [] 

1257 for refList in visitRefs.values(): 

1258 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1259 if existingRefs: 

1260 outputRefList.append(existingRefs) 

1261 

1262 self.refList = outputRefList 

1263 

1264 

1265class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1266 defaultTemplates={"catalogType": ""}, 

1267 dimensions=("instrument", "visit")): 

1268 inputCatalogs = connectionTypes.Input( 

1269 doc="Input per-detector Source Tables", 

1270 name="{catalogType}sourceTable", 

1271 storageClass="DataFrame", 

1272 dimensions=("instrument", "visit", "detector"), 

1273 multiple=True 

1274 ) 

1275 outputCatalog = connectionTypes.Output( 

1276 doc="Per-visit concatenation of Source Table", 

1277 name="{catalogType}sourceTable_visit", 

1278 storageClass="DataFrame", 

1279 dimensions=("instrument", "visit") 

1280 ) 

1281 

1282 

1283class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1284 pipelineConnections=ConsolidateSourceTableConnections): 

1285 pass 

1286 

1287 

1288class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

1289 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1290 """ 

1291 _DefaultName = 'consolidateSourceTable' 

1292 ConfigClass = ConsolidateSourceTableConfig 

1293 

1294 inputDataset = 'sourceTable' 

1295 outputDataset = 'sourceTable_visit' 

1296 

1297 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1298 inputs = butlerQC.get(inputRefs) 

1299 self.log.info("Concatenating %s per-detector Source Tables", 

1300 len(inputs['inputCatalogs'])) 

1301 df = pd.concat(inputs['inputCatalogs']) 

1302 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1303 

1304 def runDataRef(self, dataRefList): 

1305 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1306 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1307 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1308 

1309 @classmethod 

1310 def _makeArgumentParser(cls): 

1311 parser = ArgumentParser(name=cls._DefaultName) 

1312 

1313 parser.add_id_argument("--id", cls.inputDataset, 

1314 help="data ID, e.g. --id visit=12345", 

1315 ContainerClass=VisitDataIdContainer) 

1316 return parser 

1317 

1318 def writeMetadata(self, dataRef): 

1319 """No metadata to write. 

1320 """ 

1321 pass 

1322 

1323 def writeConfig(self, butler, clobber=False, doBackup=True): 

1324 """No config to write. 

1325 """ 

1326 pass 

1327 

1328 

1329class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1330 dimensions=("instrument",), 

1331 defaultTemplates={}): 

1332 visitSummaryRefs = connectionTypes.Input( 

1333 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1334 name="visitSummary", 

1335 storageClass="ExposureCatalog", 

1336 dimensions=("instrument", "visit"), 

1337 multiple=True, 

1338 deferLoad=True, 

1339 ) 

1340 outputCatalog = connectionTypes.Output( 

1341 doc="CCD and Visit metadata table", 

1342 name="CcdVisitTable", 

1343 storageClass="DataFrame", 

1344 dimensions=("instrument",) 

1345 ) 

1346 

1347 

1348class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1349 pipelineConnections=MakeCcdVisitTableConnections): 

1350 pass 

1351 

1352 

1353class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1354 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs. 

1355 """ 

1356 _DefaultName = 'makeCcdVisitTable' 

1357 ConfigClass = MakeCcdVisitTableConfig 

1358 

1359 def run(self, visitSummaryRefs): 

1360 """ Make a table of ccd information from the `visitSummary` catalogs. 

1361 Parameters 

1362 ---------- 

1363 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1364 List of DeferredDatasetHandles pointing to exposure catalogs with 

1365 per-detector summary information. 

1366 Returns 

1367 ------- 

1368 result : `lsst.pipe.Base.Struct` 

1369 Results struct with attribute: 

1370 - `outputCatalog` 

1371 Catalog of ccd and visit information. 

1372 """ 

1373 ccdEntries = [] 

1374 for visitSummaryRef in visitSummaryRefs: 

1375 visitSummary = visitSummaryRef.get() 

1376 visitInfo = visitSummary[0].getVisitInfo() 

1377 

1378 ccdEntry = {} 

1379 summaryTable = visitSummary.asAstropy() 

1380 selectColumns = ['id', 'visit', 'physical_filter', 'ra', 'decl', 'zenithDistance', 'zeroPoint', 

1381 'psfSigma', 'skyBg', 'skyNoise'] 

1382 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1383 ccdEntry = ccdEntry.rename(columns={"physical_filter": "filterName", "visit": "visitId"}) 

1384 

1385 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in 

1386 summaryTable['id']] 

1387 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId) 

1388 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds] 

1389 ccdEntry['ccdVisitId'] = ccdVisitIds 

1390 

1391 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary]) 

1392 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1393 

1394 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1395 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1396 expTime = visitInfo.getExposureTime() 

1397 ccdEntry['expTime'] = expTime 

1398 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1399 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1400 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1401 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1402 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1403 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1404 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1405 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1406 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1407 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1408 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1409 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1410 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags, 

1411 # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted. 

1412 ccdEntries.append(ccdEntry) 

1413 

1414 outputCatalog = pd.concat(ccdEntries) 

1415 return pipeBase.Struct(outputCatalog=outputCatalog) 

1416 

1417 

1418class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1419 dimensions=("instrument",), 

1420 defaultTemplates={}): 

1421 visitSummaries = connectionTypes.Input( 

1422 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1423 name="visitSummary", 

1424 storageClass="ExposureCatalog", 

1425 dimensions=("instrument", "visit",), 

1426 multiple=True, 

1427 deferLoad=True, 

1428 ) 

1429 outputCatalog = connectionTypes.Output( 

1430 doc="Visit metadata table", 

1431 name="visitTable", 

1432 storageClass="DataFrame", 

1433 dimensions=("instrument",) 

1434 ) 

1435 

1436 

1437class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1438 pipelineConnections=MakeVisitTableConnections): 

1439 pass 

1440 

1441 

1442class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1443 """Produce a `visitTable` from the `visitSummary` exposure catalogs. 

1444 """ 

1445 _DefaultName = 'makeVisitTable' 

1446 ConfigClass = MakeVisitTableConfig 

1447 

1448 def run(self, visitSummaries): 

1449 """ Make a table of visit information from the `visitSummary` catalogs 

1450 

1451 Parameters 

1452 ---------- 

1453 visitSummaries : list of `lsst.afw.table.ExposureCatalog` 

1454 List of exposure catalogs with per-detector summary information. 

1455 Returns 

1456 ------- 

1457 result : `lsst.pipe.Base.Struct` 

1458 Results struct with attribute: 

1459 ``outputCatalog`` 

1460 Catalog of visit information. 

1461 """ 

1462 visitEntries = [] 

1463 for visitSummary in visitSummaries: 

1464 visitSummary = visitSummary.get() 

1465 visitRow = visitSummary[0] 

1466 visitInfo = visitRow.getVisitInfo() 

1467 

1468 visitEntry = {} 

1469 visitEntry["visitId"] = visitRow['visit'] 

1470 visitEntry["filterName"] = visitRow['physical_filter'] 

1471 raDec = visitInfo.getBoresightRaDec() 

1472 visitEntry["ra"] = raDec.getRa().asDegrees() 

1473 visitEntry["decl"] = raDec.getDec().asDegrees() 

1474 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1475 azAlt = visitInfo.getBoresightAzAlt() 

1476 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1477 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1478 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1479 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1480 visitEntry["obsStart"] = visitInfo.getDate().toPython() 

1481 visitEntry["expTime"] = visitInfo.getExposureTime() 

1482 visitEntries.append(visitEntry) 

1483 # TODO: DM-30623, Add programId, exposureType, expMidpt, cameraTemp, mirror1Temp, mirror2Temp, 

1484 # mirror3Temp, domeTemp, externalTemp, dimmSeeing, pwvGPS, pwvMW, flags, nExposures 

1485 

1486 outputCatalog = pd.DataFrame(data=visitEntries) 

1487 return pipeBase.Struct(outputCatalog=outputCatalog) 

1488 

1489 

1490class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1491 dimensions=("instrument", "visit", "detector", "skymap", "tract")): 

1492 

1493 inputCatalog = connectionTypes.Input( 

1494 doc="Primary per-detector, single-epoch forced-photometry catalog. " 

1495 "By default, it is the output of ForcedPhotCcdTask on calexps", 

1496 name="forced_src", 

1497 storageClass="SourceCatalog", 

1498 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1499 ) 

1500 inputCatalogDiff = connectionTypes.Input( 

1501 doc="Secondary multi-epoch, per-detector, forced photometry catalog. " 

1502 "By default, it is the output of ForcedPhotCcdTask run on image differences.", 

1503 name="forced_diff", 

1504 storageClass="SourceCatalog", 

1505 dimensions=("instrument", "visit", "detector", "skymap", "tract") 

1506 ) 

1507 outputCatalog = connectionTypes.Output( 

1508 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format", 

1509 name="forcedSource", 

1510 storageClass="DataFrame", 

1511 dimensions=("instrument", "visit", "detector") 

1512 ) 

1513 

1514 

1515class WriteForcedSourceTableConfig(WriteSourceTableConfig, 

1516 pipelineConnections=WriteForcedSourceTableConnections): 

1517 pass 

1518 

1519 

1520class WriteForcedSourceTableTask(pipeBase.PipelineTask): 

1521 """Merge and convert per-detector forced source catalogs to parquet 

1522 """ 

1523 _DefaultName = "writeForcedSourceTable" 

1524 ConfigClass = WriteForcedSourceTableConfig 

1525 

1526 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1527 inputs = butlerQC.get(inputRefs) 

1528 # Add ccdVisitId to allow joining with CcdVisitTable 

1529 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

1530 inputs['band'] = butlerQC.quantum.dataId.full['band'] 

1531 

1532 outputs = self.run(**inputs) 

1533 butlerQC.put(outputs, outputRefs) 

1534 

1535 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None): 

1536 dfs = [] 

1537 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')): 

1538 df = table.asAstropy().to_pandas().set_index('objectId', drop=False) 

1539 df = df.reindex(sorted(df.columns), axis=1) 

1540 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA 

1541 df['band'] = band if band else pd.NA 

1542 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns], 

1543 names=('dataset', 'column')) 

1544 

1545 dfs.append(df) 

1546 

1547 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

1548 return pipeBase.Struct(outputCatalog=outputCatalog) 

1549 

1550 

1551class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1552 dimensions=("instrument", "skymap", "patch", "tract")): 

1553 

1554 inputCatalogs = connectionTypes.Input( 

1555 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask", 

1556 name="forcedSource", 

1557 storageClass="DataFrame", 

1558 dimensions=("instrument", "visit", "detector"), 

1559 multiple=True, 

1560 deferLoad=True 

1561 ) 

1562 referenceCatalog = connectionTypes.Input( 

1563 doc="Reference catalog which was used to seed the forcedPhot. Columns " 

1564 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner " 

1565 "are expected.", 

1566 name="objectTable", 

1567 storageClass="DataFrame", 

1568 dimensions=("tract", "patch", "skymap"), 

1569 deferLoad=True 

1570 ) 

1571 outputCatalog = connectionTypes.Output( 

1572 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a " 

1573 "specified set of functors", 

1574 name="ForcedSourceTable", 

1575 storageClass="DataFrame", 

1576 dimensions=("tract", "patch", "skymap") 

1577 ) 

1578 

1579 

1580class TransformForcedSourceTableConfig(TransformCatalogBaseConfig, 

1581 pipelineConnections=TransformForcedSourceTableConnections): 

1582 pass 

1583 

1584 

1585class TransformForcedSourceTableTask(TransformCatalogBaseTask): 

1586 """Transform/standardize a ForcedSource catalog 

1587 

1588 Transforms each wide, per-detector forcedSource parquet table per the 

1589 specification file (per-camera defaults found in ForcedSource.yaml). 

1590 All epochs that overlap the patch are aggregated into one per-patch 

1591 narrow-parquet file. 

1592 

1593 No de-duplication of rows is performed. Duplicate resolutions flags are 

1594 pulled in from the referenceCatalog: `detect_isPrimary`, 

1595 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate 

1596 for analysis or compare duplicates for QA. 

1597 

1598 The resulting table includes multiple bands. Epochs (MJDs) and other useful 

1599 per-visit rows can be retreived by joining with the CcdVisitTable on 

1600 ccdVisitId. 

1601 """ 

1602 _DefaultName = "transformForcedSourceTable" 

1603 ConfigClass = TransformForcedSourceTableConfig 

1604 

1605 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1606 inputs = butlerQC.get(inputRefs) 

1607 if self.funcs is None: 

1608 raise ValueError("config.functorFile is None. " 

1609 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

1610 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs, 

1611 dataId=outputRefs.outputCatalog.dataId.full) 

1612 

1613 butlerQC.put(outputs, outputRefs) 

1614 

1615 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None): 

1616 dfs = [] 

1617 ref = referenceCatalog.get(parameters={"columns": ['detect_isPrimary', 'detect_isTractInner', 

1618 'detect_isPatchInner']}) 

1619 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs))) 

1620 for handle in inputCatalogs: 

1621 result = self.transform(None, handle, funcs, dataId) 

1622 # Filter for only rows that were detected on (overlap) the patch 

1623 dfs.append(ref.join(result.df, how='inner')) 

1624 

1625 outputCatalog = pd.concat(dfs) 

1626 self.log.info("Made a table of %d columns and %d rows", 

1627 len(outputCatalog.columns), len(outputCatalog)) 

1628 return pipeBase.Struct(outputCatalog=outputCatalog) 

1629 

1630 

1631class ConsolidateForcedSourceTableConnections(pipeBase.PipelineTaskConnections, 

1632 defaultTemplates={"catalogType": ""}, 

1633 dimensions=("instrument", "tract")): 

1634 inputCatalogs = connectionTypes.Input( 

1635 doc="Input per-patch ForcedSource Tables", 

1636 name="{catalogType}ForcedSourceTable", 

1637 storageClass="DataFrame", 

1638 dimensions=("tract", "patch", "skymap"), 

1639 multiple=True, 

1640 ) 

1641 

1642 outputCatalog = connectionTypes.Output( 

1643 doc="Output per-tract concatenation of ForcedSource Tables", 

1644 name="{catalogType}ForcedSourceTable_tract", 

1645 storageClass="DataFrame", 

1646 dimensions=("tract", "skymap"), 

1647 ) 

1648 

1649 

1650class ConsolidateForcedSourceTableConfig(pipeBase.PipelineTaskConfig, 

1651 pipelineConnections=ConsolidateForcedSourceTableConnections): 

1652 pass 

1653 

1654 

1655class ConsolidateForcedSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

1656 """Concatenate a per-patch `ForcedSourceTable` list into a single 

1657 per-tract `forcedSourceTable_tract` 

1658 """ 

1659 _DefaultName = 'consolidateForcedSourceTable' 

1660 ConfigClass = ConsolidateForcedSourceTableConfig 

1661 

1662 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1663 inputs = butlerQC.get(inputRefs) 

1664 self.log.info("Concatenating %s per-patch ForcedSource Tables", 

1665 len(inputs['inputCatalogs'])) 

1666 df = pd.concat(inputs['inputCatalogs']) 

1667 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)