Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24from collections import defaultdict 

25import numpy as np 

26 

27import lsst.geom 

28import lsst.pex.config as pexConfig 

29import lsst.pipe.base as pipeBase 

30import lsst.daf.base as dafBase 

31from lsst.pipe.base import connectionTypes 

32import lsst.afw.table as afwTable 

33from lsst.meas.base import SingleFrameMeasurementTask 

34from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

35from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

36from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate 

37 

38from .parquetTable import ParquetTable 

39from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

40from .functors import CompositeFunctor, RAColumn, DecColumn, Column 

41 

42 

43def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

44 """Flattens a dataframe with multilevel column index 

45 """ 

46 newDf = pd.DataFrame() 

47 # band is the level 0 index 

48 dfBands = df.columns.unique(level=0).values 

49 for band in dfBands: 

50 subdf = df[band] 

51 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

52 newColumns = {c: columnFormat.format(band, c) 

53 for c in subdf.columns if c not in noDupCols} 

54 cols = list(newColumns.keys()) 

55 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

56 

57 # Band must be present in the input and output or else column is all NaN: 

58 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

59 # Get the unexploded columns from any present band's partition 

60 noDupDf = df[presentBands[0]][noDupCols] 

61 newDf = pd.concat([noDupDf, newDf], axis=1) 

62 return newDf 

63 

64 

65class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

66 defaultTemplates={"coaddName": "deep"}, 

67 dimensions=("tract", "patch", "skymap")): 

68 inputCatalogMeas = connectionTypes.Input( 

69 doc="Catalog of source measurements on the deepCoadd.", 

70 dimensions=("tract", "patch", "band", "skymap"), 

71 storageClass="SourceCatalog", 

72 name="{coaddName}Coadd_meas", 

73 multiple=True 

74 ) 

75 inputCatalogForcedSrc = connectionTypes.Input( 

76 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

77 dimensions=("tract", "patch", "band", "skymap"), 

78 storageClass="SourceCatalog", 

79 name="{coaddName}Coadd_forced_src", 

80 multiple=True 

81 ) 

82 inputCatalogRef = connectionTypes.Input( 

83 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

84 "for each detection in deepCoadd_mergeDet.", 

85 dimensions=("tract", "patch", "skymap"), 

86 storageClass="SourceCatalog", 

87 name="{coaddName}Coadd_ref" 

88 ) 

89 outputCatalog = connectionTypes.Output( 

90 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

91 "stored as a DataFrame with a multi-level column index per-patch.", 

92 dimensions=("tract", "patch", "skymap"), 

93 storageClass="DataFrame", 

94 name="{coaddName}Coadd_obj" 

95 ) 

96 

97 

98class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

99 pipelineConnections=WriteObjectTableConnections): 

100 engine = pexConfig.Field( 

101 dtype=str, 

102 default="pyarrow", 

103 doc="Parquet engine for writing (pyarrow or fastparquet)" 

104 ) 

105 coaddName = pexConfig.Field( 

106 dtype=str, 

107 default="deep", 

108 doc="Name of coadd" 

109 ) 

110 

111 

112class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

113 """Write filter-merged source tables to parquet 

114 """ 

115 _DefaultName = "writeObjectTable" 

116 ConfigClass = WriteObjectTableConfig 

117 RunnerClass = MergeSourcesRunner 

118 

119 # Names of table datasets to be merged 

120 inputDatasets = ('forced_src', 'meas', 'ref') 

121 

122 # Tag of output dataset written by `MergeSourcesTask.write` 

123 outputDataset = 'obj' 

124 

125 def __init__(self, butler=None, schema=None, **kwargs): 

126 # It is a shame that this class can't use the default init for CmdLineTask 

127 # But to do so would require its own special task runner, which is many 

128 # more lines of specialization, so this is how it is for now 

129 super().__init__(**kwargs) 

130 

131 def runDataRef(self, patchRefList): 

132 """! 

133 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in 

134 subclasses that inherit from MergeSourcesTask. 

135 @param[in] patchRefList list of data references for each filter 

136 """ 

137 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

138 dataId = patchRefList[0].dataId 

139 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

140 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog)) 

141 

142 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

143 inputs = butlerQC.get(inputRefs) 

144 

145 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

146 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

147 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

148 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

149 

150 catalogs = {} 

151 for band in measDict.keys(): 

152 catalogs[band] = {'meas': measDict[band]['meas'], 

153 'forced_src': forcedSourceDict[band]['forced_src'], 

154 'ref': inputs['inputCatalogRef']} 

155 dataId = butlerQC.quantum.dataId 

156 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

157 outputs = pipeBase.Struct(outputCatalog=df) 

158 butlerQC.put(outputs, outputRefs) 

159 

160 @classmethod 

161 def _makeArgumentParser(cls): 

162 """Create a suitable ArgumentParser. 

163 

164 We will use the ArgumentParser to get a list of data 

165 references for patches; the RunnerClass will sort them into lists 

166 of data references for the same patch. 

167 

168 References first of self.inputDatasets, rather than 

169 self.inputDataset 

170 """ 

171 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

172 

173 def readCatalog(self, patchRef): 

174 """Read input catalogs 

175 

176 Read all the input datasets given by the 'inputDatasets' 

177 attribute. 

178 

179 Parameters 

180 ---------- 

181 patchRef : `lsst.daf.persistence.ButlerDataRef` 

182 Data reference for patch 

183 

184 Returns 

185 ------- 

186 Tuple consisting of band name and a dict of catalogs, keyed by 

187 dataset name 

188 """ 

189 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel 

190 catalogDict = {} 

191 for dataset in self.inputDatasets: 

192 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

193 self.log.info("Read %d sources from %s for band %s: %s", 

194 len(catalog), dataset, band, patchRef.dataId) 

195 catalogDict[dataset] = catalog 

196 return band, catalogDict 

197 

198 def run(self, catalogs, tract, patch): 

199 """Merge multiple catalogs. 

200 

201 Parameters 

202 ---------- 

203 catalogs : `dict` 

204 Mapping from filter names to dict of catalogs. 

205 tract : int 

206 tractId to use for the tractId column 

207 patch : str 

208 patchId to use for the patchId column 

209 

210 Returns 

211 ------- 

212 catalog : `pandas.DataFrame` 

213 Merged dataframe 

214 """ 

215 

216 dfs = [] 

217 for filt, tableDict in catalogs.items(): 

218 for dataset, table in tableDict.items(): 

219 # Convert afwTable to pandas DataFrame 

220 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

221 

222 # Sort columns by name, to ensure matching schema among patches 

223 df = df.reindex(sorted(df.columns), axis=1) 

224 df['tractId'] = tract 

225 df['patchId'] = patch 

226 

227 # Make columns a 3-level MultiIndex 

228 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

229 names=('dataset', 'band', 'column')) 

230 dfs.append(df) 

231 

232 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

233 return catalog 

234 

235 def write(self, patchRef, catalog): 

236 """Write the output. 

237 

238 Parameters 

239 ---------- 

240 catalog : `ParquetTable` 

241 Catalog to write 

242 patchRef : `lsst.daf.persistence.ButlerDataRef` 

243 Data reference for patch 

244 """ 

245 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

246 # since the filter isn't actually part of the data ID for the dataset we're saving, 

247 # it's confusing to see it in the log message, even if the butler simply ignores it. 

248 mergeDataId = patchRef.dataId.copy() 

249 del mergeDataId["filter"] 

250 self.log.info("Wrote merged catalog: %s", mergeDataId) 

251 

252 def writeMetadata(self, dataRefList): 

253 """No metadata to write, and not sure how to write it for a list of dataRefs. 

254 """ 

255 pass 

256 

257 

258class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

259 defaultTemplates={"catalogType": ""}, 

260 dimensions=("instrument", "visit", "detector")): 

261 

262 catalog = connectionTypes.Input( 

263 doc="Input full-depth catalog of sources produced by CalibrateTask", 

264 name="{catalogType}src", 

265 storageClass="SourceCatalog", 

266 dimensions=("instrument", "visit", "detector") 

267 ) 

268 outputCatalog = connectionTypes.Output( 

269 doc="Catalog of sources, `src` in Parquet format. The 'id' column is " 

270 "replaced with an index; all other columns are unchanged.", 

271 name="{catalogType}source", 

272 storageClass="DataFrame", 

273 dimensions=("instrument", "visit", "detector") 

274 ) 

275 

276 

277class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

278 pipelineConnections=WriteSourceTableConnections): 

279 doApplyExternalPhotoCalib = pexConfig.Field( 

280 dtype=bool, 

281 default=False, 

282 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if " 

283 "generating Source Tables from older src tables which do not already have local calib columns") 

284 ) 

285 doApplyExternalSkyWcs = pexConfig.Field( 

286 dtype=bool, 

287 default=False, 

288 doc=("Add local WCS columns from the calexp.wcs? Should only set True if " 

289 "generating Source Tables from older src tables which do not already have local calib columns") 

290 ) 

291 

292 

293class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

294 """Write source table to parquet 

295 """ 

296 _DefaultName = "writeSourceTable" 

297 ConfigClass = WriteSourceTableConfig 

298 

299 def runDataRef(self, dataRef): 

300 src = dataRef.get('src') 

301 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

302 src = self.addCalibColumns(src, dataRef) 

303 

304 ccdVisitId = dataRef.get('ccdExposureId') 

305 result = self.run(src, ccdVisitId=ccdVisitId) 

306 dataRef.put(result.table, 'source') 

307 

308 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

309 inputs = butlerQC.get(inputRefs) 

310 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

311 result = self.run(**inputs).table 

312 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

313 butlerQC.put(outputs, outputRefs) 

314 

315 def run(self, catalog, ccdVisitId=None): 

316 """Convert `src` catalog to parquet 

317 

318 Parameters 

319 ---------- 

320 catalog: `afwTable.SourceCatalog` 

321 catalog to be converted 

322 ccdVisitId: `int` 

323 ccdVisitId to be added as a column 

324 

325 Returns 

326 ------- 

327 result : `lsst.pipe.base.Struct` 

328 ``table`` 

329 `ParquetTable` version of the input catalog 

330 """ 

331 self.log.info("Generating parquet table from src catalog %s", ccdVisitId) 

332 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

333 df['ccdVisitId'] = ccdVisitId 

334 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

335 

336 def addCalibColumns(self, catalog, dataRef): 

337 """Add columns with local calibration evaluated at each centroid 

338 

339 for backwards compatibility with old repos. 

340 This exists for the purpose of converting old src catalogs 

341 (which don't have the expected local calib columns) to Source Tables. 

342 

343 Parameters 

344 ---------- 

345 catalog: `afwTable.SourceCatalog` 

346 catalog to which calib columns will be added 

347 dataRef: `lsst.daf.persistence.ButlerDataRef 

348 for fetching the calibs from disk. 

349 

350 Returns 

351 ------- 

352 newCat: `afwTable.SourceCatalog` 

353 Source Catalog with requested local calib columns 

354 """ 

355 mapper = afwTable.SchemaMapper(catalog.schema) 

356 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

357 measureConfig.doReplaceWithNoise = False 

358 

359 # Just need the WCS or the PhotoCalib attached to an exposue 

360 exposure = dataRef.get('calexp_sub', 

361 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0))) 

362 

363 mapper = afwTable.SchemaMapper(catalog.schema) 

364 mapper.addMinimalSchema(catalog.schema, True) 

365 schema = mapper.getOutputSchema() 

366 

367 exposureIdInfo = dataRef.get("expIdInfo") 

368 measureConfig.plugins.names = [] 

369 if self.config.doApplyExternalSkyWcs: 

370 plugin = 'base_LocalWcs' 

371 if plugin in schema: 

372 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False") 

373 else: 

374 measureConfig.plugins.names.add(plugin) 

375 

376 if self.config.doApplyExternalPhotoCalib: 

377 plugin = 'base_LocalPhotoCalib' 

378 if plugin in schema: 

379 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False") 

380 else: 

381 measureConfig.plugins.names.add(plugin) 

382 

383 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

384 newCat = afwTable.SourceCatalog(schema) 

385 newCat.extend(catalog, mapper=mapper) 

386 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

387 return newCat 

388 

389 def writeMetadata(self, dataRef): 

390 """No metadata to write. 

391 """ 

392 pass 

393 

394 @classmethod 

395 def _makeArgumentParser(cls): 

396 parser = ArgumentParser(name=cls._DefaultName) 

397 parser.add_id_argument("--id", 'src', 

398 help="data ID, e.g. --id visit=12345 ccd=0") 

399 return parser 

400 

401 

402class PostprocessAnalysis(object): 

403 """Calculate columns from ParquetTable 

404 

405 This object manages and organizes an arbitrary set of computations 

406 on a catalog. The catalog is defined by a 

407 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a 

408 `deepCoadd_obj` dataset, and the computations are defined by a collection 

409 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

410 a `CompositeFunctor`). 

411 

412 After the object is initialized, accessing the `.df` attribute (which 

413 holds the `pandas.DataFrame` containing the results of the calculations) triggers 

414 computation of said dataframe. 

415 

416 One of the conveniences of using this object is the ability to define a desired common 

417 filter for all functors. This enables the same functor collection to be passed to 

418 several different `PostprocessAnalysis` objects without having to change the original 

419 functor collection, since the `filt` keyword argument of this object triggers an 

420 overwrite of the `filt` property for all functors in the collection. 

421 

422 This object also allows a list of refFlags to be passed, and defines a set of default 

423 refFlags that are always included even if not requested. 

424 

425 If a list of `ParquetTable` object is passed, rather than a single one, then the 

426 calculations will be mapped over all the input catalogs. In principle, it should 

427 be straightforward to parallelize this activity, but initial tests have failed 

428 (see TODO in code comments). 

429 

430 Parameters 

431 ---------- 

432 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

433 Source catalog(s) for computation 

434 

435 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

436 Computations to do (functors that act on `parq`). 

437 If a dict, the output 

438 DataFrame will have columns keyed accordingly. 

439 If a list, the column keys will come from the 

440 `.shortname` attribute of each functor. 

441 

442 filt : `str` (optional) 

443 Filter in which to calculate. If provided, 

444 this will overwrite any existing `.filt` attribute 

445 of the provided functors. 

446 

447 flags : `list` (optional) 

448 List of flags (per-band) to include in output table. 

449 Taken from the `meas` dataset if applied to a multilevel Object Table. 

450 

451 refFlags : `list` (optional) 

452 List of refFlags (only reference band) to include in output table. 

453 

454 forcedFlags : `list` (optional) 

455 List of flags (per-band) to include in output table. 

456 Taken from the ``forced_src`` dataset if applied to a 

457 multilevel Object Table. Intended for flags from measurement plugins 

458 only run during multi-band forced-photometry. 

459 """ 

460 _defaultRefFlags = [] 

461 _defaultFuncs = (('coord_ra', RAColumn()), 

462 ('coord_dec', DecColumn())) 

463 

464 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None): 

465 self.parq = parq 

466 self.functors = functors 

467 

468 self.filt = filt 

469 self.flags = list(flags) if flags is not None else [] 

470 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else [] 

471 self.refFlags = list(self._defaultRefFlags) 

472 if refFlags is not None: 

473 self.refFlags += list(refFlags) 

474 

475 self._df = None 

476 

477 @property 

478 def defaultFuncs(self): 

479 funcs = dict(self._defaultFuncs) 

480 return funcs 

481 

482 @property 

483 def func(self): 

484 additionalFuncs = self.defaultFuncs 

485 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags}) 

486 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

487 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

488 

489 if isinstance(self.functors, CompositeFunctor): 

490 func = self.functors 

491 else: 

492 func = CompositeFunctor(self.functors) 

493 

494 func.funcDict.update(additionalFuncs) 

495 func.filt = self.filt 

496 

497 return func 

498 

499 @property 

500 def noDupCols(self): 

501 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

502 

503 @property 

504 def df(self): 

505 if self._df is None: 

506 self.compute() 

507 return self._df 

508 

509 def compute(self, dropna=False, pool=None): 

510 # map over multiple parquet tables 

511 if type(self.parq) in (list, tuple): 

512 if pool is None: 

513 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

514 else: 

515 # TODO: Figure out why this doesn't work (pyarrow pickling issues?) 

516 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

517 self._df = pd.concat(dflist) 

518 else: 

519 self._df = self.func(self.parq, dropna=dropna) 

520 

521 return self._df 

522 

523 

524class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

525 dimensions=()): 

526 """Expected Connections for subclasses of TransformCatalogBaseTask. 

527 

528 Must be subclassed. 

529 """ 

530 inputCatalog = connectionTypes.Input( 

531 name="", 

532 storageClass="DataFrame", 

533 ) 

534 outputCatalog = connectionTypes.Output( 

535 name="", 

536 storageClass="DataFrame", 

537 ) 

538 

539 

540class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

541 pipelineConnections=TransformCatalogBaseConnections): 

542 functorFile = pexConfig.Field( 

543 dtype=str, 

544 doc='Path to YAML file specifying functors to be computed', 

545 default=None, 

546 optional=True 

547 ) 

548 

549 

550class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask): 

551 """Base class for transforming/standardizing a catalog 

552 

553 by applying functors that convert units and apply calibrations. 

554 The purpose of this task is to perform a set of computations on 

555 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

556 results to a new dataset (which needs to be declared in an `outputDataset` 

557 attribute). 

558 

559 The calculations to be performed are defined in a YAML file that specifies 

560 a set of functors to be computed, provided as 

561 a `--functorFile` config parameter. An example of such a YAML file 

562 is the following: 

563 

564 funcs: 

565 psfMag: 

566 functor: Mag 

567 args: 

568 - base_PsfFlux 

569 filt: HSC-G 

570 dataset: meas 

571 cmodel_magDiff: 

572 functor: MagDiff 

573 args: 

574 - modelfit_CModel 

575 - base_PsfFlux 

576 filt: HSC-G 

577 gauss_magDiff: 

578 functor: MagDiff 

579 args: 

580 - base_GaussianFlux 

581 - base_PsfFlux 

582 filt: HSC-G 

583 count: 

584 functor: Column 

585 args: 

586 - base_InputCount_value 

587 filt: HSC-G 

588 deconvolved_moments: 

589 functor: DeconvolvedMoments 

590 filt: HSC-G 

591 dataset: forced_src 

592 refFlags: 

593 - calib_psfUsed 

594 - merge_measurement_i 

595 - merge_measurement_r 

596 - merge_measurement_z 

597 - merge_measurement_y 

598 - merge_measurement_g 

599 - base_PixelFlags_flag_inexact_psfCenter 

600 - detect_isPrimary 

601 

602 The names for each entry under "func" will become the names of columns in the 

603 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`. 

604 Positional arguments to be passed to each functor are in the `args` list, 

605 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`, 

606 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization. 

607 

608 The "flags" entry is the default shortcut for `Column` functors. 

609 All columns listed under "flags" will be copied to the output table 

610 untransformed. They can be of any datatype. 

611 In the special case of transforming a multi-level oject table with 

612 band and dataset indices (deepCoadd_obj), these will be taked from the 

613 `meas` dataset and exploded out per band. 

614 

615 There are two special shortcuts that only apply when transforming 

616 multi-level Object (deepCoadd_obj) tables: 

617 - The "refFlags" entry is shortcut for `Column` functor 

618 taken from the `'ref'` dataset if transforming an ObjectTable. 

619 - The "forcedFlags" entry is shortcut for `Column` functors. 

620 taken from the ``forced_src`` dataset if transforming an ObjectTable. 

621 These are expanded out per band. 

622 

623 

624 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

625 to organize and excecute the calculations. 

626 

627 """ 

628 @property 

629 def _DefaultName(self): 

630 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

631 

632 @property 

633 def outputDataset(self): 

634 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

635 

636 @property 

637 def inputDataset(self): 

638 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

639 

640 @property 

641 def ConfigClass(self): 

642 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

643 

644 def __init__(self, *args, **kwargs): 

645 super().__init__(*args, **kwargs) 

646 if self.config.functorFile: 

647 self.log.info('Loading tranform functor definitions from %s', 

648 self.config.functorFile) 

649 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

650 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

651 else: 

652 self.funcs = None 

653 

654 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

655 inputs = butlerQC.get(inputRefs) 

656 if self.funcs is None: 

657 raise ValueError("config.functorFile is None. " 

658 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

659 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

660 dataId=outputRefs.outputCatalog.dataId.full) 

661 outputs = pipeBase.Struct(outputCatalog=result) 

662 butlerQC.put(outputs, outputRefs) 

663 

664 def runDataRef(self, dataRef): 

665 parq = dataRef.get() 

666 if self.funcs is None: 

667 raise ValueError("config.functorFile is None. " 

668 "Must be a valid path to yaml in order to run as a CommandlineTask.") 

669 df = self.run(parq, funcs=self.funcs, dataId=dataRef.dataId) 

670 self.write(df, dataRef) 

671 return df 

672 

673 def run(self, parq, funcs=None, dataId=None, band=None): 

674 """Do postprocessing calculations 

675 

676 Takes a `ParquetTable` object and dataId, 

677 returns a dataframe with results of postprocessing calculations. 

678 

679 Parameters 

680 ---------- 

681 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

682 ParquetTable from which calculations are done. 

683 funcs : `lsst.pipe.tasks.functors.Functors` 

684 Functors to apply to the table's columns 

685 dataId : dict, optional 

686 Used to add a `patchId` column to the output dataframe. 

687 band : `str`, optional 

688 Filter band that is being processed. 

689 

690 Returns 

691 ------ 

692 `pandas.DataFrame` 

693 

694 """ 

695 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

696 

697 df = self.transform(band, parq, funcs, dataId).df 

698 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

699 return df 

700 

701 def getFunctors(self): 

702 return self.funcs 

703 

704 def getAnalysis(self, parq, funcs=None, band=None): 

705 if funcs is None: 

706 funcs = self.funcs 

707 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

708 return analysis 

709 

710 def transform(self, band, parq, funcs, dataId): 

711 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

712 df = analysis.df 

713 if dataId is not None: 

714 for key, value in dataId.items(): 

715 df[str(key)] = value 

716 

717 return pipeBase.Struct( 

718 df=df, 

719 analysis=analysis 

720 ) 

721 

722 def write(self, df, parqRef): 

723 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

724 

725 def writeMetadata(self, dataRef): 

726 """No metadata to write. 

727 """ 

728 pass 

729 

730 

731class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

732 defaultTemplates={"coaddName": "deep"}, 

733 dimensions=("tract", "patch", "skymap")): 

734 inputCatalog = connectionTypes.Input( 

735 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

736 "stored as a DataFrame with a multi-level column index per-patch.", 

737 dimensions=("tract", "patch", "skymap"), 

738 storageClass="DataFrame", 

739 name="{coaddName}Coadd_obj", 

740 deferLoad=True, 

741 ) 

742 outputCatalog = connectionTypes.Output( 

743 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

744 "data model.", 

745 dimensions=("tract", "patch", "skymap"), 

746 storageClass="DataFrame", 

747 name="objectTable" 

748 ) 

749 

750 

751class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

752 pipelineConnections=TransformObjectCatalogConnections): 

753 coaddName = pexConfig.Field( 

754 dtype=str, 

755 default="deep", 

756 doc="Name of coadd" 

757 ) 

758 # TODO: remove in DM-27177 

759 filterMap = pexConfig.DictField( 

760 keytype=str, 

761 itemtype=str, 

762 default={}, 

763 doc=("Dictionary mapping full filter name to short one for column name munging." 

764 "These filters determine the output columns no matter what filters the " 

765 "input data actually contain."), 

766 deprecated=("Coadds are now identified by the band, so this transform is unused." 

767 "Will be removed after v22.") 

768 ) 

769 outputBands = pexConfig.ListField( 

770 dtype=str, 

771 default=None, 

772 optional=True, 

773 doc=("These bands and only these bands will appear in the output," 

774 " NaN-filled if the input does not include them." 

775 " If None, then use all bands found in the input.") 

776 ) 

777 camelCase = pexConfig.Field( 

778 dtype=bool, 

779 default=True, 

780 doc=("Write per-band columns names with camelCase, else underscore " 

781 "For example: gPsFlux instead of g_PsFlux.") 

782 ) 

783 multilevelOutput = pexConfig.Field( 

784 dtype=bool, 

785 default=False, 

786 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

787 "and name-munged (False).") 

788 ) 

789 

790 

791class TransformObjectCatalogTask(TransformCatalogBaseTask): 

792 """Produce a flattened Object Table to match the format specified in 

793 sdm_schemas. 

794 

795 Do the same set of postprocessing calculations on all bands 

796 

797 This is identical to `TransformCatalogBaseTask`, except for that it does the 

798 specified functor calculations for all filters present in the 

799 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

800 by the YAML file will be superceded. 

801 """ 

802 _DefaultName = "transformObjectCatalog" 

803 ConfigClass = TransformObjectCatalogConfig 

804 

805 # Used by Gen 2 runDataRef only: 

806 inputDataset = 'deepCoadd_obj' 

807 outputDataset = 'objectTable' 

808 

809 @classmethod 

810 def _makeArgumentParser(cls): 

811 parser = ArgumentParser(name=cls._DefaultName) 

812 parser.add_id_argument("--id", cls.inputDataset, 

813 ContainerClass=CoaddDataIdContainer, 

814 help="data ID, e.g. --id tract=12345 patch=1,2") 

815 return parser 

816 

817 def run(self, parq, funcs=None, dataId=None, band=None): 

818 # NOTE: band kwarg is ignored here. 

819 dfDict = {} 

820 analysisDict = {} 

821 templateDf = pd.DataFrame() 

822 

823 if isinstance(parq, DeferredDatasetHandle): 

824 columns = parq.get(component='columns') 

825 inputBands = columns.unique(level=1).values 

826 else: 

827 inputBands = parq.columnLevelNames['band'] 

828 

829 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

830 

831 # Perform transform for data of filters that exist in parq. 

832 for inputBand in inputBands: 

833 if inputBand not in outputBands: 

834 self.log.info("Ignoring %s band data in the input", inputBand) 

835 continue 

836 self.log.info("Transforming the catalog of band %s", inputBand) 

837 result = self.transform(inputBand, parq, funcs, dataId) 

838 dfDict[inputBand] = result.df 

839 analysisDict[inputBand] = result.analysis 

840 if templateDf.empty: 

841 templateDf = result.df 

842 

843 # Fill NaNs in columns of other wanted bands 

844 for filt in outputBands: 

845 if filt not in dfDict: 

846 self.log.info("Adding empty columns for band %s", filt) 

847 dfDict[filt] = pd.DataFrame().reindex_like(templateDf) 

848 

849 # This makes a multilevel column index, with band as first level 

850 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

851 

852 if not self.config.multilevelOutput: 

853 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

854 if dataId is not None: 

855 noDupCols += list(dataId.keys()) 

856 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

857 inputBands=inputBands) 

858 

859 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

860 return df 

861 

862 

863class TractObjectDataIdContainer(CoaddDataIdContainer): 

864 

865 def makeDataRefList(self, namespace): 

866 """Make self.refList from self.idList 

867 

868 Generate a list of data references given tract and/or patch. 

869 This was adapted from `TractQADataIdContainer`, which was 

870 `TractDataIdContainer` modifie to not require "filter". 

871 Only existing dataRefs are returned. 

872 """ 

873 def getPatchRefList(tract): 

874 return [namespace.butler.dataRef(datasetType=self.datasetType, 

875 tract=tract.getId(), 

876 patch="%d,%d" % patch.getIndex()) for patch in tract] 

877 

878 tractRefs = defaultdict(list) # Data references for each tract 

879 for dataId in self.idList: 

880 skymap = self.getSkymap(namespace) 

881 

882 if "tract" in dataId: 

883 tractId = dataId["tract"] 

884 if "patch" in dataId: 

885 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

886 tract=tractId, 

887 patch=dataId['patch'])) 

888 else: 

889 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

890 else: 

891 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

892 for tract in skymap) 

893 outputRefList = [] 

894 for tractRefList in tractRefs.values(): 

895 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

896 outputRefList.append(existingRefs) 

897 

898 self.refList = outputRefList 

899 

900 

901class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

902 dimensions=("tract", "skymap")): 

903 inputCatalogs = connectionTypes.Input( 

904 doc="Per-Patch objectTables conforming to the standard data model.", 

905 name="objectTable", 

906 storageClass="DataFrame", 

907 dimensions=("tract", "patch", "skymap"), 

908 multiple=True, 

909 ) 

910 outputCatalog = connectionTypes.Output( 

911 doc="Pre-tract horizontal concatenation of the input objectTables", 

912 name="objectTable_tract", 

913 storageClass="DataFrame", 

914 dimensions=("tract", "skymap"), 

915 ) 

916 

917 

918class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

919 pipelineConnections=ConsolidateObjectTableConnections): 

920 coaddName = pexConfig.Field( 

921 dtype=str, 

922 default="deep", 

923 doc="Name of coadd" 

924 ) 

925 

926 

927class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

928 """Write patch-merged source tables to a tract-level parquet file 

929 

930 Concatenates `objectTable` list into a per-visit `objectTable_tract` 

931 """ 

932 _DefaultName = "consolidateObjectTable" 

933 ConfigClass = ConsolidateObjectTableConfig 

934 

935 inputDataset = 'objectTable' 

936 outputDataset = 'objectTable_tract' 

937 

938 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

939 inputs = butlerQC.get(inputRefs) 

940 self.log.info("Concatenating %s per-patch Object Tables", 

941 len(inputs['inputCatalogs'])) 

942 df = pd.concat(inputs['inputCatalogs']) 

943 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

944 

945 @classmethod 

946 def _makeArgumentParser(cls): 

947 parser = ArgumentParser(name=cls._DefaultName) 

948 

949 parser.add_id_argument("--id", cls.inputDataset, 

950 help="data ID, e.g. --id tract=12345", 

951 ContainerClass=TractObjectDataIdContainer) 

952 return parser 

953 

954 def runDataRef(self, patchRefList): 

955 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

956 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

957 

958 def writeMetadata(self, dataRef): 

959 """No metadata to write. 

960 """ 

961 pass 

962 

963 

964class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

965 defaultTemplates={"catalogType": ""}, 

966 dimensions=("instrument", "visit", "detector")): 

967 

968 inputCatalog = connectionTypes.Input( 

969 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

970 name="{catalogType}source", 

971 storageClass="DataFrame", 

972 dimensions=("instrument", "visit", "detector"), 

973 deferLoad=True 

974 ) 

975 outputCatalog = connectionTypes.Output( 

976 doc="Narrower, per-detector Source Table transformed and converted per a " 

977 "specified set of functors", 

978 name="{catalogType}sourceTable", 

979 storageClass="DataFrame", 

980 dimensions=("instrument", "visit", "detector") 

981 ) 

982 

983 

984class TransformSourceTableConfig(TransformCatalogBaseConfig, 

985 pipelineConnections=TransformSourceTableConnections): 

986 pass 

987 

988 

989class TransformSourceTableTask(TransformCatalogBaseTask): 

990 """Transform/standardize a source catalog 

991 """ 

992 _DefaultName = "transformSourceTable" 

993 ConfigClass = TransformSourceTableConfig 

994 

995 inputDataset = 'source' 

996 outputDataset = 'sourceTable' 

997 

998 @classmethod 

999 def _makeArgumentParser(cls): 

1000 parser = ArgumentParser(name=cls._DefaultName) 

1001 parser.add_id_argument("--id", datasetType=cls.inputDataset, 

1002 level="sensor", 

1003 help="data ID, e.g. --id visit=12345 ccd=0") 

1004 return parser 

1005 

1006 def runDataRef(self, dataRef): 

1007 """Override to specify band label to run().""" 

1008 parq = dataRef.get() 

1009 funcs = self.getFunctors() 

1010 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel 

1011 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band) 

1012 self.write(df, dataRef) 

1013 return df 

1014 

1015 

1016class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

1017 dimensions=("instrument", "visit",), 

1018 defaultTemplates={"calexpType": ""}): 

1019 calexp = connectionTypes.Input( 

1020 doc="Processed exposures used for metadata", 

1021 name="{calexpType}calexp", 

1022 storageClass="ExposureF", 

1023 dimensions=("instrument", "visit", "detector"), 

1024 deferLoad=True, 

1025 multiple=True, 

1026 ) 

1027 visitSummary = connectionTypes.Output( 

1028 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1029 "detector id for the id and are sorted for fast lookups of a " 

1030 "detector."), 

1031 name="{calexpType}visitSummary", 

1032 storageClass="ExposureCatalog", 

1033 dimensions=("instrument", "visit"), 

1034 ) 

1035 

1036 

1037class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1038 pipelineConnections=ConsolidateVisitSummaryConnections): 

1039 """Config for ConsolidateVisitSummaryTask""" 

1040 pass 

1041 

1042 

1043class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

1044 """Task to consolidate per-detector visit metadata. 

1045 

1046 This task aggregates the following metadata from all the detectors in a 

1047 single visit into an exposure catalog: 

1048 - The visitInfo. 

1049 - The wcs. 

1050 - The photoCalib. 

1051 - The physical_filter and band (if available). 

1052 - The psf size, shape, and effective area at the center of the detector. 

1053 - The corners of the bounding box in right ascension/declination. 

1054 

1055 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1056 are not persisted here because of storage concerns, and because of their 

1057 limited utility as summary statistics. 

1058 

1059 Tests for this task are performed in ci_hsc_gen3. 

1060 """ 

1061 _DefaultName = "consolidateVisitSummary" 

1062 ConfigClass = ConsolidateVisitSummaryConfig 

1063 

1064 @classmethod 

1065 def _makeArgumentParser(cls): 

1066 parser = ArgumentParser(name=cls._DefaultName) 

1067 

1068 parser.add_id_argument("--id", "calexp", 

1069 help="data ID, e.g. --id visit=12345", 

1070 ContainerClass=VisitDataIdContainer) 

1071 return parser 

1072 

1073 def writeMetadata(self, dataRef): 

1074 """No metadata to persist, so override to remove metadata persistance. 

1075 """ 

1076 pass 

1077 

1078 def writeConfig(self, butler, clobber=False, doBackup=True): 

1079 """No config to persist, so override to remove config persistance. 

1080 """ 

1081 pass 

1082 

1083 def runDataRef(self, dataRefList): 

1084 visit = dataRefList[0].dataId['visit'] 

1085 

1086 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1087 len(dataRefList), visit) 

1088 

1089 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

1090 

1091 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

1092 

1093 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1094 dataRefs = butlerQC.get(inputRefs.calexp) 

1095 visit = dataRefs[0].dataId.byName()['visit'] 

1096 

1097 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)", 

1098 len(dataRefs), visit) 

1099 

1100 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1101 

1102 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1103 

1104 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

1105 """Make a combined exposure catalog from a list of dataRefs. 

1106 These dataRefs must point to exposures with wcs, summaryStats, 

1107 and other visit metadata. 

1108 

1109 Parameters 

1110 ---------- 

1111 visit : `int` 

1112 Visit identification number. 

1113 dataRefs : `list` 

1114 List of dataRefs in visit. May be list of 

1115 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

1116 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

1117 isGen3 : `bool`, optional 

1118 Specifies if this is a Gen3 list of datarefs. 

1119 

1120 Returns 

1121 ------- 

1122 visitSummary : `lsst.afw.table.ExposureCatalog` 

1123 Exposure catalog with per-detector summary information. 

1124 """ 

1125 schema = self._makeVisitSummarySchema() 

1126 cat = afwTable.ExposureCatalog(schema) 

1127 cat.resize(len(dataRefs)) 

1128 

1129 cat['visit'] = visit 

1130 

1131 for i, dataRef in enumerate(dataRefs): 

1132 if isGen3: 

1133 visitInfo = dataRef.get(component='visitInfo') 

1134 filterLabel = dataRef.get(component='filterLabel') 

1135 summaryStats = dataRef.get(component='summaryStats') 

1136 detector = dataRef.get(component='detector') 

1137 wcs = dataRef.get(component='wcs') 

1138 photoCalib = dataRef.get(component='photoCalib') 

1139 detector = dataRef.get(component='detector') 

1140 bbox = dataRef.get(component='bbox') 

1141 validPolygon = dataRef.get(component='validPolygon') 

1142 else: 

1143 # Note that we need to read the calexp because there is 

1144 # no magic access to the psf except through the exposure. 

1145 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

1146 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

1147 visitInfo = exp.getInfo().getVisitInfo() 

1148 filterLabel = dataRef.get("calexp_filterLabel") 

1149 summaryStats = exp.getInfo().getSummaryStats() 

1150 wcs = exp.getWcs() 

1151 photoCalib = exp.getPhotoCalib() 

1152 detector = exp.getDetector() 

1153 bbox = dataRef.get(datasetType='calexp_bbox') 

1154 validPolygon = exp.getInfo().getValidPolygon() 

1155 

1156 rec = cat[i] 

1157 rec.setBBox(bbox) 

1158 rec.setVisitInfo(visitInfo) 

1159 rec.setWcs(wcs) 

1160 rec.setPhotoCalib(photoCalib) 

1161 rec.setValidPolygon(validPolygon) 

1162 

1163 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1164 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1165 rec.setId(detector.getId()) 

1166 rec['psfSigma'] = summaryStats.psfSigma 

1167 rec['psfIxx'] = summaryStats.psfIxx 

1168 rec['psfIyy'] = summaryStats.psfIyy 

1169 rec['psfIxy'] = summaryStats.psfIxy 

1170 rec['psfArea'] = summaryStats.psfArea 

1171 rec['raCorners'][:] = summaryStats.raCorners 

1172 rec['decCorners'][:] = summaryStats.decCorners 

1173 rec['ra'] = summaryStats.ra 

1174 rec['decl'] = summaryStats.decl 

1175 rec['zenithDistance'] = summaryStats.zenithDistance 

1176 rec['zeroPoint'] = summaryStats.zeroPoint 

1177 rec['skyBg'] = summaryStats.skyBg 

1178 rec['skyNoise'] = summaryStats.skyNoise 

1179 rec['meanVar'] = summaryStats.meanVar 

1180 rec['astromOffsetMean'] = summaryStats.astromOffsetMean 

1181 rec['astromOffsetStd'] = summaryStats.astromOffsetStd 

1182 

1183 metadata = dafBase.PropertyList() 

1184 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1185 # We are looping over existing datarefs, so the following is true 

1186 metadata.add("COMMENT", "Only detectors with data have entries.") 

1187 cat.setMetadata(metadata) 

1188 

1189 cat.sort() 

1190 return cat 

1191 

1192 def _makeVisitSummarySchema(self): 

1193 """Make the schema for the visitSummary catalog.""" 

1194 schema = afwTable.ExposureTable.makeMinimalSchema() 

1195 schema.addField('visit', type='I', doc='Visit number') 

1196 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1197 schema.addField('band', type='String', size=32, doc='Name of band') 

1198 schema.addField('psfSigma', type='F', 

1199 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

1200 schema.addField('psfArea', type='F', 

1201 doc='PSF model effective area (center of chip) (pixel**2)') 

1202 schema.addField('psfIxx', type='F', 

1203 doc='PSF model Ixx (center of chip) (pixel**2)') 

1204 schema.addField('psfIyy', type='F', 

1205 doc='PSF model Iyy (center of chip) (pixel**2)') 

1206 schema.addField('psfIxy', type='F', 

1207 doc='PSF model Ixy (center of chip) (pixel**2)') 

1208 schema.addField('raCorners', type='ArrayD', size=4, 

1209 doc='Right Ascension of bounding box corners (degrees)') 

1210 schema.addField('decCorners', type='ArrayD', size=4, 

1211 doc='Declination of bounding box corners (degrees)') 

1212 schema.addField('ra', type='D', 

1213 doc='Right Ascension of bounding box center (degrees)') 

1214 schema.addField('decl', type='D', 

1215 doc='Declination of bounding box center (degrees)') 

1216 schema.addField('zenithDistance', type='F', 

1217 doc='Zenith distance of bounding box center (degrees)') 

1218 schema.addField('zeroPoint', type='F', 

1219 doc='Mean zeropoint in detector (mag)') 

1220 schema.addField('skyBg', type='F', 

1221 doc='Average sky background (ADU)') 

1222 schema.addField('skyNoise', type='F', 

1223 doc='Average sky noise (ADU)') 

1224 schema.addField('meanVar', type='F', 

1225 doc='Mean variance of the weight plane (ADU**2)') 

1226 schema.addField('astromOffsetMean', type='F', 

1227 doc='Mean offset of astrometric calibration matches (arcsec)') 

1228 schema.addField('astromOffsetStd', type='F', 

1229 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)') 

1230 

1231 return schema 

1232 

1233 

1234class VisitDataIdContainer(DataIdContainer): 

1235 """DataIdContainer that groups sensor-level id's by visit 

1236 """ 

1237 

1238 def makeDataRefList(self, namespace): 

1239 """Make self.refList from self.idList 

1240 

1241 Generate a list of data references grouped by visit. 

1242 

1243 Parameters 

1244 ---------- 

1245 namespace : `argparse.Namespace` 

1246 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments 

1247 """ 

1248 # Group by visits 

1249 visitRefs = defaultdict(list) 

1250 for dataId in self.idList: 

1251 if "visit" in dataId: 

1252 visitId = dataId["visit"] 

1253 # append all subsets to 

1254 subset = namespace.butler.subset(self.datasetType, dataId=dataId) 

1255 visitRefs[visitId].extend([dataRef for dataRef in subset]) 

1256 

1257 outputRefList = [] 

1258 for refList in visitRefs.values(): 

1259 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1260 if existingRefs: 

1261 outputRefList.append(existingRefs) 

1262 

1263 self.refList = outputRefList 

1264 

1265 

1266class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1267 defaultTemplates={"catalogType": ""}, 

1268 dimensions=("instrument", "visit")): 

1269 inputCatalogs = connectionTypes.Input( 

1270 doc="Input per-detector Source Tables", 

1271 name="{catalogType}sourceTable", 

1272 storageClass="DataFrame", 

1273 dimensions=("instrument", "visit", "detector"), 

1274 multiple=True 

1275 ) 

1276 outputCatalog = connectionTypes.Output( 

1277 doc="Per-visit concatenation of Source Table", 

1278 name="{catalogType}sourceTable_visit", 

1279 storageClass="DataFrame", 

1280 dimensions=("instrument", "visit") 

1281 ) 

1282 

1283 

1284class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1285 pipelineConnections=ConsolidateSourceTableConnections): 

1286 pass 

1287 

1288 

1289class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

1290 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1291 """ 

1292 _DefaultName = 'consolidateSourceTable' 

1293 ConfigClass = ConsolidateSourceTableConfig 

1294 

1295 inputDataset = 'sourceTable' 

1296 outputDataset = 'sourceTable_visit' 

1297 

1298 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1299 inputs = butlerQC.get(inputRefs) 

1300 self.log.info("Concatenating %s per-detector Source Tables", 

1301 len(inputs['inputCatalogs'])) 

1302 df = pd.concat(inputs['inputCatalogs']) 

1303 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1304 

1305 def runDataRef(self, dataRefList): 

1306 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1307 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1308 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1309 

1310 @classmethod 

1311 def _makeArgumentParser(cls): 

1312 parser = ArgumentParser(name=cls._DefaultName) 

1313 

1314 parser.add_id_argument("--id", cls.inputDataset, 

1315 help="data ID, e.g. --id visit=12345", 

1316 ContainerClass=VisitDataIdContainer) 

1317 return parser 

1318 

1319 def writeMetadata(self, dataRef): 

1320 """No metadata to write. 

1321 """ 

1322 pass 

1323 

1324 def writeConfig(self, butler, clobber=False, doBackup=True): 

1325 """No config to write. 

1326 """ 

1327 pass 

1328 

1329 

1330class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1331 dimensions=("instrument",), 

1332 defaultTemplates={}): 

1333 visitSummaryRefs = connectionTypes.Input( 

1334 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1335 name="visitSummary", 

1336 storageClass="ExposureCatalog", 

1337 dimensions=("instrument", "visit"), 

1338 multiple=True, 

1339 deferLoad=True, 

1340 ) 

1341 outputCatalog = connectionTypes.Output( 

1342 doc="CCD and Visit metadata table", 

1343 name="CcdVisitTable", 

1344 storageClass="DataFrame", 

1345 dimensions=("instrument",) 

1346 ) 

1347 

1348 

1349class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1350 pipelineConnections=MakeCcdVisitTableConnections): 

1351 pass 

1352 

1353 

1354class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1355 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs. 

1356 """ 

1357 _DefaultName = 'makeCcdVisitTable' 

1358 ConfigClass = MakeCcdVisitTableConfig 

1359 

1360 def run(self, visitSummaryRefs): 

1361 """ Make a table of ccd information from the `visitSummary` catalogs. 

1362 Parameters 

1363 ---------- 

1364 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1365 List of DeferredDatasetHandles pointing to exposure catalogs with 

1366 per-detector summary information. 

1367 Returns 

1368 ------- 

1369 result : `lsst.pipe.Base.Struct` 

1370 Results struct with attribute: 

1371 - `outputCatalog` 

1372 Catalog of ccd and visit information. 

1373 """ 

1374 ccdEntries = [] 

1375 for visitSummaryRef in visitSummaryRefs: 

1376 visitSummary = visitSummaryRef.get() 

1377 visitInfo = visitSummary[0].getVisitInfo() 

1378 

1379 ccdEntry = {} 

1380 summaryTable = visitSummary.asAstropy() 

1381 selectColumns = ['id', 'visit', 'physical_filter', 'ra', 'decl', 'zenithDistance', 'zeroPoint', 

1382 'psfSigma', 'skyBg', 'skyNoise'] 

1383 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1384 ccdEntry = ccdEntry.rename(columns={"physical_filter": "filterName", "visit": "visitId"}) 

1385 

1386 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in 

1387 summaryTable['id']] 

1388 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId) 

1389 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds] 

1390 ccdEntry['ccdVisitId'] = ccdVisitIds 

1391 

1392 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary]) 

1393 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1394 

1395 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1396 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1397 expTime = visitInfo.getExposureTime() 

1398 ccdEntry['expTime'] = expTime 

1399 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1400 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1401 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1402 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1403 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1404 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1405 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1406 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1407 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1408 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1409 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1410 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1411 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags, 

1412 # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted. 

1413 ccdEntries.append(ccdEntry) 

1414 

1415 outputCatalog = pd.concat(ccdEntries) 

1416 return pipeBase.Struct(outputCatalog=outputCatalog) 

1417 

1418 

1419class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1420 dimensions=("instrument",), 

1421 defaultTemplates={}): 

1422 visitSummaries = connectionTypes.Input( 

1423 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1424 name="visitSummary", 

1425 storageClass="ExposureCatalog", 

1426 dimensions=("instrument", "visit",), 

1427 multiple=True, 

1428 deferLoad=True, 

1429 ) 

1430 outputCatalog = connectionTypes.Output( 

1431 doc="Visit metadata table", 

1432 name="visitTable", 

1433 storageClass="DataFrame", 

1434 dimensions=("instrument",) 

1435 ) 

1436 

1437 

1438class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1439 pipelineConnections=MakeVisitTableConnections): 

1440 pass 

1441 

1442 

1443class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1444 """Produce a `visitTable` from the `visitSummary` exposure catalogs. 

1445 """ 

1446 _DefaultName = 'makeVisitTable' 

1447 ConfigClass = MakeVisitTableConfig 

1448 

1449 def run(self, visitSummaries): 

1450 """ Make a table of visit information from the `visitSummary` catalogs 

1451 

1452 Parameters 

1453 ---------- 

1454 visitSummaries : list of `lsst.afw.table.ExposureCatalog` 

1455 List of exposure catalogs with per-detector summary information. 

1456 Returns 

1457 ------- 

1458 result : `lsst.pipe.Base.Struct` 

1459 Results struct with attribute: 

1460 ``outputCatalog`` 

1461 Catalog of visit information. 

1462 """ 

1463 visitEntries = [] 

1464 for visitSummary in visitSummaries: 

1465 visitSummary = visitSummary.get() 

1466 visitRow = visitSummary[0] 

1467 visitInfo = visitRow.getVisitInfo() 

1468 

1469 visitEntry = {} 

1470 visitEntry["visitId"] = visitRow['visit'] 

1471 visitEntry["filterName"] = visitRow['physical_filter'] 

1472 raDec = visitInfo.getBoresightRaDec() 

1473 visitEntry["ra"] = raDec.getRa().asDegrees() 

1474 visitEntry["decl"] = raDec.getDec().asDegrees() 

1475 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1476 azAlt = visitInfo.getBoresightAzAlt() 

1477 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1478 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1479 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1480 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1481 visitEntry["obsStart"] = visitInfo.getDate().toPython() 

1482 visitEntry["expTime"] = visitInfo.getExposureTime() 

1483 visitEntries.append(visitEntry) 

1484 # TODO: DM-30623, Add programId, exposureType, expMidpt, cameraTemp, mirror1Temp, mirror2Temp, 

1485 # mirror3Temp, domeTemp, externalTemp, dimmSeeing, pwvGPS, pwvMW, flags, nExposures 

1486 

1487 outputCatalog = pd.DataFrame(data=visitEntries) 

1488 return pipeBase.Struct(outputCatalog=outputCatalog)