Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24from collections import defaultdict 

25import numpy as np 

26 

27import lsst.geom 

28import lsst.pex.config as pexConfig 

29import lsst.pipe.base as pipeBase 

30import lsst.daf.base as dafBase 

31from lsst.pipe.base import connectionTypes 

32import lsst.afw.table as afwTable 

33from lsst.meas.base import SingleFrameMeasurementTask 

34from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

35from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

36from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate 

37 

38from .parquetTable import ParquetTable 

39from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

40from .functors import CompositeFunctor, RAColumn, DecColumn, Column 

41 

42 

43def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

44 """Flattens a dataframe with multilevel column index 

45 """ 

46 newDf = pd.DataFrame() 

47 # band is the level 0 index 

48 dfBands = df.columns.unique(level=0).values 

49 for band in dfBands: 

50 subdf = df[band] 

51 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

52 newColumns = {c: columnFormat.format(band, c) 

53 for c in subdf.columns if c not in noDupCols} 

54 cols = list(newColumns.keys()) 

55 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

56 

57 # Band must be present in the input and output or else column is all NaN: 

58 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

59 # Get the unexploded columns from any present band's partition 

60 noDupDf = df[presentBands[0]][noDupCols] 

61 newDf = pd.concat([noDupDf, newDf], axis=1) 

62 return newDf 

63 

64 

65class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

66 defaultTemplates={"coaddName": "deep"}, 

67 dimensions=("tract", "patch", "skymap")): 

68 inputCatalogMeas = connectionTypes.Input( 

69 doc="Catalog of source measurements on the deepCoadd.", 

70 dimensions=("tract", "patch", "band", "skymap"), 

71 storageClass="SourceCatalog", 

72 name="{coaddName}Coadd_meas", 

73 multiple=True 

74 ) 

75 inputCatalogForcedSrc = connectionTypes.Input( 

76 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

77 dimensions=("tract", "patch", "band", "skymap"), 

78 storageClass="SourceCatalog", 

79 name="{coaddName}Coadd_forced_src", 

80 multiple=True 

81 ) 

82 inputCatalogRef = connectionTypes.Input( 

83 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

84 "for each detection in deepCoadd_mergeDet.", 

85 dimensions=("tract", "patch", "skymap"), 

86 storageClass="SourceCatalog", 

87 name="{coaddName}Coadd_ref" 

88 ) 

89 outputCatalog = connectionTypes.Output( 

90 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

91 "stored as a DataFrame with a multi-level column index per-patch.", 

92 dimensions=("tract", "patch", "skymap"), 

93 storageClass="DataFrame", 

94 name="{coaddName}Coadd_obj" 

95 ) 

96 

97 

98class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

99 pipelineConnections=WriteObjectTableConnections): 

100 engine = pexConfig.Field( 

101 dtype=str, 

102 default="pyarrow", 

103 doc="Parquet engine for writing (pyarrow or fastparquet)" 

104 ) 

105 coaddName = pexConfig.Field( 

106 dtype=str, 

107 default="deep", 

108 doc="Name of coadd" 

109 ) 

110 

111 

112class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

113 """Write filter-merged source tables to parquet 

114 """ 

115 _DefaultName = "writeObjectTable" 

116 ConfigClass = WriteObjectTableConfig 

117 RunnerClass = MergeSourcesRunner 

118 

119 # Names of table datasets to be merged 

120 inputDatasets = ('forced_src', 'meas', 'ref') 

121 

122 # Tag of output dataset written by `MergeSourcesTask.write` 

123 outputDataset = 'obj' 

124 

125 def __init__(self, butler=None, schema=None, **kwargs): 

126 # It is a shame that this class can't use the default init for CmdLineTask 

127 # But to do so would require its own special task runner, which is many 

128 # more lines of specialization, so this is how it is for now 

129 super().__init__(**kwargs) 

130 

131 def runDataRef(self, patchRefList): 

132 """! 

133 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in 

134 subclasses that inherit from MergeSourcesTask. 

135 @param[in] patchRefList list of data references for each filter 

136 """ 

137 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

138 dataId = patchRefList[0].dataId 

139 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

140 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog)) 

141 

142 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

143 inputs = butlerQC.get(inputRefs) 

144 

145 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

146 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

147 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

148 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

149 

150 catalogs = {} 

151 for band in measDict.keys(): 

152 catalogs[band] = {'meas': measDict[band]['meas'], 

153 'forced_src': forcedSourceDict[band]['forced_src'], 

154 'ref': inputs['inputCatalogRef']} 

155 dataId = butlerQC.quantum.dataId 

156 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

157 outputs = pipeBase.Struct(outputCatalog=df) 

158 butlerQC.put(outputs, outputRefs) 

159 

160 @classmethod 

161 def _makeArgumentParser(cls): 

162 """Create a suitable ArgumentParser. 

163 

164 We will use the ArgumentParser to get a list of data 

165 references for patches; the RunnerClass will sort them into lists 

166 of data references for the same patch. 

167 

168 References first of self.inputDatasets, rather than 

169 self.inputDataset 

170 """ 

171 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

172 

173 def readCatalog(self, patchRef): 

174 """Read input catalogs 

175 

176 Read all the input datasets given by the 'inputDatasets' 

177 attribute. 

178 

179 Parameters 

180 ---------- 

181 patchRef : `lsst.daf.persistence.ButlerDataRef` 

182 Data reference for patch 

183 

184 Returns 

185 ------- 

186 Tuple consisting of band name and a dict of catalogs, keyed by 

187 dataset name 

188 """ 

189 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel 

190 catalogDict = {} 

191 for dataset in self.inputDatasets: 

192 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

193 self.log.info("Read %d sources from %s for band %s: %s" % 

194 (len(catalog), dataset, band, patchRef.dataId)) 

195 catalogDict[dataset] = catalog 

196 return band, catalogDict 

197 

198 def run(self, catalogs, tract, patch): 

199 """Merge multiple catalogs. 

200 

201 Parameters 

202 ---------- 

203 catalogs : `dict` 

204 Mapping from filter names to dict of catalogs. 

205 tract : int 

206 tractId to use for the tractId column 

207 patch : str 

208 patchId to use for the patchId column 

209 

210 Returns 

211 ------- 

212 catalog : `pandas.DataFrame` 

213 Merged dataframe 

214 """ 

215 

216 dfs = [] 

217 for filt, tableDict in catalogs.items(): 

218 for dataset, table in tableDict.items(): 

219 # Convert afwTable to pandas DataFrame 

220 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

221 

222 # Sort columns by name, to ensure matching schema among patches 

223 df = df.reindex(sorted(df.columns), axis=1) 

224 df['tractId'] = tract 

225 df['patchId'] = patch 

226 

227 # Make columns a 3-level MultiIndex 

228 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

229 names=('dataset', 'band', 'column')) 

230 dfs.append(df) 

231 

232 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

233 return catalog 

234 

235 def write(self, patchRef, catalog): 

236 """Write the output. 

237 

238 Parameters 

239 ---------- 

240 catalog : `ParquetTable` 

241 Catalog to write 

242 patchRef : `lsst.daf.persistence.ButlerDataRef` 

243 Data reference for patch 

244 """ 

245 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

246 # since the filter isn't actually part of the data ID for the dataset we're saving, 

247 # it's confusing to see it in the log message, even if the butler simply ignores it. 

248 mergeDataId = patchRef.dataId.copy() 

249 del mergeDataId["filter"] 

250 self.log.info("Wrote merged catalog: %s" % (mergeDataId,)) 

251 

252 def writeMetadata(self, dataRefList): 

253 """No metadata to write, and not sure how to write it for a list of dataRefs. 

254 """ 

255 pass 

256 

257 

258class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

259 defaultTemplates={"catalogType": ""}, 

260 dimensions=("instrument", "visit", "detector")): 

261 

262 catalog = connectionTypes.Input( 

263 doc="Input full-depth catalog of sources produced by CalibrateTask", 

264 name="{catalogType}src", 

265 storageClass="SourceCatalog", 

266 dimensions=("instrument", "visit", "detector") 

267 ) 

268 outputCatalog = connectionTypes.Output( 

269 doc="Catalog of sources, `src` in Parquet format", 

270 name="{catalogType}source", 

271 storageClass="DataFrame", 

272 dimensions=("instrument", "visit", "detector") 

273 ) 

274 

275 

276class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

277 pipelineConnections=WriteSourceTableConnections): 

278 doApplyExternalPhotoCalib = pexConfig.Field( 

279 dtype=bool, 

280 default=False, 

281 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if " 

282 "generating Source Tables from older src tables which do not already have local calib columns") 

283 ) 

284 doApplyExternalSkyWcs = pexConfig.Field( 

285 dtype=bool, 

286 default=False, 

287 doc=("Add local WCS columns from the calexp.wcs? Should only set True if " 

288 "generating Source Tables from older src tables which do not already have local calib columns") 

289 ) 

290 

291 

292class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

293 """Write source table to parquet 

294 """ 

295 _DefaultName = "writeSourceTable" 

296 ConfigClass = WriteSourceTableConfig 

297 

298 def runDataRef(self, dataRef): 

299 src = dataRef.get('src') 

300 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

301 src = self.addCalibColumns(src, dataRef) 

302 

303 ccdVisitId = dataRef.get('ccdExposureId') 

304 result = self.run(src, ccdVisitId=ccdVisitId) 

305 dataRef.put(result.table, 'source') 

306 

307 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

308 inputs = butlerQC.get(inputRefs) 

309 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

310 result = self.run(**inputs).table 

311 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

312 butlerQC.put(outputs, outputRefs) 

313 

314 def run(self, catalog, ccdVisitId=None): 

315 """Convert `src` catalog to parquet 

316 

317 Parameters 

318 ---------- 

319 catalog: `afwTable.SourceCatalog` 

320 catalog to be converted 

321 ccdVisitId: `int` 

322 ccdVisitId to be added as a column 

323 

324 Returns 

325 ------- 

326 result : `lsst.pipe.base.Struct` 

327 ``table`` 

328 `ParquetTable` version of the input catalog 

329 """ 

330 self.log.info("Generating parquet table from src catalog %s", ccdVisitId) 

331 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

332 df['ccdVisitId'] = ccdVisitId 

333 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

334 

335 def addCalibColumns(self, catalog, dataRef): 

336 """Add columns with local calibration evaluated at each centroid 

337 

338 for backwards compatibility with old repos. 

339 This exists for the purpose of converting old src catalogs 

340 (which don't have the expected local calib columns) to Source Tables. 

341 

342 Parameters 

343 ---------- 

344 catalog: `afwTable.SourceCatalog` 

345 catalog to which calib columns will be added 

346 dataRef: `lsst.daf.persistence.ButlerDataRef 

347 for fetching the calibs from disk. 

348 

349 Returns 

350 ------- 

351 newCat: `afwTable.SourceCatalog` 

352 Source Catalog with requested local calib columns 

353 """ 

354 mapper = afwTable.SchemaMapper(catalog.schema) 

355 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

356 measureConfig.doReplaceWithNoise = False 

357 

358 # Just need the WCS or the PhotoCalib attached to an exposue 

359 exposure = dataRef.get('calexp_sub', 

360 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0))) 

361 

362 mapper = afwTable.SchemaMapper(catalog.schema) 

363 mapper.addMinimalSchema(catalog.schema, True) 

364 schema = mapper.getOutputSchema() 

365 

366 exposureIdInfo = dataRef.get("expIdInfo") 

367 measureConfig.plugins.names = [] 

368 if self.config.doApplyExternalSkyWcs: 

369 plugin = 'base_LocalWcs' 

370 if plugin in schema: 

371 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False") 

372 else: 

373 measureConfig.plugins.names.add(plugin) 

374 

375 if self.config.doApplyExternalPhotoCalib: 

376 plugin = 'base_LocalPhotoCalib' 

377 if plugin in schema: 

378 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False") 

379 else: 

380 measureConfig.plugins.names.add(plugin) 

381 

382 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

383 newCat = afwTable.SourceCatalog(schema) 

384 newCat.extend(catalog, mapper=mapper) 

385 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

386 return newCat 

387 

388 def writeMetadata(self, dataRef): 

389 """No metadata to write. 

390 """ 

391 pass 

392 

393 @classmethod 

394 def _makeArgumentParser(cls): 

395 parser = ArgumentParser(name=cls._DefaultName) 

396 parser.add_id_argument("--id", 'src', 

397 help="data ID, e.g. --id visit=12345 ccd=0") 

398 return parser 

399 

400 

401class PostprocessAnalysis(object): 

402 """Calculate columns from ParquetTable 

403 

404 This object manages and organizes an arbitrary set of computations 

405 on a catalog. The catalog is defined by a 

406 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a 

407 `deepCoadd_obj` dataset, and the computations are defined by a collection 

408 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

409 a `CompositeFunctor`). 

410 

411 After the object is initialized, accessing the `.df` attribute (which 

412 holds the `pandas.DataFrame` containing the results of the calculations) triggers 

413 computation of said dataframe. 

414 

415 One of the conveniences of using this object is the ability to define a desired common 

416 filter for all functors. This enables the same functor collection to be passed to 

417 several different `PostprocessAnalysis` objects without having to change the original 

418 functor collection, since the `filt` keyword argument of this object triggers an 

419 overwrite of the `filt` property for all functors in the collection. 

420 

421 This object also allows a list of refFlags to be passed, and defines a set of default 

422 refFlags that are always included even if not requested. 

423 

424 If a list of `ParquetTable` object is passed, rather than a single one, then the 

425 calculations will be mapped over all the input catalogs. In principle, it should 

426 be straightforward to parallelize this activity, but initial tests have failed 

427 (see TODO in code comments). 

428 

429 Parameters 

430 ---------- 

431 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

432 Source catalog(s) for computation 

433 

434 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

435 Computations to do (functors that act on `parq`). 

436 If a dict, the output 

437 DataFrame will have columns keyed accordingly. 

438 If a list, the column keys will come from the 

439 `.shortname` attribute of each functor. 

440 

441 filt : `str` (optional) 

442 Filter in which to calculate. If provided, 

443 this will overwrite any existing `.filt` attribute 

444 of the provided functors. 

445 

446 flags : `list` (optional) 

447 List of flags (per-band) to include in output table. 

448 

449 refFlags : `list` (optional) 

450 List of refFlags (only reference band) to include in output table. 

451 

452 

453 """ 

454 _defaultRefFlags = [] 

455 _defaultFuncs = (('coord_ra', RAColumn()), 

456 ('coord_dec', DecColumn())) 

457 

458 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None): 

459 self.parq = parq 

460 self.functors = functors 

461 

462 self.filt = filt 

463 self.flags = list(flags) if flags is not None else [] 

464 self.refFlags = list(self._defaultRefFlags) 

465 if refFlags is not None: 

466 self.refFlags += list(refFlags) 

467 

468 self._df = None 

469 

470 @property 

471 def defaultFuncs(self): 

472 funcs = dict(self._defaultFuncs) 

473 return funcs 

474 

475 @property 

476 def func(self): 

477 additionalFuncs = self.defaultFuncs 

478 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

479 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

480 

481 if isinstance(self.functors, CompositeFunctor): 

482 func = self.functors 

483 else: 

484 func = CompositeFunctor(self.functors) 

485 

486 func.funcDict.update(additionalFuncs) 

487 func.filt = self.filt 

488 

489 return func 

490 

491 @property 

492 def noDupCols(self): 

493 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

494 

495 @property 

496 def df(self): 

497 if self._df is None: 

498 self.compute() 

499 return self._df 

500 

501 def compute(self, dropna=False, pool=None): 

502 # map over multiple parquet tables 

503 if type(self.parq) in (list, tuple): 

504 if pool is None: 

505 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

506 else: 

507 # TODO: Figure out why this doesn't work (pyarrow pickling issues?) 

508 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

509 self._df = pd.concat(dflist) 

510 else: 

511 self._df = self.func(self.parq, dropna=dropna) 

512 

513 return self._df 

514 

515 

516class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

517 dimensions=()): 

518 """Expected Connections for subclasses of TransformCatalogBaseTask. 

519 

520 Must be subclassed. 

521 """ 

522 inputCatalog = connectionTypes.Input( 

523 name="", 

524 storageClass="DataFrame", 

525 ) 

526 outputCatalog = connectionTypes.Output( 

527 name="", 

528 storageClass="DataFrame", 

529 ) 

530 

531 

532class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

533 pipelineConnections=TransformCatalogBaseConnections): 

534 functorFile = pexConfig.Field( 

535 dtype=str, 

536 doc='Path to YAML file specifying functors to be computed', 

537 default=None, 

538 optional=True 

539 ) 

540 

541 

542class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask): 

543 """Base class for transforming/standardizing a catalog 

544 

545 by applying functors that convert units and apply calibrations. 

546 The purpose of this task is to perform a set of computations on 

547 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

548 results to a new dataset (which needs to be declared in an `outputDataset` 

549 attribute). 

550 

551 The calculations to be performed are defined in a YAML file that specifies 

552 a set of functors to be computed, provided as 

553 a `--functorFile` config parameter. An example of such a YAML file 

554 is the following: 

555 

556 funcs: 

557 psfMag: 

558 functor: Mag 

559 args: 

560 - base_PsfFlux 

561 filt: HSC-G 

562 dataset: meas 

563 cmodel_magDiff: 

564 functor: MagDiff 

565 args: 

566 - modelfit_CModel 

567 - base_PsfFlux 

568 filt: HSC-G 

569 gauss_magDiff: 

570 functor: MagDiff 

571 args: 

572 - base_GaussianFlux 

573 - base_PsfFlux 

574 filt: HSC-G 

575 count: 

576 functor: Column 

577 args: 

578 - base_InputCount_value 

579 filt: HSC-G 

580 deconvolved_moments: 

581 functor: DeconvolvedMoments 

582 filt: HSC-G 

583 dataset: forced_src 

584 refFlags: 

585 - calib_psfUsed 

586 - merge_measurement_i 

587 - merge_measurement_r 

588 - merge_measurement_z 

589 - merge_measurement_y 

590 - merge_measurement_g 

591 - base_PixelFlags_flag_inexact_psfCenter 

592 - detect_isPrimary 

593 

594 The names for each entry under "func" will become the names of columns in the 

595 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`. 

596 Positional arguments to be passed to each functor are in the `args` list, 

597 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`, 

598 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization. 

599 

600 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and 

601 taken from the `'ref'` dataset. 

602 

603 The "flags" entry will be expanded out per band. 

604 

605 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

606 to organize and excecute the calculations. 

607 

608 """ 

609 @property 

610 def _DefaultName(self): 

611 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

612 

613 @property 

614 def outputDataset(self): 

615 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

616 

617 @property 

618 def inputDataset(self): 

619 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

620 

621 @property 

622 def ConfigClass(self): 

623 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

624 

625 def __init__(self, *args, **kwargs): 

626 super().__init__(*args, **kwargs) 

627 if self.config.functorFile: 

628 self.log.info('Loading tranform functor definitions from %s', 

629 self.config.functorFile) 

630 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

631 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

632 else: 

633 self.funcs = None 

634 

635 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

636 inputs = butlerQC.get(inputRefs) 

637 if self.funcs is None: 

638 raise ValueError("config.functorFile is None. " 

639 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

640 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

641 dataId=outputRefs.outputCatalog.dataId.full) 

642 outputs = pipeBase.Struct(outputCatalog=result) 

643 butlerQC.put(outputs, outputRefs) 

644 

645 def runDataRef(self, dataRef): 

646 parq = dataRef.get() 

647 if self.funcs is None: 

648 raise ValueError("config.functorFile is None. " 

649 "Must be a valid path to yaml in order to run as a CommandlineTask.") 

650 df = self.run(parq, funcs=self.funcs, dataId=dataRef.dataId) 

651 self.write(df, dataRef) 

652 return df 

653 

654 def run(self, parq, funcs=None, dataId=None, band=None): 

655 """Do postprocessing calculations 

656 

657 Takes a `ParquetTable` object and dataId, 

658 returns a dataframe with results of postprocessing calculations. 

659 

660 Parameters 

661 ---------- 

662 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

663 ParquetTable from which calculations are done. 

664 funcs : `lsst.pipe.tasks.functors.Functors` 

665 Functors to apply to the table's columns 

666 dataId : dict, optional 

667 Used to add a `patchId` column to the output dataframe. 

668 band : `str`, optional 

669 Filter band that is being processed. 

670 

671 Returns 

672 ------ 

673 `pandas.DataFrame` 

674 

675 """ 

676 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

677 

678 df = self.transform(band, parq, funcs, dataId).df 

679 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

680 return df 

681 

682 def getFunctors(self): 

683 return self.funcs 

684 

685 def getAnalysis(self, parq, funcs=None, band=None): 

686 if funcs is None: 

687 funcs = self.funcs 

688 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

689 return analysis 

690 

691 def transform(self, band, parq, funcs, dataId): 

692 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

693 df = analysis.df 

694 if dataId is not None: 

695 for key, value in dataId.items(): 

696 df[str(key)] = value 

697 

698 return pipeBase.Struct( 

699 df=df, 

700 analysis=analysis 

701 ) 

702 

703 def write(self, df, parqRef): 

704 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

705 

706 def writeMetadata(self, dataRef): 

707 """No metadata to write. 

708 """ 

709 pass 

710 

711 

712class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

713 defaultTemplates={"coaddName": "deep"}, 

714 dimensions=("tract", "patch", "skymap")): 

715 inputCatalog = connectionTypes.Input( 

716 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

717 "stored as a DataFrame with a multi-level column index per-patch.", 

718 dimensions=("tract", "patch", "skymap"), 

719 storageClass="DataFrame", 

720 name="{coaddName}Coadd_obj", 

721 deferLoad=True, 

722 ) 

723 outputCatalog = connectionTypes.Output( 

724 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

725 "data model.", 

726 dimensions=("tract", "patch", "skymap"), 

727 storageClass="DataFrame", 

728 name="objectTable" 

729 ) 

730 

731 

732class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

733 pipelineConnections=TransformObjectCatalogConnections): 

734 coaddName = pexConfig.Field( 

735 dtype=str, 

736 default="deep", 

737 doc="Name of coadd" 

738 ) 

739 # TODO: remove in DM-27177 

740 filterMap = pexConfig.DictField( 

741 keytype=str, 

742 itemtype=str, 

743 default={}, 

744 doc=("Dictionary mapping full filter name to short one for column name munging." 

745 "These filters determine the output columns no matter what filters the " 

746 "input data actually contain."), 

747 deprecated=("Coadds are now identified by the band, so this transform is unused." 

748 "Will be removed after v22.") 

749 ) 

750 outputBands = pexConfig.ListField( 

751 dtype=str, 

752 default=None, 

753 optional=True, 

754 doc=("These bands and only these bands will appear in the output," 

755 " NaN-filled if the input does not include them." 

756 " If None, then use all bands found in the input.") 

757 ) 

758 camelCase = pexConfig.Field( 

759 dtype=bool, 

760 default=True, 

761 doc=("Write per-band columns names with camelCase, else underscore " 

762 "For example: gPsFlux instead of g_PsFlux.") 

763 ) 

764 multilevelOutput = pexConfig.Field( 

765 dtype=bool, 

766 default=False, 

767 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

768 "and name-munged (False).") 

769 ) 

770 

771 

772class TransformObjectCatalogTask(TransformCatalogBaseTask): 

773 """Produce a flattened Object Table to match the format specified in 

774 sdm_schemas. 

775 

776 Do the same set of postprocessing calculations on all bands 

777 

778 This is identical to `TransformCatalogBaseTask`, except for that it does the 

779 specified functor calculations for all filters present in the 

780 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

781 by the YAML file will be superceded. 

782 """ 

783 _DefaultName = "transformObjectCatalog" 

784 ConfigClass = TransformObjectCatalogConfig 

785 

786 # Used by Gen 2 runDataRef only: 

787 inputDataset = 'deepCoadd_obj' 

788 outputDataset = 'objectTable' 

789 

790 @classmethod 

791 def _makeArgumentParser(cls): 

792 parser = ArgumentParser(name=cls._DefaultName) 

793 parser.add_id_argument("--id", cls.inputDataset, 

794 ContainerClass=CoaddDataIdContainer, 

795 help="data ID, e.g. --id tract=12345 patch=1,2") 

796 return parser 

797 

798 def run(self, parq, funcs=None, dataId=None, band=None): 

799 # NOTE: band kwarg is ignored here. 

800 dfDict = {} 

801 analysisDict = {} 

802 templateDf = pd.DataFrame() 

803 

804 if isinstance(parq, DeferredDatasetHandle): 

805 columns = parq.get(component='columns') 

806 inputBands = columns.unique(level=1).values 

807 else: 

808 inputBands = parq.columnLevelNames['band'] 

809 

810 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

811 

812 # Perform transform for data of filters that exist in parq. 

813 for inputBand in inputBands: 

814 if inputBand not in outputBands: 

815 self.log.info("Ignoring %s band data in the input", inputBand) 

816 continue 

817 self.log.info("Transforming the catalog of band %s", inputBand) 

818 result = self.transform(inputBand, parq, funcs, dataId) 

819 dfDict[inputBand] = result.df 

820 analysisDict[inputBand] = result.analysis 

821 if templateDf.empty: 

822 templateDf = result.df 

823 

824 # Fill NaNs in columns of other wanted bands 

825 for filt in outputBands: 

826 if filt not in dfDict: 

827 self.log.info("Adding empty columns for band %s", filt) 

828 dfDict[filt] = pd.DataFrame().reindex_like(templateDf) 

829 

830 # This makes a multilevel column index, with band as first level 

831 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

832 

833 if not self.config.multilevelOutput: 

834 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

835 if dataId is not None: 

836 noDupCols += list(dataId.keys()) 

837 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

838 inputBands=inputBands) 

839 

840 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

841 return df 

842 

843 

844class TractObjectDataIdContainer(CoaddDataIdContainer): 

845 

846 def makeDataRefList(self, namespace): 

847 """Make self.refList from self.idList 

848 

849 Generate a list of data references given tract and/or patch. 

850 This was adapted from `TractQADataIdContainer`, which was 

851 `TractDataIdContainer` modifie to not require "filter". 

852 Only existing dataRefs are returned. 

853 """ 

854 def getPatchRefList(tract): 

855 return [namespace.butler.dataRef(datasetType=self.datasetType, 

856 tract=tract.getId(), 

857 patch="%d,%d" % patch.getIndex()) for patch in tract] 

858 

859 tractRefs = defaultdict(list) # Data references for each tract 

860 for dataId in self.idList: 

861 skymap = self.getSkymap(namespace) 

862 

863 if "tract" in dataId: 

864 tractId = dataId["tract"] 

865 if "patch" in dataId: 

866 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

867 tract=tractId, 

868 patch=dataId['patch'])) 

869 else: 

870 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

871 else: 

872 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

873 for tract in skymap) 

874 outputRefList = [] 

875 for tractRefList in tractRefs.values(): 

876 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

877 outputRefList.append(existingRefs) 

878 

879 self.refList = outputRefList 

880 

881 

882class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

883 dimensions=("tract", "skymap")): 

884 inputCatalogs = connectionTypes.Input( 

885 doc="Per-Patch objectTables conforming to the standard data model.", 

886 name="objectTable", 

887 storageClass="DataFrame", 

888 dimensions=("tract", "patch", "skymap"), 

889 multiple=True, 

890 ) 

891 outputCatalog = connectionTypes.Output( 

892 doc="Pre-tract horizontal concatenation of the input objectTables", 

893 name="objectTable_tract", 

894 storageClass="DataFrame", 

895 dimensions=("tract", "skymap"), 

896 ) 

897 

898 

899class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

900 pipelineConnections=ConsolidateObjectTableConnections): 

901 coaddName = pexConfig.Field( 

902 dtype=str, 

903 default="deep", 

904 doc="Name of coadd" 

905 ) 

906 

907 

908class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

909 """Write patch-merged source tables to a tract-level parquet file 

910 

911 Concatenates `objectTable` list into a per-visit `objectTable_tract` 

912 """ 

913 _DefaultName = "consolidateObjectTable" 

914 ConfigClass = ConsolidateObjectTableConfig 

915 

916 inputDataset = 'objectTable' 

917 outputDataset = 'objectTable_tract' 

918 

919 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

920 inputs = butlerQC.get(inputRefs) 

921 self.log.info("Concatenating %s per-patch Object Tables", 

922 len(inputs['inputCatalogs'])) 

923 df = pd.concat(inputs['inputCatalogs']) 

924 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

925 

926 @classmethod 

927 def _makeArgumentParser(cls): 

928 parser = ArgumentParser(name=cls._DefaultName) 

929 

930 parser.add_id_argument("--id", cls.inputDataset, 

931 help="data ID, e.g. --id tract=12345", 

932 ContainerClass=TractObjectDataIdContainer) 

933 return parser 

934 

935 def runDataRef(self, patchRefList): 

936 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

937 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

938 

939 def writeMetadata(self, dataRef): 

940 """No metadata to write. 

941 """ 

942 pass 

943 

944 

945class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

946 defaultTemplates={"catalogType": ""}, 

947 dimensions=("instrument", "visit", "detector")): 

948 

949 inputCatalog = connectionTypes.Input( 

950 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

951 name="{catalogType}source", 

952 storageClass="DataFrame", 

953 dimensions=("instrument", "visit", "detector"), 

954 deferLoad=True 

955 ) 

956 outputCatalog = connectionTypes.Output( 

957 doc="Narrower, per-detector Source Table transformed and converted per a " 

958 "specified set of functors", 

959 name="{catalogType}sourceTable", 

960 storageClass="DataFrame", 

961 dimensions=("instrument", "visit", "detector") 

962 ) 

963 

964 

965class TransformSourceTableConfig(TransformCatalogBaseConfig, 

966 pipelineConnections=TransformSourceTableConnections): 

967 pass 

968 

969 

970class TransformSourceTableTask(TransformCatalogBaseTask): 

971 """Transform/standardize a source catalog 

972 """ 

973 _DefaultName = "transformSourceTable" 

974 ConfigClass = TransformSourceTableConfig 

975 

976 inputDataset = 'source' 

977 outputDataset = 'sourceTable' 

978 

979 @classmethod 

980 def _makeArgumentParser(cls): 

981 parser = ArgumentParser(name=cls._DefaultName) 

982 parser.add_id_argument("--id", datasetType=cls.inputDataset, 

983 level="sensor", 

984 help="data ID, e.g. --id visit=12345 ccd=0") 

985 return parser 

986 

987 def runDataRef(self, dataRef): 

988 """Override to specify band label to run().""" 

989 parq = dataRef.get() 

990 funcs = self.getFunctors() 

991 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel 

992 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band) 

993 self.write(df, dataRef) 

994 return df 

995 

996 

997class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

998 dimensions=("instrument", "visit",), 

999 defaultTemplates={"calexpType": ""}): 

1000 calexp = connectionTypes.Input( 

1001 doc="Processed exposures used for metadata", 

1002 name="{calexpType}calexp", 

1003 storageClass="ExposureF", 

1004 dimensions=("instrument", "visit", "detector"), 

1005 deferLoad=True, 

1006 multiple=True, 

1007 ) 

1008 visitSummary = connectionTypes.Output( 

1009 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1010 "detector id for the id and are sorted for fast lookups of a " 

1011 "detector."), 

1012 name="{calexpType}visitSummary", 

1013 storageClass="ExposureCatalog", 

1014 dimensions=("instrument", "visit"), 

1015 ) 

1016 

1017 

1018class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1019 pipelineConnections=ConsolidateVisitSummaryConnections): 

1020 """Config for ConsolidateVisitSummaryTask""" 

1021 pass 

1022 

1023 

1024class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

1025 """Task to consolidate per-detector visit metadata. 

1026 

1027 This task aggregates the following metadata from all the detectors in a 

1028 single visit into an exposure catalog: 

1029 - The visitInfo. 

1030 - The wcs. 

1031 - The photoCalib. 

1032 - The physical_filter and band (if available). 

1033 - The psf size, shape, and effective area at the center of the detector. 

1034 - The corners of the bounding box in right ascension/declination. 

1035 

1036 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1037 are not persisted here because of storage concerns, and because of their 

1038 limited utility as summary statistics. 

1039 

1040 Tests for this task are performed in ci_hsc_gen3. 

1041 """ 

1042 _DefaultName = "consolidateVisitSummary" 

1043 ConfigClass = ConsolidateVisitSummaryConfig 

1044 

1045 @classmethod 

1046 def _makeArgumentParser(cls): 

1047 parser = ArgumentParser(name=cls._DefaultName) 

1048 

1049 parser.add_id_argument("--id", "calexp", 

1050 help="data ID, e.g. --id visit=12345", 

1051 ContainerClass=VisitDataIdContainer) 

1052 return parser 

1053 

1054 def writeMetadata(self, dataRef): 

1055 """No metadata to persist, so override to remove metadata persistance. 

1056 """ 

1057 pass 

1058 

1059 def writeConfig(self, butler, clobber=False, doBackup=True): 

1060 """No config to persist, so override to remove config persistance. 

1061 """ 

1062 pass 

1063 

1064 def runDataRef(self, dataRefList): 

1065 visit = dataRefList[0].dataId['visit'] 

1066 

1067 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

1068 (len(dataRefList), visit)) 

1069 

1070 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

1071 

1072 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

1073 

1074 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1075 dataRefs = butlerQC.get(inputRefs.calexp) 

1076 visit = dataRefs[0].dataId.byName()['visit'] 

1077 

1078 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

1079 (len(dataRefs), visit)) 

1080 

1081 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1082 

1083 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1084 

1085 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

1086 """Make a combined exposure catalog from a list of dataRefs. 

1087 These dataRefs must point to exposures with wcs, summaryStats, 

1088 and other visit metadata. 

1089 

1090 Parameters 

1091 ---------- 

1092 visit : `int` 

1093 Visit identification number. 

1094 dataRefs : `list` 

1095 List of dataRefs in visit. May be list of 

1096 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

1097 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

1098 isGen3 : `bool`, optional 

1099 Specifies if this is a Gen3 list of datarefs. 

1100 

1101 Returns 

1102 ------- 

1103 visitSummary : `lsst.afw.table.ExposureCatalog` 

1104 Exposure catalog with per-detector summary information. 

1105 """ 

1106 schema = self._makeVisitSummarySchema() 

1107 cat = afwTable.ExposureCatalog(schema) 

1108 cat.resize(len(dataRefs)) 

1109 

1110 cat['visit'] = visit 

1111 

1112 for i, dataRef in enumerate(dataRefs): 

1113 if isGen3: 

1114 visitInfo = dataRef.get(component='visitInfo') 

1115 filterLabel = dataRef.get(component='filterLabel') 

1116 summaryStats = dataRef.get(component='summaryStats') 

1117 detector = dataRef.get(component='detector') 

1118 wcs = dataRef.get(component='wcs') 

1119 photoCalib = dataRef.get(component='photoCalib') 

1120 detector = dataRef.get(component='detector') 

1121 bbox = dataRef.get(component='bbox') 

1122 validPolygon = dataRef.get(component='validPolygon') 

1123 else: 

1124 # Note that we need to read the calexp because there is 

1125 # no magic access to the psf except through the exposure. 

1126 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

1127 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

1128 visitInfo = exp.getInfo().getVisitInfo() 

1129 filterLabel = dataRef.get("calexp_filterLabel") 

1130 summaryStats = exp.getInfo().getSummaryStats() 

1131 wcs = exp.getWcs() 

1132 photoCalib = exp.getPhotoCalib() 

1133 detector = exp.getDetector() 

1134 bbox = dataRef.get(datasetType='calexp_bbox') 

1135 validPolygon = exp.getInfo().getValidPolygon() 

1136 

1137 rec = cat[i] 

1138 rec.setBBox(bbox) 

1139 rec.setVisitInfo(visitInfo) 

1140 rec.setWcs(wcs) 

1141 rec.setPhotoCalib(photoCalib) 

1142 rec.setValidPolygon(validPolygon) 

1143 

1144 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1145 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1146 rec.setId(detector.getId()) 

1147 rec['psfSigma'] = summaryStats.psfSigma 

1148 rec['psfIxx'] = summaryStats.psfIxx 

1149 rec['psfIyy'] = summaryStats.psfIyy 

1150 rec['psfIxy'] = summaryStats.psfIxy 

1151 rec['psfArea'] = summaryStats.psfArea 

1152 rec['raCorners'][:] = summaryStats.raCorners 

1153 rec['decCorners'][:] = summaryStats.decCorners 

1154 rec['ra'] = summaryStats.ra 

1155 rec['decl'] = summaryStats.decl 

1156 rec['zenithDistance'] = summaryStats.zenithDistance 

1157 rec['zeroPoint'] = summaryStats.zeroPoint 

1158 rec['skyBg'] = summaryStats.skyBg 

1159 rec['skyNoise'] = summaryStats.skyNoise 

1160 rec['meanVar'] = summaryStats.meanVar 

1161 

1162 metadata = dafBase.PropertyList() 

1163 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1164 # We are looping over existing datarefs, so the following is true 

1165 metadata.add("COMMENT", "Only detectors with data have entries.") 

1166 cat.setMetadata(metadata) 

1167 

1168 cat.sort() 

1169 return cat 

1170 

1171 def _makeVisitSummarySchema(self): 

1172 """Make the schema for the visitSummary catalog.""" 

1173 schema = afwTable.ExposureTable.makeMinimalSchema() 

1174 schema.addField('visit', type='I', doc='Visit number') 

1175 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1176 schema.addField('band', type='String', size=32, doc='Name of band') 

1177 schema.addField('psfSigma', type='F', 

1178 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

1179 schema.addField('psfArea', type='F', 

1180 doc='PSF model effective area (center of chip) (pixel**2)') 

1181 schema.addField('psfIxx', type='F', 

1182 doc='PSF model Ixx (center of chip) (pixel**2)') 

1183 schema.addField('psfIyy', type='F', 

1184 doc='PSF model Iyy (center of chip) (pixel**2)') 

1185 schema.addField('psfIxy', type='F', 

1186 doc='PSF model Ixy (center of chip) (pixel**2)') 

1187 schema.addField('raCorners', type='ArrayD', size=4, 

1188 doc='Right Ascension of bounding box corners (degrees)') 

1189 schema.addField('decCorners', type='ArrayD', size=4, 

1190 doc='Declination of bounding box corners (degrees)') 

1191 schema.addField('ra', type='D', 

1192 doc='Right Ascension of bounding box center (degrees)') 

1193 schema.addField('decl', type='D', 

1194 doc='Declination of bounding box center (degrees)') 

1195 schema.addField('zenithDistance', type='F', 

1196 doc='Zenith distance of bounding box center (degrees)') 

1197 schema.addField('zeroPoint', type='F', 

1198 doc='Mean zeropoint in detector (mag)') 

1199 schema.addField('skyBg', type='F', 

1200 doc='Average sky background (ADU)') 

1201 schema.addField('skyNoise', type='F', 

1202 doc='Average sky noise (ADU)') 

1203 schema.addField('meanVar', type='F', 

1204 doc='Mean variance of the weight plane (ADU**2)') 

1205 

1206 return schema 

1207 

1208 

1209class VisitDataIdContainer(DataIdContainer): 

1210 """DataIdContainer that groups sensor-level id's by visit 

1211 """ 

1212 

1213 def makeDataRefList(self, namespace): 

1214 """Make self.refList from self.idList 

1215 

1216 Generate a list of data references grouped by visit. 

1217 

1218 Parameters 

1219 ---------- 

1220 namespace : `argparse.Namespace` 

1221 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments 

1222 """ 

1223 # Group by visits 

1224 visitRefs = defaultdict(list) 

1225 for dataId in self.idList: 

1226 if "visit" in dataId: 

1227 visitId = dataId["visit"] 

1228 # append all subsets to 

1229 subset = namespace.butler.subset(self.datasetType, dataId=dataId) 

1230 visitRefs[visitId].extend([dataRef for dataRef in subset]) 

1231 

1232 outputRefList = [] 

1233 for refList in visitRefs.values(): 

1234 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1235 if existingRefs: 

1236 outputRefList.append(existingRefs) 

1237 

1238 self.refList = outputRefList 

1239 

1240 

1241class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1242 defaultTemplates={"catalogType": ""}, 

1243 dimensions=("instrument", "visit")): 

1244 inputCatalogs = connectionTypes.Input( 

1245 doc="Input per-detector Source Tables", 

1246 name="{catalogType}sourceTable", 

1247 storageClass="DataFrame", 

1248 dimensions=("instrument", "visit", "detector"), 

1249 multiple=True 

1250 ) 

1251 outputCatalog = connectionTypes.Output( 

1252 doc="Per-visit concatenation of Source Table", 

1253 name="{catalogType}sourceTable_visit", 

1254 storageClass="DataFrame", 

1255 dimensions=("instrument", "visit") 

1256 ) 

1257 

1258 

1259class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1260 pipelineConnections=ConsolidateSourceTableConnections): 

1261 pass 

1262 

1263 

1264class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

1265 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1266 """ 

1267 _DefaultName = 'consolidateSourceTable' 

1268 ConfigClass = ConsolidateSourceTableConfig 

1269 

1270 inputDataset = 'sourceTable' 

1271 outputDataset = 'sourceTable_visit' 

1272 

1273 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1274 inputs = butlerQC.get(inputRefs) 

1275 self.log.info("Concatenating %s per-detector Source Tables", 

1276 len(inputs['inputCatalogs'])) 

1277 df = pd.concat(inputs['inputCatalogs']) 

1278 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1279 

1280 def runDataRef(self, dataRefList): 

1281 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1282 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1283 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1284 

1285 @classmethod 

1286 def _makeArgumentParser(cls): 

1287 parser = ArgumentParser(name=cls._DefaultName) 

1288 

1289 parser.add_id_argument("--id", cls.inputDataset, 

1290 help="data ID, e.g. --id visit=12345", 

1291 ContainerClass=VisitDataIdContainer) 

1292 return parser 

1293 

1294 def writeMetadata(self, dataRef): 

1295 """No metadata to write. 

1296 """ 

1297 pass 

1298 

1299 def writeConfig(self, butler, clobber=False, doBackup=True): 

1300 """No config to write. 

1301 """ 

1302 pass 

1303 

1304 

1305class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1306 dimensions=("instrument",), 

1307 defaultTemplates={}): 

1308 visitSummaryRefs = pipeBase.connectionTypes.Input( 

1309 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1310 name="visitSummary", 

1311 storageClass="ExposureCatalog", 

1312 dimensions=("instrument", "visit"), 

1313 multiple=True, 

1314 deferLoad=True, 

1315 ) 

1316 outputCatalog = connectionTypes.Output( 

1317 doc="CCD and Visit metadata table", 

1318 name="CcdVisitTable", 

1319 storageClass="DataFrame", 

1320 dimensions=("instrument",) 

1321 ) 

1322 

1323 

1324class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1325 pipelineConnections=MakeCcdVisitTableConnections): 

1326 pass 

1327 

1328 

1329class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1330 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs. 

1331 """ 

1332 _DefaultName = 'makeCcdVisitTable' 

1333 ConfigClass = MakeCcdVisitTableConfig 

1334 

1335 def run(self, visitSummaryRefs): 

1336 """ Make a table of ccd information from the `visitSummary` catalogs. 

1337 

1338 Parameters 

1339 ---------- 

1340 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1341 List of DeferredDatasetHandles pointing to exposure catalogs with 

1342 per-detector summary information. 

1343 Returns 

1344 ------- 

1345 result : `lsst.pipe.Base.Struct` 

1346 Results struct with attribute: 

1347 - `outputCatalog` 

1348 Catalog of ccd and visit information. 

1349 """ 

1350 ccdEntries = [] 

1351 for visitSummaryRef in visitSummaryRefs: 

1352 visitSummary = visitSummaryRef.get() 

1353 visitInfo = visitSummary[0].getVisitInfo() 

1354 

1355 ccdEntry = {} 

1356 summaryTable = visitSummary.asAstropy() 

1357 selectColumns = ['id', 'visit', 'physical_filter', 'ra', 'decl', 'zenithDistance', 'zeroPoint', 

1358 'psfSigma', 'skyBg', 'skyNoise'] 

1359 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1360 ccdEntry = ccdEntry.rename(columns={"physical_filter": "filterName", "visit": "visitId"}) 

1361 

1362 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in 

1363 summaryTable['id']] 

1364 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId) 

1365 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds] 

1366 ccdEntry['ccdVisitId'] = ccdVisitIds 

1367 

1368 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary]) 

1369 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1370 

1371 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1372 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1373 expTime = visitInfo.getExposureTime() 

1374 ccdEntry['expTime'] = expTime 

1375 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1376 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1377 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1378 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1379 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1380 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1381 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1382 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1383 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1384 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1385 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1386 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1387 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags, 

1388 # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted. 

1389 ccdEntries.append(ccdEntry) 

1390 

1391 outputCatalog = pd.concat(ccdEntries) 

1392 return pipeBase.Struct(outputCatalog=outputCatalog)