Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_tasks 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import functools 

23import pandas as pd 

24from collections import defaultdict 

25import numpy as np 

26 

27import lsst.geom 

28import lsst.pex.config as pexConfig 

29import lsst.pipe.base as pipeBase 

30import lsst.daf.base as dafBase 

31from lsst.pipe.base import connectionTypes 

32import lsst.afw.table as afwTable 

33from lsst.meas.base import SingleFrameMeasurementTask 

34from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer 

35from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer 

36from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate 

37 

38from .parquetTable import ParquetTable 

39from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner 

40from .functors import CompositeFunctor, RAColumn, DecColumn, Column 

41 

42 

43def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None): 

44 """Flattens a dataframe with multilevel column index 

45 """ 

46 newDf = pd.DataFrame() 

47 # band is the level 0 index 

48 dfBands = df.columns.unique(level=0).values 

49 for band in dfBands: 

50 subdf = df[band] 

51 columnFormat = '{0}{1}' if camelCase else '{0}_{1}' 

52 newColumns = {c: columnFormat.format(band, c) 

53 for c in subdf.columns if c not in noDupCols} 

54 cols = list(newColumns.keys()) 

55 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1) 

56 

57 # Band must be present in the input and output or else column is all NaN: 

58 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands)) 

59 # Get the unexploded columns from any present band's partition 

60 noDupDf = df[presentBands[0]][noDupCols] 

61 newDf = pd.concat([noDupDf, newDf], axis=1) 

62 return newDf 

63 

64 

65class WriteObjectTableConnections(pipeBase.PipelineTaskConnections, 

66 defaultTemplates={"coaddName": "deep"}, 

67 dimensions=("tract", "patch", "skymap")): 

68 inputCatalogMeas = connectionTypes.Input( 

69 doc="Catalog of source measurements on the deepCoadd.", 

70 dimensions=("tract", "patch", "band", "skymap"), 

71 storageClass="SourceCatalog", 

72 name="{coaddName}Coadd_meas", 

73 multiple=True 

74 ) 

75 inputCatalogForcedSrc = connectionTypes.Input( 

76 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.", 

77 dimensions=("tract", "patch", "band", "skymap"), 

78 storageClass="SourceCatalog", 

79 name="{coaddName}Coadd_forced_src", 

80 multiple=True 

81 ) 

82 inputCatalogRef = connectionTypes.Input( 

83 doc="Catalog marking the primary detection (which band provides a good shape and position)" 

84 "for each detection in deepCoadd_mergeDet.", 

85 dimensions=("tract", "patch", "skymap"), 

86 storageClass="SourceCatalog", 

87 name="{coaddName}Coadd_ref" 

88 ) 

89 outputCatalog = connectionTypes.Output( 

90 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

91 "stored as a DataFrame with a multi-level column index per-patch.", 

92 dimensions=("tract", "patch", "skymap"), 

93 storageClass="DataFrame", 

94 name="{coaddName}Coadd_obj" 

95 ) 

96 

97 

98class WriteObjectTableConfig(pipeBase.PipelineTaskConfig, 

99 pipelineConnections=WriteObjectTableConnections): 

100 engine = pexConfig.Field( 

101 dtype=str, 

102 default="pyarrow", 

103 doc="Parquet engine for writing (pyarrow or fastparquet)" 

104 ) 

105 coaddName = pexConfig.Field( 

106 dtype=str, 

107 default="deep", 

108 doc="Name of coadd" 

109 ) 

110 

111 

112class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

113 """Write filter-merged source tables to parquet 

114 """ 

115 _DefaultName = "writeObjectTable" 

116 ConfigClass = WriteObjectTableConfig 

117 RunnerClass = MergeSourcesRunner 

118 

119 # Names of table datasets to be merged 

120 inputDatasets = ('forced_src', 'meas', 'ref') 

121 

122 # Tag of output dataset written by `MergeSourcesTask.write` 

123 outputDataset = 'obj' 

124 

125 def __init__(self, butler=None, schema=None, **kwargs): 

126 # It is a shame that this class can't use the default init for CmdLineTask 

127 # But to do so would require its own special task runner, which is many 

128 # more lines of specialization, so this is how it is for now 

129 super().__init__(**kwargs) 

130 

131 def runDataRef(self, patchRefList): 

132 """! 

133 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in 

134 subclasses that inherit from MergeSourcesTask. 

135 @param[in] patchRefList list of data references for each filter 

136 """ 

137 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList) 

138 dataId = patchRefList[0].dataId 

139 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch']) 

140 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog)) 

141 

142 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

143 inputs = butlerQC.get(inputRefs) 

144 

145 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in 

146 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])} 

147 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in 

148 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])} 

149 

150 catalogs = {} 

151 for band in measDict.keys(): 

152 catalogs[band] = {'meas': measDict[band]['meas'], 

153 'forced_src': forcedSourceDict[band]['forced_src'], 

154 'ref': inputs['inputCatalogRef']} 

155 dataId = butlerQC.quantum.dataId 

156 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch']) 

157 outputs = pipeBase.Struct(outputCatalog=df) 

158 butlerQC.put(outputs, outputRefs) 

159 

160 @classmethod 

161 def _makeArgumentParser(cls): 

162 """Create a suitable ArgumentParser. 

163 

164 We will use the ArgumentParser to get a list of data 

165 references for patches; the RunnerClass will sort them into lists 

166 of data references for the same patch. 

167 

168 References first of self.inputDatasets, rather than 

169 self.inputDataset 

170 """ 

171 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0]) 

172 

173 def readCatalog(self, patchRef): 

174 """Read input catalogs 

175 

176 Read all the input datasets given by the 'inputDatasets' 

177 attribute. 

178 

179 Parameters 

180 ---------- 

181 patchRef : `lsst.daf.persistence.ButlerDataRef` 

182 Data reference for patch 

183 

184 Returns 

185 ------- 

186 Tuple consisting of band name and a dict of catalogs, keyed by 

187 dataset name 

188 """ 

189 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel 

190 catalogDict = {} 

191 for dataset in self.inputDatasets: 

192 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True) 

193 self.log.info("Read %d sources from %s for band %s: %s" % 

194 (len(catalog), dataset, band, patchRef.dataId)) 

195 catalogDict[dataset] = catalog 

196 return band, catalogDict 

197 

198 def run(self, catalogs, tract, patch): 

199 """Merge multiple catalogs. 

200 

201 Parameters 

202 ---------- 

203 catalogs : `dict` 

204 Mapping from filter names to dict of catalogs. 

205 tract : int 

206 tractId to use for the tractId column 

207 patch : str 

208 patchId to use for the patchId column 

209 

210 Returns 

211 ------- 

212 catalog : `pandas.DataFrame` 

213 Merged dataframe 

214 """ 

215 

216 dfs = [] 

217 for filt, tableDict in catalogs.items(): 

218 for dataset, table in tableDict.items(): 

219 # Convert afwTable to pandas DataFrame 

220 df = table.asAstropy().to_pandas().set_index('id', drop=True) 

221 

222 # Sort columns by name, to ensure matching schema among patches 

223 df = df.reindex(sorted(df.columns), axis=1) 

224 df['tractId'] = tract 

225 df['patchId'] = patch 

226 

227 # Make columns a 3-level MultiIndex 

228 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns], 

229 names=('dataset', 'band', 'column')) 

230 dfs.append(df) 

231 

232 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs) 

233 return catalog 

234 

235 def write(self, patchRef, catalog): 

236 """Write the output. 

237 

238 Parameters 

239 ---------- 

240 catalog : `ParquetTable` 

241 Catalog to write 

242 patchRef : `lsst.daf.persistence.ButlerDataRef` 

243 Data reference for patch 

244 """ 

245 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

246 # since the filter isn't actually part of the data ID for the dataset we're saving, 

247 # it's confusing to see it in the log message, even if the butler simply ignores it. 

248 mergeDataId = patchRef.dataId.copy() 

249 del mergeDataId["filter"] 

250 self.log.info("Wrote merged catalog: %s" % (mergeDataId,)) 

251 

252 def writeMetadata(self, dataRefList): 

253 """No metadata to write, and not sure how to write it for a list of dataRefs. 

254 """ 

255 pass 

256 

257 

258class WriteSourceTableConnections(pipeBase.PipelineTaskConnections, 

259 defaultTemplates={"catalogType": ""}, 

260 dimensions=("instrument", "visit", "detector")): 

261 

262 catalog = connectionTypes.Input( 

263 doc="Input full-depth catalog of sources produced by CalibrateTask", 

264 name="{catalogType}src", 

265 storageClass="SourceCatalog", 

266 dimensions=("instrument", "visit", "detector") 

267 ) 

268 outputCatalog = connectionTypes.Output( 

269 doc="Catalog of sources, `src` in Parquet format", 

270 name="{catalogType}source", 

271 storageClass="DataFrame", 

272 dimensions=("instrument", "visit", "detector") 

273 ) 

274 

275 

276class WriteSourceTableConfig(pipeBase.PipelineTaskConfig, 

277 pipelineConnections=WriteSourceTableConnections): 

278 doApplyExternalPhotoCalib = pexConfig.Field( 

279 dtype=bool, 

280 default=False, 

281 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if " 

282 "generating Source Tables from older src tables which do not already have local calib columns") 

283 ) 

284 doApplyExternalSkyWcs = pexConfig.Field( 

285 dtype=bool, 

286 default=False, 

287 doc=("Add local WCS columns from the calexp.wcs? Should only set True if " 

288 "generating Source Tables from older src tables which do not already have local calib columns") 

289 ) 

290 

291 

292class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

293 """Write source table to parquet 

294 """ 

295 _DefaultName = "writeSourceTable" 

296 ConfigClass = WriteSourceTableConfig 

297 

298 def runDataRef(self, dataRef): 

299 src = dataRef.get('src') 

300 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs: 

301 src = self.addCalibColumns(src, dataRef) 

302 

303 ccdVisitId = dataRef.get('ccdExposureId') 

304 result = self.run(src, ccdVisitId=ccdVisitId) 

305 dataRef.put(result.table, 'source') 

306 

307 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

308 inputs = butlerQC.get(inputRefs) 

309 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector") 

310 result = self.run(**inputs).table 

311 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame()) 

312 butlerQC.put(outputs, outputRefs) 

313 

314 def run(self, catalog, ccdVisitId=None): 

315 """Convert `src` catalog to parquet 

316 

317 Parameters 

318 ---------- 

319 catalog: `afwTable.SourceCatalog` 

320 catalog to be converted 

321 ccdVisitId: `int` 

322 ccdVisitId to be added as a column 

323 

324 Returns 

325 ------- 

326 result : `lsst.pipe.base.Struct` 

327 ``table`` 

328 `ParquetTable` version of the input catalog 

329 """ 

330 self.log.info("Generating parquet table from src catalog %s", ccdVisitId) 

331 df = catalog.asAstropy().to_pandas().set_index('id', drop=True) 

332 df['ccdVisitId'] = ccdVisitId 

333 return pipeBase.Struct(table=ParquetTable(dataFrame=df)) 

334 

335 def addCalibColumns(self, catalog, dataRef): 

336 """Add columns with local calibration evaluated at each centroid 

337 

338 for backwards compatibility with old repos. 

339 This exists for the purpose of converting old src catalogs 

340 (which don't have the expected local calib columns) to Source Tables. 

341 

342 Parameters 

343 ---------- 

344 catalog: `afwTable.SourceCatalog` 

345 catalog to which calib columns will be added 

346 dataRef: `lsst.daf.persistence.ButlerDataRef 

347 for fetching the calibs from disk. 

348 

349 Returns 

350 ------- 

351 newCat: `afwTable.SourceCatalog` 

352 Source Catalog with requested local calib columns 

353 """ 

354 mapper = afwTable.SchemaMapper(catalog.schema) 

355 measureConfig = SingleFrameMeasurementTask.ConfigClass() 

356 measureConfig.doReplaceWithNoise = False 

357 

358 # Just need the WCS or the PhotoCalib attached to an exposue 

359 exposure = dataRef.get('calexp_sub', 

360 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0))) 

361 

362 mapper = afwTable.SchemaMapper(catalog.schema) 

363 mapper.addMinimalSchema(catalog.schema, True) 

364 schema = mapper.getOutputSchema() 

365 

366 exposureIdInfo = dataRef.get("expIdInfo") 

367 measureConfig.plugins.names = [] 

368 if self.config.doApplyExternalSkyWcs: 

369 plugin = 'base_LocalWcs' 

370 if plugin in schema: 

371 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False") 

372 else: 

373 measureConfig.plugins.names.add(plugin) 

374 

375 if self.config.doApplyExternalPhotoCalib: 

376 plugin = 'base_LocalPhotoCalib' 

377 if plugin in schema: 

378 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False") 

379 else: 

380 measureConfig.plugins.names.add(plugin) 

381 

382 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema) 

383 newCat = afwTable.SourceCatalog(schema) 

384 newCat.extend(catalog, mapper=mapper) 

385 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId) 

386 return newCat 

387 

388 def writeMetadata(self, dataRef): 

389 """No metadata to write. 

390 """ 

391 pass 

392 

393 @classmethod 

394 def _makeArgumentParser(cls): 

395 parser = ArgumentParser(name=cls._DefaultName) 

396 parser.add_id_argument("--id", 'src', 

397 help="data ID, e.g. --id visit=12345 ccd=0") 

398 return parser 

399 

400 

401class PostprocessAnalysis(object): 

402 """Calculate columns from ParquetTable 

403 

404 This object manages and organizes an arbitrary set of computations 

405 on a catalog. The catalog is defined by a 

406 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a 

407 `deepCoadd_obj` dataset, and the computations are defined by a collection 

408 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently, 

409 a `CompositeFunctor`). 

410 

411 After the object is initialized, accessing the `.df` attribute (which 

412 holds the `pandas.DataFrame` containing the results of the calculations) triggers 

413 computation of said dataframe. 

414 

415 One of the conveniences of using this object is the ability to define a desired common 

416 filter for all functors. This enables the same functor collection to be passed to 

417 several different `PostprocessAnalysis` objects without having to change the original 

418 functor collection, since the `filt` keyword argument of this object triggers an 

419 overwrite of the `filt` property for all functors in the collection. 

420 

421 This object also allows a list of refFlags to be passed, and defines a set of default 

422 refFlags that are always included even if not requested. 

423 

424 If a list of `ParquetTable` object is passed, rather than a single one, then the 

425 calculations will be mapped over all the input catalogs. In principle, it should 

426 be straightforward to parallelize this activity, but initial tests have failed 

427 (see TODO in code comments). 

428 

429 Parameters 

430 ---------- 

431 parq : `lsst.pipe.tasks.ParquetTable` (or list of such) 

432 Source catalog(s) for computation 

433 

434 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor` 

435 Computations to do (functors that act on `parq`). 

436 If a dict, the output 

437 DataFrame will have columns keyed accordingly. 

438 If a list, the column keys will come from the 

439 `.shortname` attribute of each functor. 

440 

441 filt : `str` (optional) 

442 Filter in which to calculate. If provided, 

443 this will overwrite any existing `.filt` attribute 

444 of the provided functors. 

445 

446 flags : `list` (optional) 

447 List of flags (per-band) to include in output table. 

448 

449 refFlags : `list` (optional) 

450 List of refFlags (only reference band) to include in output table. 

451 

452 

453 """ 

454 _defaultRefFlags = [] 

455 _defaultFuncs = (('coord_ra', RAColumn()), 

456 ('coord_dec', DecColumn())) 

457 

458 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None): 

459 self.parq = parq 

460 self.functors = functors 

461 

462 self.filt = filt 

463 self.flags = list(flags) if flags is not None else [] 

464 self.refFlags = list(self._defaultRefFlags) 

465 if refFlags is not None: 

466 self.refFlags += list(refFlags) 

467 

468 self._df = None 

469 

470 @property 

471 def defaultFuncs(self): 

472 funcs = dict(self._defaultFuncs) 

473 return funcs 

474 

475 @property 

476 def func(self): 

477 additionalFuncs = self.defaultFuncs 

478 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags}) 

479 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags}) 

480 

481 if isinstance(self.functors, CompositeFunctor): 

482 func = self.functors 

483 else: 

484 func = CompositeFunctor(self.functors) 

485 

486 func.funcDict.update(additionalFuncs) 

487 func.filt = self.filt 

488 

489 return func 

490 

491 @property 

492 def noDupCols(self): 

493 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref'] 

494 

495 @property 

496 def df(self): 

497 if self._df is None: 

498 self.compute() 

499 return self._df 

500 

501 def compute(self, dropna=False, pool=None): 

502 # map over multiple parquet tables 

503 if type(self.parq) in (list, tuple): 

504 if pool is None: 

505 dflist = [self.func(parq, dropna=dropna) for parq in self.parq] 

506 else: 

507 # TODO: Figure out why this doesn't work (pyarrow pickling issues?) 

508 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq) 

509 self._df = pd.concat(dflist) 

510 else: 

511 self._df = self.func(self.parq, dropna=dropna) 

512 

513 return self._df 

514 

515 

516class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections, 

517 dimensions=()): 

518 """Expected Connections for subclasses of TransformCatalogBaseTask. 

519 

520 Must be subclassed. 

521 """ 

522 inputCatalog = connectionTypes.Input( 

523 name="", 

524 storageClass="DataFrame", 

525 ) 

526 outputCatalog = connectionTypes.Output( 

527 name="", 

528 storageClass="DataFrame", 

529 ) 

530 

531 

532class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig, 

533 pipelineConnections=TransformCatalogBaseConnections): 

534 functorFile = pexConfig.Field( 

535 dtype=str, 

536 doc='Path to YAML file specifying functors to be computed', 

537 default=None, 

538 optional=True 

539 ) 

540 

541 

542class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask): 

543 """Base class for transforming/standardizing a catalog 

544 

545 by applying functors that convert units and apply calibrations. 

546 The purpose of this task is to perform a set of computations on 

547 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the 

548 results to a new dataset (which needs to be declared in an `outputDataset` 

549 attribute). 

550 

551 The calculations to be performed are defined in a YAML file that specifies 

552 a set of functors to be computed, provided as 

553 a `--functorFile` config parameter. An example of such a YAML file 

554 is the following: 

555 

556 funcs: 

557 psfMag: 

558 functor: Mag 

559 args: 

560 - base_PsfFlux 

561 filt: HSC-G 

562 dataset: meas 

563 cmodel_magDiff: 

564 functor: MagDiff 

565 args: 

566 - modelfit_CModel 

567 - base_PsfFlux 

568 filt: HSC-G 

569 gauss_magDiff: 

570 functor: MagDiff 

571 args: 

572 - base_GaussianFlux 

573 - base_PsfFlux 

574 filt: HSC-G 

575 count: 

576 functor: Column 

577 args: 

578 - base_InputCount_value 

579 filt: HSC-G 

580 deconvolved_moments: 

581 functor: DeconvolvedMoments 

582 filt: HSC-G 

583 dataset: forced_src 

584 refFlags: 

585 - calib_psfUsed 

586 - merge_measurement_i 

587 - merge_measurement_r 

588 - merge_measurement_z 

589 - merge_measurement_y 

590 - merge_measurement_g 

591 - base_PixelFlags_flag_inexact_psfCenter 

592 - detect_isPrimary 

593 

594 The names for each entry under "func" will become the names of columns in the 

595 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`. 

596 Positional arguments to be passed to each functor are in the `args` list, 

597 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`, 

598 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization. 

599 

600 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and 

601 taken from the `'ref'` dataset. 

602 

603 The "flags" entry will be expanded out per band. 

604 

605 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object 

606 to organize and excecute the calculations. 

607 

608 """ 

609 @property 

610 def _DefaultName(self): 

611 raise NotImplementedError('Subclass must define "_DefaultName" attribute') 

612 

613 @property 

614 def outputDataset(self): 

615 raise NotImplementedError('Subclass must define "outputDataset" attribute') 

616 

617 @property 

618 def inputDataset(self): 

619 raise NotImplementedError('Subclass must define "inputDataset" attribute') 

620 

621 @property 

622 def ConfigClass(self): 

623 raise NotImplementedError('Subclass must define "ConfigClass" attribute') 

624 

625 def __init__(self, *args, **kwargs): 

626 super().__init__(*args, **kwargs) 

627 if self.config.functorFile: 

628 self.log.info('Loading tranform functor definitions from %s', 

629 self.config.functorFile) 

630 self.funcs = CompositeFunctor.from_file(self.config.functorFile) 

631 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs)) 

632 else: 

633 self.funcs = None 

634 

635 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

636 inputs = butlerQC.get(inputRefs) 

637 if self.funcs is None: 

638 raise ValueError("config.functorFile is None. " 

639 "Must be a valid path to yaml in order to run Task as a PipelineTask.") 

640 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs, 

641 dataId=outputRefs.outputCatalog.dataId.full) 

642 outputs = pipeBase.Struct(outputCatalog=result) 

643 butlerQC.put(outputs, outputRefs) 

644 

645 def runDataRef(self, dataRef): 

646 parq = dataRef.get() 

647 if self.funcs is None: 

648 raise ValueError("config.functorFile is None. " 

649 "Must be a valid path to yaml in order to run as a CommandlineTask.") 

650 df = self.run(parq, funcs=self.funcs, dataId=dataRef.dataId) 

651 self.write(df, dataRef) 

652 return df 

653 

654 def run(self, parq, funcs=None, dataId=None, band=None): 

655 """Do postprocessing calculations 

656 

657 Takes a `ParquetTable` object and dataId, 

658 returns a dataframe with results of postprocessing calculations. 

659 

660 Parameters 

661 ---------- 

662 parq : `lsst.pipe.tasks.parquetTable.ParquetTable` 

663 ParquetTable from which calculations are done. 

664 funcs : `lsst.pipe.tasks.functors.Functors` 

665 Functors to apply to the table's columns 

666 dataId : dict, optional 

667 Used to add a `patchId` column to the output dataframe. 

668 band : `str`, optional 

669 Filter band that is being processed. 

670 

671 Returns 

672 ------ 

673 `pandas.DataFrame` 

674 

675 """ 

676 self.log.info("Transforming/standardizing the source table dataId: %s", dataId) 

677 

678 df = self.transform(band, parq, funcs, dataId).df 

679 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

680 return df 

681 

682 def getFunctors(self): 

683 return self.funcs 

684 

685 def getAnalysis(self, parq, funcs=None, band=None): 

686 if funcs is None: 

687 funcs = self.funcs 

688 analysis = PostprocessAnalysis(parq, funcs, filt=band) 

689 return analysis 

690 

691 def transform(self, band, parq, funcs, dataId): 

692 analysis = self.getAnalysis(parq, funcs=funcs, band=band) 

693 df = analysis.df 

694 if dataId is not None: 

695 for key, value in dataId.items(): 

696 df[str(key)] = value 

697 

698 return pipeBase.Struct( 

699 df=df, 

700 analysis=analysis 

701 ) 

702 

703 def write(self, df, parqRef): 

704 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset) 

705 

706 def writeMetadata(self, dataRef): 

707 """No metadata to write. 

708 """ 

709 pass 

710 

711 

712class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections, 

713 defaultTemplates={"coaddName": "deep"}, 

714 dimensions=("tract", "patch", "skymap")): 

715 inputCatalog = connectionTypes.Input( 

716 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, " 

717 "stored as a DataFrame with a multi-level column index per-patch.", 

718 dimensions=("tract", "patch", "skymap"), 

719 storageClass="DataFrame", 

720 name="{coaddName}Coadd_obj", 

721 deferLoad=True, 

722 ) 

723 outputCatalog = connectionTypes.Output( 

724 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard " 

725 "data model.", 

726 dimensions=("tract", "patch", "skymap"), 

727 storageClass="DataFrame", 

728 name="objectTable" 

729 ) 

730 

731 

732class TransformObjectCatalogConfig(TransformCatalogBaseConfig, 

733 pipelineConnections=TransformObjectCatalogConnections): 

734 coaddName = pexConfig.Field( 

735 dtype=str, 

736 default="deep", 

737 doc="Name of coadd" 

738 ) 

739 # TODO: remove in DM-27177 

740 filterMap = pexConfig.DictField( 

741 keytype=str, 

742 itemtype=str, 

743 default={}, 

744 doc=("Dictionary mapping full filter name to short one for column name munging." 

745 "These filters determine the output columns no matter what filters the " 

746 "input data actually contain."), 

747 deprecated=("Coadds are now identified by the band, so this transform is unused." 

748 "Will be removed after v22.") 

749 ) 

750 outputBands = pexConfig.ListField( 

751 dtype=str, 

752 default=None, 

753 optional=True, 

754 doc=("These bands and only these bands will appear in the output," 

755 " NaN-filled if the input does not include them." 

756 " If None, then use all bands found in the input.") 

757 ) 

758 camelCase = pexConfig.Field( 

759 dtype=bool, 

760 default=True, 

761 doc=("Write per-band columns names with camelCase, else underscore " 

762 "For example: gPsFlux instead of g_PsFlux.") 

763 ) 

764 multilevelOutput = pexConfig.Field( 

765 dtype=bool, 

766 default=False, 

767 doc=("Whether results dataframe should have a multilevel column index (True) or be flat " 

768 "and name-munged (False).") 

769 ) 

770 

771 

772class TransformObjectCatalogTask(TransformCatalogBaseTask): 

773 """Produce a flattened Object Table to match the format specified in 

774 sdm_schemas. 

775 

776 Do the same set of postprocessing calculations on all bands 

777 

778 This is identical to `TransformCatalogBaseTask`, except for that it does the 

779 specified functor calculations for all filters present in the 

780 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified 

781 by the YAML file will be superceded. 

782 """ 

783 _DefaultName = "transformObjectCatalog" 

784 ConfigClass = TransformObjectCatalogConfig 

785 

786 # Used by Gen 2 runDataRef only: 

787 inputDataset = 'deepCoadd_obj' 

788 outputDataset = 'objectTable' 

789 

790 @classmethod 

791 def _makeArgumentParser(cls): 

792 parser = ArgumentParser(name=cls._DefaultName) 

793 parser.add_id_argument("--id", cls.inputDataset, 

794 ContainerClass=CoaddDataIdContainer, 

795 help="data ID, e.g. --id tract=12345 patch=1,2") 

796 return parser 

797 

798 def run(self, parq, funcs=None, dataId=None, band=None): 

799 # NOTE: band kwarg is ignored here. 

800 dfDict = {} 

801 analysisDict = {} 

802 templateDf = pd.DataFrame() 

803 

804 if isinstance(parq, DeferredDatasetHandle): 

805 columns = parq.get(component='columns') 

806 inputBands = columns.unique(level=1).values 

807 else: 

808 inputBands = parq.columnLevelNames['band'] 

809 

810 outputBands = self.config.outputBands if self.config.outputBands else inputBands 

811 

812 # Perform transform for data of filters that exist in parq. 

813 for inputBand in inputBands: 

814 if inputBand not in outputBands: 

815 self.log.info("Ignoring %s band data in the input", inputBand) 

816 continue 

817 self.log.info("Transforming the catalog of band %s", inputBand) 

818 result = self.transform(inputBand, parq, funcs, dataId) 

819 dfDict[inputBand] = result.df 

820 analysisDict[inputBand] = result.analysis 

821 if templateDf.empty: 

822 templateDf = result.df 

823 

824 # Fill NaNs in columns of other wanted bands 

825 for filt in outputBands: 

826 if filt not in dfDict: 

827 self.log.info("Adding empty columns for band %s", filt) 

828 dfDict[filt] = pd.DataFrame().reindex_like(templateDf) 

829 

830 # This makes a multilevel column index, with band as first level 

831 df = pd.concat(dfDict, axis=1, names=['band', 'column']) 

832 

833 if not self.config.multilevelOutput: 

834 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()])) 

835 if dataId is not None: 

836 noDupCols += list(dataId.keys()) 

837 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase, 

838 inputBands=inputBands) 

839 

840 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df)) 

841 return df 

842 

843 

844class TractObjectDataIdContainer(CoaddDataIdContainer): 

845 

846 def makeDataRefList(self, namespace): 

847 """Make self.refList from self.idList 

848 

849 Generate a list of data references given tract and/or patch. 

850 This was adapted from `TractQADataIdContainer`, which was 

851 `TractDataIdContainer` modifie to not require "filter". 

852 Only existing dataRefs are returned. 

853 """ 

854 def getPatchRefList(tract): 

855 return [namespace.butler.dataRef(datasetType=self.datasetType, 

856 tract=tract.getId(), 

857 patch="%d,%d" % patch.getIndex()) for patch in tract] 

858 

859 tractRefs = defaultdict(list) # Data references for each tract 

860 for dataId in self.idList: 

861 skymap = self.getSkymap(namespace) 

862 

863 if "tract" in dataId: 

864 tractId = dataId["tract"] 

865 if "patch" in dataId: 

866 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType, 

867 tract=tractId, 

868 patch=dataId['patch'])) 

869 else: 

870 tractRefs[tractId] += getPatchRefList(skymap[tractId]) 

871 else: 

872 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract)) 

873 for tract in skymap) 

874 outputRefList = [] 

875 for tractRefList in tractRefs.values(): 

876 existingRefs = [ref for ref in tractRefList if ref.datasetExists()] 

877 outputRefList.append(existingRefs) 

878 

879 self.refList = outputRefList 

880 

881 

882class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections, 

883 dimensions=("tract", "skymap")): 

884 inputCatalogs = connectionTypes.Input( 

885 doc="Per-Patch objectTables conforming to the standard data model.", 

886 name="objectTable", 

887 storageClass="DataFrame", 

888 dimensions=("tract", "patch", "skymap"), 

889 multiple=True, 

890 ) 

891 outputCatalog = connectionTypes.Output( 

892 doc="Pre-tract horizontal concatenation of the input objectTables", 

893 name="objectTable_tract", 

894 storageClass="DataFrame", 

895 dimensions=("tract", "skymap"), 

896 ) 

897 

898 

899class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig, 

900 pipelineConnections=ConsolidateObjectTableConnections): 

901 coaddName = pexConfig.Field( 

902 dtype=str, 

903 default="deep", 

904 doc="Name of coadd" 

905 ) 

906 

907 

908class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask): 

909 """Write patch-merged source tables to a tract-level parquet file 

910 

911 Concatenates `objectTable` list into a per-visit `objectTable_tract` 

912 """ 

913 _DefaultName = "consolidateObjectTable" 

914 ConfigClass = ConsolidateObjectTableConfig 

915 

916 inputDataset = 'objectTable' 

917 outputDataset = 'objectTable_tract' 

918 

919 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

920 inputs = butlerQC.get(inputRefs) 

921 self.log.info("Concatenating %s per-patch Object Tables", 

922 len(inputs['inputCatalogs'])) 

923 df = pd.concat(inputs['inputCatalogs']) 

924 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

925 

926 @classmethod 

927 def _makeArgumentParser(cls): 

928 parser = ArgumentParser(name=cls._DefaultName) 

929 

930 parser.add_id_argument("--id", cls.inputDataset, 

931 help="data ID, e.g. --id tract=12345", 

932 ContainerClass=TractObjectDataIdContainer) 

933 return parser 

934 

935 def runDataRef(self, patchRefList): 

936 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList]) 

937 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

938 

939 def writeMetadata(self, dataRef): 

940 """No metadata to write. 

941 """ 

942 pass 

943 

944 

945class TransformSourceTableConnections(pipeBase.PipelineTaskConnections, 

946 defaultTemplates={"catalogType": ""}, 

947 dimensions=("instrument", "visit", "detector")): 

948 

949 inputCatalog = connectionTypes.Input( 

950 doc="Wide input catalog of sources produced by WriteSourceTableTask", 

951 name="{catalogType}source", 

952 storageClass="DataFrame", 

953 dimensions=("instrument", "visit", "detector"), 

954 deferLoad=True 

955 ) 

956 outputCatalog = connectionTypes.Output( 

957 doc="Narrower, per-detector Source Table transformed and converted per a " 

958 "specified set of functors", 

959 name="{catalogType}sourceTable", 

960 storageClass="DataFrame", 

961 dimensions=("instrument", "visit", "detector") 

962 ) 

963 

964 

965class TransformSourceTableConfig(TransformCatalogBaseConfig, 

966 pipelineConnections=TransformSourceTableConnections): 

967 pass 

968 

969 

970class TransformSourceTableTask(TransformCatalogBaseTask): 

971 """Transform/standardize a source catalog 

972 """ 

973 _DefaultName = "transformSourceTable" 

974 ConfigClass = TransformSourceTableConfig 

975 

976 inputDataset = 'source' 

977 outputDataset = 'sourceTable' 

978 

979 @classmethod 

980 def _makeArgumentParser(cls): 

981 parser = ArgumentParser(name=cls._DefaultName) 

982 parser.add_id_argument("--id", datasetType=cls.inputDataset, 

983 level="sensor", 

984 help="data ID, e.g. --id visit=12345 ccd=0") 

985 return parser 

986 

987 def runDataRef(self, dataRef): 

988 """Override to specify band label to run().""" 

989 parq = dataRef.get() 

990 funcs = self.getFunctors() 

991 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel 

992 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band) 

993 self.write(df, dataRef) 

994 return df 

995 

996 

997class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections, 

998 dimensions=("instrument", "visit",), 

999 defaultTemplates={"calexpType": ""}): 

1000 calexp = connectionTypes.Input( 

1001 doc="Processed exposures used for metadata", 

1002 name="{calexpType}calexp", 

1003 storageClass="ExposureF", 

1004 dimensions=("instrument", "visit", "detector"), 

1005 deferLoad=True, 

1006 multiple=True, 

1007 ) 

1008 visitSummary = connectionTypes.Output( 

1009 doc=("Per-visit consolidated exposure metadata. These catalogs use " 

1010 "detector id for the id and are sorted for fast lookups of a " 

1011 "detector."), 

1012 name="{calexpType}visitSummary", 

1013 storageClass="ExposureCatalog", 

1014 dimensions=("instrument", "visit"), 

1015 ) 

1016 

1017 

1018class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig, 

1019 pipelineConnections=ConsolidateVisitSummaryConnections): 

1020 """Config for ConsolidateVisitSummaryTask""" 

1021 pass 

1022 

1023 

1024class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask): 

1025 """Task to consolidate per-detector visit metadata. 

1026 

1027 This task aggregates the following metadata from all the detectors in a 

1028 single visit into an exposure catalog: 

1029 - The visitInfo. 

1030 - The wcs. 

1031 - The photoCalib. 

1032 - The physical_filter and band (if available). 

1033 - The psf size, shape, and effective area at the center of the detector. 

1034 - The corners of the bounding box in right ascension/declination. 

1035 

1036 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve 

1037 are not persisted here because of storage concerns, and because of their 

1038 limited utility as summary statistics. 

1039 

1040 Tests for this task are performed in ci_hsc_gen3. 

1041 """ 

1042 _DefaultName = "consolidateVisitSummary" 

1043 ConfigClass = ConsolidateVisitSummaryConfig 

1044 

1045 @classmethod 

1046 def _makeArgumentParser(cls): 

1047 parser = ArgumentParser(name=cls._DefaultName) 

1048 

1049 parser.add_id_argument("--id", "calexp", 

1050 help="data ID, e.g. --id visit=12345", 

1051 ContainerClass=VisitDataIdContainer) 

1052 return parser 

1053 

1054 def writeMetadata(self, dataRef): 

1055 """No metadata to persist, so override to remove metadata persistance. 

1056 """ 

1057 pass 

1058 

1059 def writeConfig(self, butler, clobber=False, doBackup=True): 

1060 """No config to persist, so override to remove config persistance. 

1061 """ 

1062 pass 

1063 

1064 def runDataRef(self, dataRefList): 

1065 visit = dataRefList[0].dataId['visit'] 

1066 

1067 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

1068 (len(dataRefList), visit)) 

1069 

1070 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False) 

1071 

1072 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit) 

1073 

1074 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1075 dataRefs = butlerQC.get(inputRefs.calexp) 

1076 visit = dataRefs[0].dataId.byName()['visit'] 

1077 

1078 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" % 

1079 (len(dataRefs), visit)) 

1080 

1081 expCatalog = self._combineExposureMetadata(visit, dataRefs) 

1082 

1083 butlerQC.put(expCatalog, outputRefs.visitSummary) 

1084 

1085 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True): 

1086 """Make a combined exposure catalog from a list of dataRefs. 

1087 These dataRefs must point to exposures with wcs, summaryStats, 

1088 and other visit metadata. 

1089 

1090 Parameters 

1091 ---------- 

1092 visit : `int` 

1093 Visit identification number. 

1094 dataRefs : `list` 

1095 List of dataRefs in visit. May be list of 

1096 `lsst.daf.persistence.ButlerDataRef` (Gen2) or 

1097 `lsst.daf.butler.DeferredDatasetHandle` (Gen3). 

1098 isGen3 : `bool`, optional 

1099 Specifies if this is a Gen3 list of datarefs. 

1100 

1101 Returns 

1102 ------- 

1103 visitSummary : `lsst.afw.table.ExposureCatalog` 

1104 Exposure catalog with per-detector summary information. 

1105 """ 

1106 schema = self._makeVisitSummarySchema() 

1107 cat = afwTable.ExposureCatalog(schema) 

1108 cat.resize(len(dataRefs)) 

1109 

1110 cat['visit'] = visit 

1111 

1112 for i, dataRef in enumerate(dataRefs): 

1113 if isGen3: 

1114 visitInfo = dataRef.get(component='visitInfo') 

1115 filterLabel = dataRef.get(component='filterLabel') 

1116 summaryStats = dataRef.get(component='summaryStats') 

1117 detector = dataRef.get(component='detector') 

1118 wcs = dataRef.get(component='wcs') 

1119 photoCalib = dataRef.get(component='photoCalib') 

1120 detector = dataRef.get(component='detector') 

1121 bbox = dataRef.get(component='bbox') 

1122 validPolygon = dataRef.get(component='validPolygon') 

1123 else: 

1124 # Note that we need to read the calexp because there is 

1125 # no magic access to the psf except through the exposure. 

1126 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1)) 

1127 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox) 

1128 visitInfo = exp.getInfo().getVisitInfo() 

1129 filterLabel = dataRef.get("calexp_filterLabel") 

1130 summaryStats = exp.getInfo().getSummaryStats() 

1131 wcs = exp.getWcs() 

1132 photoCalib = exp.getPhotoCalib() 

1133 detector = exp.getDetector() 

1134 bbox = dataRef.get(datasetType='calexp_bbox') 

1135 validPolygon = exp.getInfo().getValidPolygon() 

1136 

1137 rec = cat[i] 

1138 rec.setBBox(bbox) 

1139 rec.setVisitInfo(visitInfo) 

1140 rec.setWcs(wcs) 

1141 rec.setPhotoCalib(photoCalib) 

1142 rec.setValidPolygon(validPolygon) 

1143 

1144 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else "" 

1145 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else "" 

1146 rec.setId(detector.getId()) 

1147 rec['psfSigma'] = summaryStats.psfSigma 

1148 rec['psfIxx'] = summaryStats.psfIxx 

1149 rec['psfIyy'] = summaryStats.psfIyy 

1150 rec['psfIxy'] = summaryStats.psfIxy 

1151 rec['psfArea'] = summaryStats.psfArea 

1152 rec['raCorners'][:] = summaryStats.raCorners 

1153 rec['decCorners'][:] = summaryStats.decCorners 

1154 rec['ra'] = summaryStats.ra 

1155 rec['decl'] = summaryStats.decl 

1156 rec['zenithDistance'] = summaryStats.zenithDistance 

1157 rec['zeroPoint'] = summaryStats.zeroPoint 

1158 rec['skyBg'] = summaryStats.skyBg 

1159 rec['skyNoise'] = summaryStats.skyNoise 

1160 rec['meanVar'] = summaryStats.meanVar 

1161 rec['astromOffsetMean'] = summaryStats.astromOffsetMean 

1162 rec['astromOffsetStd'] = summaryStats.astromOffsetStd 

1163 

1164 metadata = dafBase.PropertyList() 

1165 metadata.add("COMMENT", "Catalog id is detector id, sorted.") 

1166 # We are looping over existing datarefs, so the following is true 

1167 metadata.add("COMMENT", "Only detectors with data have entries.") 

1168 cat.setMetadata(metadata) 

1169 

1170 cat.sort() 

1171 return cat 

1172 

1173 def _makeVisitSummarySchema(self): 

1174 """Make the schema for the visitSummary catalog.""" 

1175 schema = afwTable.ExposureTable.makeMinimalSchema() 

1176 schema.addField('visit', type='I', doc='Visit number') 

1177 schema.addField('physical_filter', type='String', size=32, doc='Physical filter') 

1178 schema.addField('band', type='String', size=32, doc='Name of band') 

1179 schema.addField('psfSigma', type='F', 

1180 doc='PSF model second-moments determinant radius (center of chip) (pixel)') 

1181 schema.addField('psfArea', type='F', 

1182 doc='PSF model effective area (center of chip) (pixel**2)') 

1183 schema.addField('psfIxx', type='F', 

1184 doc='PSF model Ixx (center of chip) (pixel**2)') 

1185 schema.addField('psfIyy', type='F', 

1186 doc='PSF model Iyy (center of chip) (pixel**2)') 

1187 schema.addField('psfIxy', type='F', 

1188 doc='PSF model Ixy (center of chip) (pixel**2)') 

1189 schema.addField('raCorners', type='ArrayD', size=4, 

1190 doc='Right Ascension of bounding box corners (degrees)') 

1191 schema.addField('decCorners', type='ArrayD', size=4, 

1192 doc='Declination of bounding box corners (degrees)') 

1193 schema.addField('ra', type='D', 

1194 doc='Right Ascension of bounding box center (degrees)') 

1195 schema.addField('decl', type='D', 

1196 doc='Declination of bounding box center (degrees)') 

1197 schema.addField('zenithDistance', type='F', 

1198 doc='Zenith distance of bounding box center (degrees)') 

1199 schema.addField('zeroPoint', type='F', 

1200 doc='Mean zeropoint in detector (mag)') 

1201 schema.addField('skyBg', type='F', 

1202 doc='Average sky background (ADU)') 

1203 schema.addField('skyNoise', type='F', 

1204 doc='Average sky noise (ADU)') 

1205 schema.addField('meanVar', type='F', 

1206 doc='Mean variance of the weight plane (ADU**2)') 

1207 schema.addField('astromOffsetMean', type='F', 

1208 doc='Mean offset of astrometric calibration matches (arcsec)') 

1209 schema.addField('astromOffsetStd', type='F', 

1210 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)') 

1211 

1212 return schema 

1213 

1214 

1215class VisitDataIdContainer(DataIdContainer): 

1216 """DataIdContainer that groups sensor-level id's by visit 

1217 """ 

1218 

1219 def makeDataRefList(self, namespace): 

1220 """Make self.refList from self.idList 

1221 

1222 Generate a list of data references grouped by visit. 

1223 

1224 Parameters 

1225 ---------- 

1226 namespace : `argparse.Namespace` 

1227 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments 

1228 """ 

1229 # Group by visits 

1230 visitRefs = defaultdict(list) 

1231 for dataId in self.idList: 

1232 if "visit" in dataId: 

1233 visitId = dataId["visit"] 

1234 # append all subsets to 

1235 subset = namespace.butler.subset(self.datasetType, dataId=dataId) 

1236 visitRefs[visitId].extend([dataRef for dataRef in subset]) 

1237 

1238 outputRefList = [] 

1239 for refList in visitRefs.values(): 

1240 existingRefs = [ref for ref in refList if ref.datasetExists()] 

1241 if existingRefs: 

1242 outputRefList.append(existingRefs) 

1243 

1244 self.refList = outputRefList 

1245 

1246 

1247class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections, 

1248 defaultTemplates={"catalogType": ""}, 

1249 dimensions=("instrument", "visit")): 

1250 inputCatalogs = connectionTypes.Input( 

1251 doc="Input per-detector Source Tables", 

1252 name="{catalogType}sourceTable", 

1253 storageClass="DataFrame", 

1254 dimensions=("instrument", "visit", "detector"), 

1255 multiple=True 

1256 ) 

1257 outputCatalog = connectionTypes.Output( 

1258 doc="Per-visit concatenation of Source Table", 

1259 name="{catalogType}sourceTable_visit", 

1260 storageClass="DataFrame", 

1261 dimensions=("instrument", "visit") 

1262 ) 

1263 

1264 

1265class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig, 

1266 pipelineConnections=ConsolidateSourceTableConnections): 

1267 pass 

1268 

1269 

1270class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask): 

1271 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit` 

1272 """ 

1273 _DefaultName = 'consolidateSourceTable' 

1274 ConfigClass = ConsolidateSourceTableConfig 

1275 

1276 inputDataset = 'sourceTable' 

1277 outputDataset = 'sourceTable_visit' 

1278 

1279 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

1280 inputs = butlerQC.get(inputRefs) 

1281 self.log.info("Concatenating %s per-detector Source Tables", 

1282 len(inputs['inputCatalogs'])) 

1283 df = pd.concat(inputs['inputCatalogs']) 

1284 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs) 

1285 

1286 def runDataRef(self, dataRefList): 

1287 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList)) 

1288 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList]) 

1289 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset) 

1290 

1291 @classmethod 

1292 def _makeArgumentParser(cls): 

1293 parser = ArgumentParser(name=cls._DefaultName) 

1294 

1295 parser.add_id_argument("--id", cls.inputDataset, 

1296 help="data ID, e.g. --id visit=12345", 

1297 ContainerClass=VisitDataIdContainer) 

1298 return parser 

1299 

1300 def writeMetadata(self, dataRef): 

1301 """No metadata to write. 

1302 """ 

1303 pass 

1304 

1305 def writeConfig(self, butler, clobber=False, doBackup=True): 

1306 """No config to write. 

1307 """ 

1308 pass 

1309 

1310 

1311class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections, 

1312 dimensions=("instrument",), 

1313 defaultTemplates={}): 

1314 visitSummaryRefs = connectionTypes.Input( 

1315 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1316 name="visitSummary", 

1317 storageClass="ExposureCatalog", 

1318 dimensions=("instrument", "visit"), 

1319 multiple=True, 

1320 deferLoad=True, 

1321 ) 

1322 outputCatalog = connectionTypes.Output( 

1323 doc="CCD and Visit metadata table", 

1324 name="CcdVisitTable", 

1325 storageClass="DataFrame", 

1326 dimensions=("instrument",) 

1327 ) 

1328 

1329 

1330class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig, 

1331 pipelineConnections=MakeCcdVisitTableConnections): 

1332 pass 

1333 

1334 

1335class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1336 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs. 

1337 """ 

1338 _DefaultName = 'makeCcdVisitTable' 

1339 ConfigClass = MakeCcdVisitTableConfig 

1340 

1341 def run(self, visitSummaryRefs): 

1342 """ Make a table of ccd information from the `visitSummary` catalogs. 

1343 Parameters 

1344 ---------- 

1345 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle` 

1346 List of DeferredDatasetHandles pointing to exposure catalogs with 

1347 per-detector summary information. 

1348 Returns 

1349 ------- 

1350 result : `lsst.pipe.Base.Struct` 

1351 Results struct with attribute: 

1352 - `outputCatalog` 

1353 Catalog of ccd and visit information. 

1354 """ 

1355 ccdEntries = [] 

1356 for visitSummaryRef in visitSummaryRefs: 

1357 visitSummary = visitSummaryRef.get() 

1358 visitInfo = visitSummary[0].getVisitInfo() 

1359 

1360 ccdEntry = {} 

1361 summaryTable = visitSummary.asAstropy() 

1362 selectColumns = ['id', 'visit', 'physical_filter', 'ra', 'decl', 'zenithDistance', 'zeroPoint', 

1363 'psfSigma', 'skyBg', 'skyNoise'] 

1364 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id') 

1365 ccdEntry = ccdEntry.rename(columns={"physical_filter": "filterName", "visit": "visitId"}) 

1366 

1367 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in 

1368 summaryTable['id']] 

1369 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId) 

1370 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds] 

1371 ccdEntry['ccdVisitId'] = ccdVisitIds 

1372 

1373 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary]) 

1374 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds 

1375 

1376 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1377 ccdEntry["expMidpt"] = visitInfo.getDate().toPython() 

1378 expTime = visitInfo.getExposureTime() 

1379 ccdEntry['expTime'] = expTime 

1380 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime) 

1381 ccdEntry['darkTime'] = visitInfo.getDarkTime() 

1382 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x'] 

1383 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y'] 

1384 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0] 

1385 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0] 

1386 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1] 

1387 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1] 

1388 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2] 

1389 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2] 

1390 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3] 

1391 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3] 

1392 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags, 

1393 # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted. 

1394 ccdEntries.append(ccdEntry) 

1395 

1396 outputCatalog = pd.concat(ccdEntries) 

1397 return pipeBase.Struct(outputCatalog=outputCatalog) 

1398 

1399 

1400class MakeVisitTableConnections(pipeBase.PipelineTaskConnections, 

1401 dimensions=("instrument",), 

1402 defaultTemplates={}): 

1403 visitSummaries = connectionTypes.Input( 

1404 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask", 

1405 name="visitSummary", 

1406 storageClass="ExposureCatalog", 

1407 dimensions=("instrument", "visit",), 

1408 multiple=True, 

1409 deferLoad=True, 

1410 ) 

1411 outputCatalog = connectionTypes.Output( 

1412 doc="Visit metadata table", 

1413 name="visitTable", 

1414 storageClass="DataFrame", 

1415 dimensions=("instrument",) 

1416 ) 

1417 

1418 

1419class MakeVisitTableConfig(pipeBase.PipelineTaskConfig, 

1420 pipelineConnections=MakeVisitTableConnections): 

1421 pass 

1422 

1423 

1424class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask): 

1425 """Produce a `visitTable` from the `visitSummary` exposure catalogs. 

1426 """ 

1427 _DefaultName = 'makeVisitTable' 

1428 ConfigClass = MakeVisitTableConfig 

1429 

1430 def run(self, visitSummaries): 

1431 """ Make a table of visit information from the `visitSummary` catalogs 

1432 

1433 Parameters 

1434 ---------- 

1435 visitSummaries : list of `lsst.afw.table.ExposureCatalog` 

1436 List of exposure catalogs with per-detector summary information. 

1437 Returns 

1438 ------- 

1439 result : `lsst.pipe.Base.Struct` 

1440 Results struct with attribute: 

1441 ``outputCatalog`` 

1442 Catalog of visit information. 

1443 """ 

1444 visitEntries = [] 

1445 for visitSummary in visitSummaries: 

1446 visitSummary = visitSummary.get() 

1447 visitRow = visitSummary[0] 

1448 visitInfo = visitRow.getVisitInfo() 

1449 

1450 visitEntry = {} 

1451 visitEntry["visitId"] = visitRow['visit'] 

1452 visitEntry["filterName"] = visitRow['physical_filter'] 

1453 raDec = visitInfo.getBoresightRaDec() 

1454 visitEntry["ra"] = raDec.getRa().asDegrees() 

1455 visitEntry["decl"] = raDec.getDec().asDegrees() 

1456 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees() 

1457 azAlt = visitInfo.getBoresightAzAlt() 

1458 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees() 

1459 visitEntry["altitude"] = azAlt.getLatitude().asDegrees() 

1460 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees() 

1461 visitEntry["airmass"] = visitInfo.getBoresightAirmass() 

1462 visitEntry["obsStart"] = visitInfo.getDate().toPython() 

1463 visitEntry["expTime"] = visitInfo.getExposureTime() 

1464 visitEntries.append(visitEntry) 

1465 # TODO: DM-30623, Add programId, exposureType, expMidpt, cameraTemp, mirror1Temp, mirror2Temp, 

1466 # mirror3Temp, domeTemp, externalTemp, dimmSeeing, pwvGPS, pwvMW, flags, nExposures 

1467 

1468 outputCatalog = pd.DataFrame(data=visitEntries) 

1469 return pipeBase.Struct(outputCatalog=outputCatalog)