Coverage for python/lsst/pipe/tasks/postprocess.py: 26%
653 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-23 02:25 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-23 02:25 -0800
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["WriteObjectTableConfig", "WriteObjectTableTask",
23 "WriteSourceTableConfig", "WriteSourceTableTask",
24 "WriteRecalibratedSourceTableConfig", "WriteRecalibratedSourceTableTask",
25 "PostprocessAnalysis",
26 "TransformCatalogBaseConfig", "TransformCatalogBaseTask",
27 "TransformObjectCatalogConfig", "TransformObjectCatalogTask",
28 "ConsolidateObjectTableConfig", "ConsolidateObjectTableTask",
29 "TransformSourceTableConfig", "TransformSourceTableTask",
30 "ConsolidateVisitSummaryConfig", "ConsolidateVisitSummaryTask",
31 "ConsolidateSourceTableConfig", "ConsolidateSourceTableTask",
32 "MakeCcdVisitTableConfig", "MakeCcdVisitTableTask",
33 "MakeVisitTableConfig", "MakeVisitTableTask",
34 "WriteForcedSourceTableConfig", "WriteForcedSourceTableTask",
35 "TransformForcedSourceTableConfig", "TransformForcedSourceTableTask",
36 "ConsolidateTractConfig", "ConsolidateTractTask"]
38import functools
39import pandas as pd
40import logging
41import numpy as np
42import numbers
43import os
45import lsst.geom
46import lsst.pex.config as pexConfig
47import lsst.pipe.base as pipeBase
48import lsst.daf.base as dafBase
49from lsst.obs.base import ExposureIdInfo
50from lsst.pipe.base import connectionTypes
51import lsst.afw.table as afwTable
52from lsst.afw.image import ExposureSummaryStats
53from lsst.meas.base import SingleFrameMeasurementTask
54from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate
55from lsst.skymap import BaseSkyMap
57from .parquetTable import ParquetTable
58from .functors import CompositeFunctor, Column
60log = logging.getLogger(__name__)
63def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
64 """Flattens a dataframe with multilevel column index.
65 """
66 newDf = pd.DataFrame()
67 # band is the level 0 index
68 dfBands = df.columns.unique(level=0).values
69 for band in dfBands:
70 subdf = df[band]
71 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
72 newColumns = {c: columnFormat.format(band, c)
73 for c in subdf.columns if c not in noDupCols}
74 cols = list(newColumns.keys())
75 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
77 # Band must be present in the input and output or else column is all NaN:
78 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
79 # Get the unexploded columns from any present band's partition
80 noDupDf = df[presentBands[0]][noDupCols]
81 newDf = pd.concat([noDupDf, newDf], axis=1)
82 return newDf
85class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
86 defaultTemplates={"coaddName": "deep"},
87 dimensions=("tract", "patch", "skymap")):
88 inputCatalogMeas = connectionTypes.Input(
89 doc="Catalog of source measurements on the deepCoadd.",
90 dimensions=("tract", "patch", "band", "skymap"),
91 storageClass="SourceCatalog",
92 name="{coaddName}Coadd_meas",
93 multiple=True
94 )
95 inputCatalogForcedSrc = connectionTypes.Input(
96 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
97 dimensions=("tract", "patch", "band", "skymap"),
98 storageClass="SourceCatalog",
99 name="{coaddName}Coadd_forced_src",
100 multiple=True
101 )
102 inputCatalogRef = connectionTypes.Input(
103 doc="Catalog marking the primary detection (which band provides a good shape and position)"
104 "for each detection in deepCoadd_mergeDet.",
105 dimensions=("tract", "patch", "skymap"),
106 storageClass="SourceCatalog",
107 name="{coaddName}Coadd_ref"
108 )
109 outputCatalog = connectionTypes.Output(
110 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
111 "stored as a DataFrame with a multi-level column index per-patch.",
112 dimensions=("tract", "patch", "skymap"),
113 storageClass="DataFrame",
114 name="{coaddName}Coadd_obj"
115 )
118class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
119 pipelineConnections=WriteObjectTableConnections):
120 engine = pexConfig.Field(
121 dtype=str,
122 default="pyarrow",
123 doc="Parquet engine for writing (pyarrow or fastparquet)"
124 )
125 coaddName = pexConfig.Field(
126 dtype=str,
127 default="deep",
128 doc="Name of coadd"
129 )
132class WriteObjectTableTask(pipeBase.PipelineTask):
133 """Write filter-merged source tables to parquet
134 """
135 _DefaultName = "writeObjectTable"
136 ConfigClass = WriteObjectTableConfig
138 # Names of table datasets to be merged
139 inputDatasets = ('forced_src', 'meas', 'ref')
141 # Tag of output dataset written by `MergeSourcesTask.write`
142 outputDataset = 'obj'
144 def runQuantum(self, butlerQC, inputRefs, outputRefs):
145 inputs = butlerQC.get(inputRefs)
147 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
148 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
149 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
150 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
152 catalogs = {}
153 for band in measDict.keys():
154 catalogs[band] = {'meas': measDict[band]['meas'],
155 'forced_src': forcedSourceDict[band]['forced_src'],
156 'ref': inputs['inputCatalogRef']}
157 dataId = butlerQC.quantum.dataId
158 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
159 outputs = pipeBase.Struct(outputCatalog=df)
160 butlerQC.put(outputs, outputRefs)
162 def run(self, catalogs, tract, patch):
163 """Merge multiple catalogs.
165 Parameters
166 ----------
167 catalogs : `dict`
168 Mapping from filter names to dict of catalogs.
169 tract : int
170 tractId to use for the tractId column.
171 patch : str
172 patchId to use for the patchId column.
174 Returns
175 -------
176 catalog : `pandas.DataFrame`
177 Merged dataframe.
178 """
180 dfs = []
181 for filt, tableDict in catalogs.items():
182 for dataset, table in tableDict.items():
183 # Convert afwTable to pandas DataFrame
184 df = table.asAstropy().to_pandas().set_index('id', drop=True)
186 # Sort columns by name, to ensure matching schema among patches
187 df = df.reindex(sorted(df.columns), axis=1)
188 df['tractId'] = tract
189 df['patchId'] = patch
191 # Make columns a 3-level MultiIndex
192 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
193 names=('dataset', 'band', 'column'))
194 dfs.append(df)
196 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
197 return catalog
200class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
201 defaultTemplates={"catalogType": ""},
202 dimensions=("instrument", "visit", "detector")):
204 catalog = connectionTypes.Input(
205 doc="Input full-depth catalog of sources produced by CalibrateTask",
206 name="{catalogType}src",
207 storageClass="SourceCatalog",
208 dimensions=("instrument", "visit", "detector")
209 )
210 outputCatalog = connectionTypes.Output(
211 doc="Catalog of sources, `src` in Parquet format. The 'id' column is "
212 "replaced with an index; all other columns are unchanged.",
213 name="{catalogType}source",
214 storageClass="DataFrame",
215 dimensions=("instrument", "visit", "detector")
216 )
219class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
220 pipelineConnections=WriteSourceTableConnections):
221 pass
224class WriteSourceTableTask(pipeBase.PipelineTask):
225 """Write source table to parquet.
226 """
227 _DefaultName = "writeSourceTable"
228 ConfigClass = WriteSourceTableConfig
230 def runQuantum(self, butlerQC, inputRefs, outputRefs):
231 inputs = butlerQC.get(inputRefs)
232 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
233 result = self.run(**inputs).table
234 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
235 butlerQC.put(outputs, outputRefs)
237 def run(self, catalog, ccdVisitId=None, **kwargs):
238 """Convert `src` catalog to parquet
240 Parameters
241 ----------
242 catalog: `afwTable.SourceCatalog`
243 catalog to be converted
244 ccdVisitId: `int`
245 ccdVisitId to be added as a column
247 Returns
248 -------
249 result : `lsst.pipe.base.Struct`
250 ``table``
251 `ParquetTable` version of the input catalog
252 """
253 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId)
254 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
255 df['ccdVisitId'] = ccdVisitId
256 return pipeBase.Struct(table=ParquetTable(dataFrame=df))
259class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
260 defaultTemplates={"catalogType": "",
261 "skyWcsName": "jointcal",
262 "photoCalibName": "fgcm"},
263 dimensions=("instrument", "visit", "detector", "skymap")):
264 skyMap = connectionTypes.Input(
265 doc="skyMap needed to choose which tract-level calibrations to use when multiple available",
266 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
267 storageClass="SkyMap",
268 dimensions=("skymap",),
269 )
270 exposure = connectionTypes.Input(
271 doc="Input exposure to perform photometry on.",
272 name="calexp",
273 storageClass="ExposureF",
274 dimensions=["instrument", "visit", "detector"],
275 )
276 externalSkyWcsTractCatalog = connectionTypes.Input(
277 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector "
278 "id for the catalog id, sorted on id for fast lookup."),
279 name="{skyWcsName}SkyWcsCatalog",
280 storageClass="ExposureCatalog",
281 dimensions=["instrument", "visit", "tract"],
282 multiple=True
283 )
284 externalSkyWcsGlobalCatalog = connectionTypes.Input(
285 doc=("Per-visit wcs calibrations computed globally (with no tract information). "
286 "These catalogs use the detector id for the catalog id, sorted on id for "
287 "fast lookup."),
288 name="{skyWcsName}SkyWcsCatalog",
289 storageClass="ExposureCatalog",
290 dimensions=["instrument", "visit"],
291 )
292 externalPhotoCalibTractCatalog = connectionTypes.Input(
293 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the "
294 "detector id for the catalog id, sorted on id for fast lookup."),
295 name="{photoCalibName}PhotoCalibCatalog",
296 storageClass="ExposureCatalog",
297 dimensions=["instrument", "visit", "tract"],
298 multiple=True
299 )
300 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
301 doc=("Per-visit photometric calibrations computed globally (with no tract "
302 "information). These catalogs use the detector id for the catalog id, "
303 "sorted on id for fast lookup."),
304 name="{photoCalibName}PhotoCalibCatalog",
305 storageClass="ExposureCatalog",
306 dimensions=["instrument", "visit"],
307 )
309 def __init__(self, *, config=None):
310 super().__init__(config=config)
311 # Same connection boilerplate as all other applications of
312 # Global/Tract calibrations
313 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs:
314 if config.useGlobalExternalSkyWcs:
315 self.inputs.remove("externalSkyWcsTractCatalog")
316 else:
317 self.inputs.remove("externalSkyWcsGlobalCatalog")
318 else:
319 self.inputs.remove("externalSkyWcsTractCatalog")
320 self.inputs.remove("externalSkyWcsGlobalCatalog")
321 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib:
322 if config.useGlobalExternalPhotoCalib:
323 self.inputs.remove("externalPhotoCalibTractCatalog")
324 else:
325 self.inputs.remove("externalPhotoCalibGlobalCatalog")
326 else:
327 self.inputs.remove("externalPhotoCalibTractCatalog")
328 self.inputs.remove("externalPhotoCalibGlobalCatalog")
331class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
332 pipelineConnections=WriteRecalibratedSourceTableConnections):
334 doReevaluatePhotoCalib = pexConfig.Field(
335 dtype=bool,
336 default=True,
337 doc=("Add or replace local photoCalib columns")
338 )
339 doReevaluateSkyWcs = pexConfig.Field(
340 dtype=bool,
341 default=True,
342 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
343 )
344 doApplyExternalPhotoCalib = pexConfig.Field(
345 dtype=bool,
346 default=True,
347 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
348 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
349 )
350 doApplyExternalSkyWcs = pexConfig.Field(
351 dtype=bool,
352 default=True,
353 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
354 "else use the wcs already attached to the exposure."),
355 )
356 useGlobalExternalPhotoCalib = pexConfig.Field(
357 dtype=bool,
358 default=True,
359 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations "
360 "that are not run per-tract. When False, use per-tract photometric "
361 "calibration files.")
362 )
363 useGlobalExternalSkyWcs = pexConfig.Field(
364 dtype=bool,
365 default=False,
366 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations "
367 "that are not run per-tract. When False, use per-tract wcs "
368 "files.")
369 )
371 def validate(self):
372 super().validate()
373 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs:
374 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
375 "External SkyWcs will not be read or evaluated.")
376 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib:
377 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
378 "External PhotoCalib will not be read or evaluated.")
381class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
382 """Write source table to parquet
383 """
384 _DefaultName = "writeRecalibratedSourceTable"
385 ConfigClass = WriteRecalibratedSourceTableConfig
387 def runQuantum(self, butlerQC, inputRefs, outputRefs):
388 inputs = butlerQC.get(inputRefs)
389 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
390 inputs['exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "visit_detector")
392 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs:
393 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
394 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs)
396 inputs['catalog'] = self.addCalibColumns(**inputs)
398 result = self.run(**inputs).table
399 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
400 butlerQC.put(outputs, outputRefs)
402 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
403 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
404 externalPhotoCalibTractCatalog=None, **kwargs):
405 """Apply external calibrations to exposure per configuration
407 When multiple tract-level calibrations overlap, select the one with the
408 center closest to detector.
410 Parameters
411 ----------
412 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of
413 tract-level calibs.
414 skyMap : `lsst.skymap.SkyMap`
415 exposure : `lsst.afw.image.exposure.Exposure`
416 Input exposure to adjust calibrations.
417 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
418 Exposure catalog with external skyWcs to be applied per config
419 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
420 Exposure catalog with external skyWcs to be applied per config
421 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
422 Exposure catalog with external photoCalib to be applied per config
423 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
426 Returns
427 -------
428 exposure : `lsst.afw.image.exposure.Exposure`
429 Exposure with adjusted calibrations.
430 """
431 if not self.config.doApplyExternalSkyWcs:
432 # Do not modify the exposure's SkyWcs
433 externalSkyWcsCatalog = None
434 elif self.config.useGlobalExternalSkyWcs:
435 # Use the global external SkyWcs
436 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
437 self.log.info('Applying global SkyWcs')
438 else:
439 # use tract-level external SkyWcs from the closest overlapping tract
440 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog')
441 tracts = [ref.dataId['tract'] for ref in inputRef]
442 if len(tracts) == 1:
443 ind = 0
444 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind])
445 else:
446 if exposure.getWcs() is None: # TODO: could this look-up use the externalPhotoCalib?
447 raise ValueError("Trying to locate nearest tract, but exposure.wcs is None.")
448 ind = self.getClosestTract(tracts, skyMap,
449 exposure.getBBox(), exposure.getWcs())
450 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
451 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
453 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
455 if not self.config.doApplyExternalPhotoCalib:
456 # Do not modify the exposure's PhotoCalib
457 externalPhotoCalibCatalog = None
458 elif self.config.useGlobalExternalPhotoCalib:
459 # Use the global external PhotoCalib
460 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
461 self.log.info('Applying global PhotoCalib')
462 else:
463 # use tract-level external PhotoCalib from the closest overlapping tract
464 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog')
465 tracts = [ref.dataId['tract'] for ref in inputRef]
466 if len(tracts) == 1:
467 ind = 0
468 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind])
469 else:
470 ind = self.getClosestTract(tracts, skyMap,
471 exposure.getBBox(), exposure.getWcs())
472 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
473 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
475 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
477 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
479 def getClosestTract(self, tracts, skyMap, bbox, wcs):
480 """Find the index of the tract closest to detector from list of tractIds
482 Parameters
483 ----------
484 tracts: `list` [`int`]
485 Iterable of integer tractIds
486 skyMap : `lsst.skymap.SkyMap`
487 skyMap to lookup tract geometry and wcs
488 bbox : `lsst.geom.Box2I`
489 Detector bbox, center of which will compared to tract centers
490 wcs : `lsst.afw.geom.SkyWcs`
491 Detector Wcs object to map the detector center to SkyCoord
493 Returns
494 -------
495 index : `int`
496 """
497 if len(tracts) == 1:
498 return 0
500 center = wcs.pixelToSky(bbox.getCenter())
501 sep = []
502 for tractId in tracts:
503 tract = skyMap[tractId]
504 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
505 sep.append(center.separation(tractCenter))
507 return np.argmin(sep)
509 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
510 """Prepare a calibrated exposure and apply external calibrations
511 if so configured.
513 Parameters
514 ----------
515 exposure : `lsst.afw.image.exposure.Exposure`
516 Input exposure to adjust calibrations.
517 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional
518 Exposure catalog with external skyWcs to be applied
519 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id
520 for the catalog id, sorted on id for fast lookup.
521 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional
522 Exposure catalog with external photoCalib to be applied
523 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector
524 id for the catalog id, sorted on id for fast lookup.
526 Returns
527 -------
528 exposure : `lsst.afw.image.exposure.Exposure`
529 Exposure with adjusted calibrations.
530 """
531 detectorId = exposure.getInfo().getDetector().getId()
533 if externalPhotoCalibCatalog is not None:
534 row = externalPhotoCalibCatalog.find(detectorId)
535 if row is None:
536 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; "
537 "Using original photoCalib.", detectorId)
538 else:
539 photoCalib = row.getPhotoCalib()
540 if photoCalib is None:
541 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
542 "Using original photoCalib.", detectorId)
543 else:
544 exposure.setPhotoCalib(photoCalib)
546 if externalSkyWcsCatalog is not None:
547 row = externalSkyWcsCatalog.find(detectorId)
548 if row is None:
549 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; "
550 "Using original skyWcs.", detectorId)
551 else:
552 skyWcs = row.getWcs()
553 if skyWcs is None:
554 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
555 "Using original skyWcs.", detectorId)
556 else:
557 exposure.setWcs(skyWcs)
559 return exposure
561 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs):
562 """Add replace columns with calibs evaluated at each centroid
564 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a
565 a source catalog, by rerunning the plugins.
567 Parameters
568 ----------
569 catalog : `lsst.afw.table.SourceCatalog`
570 catalog to which calib columns will be added
571 exposure : `lsst.afw.image.exposure.Exposure`
572 Exposure with attached PhotoCalibs and SkyWcs attributes to be
573 reevaluated at local centroids. Pixels are not required.
574 exposureIdInfo : `lsst.obs.base.ExposureIdInfo`
576 Returns
577 -------
578 newCat: `lsst.afw.table.SourceCatalog`
579 Source Catalog with requested local calib columns
580 """
581 measureConfig = SingleFrameMeasurementTask.ConfigClass()
582 measureConfig.doReplaceWithNoise = False
584 # Clear all slots, because we aren't running the relevant plugins.
585 for slot in measureConfig.slots:
586 setattr(measureConfig.slots, slot, None)
588 measureConfig.plugins.names = []
589 if self.config.doReevaluateSkyWcs:
590 measureConfig.plugins.names.add('base_LocalWcs')
591 self.log.info("Re-evaluating base_LocalWcs plugin")
592 if self.config.doReevaluatePhotoCalib:
593 measureConfig.plugins.names.add('base_LocalPhotoCalib')
594 self.log.info("Re-evaluating base_LocalPhotoCalib plugin")
595 pluginsNotToCopy = tuple(measureConfig.plugins.names)
597 # Create a new schema and catalog
598 # Copy all columns from original except for the ones to reevaluate
599 aliasMap = catalog.schema.getAliasMap()
600 mapper = afwTable.SchemaMapper(catalog.schema)
601 for item in catalog.schema:
602 if not item.field.getName().startswith(pluginsNotToCopy):
603 mapper.addMapping(item.key)
605 schema = mapper.getOutputSchema()
606 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
607 schema.setAliasMap(aliasMap)
608 newCat = afwTable.SourceCatalog(schema)
609 newCat.extend(catalog, mapper=mapper)
611 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to
612 # update here. LocalPhotoCalibs are applied during transform tasks.
613 # Update coord_ra/coord_dec, which are expected to be positions on the
614 # sky and are used as such in sdm tables without transform
615 if self.config.doReevaluateSkyWcs and exposure.wcs is not None:
616 afwTable.updateSourceCoords(exposure.wcs, newCat)
618 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
620 return newCat
623class PostprocessAnalysis(object):
624 """Calculate columns from ParquetTable.
626 This object manages and organizes an arbitrary set of computations
627 on a catalog. The catalog is defined by a
628 `~lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such
629 as a ``deepCoadd_obj`` dataset, and the computations are defined by a
630 collection of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
631 a ``CompositeFunctor``).
633 After the object is initialized, accessing the ``.df`` attribute (which
634 holds the `pandas.DataFrame` containing the results of the calculations)
635 triggers computation of said dataframe.
637 One of the conveniences of using this object is the ability to define a
638 desired common filter for all functors. This enables the same functor
639 collection to be passed to several different `PostprocessAnalysis` objects
640 without having to change the original functor collection, since the ``filt``
641 keyword argument of this object triggers an overwrite of the ``filt``
642 property for all functors in the collection.
644 This object also allows a list of refFlags to be passed, and defines a set
645 of default refFlags that are always included even if not requested.
647 If a list of `~lsst.pipe.tasks.ParquetTable` object is passed, rather than a single one,
648 then the calculations will be mapped over all the input catalogs. In
649 principle, it should be straightforward to parallelize this activity, but
650 initial tests have failed (see TODO in code comments).
652 Parameters
653 ----------
654 parq : `~lsst.pipe.tasks.ParquetTable` (or list of such)
655 Source catalog(s) for computation.
657 functors : `list`, `dict`, or `~lsst.pipe.tasks.functors.CompositeFunctor`
658 Computations to do (functors that act on ``parq``).
659 If a dict, the output
660 DataFrame will have columns keyed accordingly.
661 If a list, the column keys will come from the
662 ``.shortname`` attribute of each functor.
664 filt : `str`, optional
665 Filter in which to calculate. If provided,
666 this will overwrite any existing ``.filt`` attribute
667 of the provided functors.
669 flags : `list`, optional
670 List of flags (per-band) to include in output table.
671 Taken from the ``meas`` dataset if applied to a multilevel Object Table.
673 refFlags : `list`, optional
674 List of refFlags (only reference band) to include in output table.
676 forcedFlags : `list`, optional
677 List of flags (per-band) to include in output table.
678 Taken from the ``forced_src`` dataset if applied to a
679 multilevel Object Table. Intended for flags from measurement plugins
680 only run during multi-band forced-photometry.
681 """
682 _defaultRefFlags = []
683 _defaultFuncs = ()
685 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
686 self.parq = parq
687 self.functors = functors
689 self.filt = filt
690 self.flags = list(flags) if flags is not None else []
691 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else []
692 self.refFlags = list(self._defaultRefFlags)
693 if refFlags is not None:
694 self.refFlags += list(refFlags)
696 self._df = None
698 @property
699 def defaultFuncs(self):
700 funcs = dict(self._defaultFuncs)
701 return funcs
703 @property
704 def func(self):
705 additionalFuncs = self.defaultFuncs
706 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags})
707 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
708 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
710 if isinstance(self.functors, CompositeFunctor):
711 func = self.functors
712 else:
713 func = CompositeFunctor(self.functors)
715 func.funcDict.update(additionalFuncs)
716 func.filt = self.filt
718 return func
720 @property
721 def noDupCols(self):
722 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
724 @property
725 def df(self):
726 if self._df is None:
727 self.compute()
728 return self._df
730 def compute(self, dropna=False, pool=None):
731 # map over multiple parquet tables
732 if type(self.parq) in (list, tuple):
733 if pool is None:
734 dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
735 else:
736 # TODO: Figure out why this doesn't work (pyarrow pickling
737 # issues?)
738 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
739 self._df = pd.concat(dflist)
740 else:
741 self._df = self.func(self.parq, dropna=dropna)
743 return self._df
746class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
747 dimensions=()):
748 """Expected Connections for subclasses of TransformCatalogBaseTask.
750 Must be subclassed.
751 """
752 inputCatalog = connectionTypes.Input(
753 name="",
754 storageClass="DataFrame",
755 )
756 outputCatalog = connectionTypes.Output(
757 name="",
758 storageClass="DataFrame",
759 )
762class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
763 pipelineConnections=TransformCatalogBaseConnections):
764 functorFile = pexConfig.Field(
765 dtype=str,
766 doc="Path to YAML file specifying Science Data Model functors to use "
767 "when copying columns and computing calibrated values.",
768 default=None,
769 optional=True
770 )
771 primaryKey = pexConfig.Field(
772 dtype=str,
773 doc="Name of column to be set as the DataFrame index. If None, the index"
774 "will be named `id`",
775 default=None,
776 optional=True
777 )
778 columnsFromDataId = pexConfig.ListField(
779 dtype=str,
780 default=None,
781 optional=True,
782 doc="Columns to extract from the dataId",
783 )
786class TransformCatalogBaseTask(pipeBase.PipelineTask):
787 """Base class for transforming/standardizing a catalog
789 by applying functors that convert units and apply calibrations.
790 The purpose of this task is to perform a set of computations on
791 an input `ParquetTable` dataset (such as ``deepCoadd_obj``) and write the
792 results to a new dataset (which needs to be declared in an ``outputDataset``
793 attribute).
795 The calculations to be performed are defined in a YAML file that specifies
796 a set of functors to be computed, provided as
797 a ``--functorFile`` config parameter. An example of such a YAML file
798 is the following:
800 funcs:
801 psfMag:
802 functor: Mag
803 args:
804 - base_PsfFlux
805 filt: HSC-G
806 dataset: meas
807 cmodel_magDiff:
808 functor: MagDiff
809 args:
810 - modelfit_CModel
811 - base_PsfFlux
812 filt: HSC-G
813 gauss_magDiff:
814 functor: MagDiff
815 args:
816 - base_GaussianFlux
817 - base_PsfFlux
818 filt: HSC-G
819 count:
820 functor: Column
821 args:
822 - base_InputCount_value
823 filt: HSC-G
824 deconvolved_moments:
825 functor: DeconvolvedMoments
826 filt: HSC-G
827 dataset: forced_src
828 refFlags:
829 - calib_psfUsed
830 - merge_measurement_i
831 - merge_measurement_r
832 - merge_measurement_z
833 - merge_measurement_y
834 - merge_measurement_g
835 - base_PixelFlags_flag_inexact_psfCenter
836 - detect_isPrimary
838 The names for each entry under "func" will become the names of columns in
839 the output dataset. All the functors referenced are defined in
840 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each
841 functor are in the `args` list, and any additional entries for each column
842 other than "functor" or "args" (e.g., ``'filt'``, ``'dataset'``) are treated as
843 keyword arguments to be passed to the functor initialization.
845 The "flags" entry is the default shortcut for `Column` functors.
846 All columns listed under "flags" will be copied to the output table
847 untransformed. They can be of any datatype.
848 In the special case of transforming a multi-level oject table with
849 band and dataset indices (deepCoadd_obj), these will be taked from the
850 `meas` dataset and exploded out per band.
852 There are two special shortcuts that only apply when transforming
853 multi-level Object (deepCoadd_obj) tables:
854 - The "refFlags" entry is shortcut for `Column` functor
855 taken from the `'ref'` dataset if transforming an ObjectTable.
856 - The "forcedFlags" entry is shortcut for `Column` functors.
857 taken from the ``forced_src`` dataset if transforming an ObjectTable.
858 These are expanded out per band.
861 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
862 to organize and excecute the calculations.
863 """
864 @property
865 def _DefaultName(self):
866 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
868 @property
869 def outputDataset(self):
870 raise NotImplementedError('Subclass must define "outputDataset" attribute')
872 @property
873 def inputDataset(self):
874 raise NotImplementedError('Subclass must define "inputDataset" attribute')
876 @property
877 def ConfigClass(self):
878 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
880 def __init__(self, *args, **kwargs):
881 super().__init__(*args, **kwargs)
882 if self.config.functorFile:
883 self.log.info('Loading tranform functor definitions from %s',
884 self.config.functorFile)
885 self.funcs = CompositeFunctor.from_file(self.config.functorFile)
886 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs))
887 else:
888 self.funcs = None
890 def runQuantum(self, butlerQC, inputRefs, outputRefs):
891 inputs = butlerQC.get(inputRefs)
892 if self.funcs is None:
893 raise ValueError("config.functorFile is None. "
894 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
895 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs,
896 dataId=outputRefs.outputCatalog.dataId.full)
897 outputs = pipeBase.Struct(outputCatalog=result)
898 butlerQC.put(outputs, outputRefs)
900 def run(self, parq, funcs=None, dataId=None, band=None):
901 """Do postprocessing calculations
903 Takes a `ParquetTable` object and dataId,
904 returns a dataframe with results of postprocessing calculations.
906 Parameters
907 ----------
908 parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
909 ParquetTable from which calculations are done.
910 funcs : `lsst.pipe.tasks.functors.Functors`
911 Functors to apply to the table's columns
912 dataId : dict, optional
913 Used to add a `patchId` column to the output dataframe.
914 band : `str`, optional
915 Filter band that is being processed.
917 Returns
918 ------
919 df : `pandas.DataFrame`
920 """
921 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
923 df = self.transform(band, parq, funcs, dataId).df
924 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
925 return df
927 def getFunctors(self):
928 return self.funcs
930 def getAnalysis(self, parq, funcs=None, band=None):
931 if funcs is None:
932 funcs = self.funcs
933 analysis = PostprocessAnalysis(parq, funcs, filt=band)
934 return analysis
936 def transform(self, band, parq, funcs, dataId):
937 analysis = self.getAnalysis(parq, funcs=funcs, band=band)
938 df = analysis.df
939 if dataId and self.config.columnsFromDataId:
940 for key in self.config.columnsFromDataId:
941 if key in dataId:
942 df[str(key)] = dataId[key]
943 else:
944 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
946 if self.config.primaryKey:
947 if df.index.name != self.config.primaryKey and self.config.primaryKey in df:
948 df.reset_index(inplace=True, drop=True)
949 df.set_index(self.config.primaryKey, inplace=True)
951 return pipeBase.Struct(
952 df=df,
953 analysis=analysis
954 )
957class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
958 defaultTemplates={"coaddName": "deep"},
959 dimensions=("tract", "patch", "skymap")):
960 inputCatalog = connectionTypes.Input(
961 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
962 "stored as a DataFrame with a multi-level column index per-patch.",
963 dimensions=("tract", "patch", "skymap"),
964 storageClass="DataFrame",
965 name="{coaddName}Coadd_obj",
966 deferLoad=True,
967 )
968 outputCatalog = connectionTypes.Output(
969 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
970 "data model.",
971 dimensions=("tract", "patch", "skymap"),
972 storageClass="DataFrame",
973 name="objectTable"
974 )
977class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
978 pipelineConnections=TransformObjectCatalogConnections):
979 coaddName = pexConfig.Field(
980 dtype=str,
981 default="deep",
982 doc="Name of coadd"
983 )
984 # TODO: remove in DM-27177
985 filterMap = pexConfig.DictField(
986 keytype=str,
987 itemtype=str,
988 default={},
989 doc=("Dictionary mapping full filter name to short one for column name munging."
990 "These filters determine the output columns no matter what filters the "
991 "input data actually contain."),
992 deprecated=("Coadds are now identified by the band, so this transform is unused."
993 "Will be removed after v22.")
994 )
995 outputBands = pexConfig.ListField(
996 dtype=str,
997 default=None,
998 optional=True,
999 doc=("These bands and only these bands will appear in the output,"
1000 " NaN-filled if the input does not include them."
1001 " If None, then use all bands found in the input.")
1002 )
1003 camelCase = pexConfig.Field(
1004 dtype=bool,
1005 default=False,
1006 doc=("Write per-band columns names with camelCase, else underscore "
1007 "For example: gPsFlux instead of g_PsFlux.")
1008 )
1009 multilevelOutput = pexConfig.Field(
1010 dtype=bool,
1011 default=False,
1012 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
1013 "and name-munged (False).")
1014 )
1015 goodFlags = pexConfig.ListField(
1016 dtype=str,
1017 default=[],
1018 doc=("List of 'good' flags that should be set False when populating empty tables. "
1019 "All other flags are considered to be 'bad' flags and will be set to True.")
1020 )
1021 floatFillValue = pexConfig.Field(
1022 dtype=float,
1023 default=np.nan,
1024 doc="Fill value for float fields when populating empty tables."
1025 )
1026 integerFillValue = pexConfig.Field(
1027 dtype=int,
1028 default=-1,
1029 doc="Fill value for integer fields when populating empty tables."
1030 )
1032 def setDefaults(self):
1033 super().setDefaults()
1034 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml')
1035 self.primaryKey = 'objectId'
1036 self.columnsFromDataId = ['tract', 'patch']
1037 self.goodFlags = ['calib_astrometry_used',
1038 'calib_photometry_reserved',
1039 'calib_photometry_used',
1040 'calib_psf_candidate',
1041 'calib_psf_reserved',
1042 'calib_psf_used']
1045class TransformObjectCatalogTask(TransformCatalogBaseTask):
1046 """Produce a flattened Object Table to match the format specified in
1047 sdm_schemas.
1049 Do the same set of postprocessing calculations on all bands.
1051 This is identical to `TransformCatalogBaseTask`, except for that it does
1052 the specified functor calculations for all filters present in the
1053 input `deepCoadd_obj` table. Any specific ``"filt"`` keywords specified
1054 by the YAML file will be superceded.
1055 """
1056 _DefaultName = "transformObjectCatalog"
1057 ConfigClass = TransformObjectCatalogConfig
1059 def run(self, parq, funcs=None, dataId=None, band=None):
1060 # NOTE: band kwarg is ignored here.
1061 dfDict = {}
1062 analysisDict = {}
1063 templateDf = pd.DataFrame()
1065 if isinstance(parq, DeferredDatasetHandle):
1066 columns = parq.get(component='columns')
1067 inputBands = columns.unique(level=1).values
1068 else:
1069 inputBands = parq.columnLevelNames['band']
1071 outputBands = self.config.outputBands if self.config.outputBands else inputBands
1073 # Perform transform for data of filters that exist in parq.
1074 for inputBand in inputBands:
1075 if inputBand not in outputBands:
1076 self.log.info("Ignoring %s band data in the input", inputBand)
1077 continue
1078 self.log.info("Transforming the catalog of band %s", inputBand)
1079 result = self.transform(inputBand, parq, funcs, dataId)
1080 dfDict[inputBand] = result.df
1081 analysisDict[inputBand] = result.analysis
1082 if templateDf.empty:
1083 templateDf = result.df
1085 # Put filler values in columns of other wanted bands
1086 for filt in outputBands:
1087 if filt not in dfDict:
1088 self.log.info("Adding empty columns for band %s", filt)
1089 dfTemp = templateDf.copy()
1090 for col in dfTemp.columns:
1091 testValue = dfTemp[col].values[0]
1092 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1093 # Boolean flag type, check if it is a "good" flag
1094 if col in self.config.goodFlags:
1095 fillValue = False
1096 else:
1097 fillValue = True
1098 elif isinstance(testValue, numbers.Integral):
1099 # Checking numbers.Integral catches all flavors
1100 # of python, numpy, pandas, etc. integers.
1101 # We must ensure this is not an unsigned integer.
1102 if isinstance(testValue, np.unsignedinteger):
1103 raise ValueError("Parquet tables may not have unsigned integer columns.")
1104 else:
1105 fillValue = self.config.integerFillValue
1106 else:
1107 fillValue = self.config.floatFillValue
1108 dfTemp[col].values[:] = fillValue
1109 dfDict[filt] = dfTemp
1111 # This makes a multilevel column index, with band as first level
1112 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
1114 if not self.config.multilevelOutput:
1115 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
1116 if self.config.primaryKey in noDupCols:
1117 noDupCols.remove(self.config.primaryKey)
1118 if dataId and self.config.columnsFromDataId:
1119 noDupCols += self.config.columnsFromDataId
1120 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1121 inputBands=inputBands)
1123 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
1125 return df
1128class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1129 dimensions=("tract", "skymap")):
1130 inputCatalogs = connectionTypes.Input(
1131 doc="Per-Patch objectTables conforming to the standard data model.",
1132 name="objectTable",
1133 storageClass="DataFrame",
1134 dimensions=("tract", "patch", "skymap"),
1135 multiple=True,
1136 )
1137 outputCatalog = connectionTypes.Output(
1138 doc="Pre-tract horizontal concatenation of the input objectTables",
1139 name="objectTable_tract",
1140 storageClass="DataFrame",
1141 dimensions=("tract", "skymap"),
1142 )
1145class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1146 pipelineConnections=ConsolidateObjectTableConnections):
1147 coaddName = pexConfig.Field(
1148 dtype=str,
1149 default="deep",
1150 doc="Name of coadd"
1151 )
1154class ConsolidateObjectTableTask(pipeBase.PipelineTask):
1155 """Write patch-merged source tables to a tract-level parquet file.
1157 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1158 """
1159 _DefaultName = "consolidateObjectTable"
1160 ConfigClass = ConsolidateObjectTableConfig
1162 inputDataset = 'objectTable'
1163 outputDataset = 'objectTable_tract'
1165 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1166 inputs = butlerQC.get(inputRefs)
1167 self.log.info("Concatenating %s per-patch Object Tables",
1168 len(inputs['inputCatalogs']))
1169 df = pd.concat(inputs['inputCatalogs'])
1170 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1173class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1174 defaultTemplates={"catalogType": ""},
1175 dimensions=("instrument", "visit", "detector")):
1177 inputCatalog = connectionTypes.Input(
1178 doc="Wide input catalog of sources produced by WriteSourceTableTask",
1179 name="{catalogType}source",
1180 storageClass="DataFrame",
1181 dimensions=("instrument", "visit", "detector"),
1182 deferLoad=True
1183 )
1184 outputCatalog = connectionTypes.Output(
1185 doc="Narrower, per-detector Source Table transformed and converted per a "
1186 "specified set of functors",
1187 name="{catalogType}sourceTable",
1188 storageClass="DataFrame",
1189 dimensions=("instrument", "visit", "detector")
1190 )
1193class TransformSourceTableConfig(TransformCatalogBaseConfig,
1194 pipelineConnections=TransformSourceTableConnections):
1196 def setDefaults(self):
1197 super().setDefaults()
1198 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml')
1199 self.primaryKey = 'sourceId'
1200 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter']
1203class TransformSourceTableTask(TransformCatalogBaseTask):
1204 """Transform/standardize a source catalog
1205 """
1206 _DefaultName = "transformSourceTable"
1207 ConfigClass = TransformSourceTableConfig
1210class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1211 dimensions=("instrument", "visit",),
1212 defaultTemplates={"calexpType": ""}):
1213 calexp = connectionTypes.Input(
1214 doc="Processed exposures used for metadata",
1215 name="{calexpType}calexp",
1216 storageClass="ExposureF",
1217 dimensions=("instrument", "visit", "detector"),
1218 deferLoad=True,
1219 multiple=True,
1220 )
1221 visitSummary = connectionTypes.Output(
1222 doc=("Per-visit consolidated exposure metadata. These catalogs use "
1223 "detector id for the id and are sorted for fast lookups of a "
1224 "detector."),
1225 name="{calexpType}visitSummary",
1226 storageClass="ExposureCatalog",
1227 dimensions=("instrument", "visit"),
1228 )
1231class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1232 pipelineConnections=ConsolidateVisitSummaryConnections):
1233 """Config for ConsolidateVisitSummaryTask"""
1234 pass
1237class ConsolidateVisitSummaryTask(pipeBase.PipelineTask):
1238 """Task to consolidate per-detector visit metadata.
1240 This task aggregates the following metadata from all the detectors in a
1241 single visit into an exposure catalog:
1242 - The visitInfo.
1243 - The wcs.
1244 - The photoCalib.
1245 - The physical_filter and band (if available).
1246 - The psf size, shape, and effective area at the center of the detector.
1247 - The corners of the bounding box in right ascension/declination.
1249 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1250 are not persisted here because of storage concerns, and because of their
1251 limited utility as summary statistics.
1253 Tests for this task are performed in ci_hsc_gen3.
1254 """
1255 _DefaultName = "consolidateVisitSummary"
1256 ConfigClass = ConsolidateVisitSummaryConfig
1258 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1259 dataRefs = butlerQC.get(inputRefs.calexp)
1260 visit = dataRefs[0].dataId.byName()['visit']
1262 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1263 len(dataRefs), visit)
1265 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1267 butlerQC.put(expCatalog, outputRefs.visitSummary)
1269 def _combineExposureMetadata(self, visit, dataRefs):
1270 """Make a combined exposure catalog from a list of dataRefs.
1271 These dataRefs must point to exposures with wcs, summaryStats,
1272 and other visit metadata.
1274 Parameters
1275 ----------
1276 visit : `int`
1277 Visit identification number.
1278 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1279 List of dataRefs in visit.
1281 Returns
1282 -------
1283 visitSummary : `lsst.afw.table.ExposureCatalog`
1284 Exposure catalog with per-detector summary information.
1285 """
1286 schema = self._makeVisitSummarySchema()
1287 cat = afwTable.ExposureCatalog(schema)
1288 cat.resize(len(dataRefs))
1290 cat['visit'] = visit
1292 for i, dataRef in enumerate(dataRefs):
1293 visitInfo = dataRef.get(component='visitInfo')
1294 filterLabel = dataRef.get(component='filter')
1295 summaryStats = dataRef.get(component='summaryStats')
1296 detector = dataRef.get(component='detector')
1297 wcs = dataRef.get(component='wcs')
1298 photoCalib = dataRef.get(component='photoCalib')
1299 detector = dataRef.get(component='detector')
1300 bbox = dataRef.get(component='bbox')
1301 validPolygon = dataRef.get(component='validPolygon')
1303 rec = cat[i]
1304 rec.setBBox(bbox)
1305 rec.setVisitInfo(visitInfo)
1306 rec.setWcs(wcs)
1307 rec.setPhotoCalib(photoCalib)
1308 rec.setValidPolygon(validPolygon)
1310 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1311 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1312 rec.setId(detector.getId())
1313 summaryStats.update_record(rec)
1315 metadata = dafBase.PropertyList()
1316 metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1317 # We are looping over existing datarefs, so the following is true
1318 metadata.add("COMMENT", "Only detectors with data have entries.")
1319 cat.setMetadata(metadata)
1321 cat.sort()
1322 return cat
1324 def _makeVisitSummarySchema(self):
1325 """Make the schema for the visitSummary catalog."""
1326 schema = afwTable.ExposureTable.makeMinimalSchema()
1327 schema.addField('visit', type='L', doc='Visit number')
1328 schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1329 schema.addField('band', type='String', size=32, doc='Name of band')
1330 ExposureSummaryStats.update_schema(schema)
1331 return schema
1334class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1335 defaultTemplates={"catalogType": ""},
1336 dimensions=("instrument", "visit")):
1337 inputCatalogs = connectionTypes.Input(
1338 doc="Input per-detector Source Tables",
1339 name="{catalogType}sourceTable",
1340 storageClass="DataFrame",
1341 dimensions=("instrument", "visit", "detector"),
1342 multiple=True
1343 )
1344 outputCatalog = connectionTypes.Output(
1345 doc="Per-visit concatenation of Source Table",
1346 name="{catalogType}sourceTable_visit",
1347 storageClass="DataFrame",
1348 dimensions=("instrument", "visit")
1349 )
1352class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1353 pipelineConnections=ConsolidateSourceTableConnections):
1354 pass
1357class ConsolidateSourceTableTask(pipeBase.PipelineTask):
1358 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1359 """
1360 _DefaultName = 'consolidateSourceTable'
1361 ConfigClass = ConsolidateSourceTableConfig
1363 inputDataset = 'sourceTable'
1364 outputDataset = 'sourceTable_visit'
1366 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1367 from .makeWarp import reorderRefs
1369 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs]
1370 detectorOrder.sort()
1371 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector')
1372 inputs = butlerQC.get(inputRefs)
1373 self.log.info("Concatenating %s per-detector Source Tables",
1374 len(inputs['inputCatalogs']))
1375 df = pd.concat(inputs['inputCatalogs'])
1376 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1379class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1380 dimensions=("instrument",),
1381 defaultTemplates={"calexpType": ""}):
1382 visitSummaryRefs = connectionTypes.Input(
1383 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1384 name="{calexpType}visitSummary",
1385 storageClass="ExposureCatalog",
1386 dimensions=("instrument", "visit"),
1387 multiple=True,
1388 deferLoad=True,
1389 )
1390 outputCatalog = connectionTypes.Output(
1391 doc="CCD and Visit metadata table",
1392 name="{calexpType}ccdVisitTable",
1393 storageClass="DataFrame",
1394 dimensions=("instrument",)
1395 )
1398class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1399 pipelineConnections=MakeCcdVisitTableConnections):
1400 pass
1403class MakeCcdVisitTableTask(pipeBase.PipelineTask):
1404 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1405 """
1406 _DefaultName = 'makeCcdVisitTable'
1407 ConfigClass = MakeCcdVisitTableConfig
1409 def run(self, visitSummaryRefs):
1410 """Make a table of ccd information from the `visitSummary` catalogs.
1412 Parameters
1413 ----------
1414 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1415 List of DeferredDatasetHandles pointing to exposure catalogs with
1416 per-detector summary information.
1418 Returns
1419 -------
1420 result : `lsst.pipe.Base.Struct`
1421 Results struct with attribute:
1423 ``outputCatalog``
1424 Catalog of ccd and visit information.
1425 """
1426 ccdEntries = []
1427 for visitSummaryRef in visitSummaryRefs:
1428 visitSummary = visitSummaryRef.get()
1429 visitInfo = visitSummary[0].getVisitInfo()
1431 ccdEntry = {}
1432 summaryTable = visitSummary.asAstropy()
1433 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance',
1434 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise',
1435 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar',
1436 'psfStarDeltaE1Median', 'psfStarDeltaE2Median',
1437 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter',
1438 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter',
1439 'psfStarScaledDeltaSizeScatter']
1440 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1441 # 'visit' is the human readable visit number.
1442 # 'visitId' is the key to the visitId table. They are the same.
1443 # Technically you should join to get the visit from the visit
1444 # table.
1445 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"})
1446 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in
1447 summaryTable['id']]
1448 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId)
1449 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds]
1450 ccdEntry['ccdVisitId'] = ccdVisitIds
1451 ccdEntry['detector'] = summaryTable['id']
1452 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() if vR.getWcs()
1453 else np.nan for vR in visitSummary])
1454 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1456 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1457 ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1458 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1459 expTime = visitInfo.getExposureTime()
1460 ccdEntry['expTime'] = expTime
1461 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1462 expTime_days = expTime / (60*60*24)
1463 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days
1464 ccdEntry['darkTime'] = visitInfo.getDarkTime()
1465 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1466 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1467 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1468 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1469 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1470 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1471 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1472 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1473 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1474 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1475 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY,
1476 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc.
1477 # values are actually wanted.
1478 ccdEntries.append(ccdEntry)
1480 outputCatalog = pd.concat(ccdEntries)
1481 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True)
1482 return pipeBase.Struct(outputCatalog=outputCatalog)
1485class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1486 dimensions=("instrument",),
1487 defaultTemplates={"calexpType": ""}):
1488 visitSummaries = connectionTypes.Input(
1489 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1490 name="{calexpType}visitSummary",
1491 storageClass="ExposureCatalog",
1492 dimensions=("instrument", "visit",),
1493 multiple=True,
1494 deferLoad=True,
1495 )
1496 outputCatalog = connectionTypes.Output(
1497 doc="Visit metadata table",
1498 name="{calexpType}visitTable",
1499 storageClass="DataFrame",
1500 dimensions=("instrument",)
1501 )
1504class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1505 pipelineConnections=MakeVisitTableConnections):
1506 pass
1509class MakeVisitTableTask(pipeBase.PipelineTask):
1510 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1511 """
1512 _DefaultName = 'makeVisitTable'
1513 ConfigClass = MakeVisitTableConfig
1515 def run(self, visitSummaries):
1516 """Make a table of visit information from the `visitSummary` catalogs.
1518 Parameters
1519 ----------
1520 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog`
1521 List of exposure catalogs with per-detector summary information.
1522 Returns
1523 -------
1524 result : `lsst.pipe.Base.Struct`
1525 Results struct with attribute:
1527 ``outputCatalog``
1528 Catalog of visit information.
1529 """
1530 visitEntries = []
1531 for visitSummary in visitSummaries:
1532 visitSummary = visitSummary.get()
1533 visitRow = visitSummary[0]
1534 visitInfo = visitRow.getVisitInfo()
1536 visitEntry = {}
1537 visitEntry["visitId"] = visitRow['visit']
1538 visitEntry["visit"] = visitRow['visit']
1539 visitEntry["physical_filter"] = visitRow['physical_filter']
1540 visitEntry["band"] = visitRow['band']
1541 raDec = visitInfo.getBoresightRaDec()
1542 visitEntry["ra"] = raDec.getRa().asDegrees()
1543 visitEntry["decl"] = raDec.getDec().asDegrees()
1544 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1545 azAlt = visitInfo.getBoresightAzAlt()
1546 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1547 visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1548 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1549 visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1550 expTime = visitInfo.getExposureTime()
1551 visitEntry["expTime"] = expTime
1552 visitEntry["expMidpt"] = visitInfo.getDate().toPython()
1553 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1554 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1555 expTime_days = expTime / (60*60*24)
1556 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days
1557 visitEntries.append(visitEntry)
1559 # TODO: DM-30623, Add programId, exposureType, cameraTemp,
1560 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp,
1561 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures.
1563 outputCatalog = pd.DataFrame(data=visitEntries)
1564 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True)
1565 return pipeBase.Struct(outputCatalog=outputCatalog)
1568class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1569 dimensions=("instrument", "visit", "detector", "skymap", "tract")):
1571 inputCatalog = connectionTypes.Input(
1572 doc="Primary per-detector, single-epoch forced-photometry catalog. "
1573 "By default, it is the output of ForcedPhotCcdTask on calexps",
1574 name="forced_src",
1575 storageClass="SourceCatalog",
1576 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1577 )
1578 inputCatalogDiff = connectionTypes.Input(
1579 doc="Secondary multi-epoch, per-detector, forced photometry catalog. "
1580 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1581 name="forced_diff",
1582 storageClass="SourceCatalog",
1583 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1584 )
1585 outputCatalog = connectionTypes.Output(
1586 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format",
1587 name="mergedForcedSource",
1588 storageClass="DataFrame",
1589 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1590 )
1593class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1594 pipelineConnections=WriteForcedSourceTableConnections):
1595 key = lsst.pex.config.Field(
1596 doc="Column on which to join the two input tables on and make the primary key of the output",
1597 dtype=str,
1598 default="objectId",
1599 )
1602class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1603 """Merge and convert per-detector forced source catalogs to parquet.
1605 Because the predecessor ForcedPhotCcdTask operates per-detector,
1606 per-tract, (i.e., it has tract in its dimensions), detectors
1607 on the tract boundary may have multiple forced source catalogs.
1609 The successor task TransformForcedSourceTable runs per-patch
1610 and temporally-aggregates overlapping mergedForcedSource catalogs from all
1611 available multiple epochs.
1612 """
1613 _DefaultName = "writeForcedSourceTable"
1614 ConfigClass = WriteForcedSourceTableConfig
1616 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1617 inputs = butlerQC.get(inputRefs)
1618 # Add ccdVisitId to allow joining with CcdVisitTable
1619 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
1620 inputs['band'] = butlerQC.quantum.dataId.full['band']
1621 outputs = self.run(**inputs)
1622 butlerQC.put(outputs, outputRefs)
1624 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1625 dfs = []
1626 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')):
1627 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False)
1628 df = df.reindex(sorted(df.columns), axis=1)
1629 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA
1630 df['band'] = band if band else pd.NA
1631 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns],
1632 names=('dataset', 'column'))
1634 dfs.append(df)
1636 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
1637 return pipeBase.Struct(outputCatalog=outputCatalog)
1640class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1641 dimensions=("instrument", "skymap", "patch", "tract")):
1643 inputCatalogs = connectionTypes.Input(
1644 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1645 name="mergedForcedSource",
1646 storageClass="DataFrame",
1647 dimensions=("instrument", "visit", "detector", "skymap", "tract"),
1648 multiple=True,
1649 deferLoad=True
1650 )
1651 referenceCatalog = connectionTypes.Input(
1652 doc="Reference catalog which was used to seed the forcedPhot. Columns "
1653 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1654 "are expected.",
1655 name="objectTable",
1656 storageClass="DataFrame",
1657 dimensions=("tract", "patch", "skymap"),
1658 deferLoad=True
1659 )
1660 outputCatalog = connectionTypes.Output(
1661 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1662 "specified set of functors",
1663 name="forcedSourceTable",
1664 storageClass="DataFrame",
1665 dimensions=("tract", "patch", "skymap")
1666 )
1669class TransformForcedSourceTableConfig(TransformCatalogBaseConfig,
1670 pipelineConnections=TransformForcedSourceTableConnections):
1671 referenceColumns = pexConfig.ListField(
1672 dtype=str,
1673 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"],
1674 optional=True,
1675 doc="Columns to pull from reference catalog",
1676 )
1677 keyRef = lsst.pex.config.Field(
1678 doc="Column on which to join the two input tables on and make the primary key of the output",
1679 dtype=str,
1680 default="objectId",
1681 )
1682 key = lsst.pex.config.Field(
1683 doc="Rename the output DataFrame index to this name",
1684 dtype=str,
1685 default="forcedSourceId",
1686 )
1688 def setDefaults(self):
1689 super().setDefaults()
1690 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml')
1691 self.columnsFromDataId = ['tract', 'patch']
1694class TransformForcedSourceTableTask(TransformCatalogBaseTask):
1695 """Transform/standardize a ForcedSource catalog
1697 Transforms each wide, per-detector forcedSource parquet table per the
1698 specification file (per-camera defaults found in ForcedSource.yaml).
1699 All epochs that overlap the patch are aggregated into one per-patch
1700 narrow-parquet file.
1702 No de-duplication of rows is performed. Duplicate resolutions flags are
1703 pulled in from the referenceCatalog: `detect_isPrimary`,
1704 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1705 for analysis or compare duplicates for QA.
1707 The resulting table includes multiple bands. Epochs (MJDs) and other useful
1708 per-visit rows can be retreived by joining with the CcdVisitTable on
1709 ccdVisitId.
1710 """
1711 _DefaultName = "transformForcedSourceTable"
1712 ConfigClass = TransformForcedSourceTableConfig
1714 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1715 inputs = butlerQC.get(inputRefs)
1716 if self.funcs is None:
1717 raise ValueError("config.functorFile is None. "
1718 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1719 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs,
1720 dataId=outputRefs.outputCatalog.dataId.full)
1722 butlerQC.put(outputs, outputRefs)
1724 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1725 dfs = []
1726 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns})
1727 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs)))
1728 for handle in inputCatalogs:
1729 result = self.transform(None, handle, funcs, dataId)
1730 # Filter for only rows that were detected on (overlap) the patch
1731 dfs.append(result.df.join(ref, how='inner'))
1733 outputCatalog = pd.concat(dfs)
1735 # Now that we are done joining on config.keyRef
1736 # Change index to config.key by
1737 outputCatalog.index.rename(self.config.keyRef, inplace=True)
1738 # Add config.keyRef to the column list
1739 outputCatalog.reset_index(inplace=True)
1740 # Set the forcedSourceId to the index. This is specified in the
1741 # ForcedSource.yaml
1742 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True)
1743 # Rename it to the config.key
1744 outputCatalog.index.rename(self.config.key, inplace=True)
1746 self.log.info("Made a table of %d columns and %d rows",
1747 len(outputCatalog.columns), len(outputCatalog))
1748 return pipeBase.Struct(outputCatalog=outputCatalog)
1751class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1752 defaultTemplates={"catalogType": ""},
1753 dimensions=("instrument", "tract")):
1754 inputCatalogs = connectionTypes.Input(
1755 doc="Input per-patch DataFrame Tables to be concatenated",
1756 name="{catalogType}ForcedSourceTable",
1757 storageClass="DataFrame",
1758 dimensions=("tract", "patch", "skymap"),
1759 multiple=True,
1760 )
1762 outputCatalog = connectionTypes.Output(
1763 doc="Output per-tract concatenation of DataFrame Tables",
1764 name="{catalogType}ForcedSourceTable_tract",
1765 storageClass="DataFrame",
1766 dimensions=("tract", "skymap"),
1767 )
1770class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1771 pipelineConnections=ConsolidateTractConnections):
1772 pass
1775class ConsolidateTractTask(pipeBase.PipelineTask):
1776 """Concatenate any per-patch, dataframe list into a single
1777 per-tract DataFrame.
1778 """
1779 _DefaultName = 'ConsolidateTract'
1780 ConfigClass = ConsolidateTractConfig
1782 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1783 inputs = butlerQC.get(inputRefs)
1784 # Not checking at least one inputCatalog exists because that'd be an
1785 # empty QG.
1786 self.log.info("Concatenating %s per-patch %s Tables",
1787 len(inputs['inputCatalogs']),
1788 inputRefs.inputCatalogs[0].datasetType.name)
1789 df = pd.concat(inputs['inputCatalogs'])
1790 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)