Coverage for python/lsst/pipe/tasks/postprocess.py: 26%
651 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-08 06:53 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-08 06:53 -0700
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["WriteObjectTableConfig", "WriteObjectTableTask",
23 "WriteSourceTableConfig", "WriteSourceTableTask",
24 "WriteRecalibratedSourceTableConfig", "WriteRecalibratedSourceTableTask",
25 "PostprocessAnalysis",
26 "TransformCatalogBaseConfig", "TransformCatalogBaseTask",
27 "TransformObjectCatalogConfig", "TransformObjectCatalogTask",
28 "ConsolidateObjectTableConfig", "ConsolidateObjectTableTask",
29 "TransformSourceTableConfig", "TransformSourceTableTask",
30 "ConsolidateVisitSummaryConfig", "ConsolidateVisitSummaryTask",
31 "ConsolidateSourceTableConfig", "ConsolidateSourceTableTask",
32 "MakeCcdVisitTableConfig", "MakeCcdVisitTableTask",
33 "MakeVisitTableConfig", "MakeVisitTableTask",
34 "WriteForcedSourceTableConfig", "WriteForcedSourceTableTask",
35 "TransformForcedSourceTableConfig", "TransformForcedSourceTableTask",
36 "ConsolidateTractConfig", "ConsolidateTractTask"]
38import functools
39import pandas as pd
40import logging
41import numpy as np
42import numbers
43import os
45import lsst.geom
46import lsst.pex.config as pexConfig
47import lsst.pipe.base as pipeBase
48import lsst.daf.base as dafBase
49from lsst.pipe.base import connectionTypes
50import lsst.afw.table as afwTable
51from lsst.afw.image import ExposureSummaryStats
52from lsst.meas.base import SingleFrameMeasurementTask, DetectorVisitIdGeneratorConfig
53from lsst.skymap import BaseSkyMap
55from .functors import CompositeFunctor, Column
57log = logging.getLogger(__name__)
60def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
61 """Flattens a dataframe with multilevel column index.
62 """
63 newDf = pd.DataFrame()
64 # band is the level 0 index
65 dfBands = df.columns.unique(level=0).values
66 for band in dfBands:
67 subdf = df[band]
68 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
69 newColumns = {c: columnFormat.format(band, c)
70 for c in subdf.columns if c not in noDupCols}
71 cols = list(newColumns.keys())
72 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
74 # Band must be present in the input and output or else column is all NaN:
75 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
76 # Get the unexploded columns from any present band's partition
77 noDupDf = df[presentBands[0]][noDupCols]
78 newDf = pd.concat([noDupDf, newDf], axis=1)
79 return newDf
82class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
83 defaultTemplates={"coaddName": "deep"},
84 dimensions=("tract", "patch", "skymap")):
85 inputCatalogMeas = connectionTypes.Input(
86 doc="Catalog of source measurements on the deepCoadd.",
87 dimensions=("tract", "patch", "band", "skymap"),
88 storageClass="SourceCatalog",
89 name="{coaddName}Coadd_meas",
90 multiple=True
91 )
92 inputCatalogForcedSrc = connectionTypes.Input(
93 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
94 dimensions=("tract", "patch", "band", "skymap"),
95 storageClass="SourceCatalog",
96 name="{coaddName}Coadd_forced_src",
97 multiple=True
98 )
99 inputCatalogRef = connectionTypes.Input(
100 doc="Catalog marking the primary detection (which band provides a good shape and position)"
101 "for each detection in deepCoadd_mergeDet.",
102 dimensions=("tract", "patch", "skymap"),
103 storageClass="SourceCatalog",
104 name="{coaddName}Coadd_ref"
105 )
106 outputCatalog = connectionTypes.Output(
107 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
108 "stored as a DataFrame with a multi-level column index per-patch.",
109 dimensions=("tract", "patch", "skymap"),
110 storageClass="DataFrame",
111 name="{coaddName}Coadd_obj"
112 )
115class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
116 pipelineConnections=WriteObjectTableConnections):
117 engine = pexConfig.Field(
118 dtype=str,
119 default="pyarrow",
120 doc="Parquet engine for writing (pyarrow or fastparquet)",
121 deprecated="This config is no longer used, and will be removed after v26."
122 )
123 coaddName = pexConfig.Field(
124 dtype=str,
125 default="deep",
126 doc="Name of coadd"
127 )
130class WriteObjectTableTask(pipeBase.PipelineTask):
131 """Write filter-merged source tables as a DataFrame in parquet format.
132 """
133 _DefaultName = "writeObjectTable"
134 ConfigClass = WriteObjectTableConfig
136 # Names of table datasets to be merged
137 inputDatasets = ('forced_src', 'meas', 'ref')
139 # Tag of output dataset written by `MergeSourcesTask.write`
140 outputDataset = 'obj'
142 def runQuantum(self, butlerQC, inputRefs, outputRefs):
143 inputs = butlerQC.get(inputRefs)
145 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
146 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
147 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
148 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
150 catalogs = {}
151 for band in measDict.keys():
152 catalogs[band] = {'meas': measDict[band]['meas'],
153 'forced_src': forcedSourceDict[band]['forced_src'],
154 'ref': inputs['inputCatalogRef']}
155 dataId = butlerQC.quantum.dataId
156 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
157 outputs = pipeBase.Struct(outputCatalog=df)
158 butlerQC.put(outputs, outputRefs)
160 def run(self, catalogs, tract, patch):
161 """Merge multiple catalogs.
163 Parameters
164 ----------
165 catalogs : `dict`
166 Mapping from filter names to dict of catalogs.
167 tract : int
168 tractId to use for the tractId column.
169 patch : str
170 patchId to use for the patchId column.
172 Returns
173 -------
174 catalog : `pandas.DataFrame`
175 Merged dataframe.
176 """
177 dfs = []
178 for filt, tableDict in catalogs.items():
179 for dataset, table in tableDict.items():
180 # Convert afwTable to pandas DataFrame
181 df = table.asAstropy().to_pandas().set_index('id', drop=True)
183 # Sort columns by name, to ensure matching schema among patches
184 df = df.reindex(sorted(df.columns), axis=1)
185 df = df.assign(tractId=tract, patchId=patch)
187 # Make columns a 3-level MultiIndex
188 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
189 names=('dataset', 'band', 'column'))
190 dfs.append(df)
192 # We do this dance and not `pd.concat(dfs)` because the pandas
193 # concatenation uses infinite memory.
194 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
195 return catalog
198class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
199 defaultTemplates={"catalogType": ""},
200 dimensions=("instrument", "visit", "detector")):
202 catalog = connectionTypes.Input(
203 doc="Input full-depth catalog of sources produced by CalibrateTask",
204 name="{catalogType}src",
205 storageClass="SourceCatalog",
206 dimensions=("instrument", "visit", "detector")
207 )
208 outputCatalog = connectionTypes.Output(
209 doc="Catalog of sources, `src` in DataFrame/Parquet format. The 'id' column is "
210 "replaced with an index; all other columns are unchanged.",
211 name="{catalogType}source",
212 storageClass="DataFrame",
213 dimensions=("instrument", "visit", "detector")
214 )
217class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
218 pipelineConnections=WriteSourceTableConnections):
219 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
222class WriteSourceTableTask(pipeBase.PipelineTask):
223 """Write source table to DataFrame Parquet format.
224 """
225 _DefaultName = "writeSourceTable"
226 ConfigClass = WriteSourceTableConfig
228 def runQuantum(self, butlerQC, inputRefs, outputRefs):
229 inputs = butlerQC.get(inputRefs)
230 inputs['ccdVisitId'] = self.config.idGenerator.apply(butlerQC.quantum.dataId).catalog_id
231 result = self.run(**inputs)
232 outputs = pipeBase.Struct(outputCatalog=result.table)
233 butlerQC.put(outputs, outputRefs)
235 def run(self, catalog, ccdVisitId=None, **kwargs):
236 """Convert `src` catalog to DataFrame
238 Parameters
239 ----------
240 catalog: `afwTable.SourceCatalog`
241 catalog to be converted
242 ccdVisitId: `int`
243 ccdVisitId to be added as a column
244 **kwargs
245 Additional keyword arguments are ignored as a convenience for
246 subclasses that pass the same arguments to several different
247 methods.
249 Returns
250 -------
251 result : `lsst.pipe.base.Struct`
252 ``table``
253 `DataFrame` version of the input catalog
254 """
255 self.log.info("Generating DataFrame from src catalog ccdVisitId=%s", ccdVisitId)
256 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
257 df['ccdVisitId'] = ccdVisitId
259 return pipeBase.Struct(table=df)
262class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
263 defaultTemplates={"catalogType": "",
264 "skyWcsName": "gbdesAstrometricFit",
265 "photoCalibName": "fgcm"},
266 dimensions=("instrument", "visit", "detector", "skymap")):
267 skyMap = connectionTypes.Input(
268 doc="skyMap needed to choose which tract-level calibrations to use when multiple available",
269 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
270 storageClass="SkyMap",
271 dimensions=("skymap",),
272 )
273 exposure = connectionTypes.Input(
274 doc="Input exposure to perform photometry on.",
275 name="calexp",
276 storageClass="ExposureF",
277 dimensions=["instrument", "visit", "detector"],
278 )
279 externalSkyWcsTractCatalog = connectionTypes.Input(
280 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector "
281 "id for the catalog id, sorted on id for fast lookup."),
282 name="{skyWcsName}SkyWcsCatalog",
283 storageClass="ExposureCatalog",
284 dimensions=["instrument", "visit", "tract"],
285 multiple=True
286 )
287 externalSkyWcsGlobalCatalog = connectionTypes.Input(
288 doc=("Per-visit wcs calibrations computed globally (with no tract information). "
289 "These catalogs use the detector id for the catalog id, sorted on id for "
290 "fast lookup."),
291 name="finalVisitSummary",
292 storageClass="ExposureCatalog",
293 dimensions=["instrument", "visit"],
294 )
295 externalPhotoCalibTractCatalog = connectionTypes.Input(
296 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the "
297 "detector id for the catalog id, sorted on id for fast lookup."),
298 name="{photoCalibName}PhotoCalibCatalog",
299 storageClass="ExposureCatalog",
300 dimensions=["instrument", "visit", "tract"],
301 multiple=True
302 )
303 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
304 doc=("Per-visit photometric calibrations computed globally (with no tract "
305 "information). These catalogs use the detector id for the catalog id, "
306 "sorted on id for fast lookup."),
307 name="finalVisitSummary",
308 storageClass="ExposureCatalog",
309 dimensions=["instrument", "visit"],
310 )
312 def __init__(self, *, config=None):
313 super().__init__(config=config)
314 # Same connection boilerplate as all other applications of
315 # Global/Tract calibrations
316 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs:
317 if config.useGlobalExternalSkyWcs:
318 self.inputs.remove("externalSkyWcsTractCatalog")
319 else:
320 self.inputs.remove("externalSkyWcsGlobalCatalog")
321 else:
322 self.inputs.remove("externalSkyWcsTractCatalog")
323 self.inputs.remove("externalSkyWcsGlobalCatalog")
324 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib:
325 if config.useGlobalExternalPhotoCalib:
326 self.inputs.remove("externalPhotoCalibTractCatalog")
327 else:
328 self.inputs.remove("externalPhotoCalibGlobalCatalog")
329 else:
330 self.inputs.remove("externalPhotoCalibTractCatalog")
331 self.inputs.remove("externalPhotoCalibGlobalCatalog")
334class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
335 pipelineConnections=WriteRecalibratedSourceTableConnections):
337 doReevaluatePhotoCalib = pexConfig.Field(
338 dtype=bool,
339 default=True,
340 doc=("Add or replace local photoCalib columns")
341 )
342 doReevaluateSkyWcs = pexConfig.Field(
343 dtype=bool,
344 default=True,
345 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
346 )
347 doApplyExternalPhotoCalib = pexConfig.Field(
348 dtype=bool,
349 default=True,
350 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
351 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
352 )
353 doApplyExternalSkyWcs = pexConfig.Field(
354 dtype=bool,
355 default=True,
356 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
357 "else use the wcs already attached to the exposure."),
358 )
359 useGlobalExternalPhotoCalib = pexConfig.Field(
360 dtype=bool,
361 default=True,
362 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations "
363 "that are not run per-tract. When False, use per-tract photometric "
364 "calibration files.")
365 )
366 useGlobalExternalSkyWcs = pexConfig.Field(
367 dtype=bool,
368 default=True,
369 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations "
370 "that are not run per-tract. When False, use per-tract wcs "
371 "files.")
372 )
373 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
375 def validate(self):
376 super().validate()
377 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs:
378 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
379 "External SkyWcs will not be read or evaluated.")
380 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib:
381 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
382 "External PhotoCalib will not be read or evaluated.")
385class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
386 """Write source table to DataFrame Parquet format.
387 """
388 _DefaultName = "writeRecalibratedSourceTable"
389 ConfigClass = WriteRecalibratedSourceTableConfig
391 def runQuantum(self, butlerQC, inputRefs, outputRefs):
392 inputs = butlerQC.get(inputRefs)
394 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
395 inputs['idGenerator'] = idGenerator
396 inputs['ccdVisitId'] = idGenerator.catalog_id
398 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs:
399 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
400 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs)
402 inputs['catalog'] = self.addCalibColumns(**inputs)
404 result = self.run(**inputs)
405 outputs = pipeBase.Struct(outputCatalog=result.table)
406 butlerQC.put(outputs, outputRefs)
408 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
409 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
410 externalPhotoCalibTractCatalog=None, **kwargs):
411 """Apply external calibrations to exposure per configuration
413 When multiple tract-level calibrations overlap, select the one with the
414 center closest to detector.
416 Parameters
417 ----------
418 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of
419 tract-level calibs.
420 skyMap : `lsst.skymap.SkyMap`
421 exposure : `lsst.afw.image.exposure.Exposure`
422 Input exposure to adjust calibrations.
423 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
424 Exposure catalog with external skyWcs to be applied per config
425 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
426 Exposure catalog with external skyWcs to be applied per config
427 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
428 Exposure catalog with external photoCalib to be applied per config
429 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
430 Exposure catalog with external photoCalib to be applied per config
431 **kwargs
432 Additional keyword arguments are ignored to facilitate passing the
433 same arguments to several methods.
435 Returns
436 -------
437 exposure : `lsst.afw.image.exposure.Exposure`
438 Exposure with adjusted calibrations.
439 """
440 if not self.config.doApplyExternalSkyWcs:
441 # Do not modify the exposure's SkyWcs
442 externalSkyWcsCatalog = None
443 elif self.config.useGlobalExternalSkyWcs:
444 # Use the global external SkyWcs
445 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
446 self.log.info('Applying global SkyWcs')
447 else:
448 # use tract-level external SkyWcs from the closest overlapping tract
449 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog')
450 tracts = [ref.dataId['tract'] for ref in inputRef]
451 if len(tracts) == 1:
452 ind = 0
453 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind])
454 else:
455 if exposure.getWcs() is None: # TODO: could this look-up use the externalPhotoCalib?
456 raise ValueError("Trying to locate nearest tract, but exposure.wcs is None.")
457 ind = self.getClosestTract(tracts, skyMap,
458 exposure.getBBox(), exposure.getWcs())
459 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
460 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
462 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
464 if not self.config.doApplyExternalPhotoCalib:
465 # Do not modify the exposure's PhotoCalib
466 externalPhotoCalibCatalog = None
467 elif self.config.useGlobalExternalPhotoCalib:
468 # Use the global external PhotoCalib
469 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
470 self.log.info('Applying global PhotoCalib')
471 else:
472 # use tract-level external PhotoCalib from the closest overlapping tract
473 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog')
474 tracts = [ref.dataId['tract'] for ref in inputRef]
475 if len(tracts) == 1:
476 ind = 0
477 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind])
478 else:
479 ind = self.getClosestTract(tracts, skyMap,
480 exposure.getBBox(), exposure.getWcs())
481 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
482 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
484 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
486 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
488 def getClosestTract(self, tracts, skyMap, bbox, wcs):
489 """Find the index of the tract closest to detector from list of tractIds
491 Parameters
492 ----------
493 tracts: `list` [`int`]
494 Iterable of integer tractIds
495 skyMap : `lsst.skymap.SkyMap`
496 skyMap to lookup tract geometry and wcs
497 bbox : `lsst.geom.Box2I`
498 Detector bbox, center of which will compared to tract centers
499 wcs : `lsst.afw.geom.SkyWcs`
500 Detector Wcs object to map the detector center to SkyCoord
502 Returns
503 -------
504 index : `int`
505 """
506 if len(tracts) == 1:
507 return 0
509 center = wcs.pixelToSky(bbox.getCenter())
510 sep = []
511 for tractId in tracts:
512 tract = skyMap[tractId]
513 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
514 sep.append(center.separation(tractCenter))
516 return np.argmin(sep)
518 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
519 """Prepare a calibrated exposure and apply external calibrations
520 if so configured.
522 Parameters
523 ----------
524 exposure : `lsst.afw.image.exposure.Exposure`
525 Input exposure to adjust calibrations.
526 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional
527 Exposure catalog with external skyWcs to be applied
528 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id
529 for the catalog id, sorted on id for fast lookup.
530 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional
531 Exposure catalog with external photoCalib to be applied
532 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector
533 id for the catalog id, sorted on id for fast lookup.
535 Returns
536 -------
537 exposure : `lsst.afw.image.exposure.Exposure`
538 Exposure with adjusted calibrations.
539 """
540 detectorId = exposure.getInfo().getDetector().getId()
542 if externalPhotoCalibCatalog is not None:
543 row = externalPhotoCalibCatalog.find(detectorId)
544 if row is None:
545 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; "
546 "Using original photoCalib.", detectorId)
547 else:
548 photoCalib = row.getPhotoCalib()
549 if photoCalib is None:
550 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
551 "Using original photoCalib.", detectorId)
552 else:
553 exposure.setPhotoCalib(photoCalib)
555 if externalSkyWcsCatalog is not None:
556 row = externalSkyWcsCatalog.find(detectorId)
557 if row is None:
558 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; "
559 "Using original skyWcs.", detectorId)
560 else:
561 skyWcs = row.getWcs()
562 if skyWcs is None:
563 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
564 "Using original skyWcs.", detectorId)
565 else:
566 exposure.setWcs(skyWcs)
568 return exposure
570 def addCalibColumns(self, catalog, exposure, idGenerator, **kwargs):
571 """Add replace columns with calibs evaluated at each centroid
573 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a
574 a source catalog, by rerunning the plugins.
576 Parameters
577 ----------
578 catalog : `lsst.afw.table.SourceCatalog`
579 catalog to which calib columns will be added
580 exposure : `lsst.afw.image.exposure.Exposure`
581 Exposure with attached PhotoCalibs and SkyWcs attributes to be
582 reevaluated at local centroids. Pixels are not required.
583 idGenerator : `lsst.meas.base.IdGenerator`
584 Object that generates Source IDs and random seeds.
585 **kwargs
586 Additional keyword arguments are ignored to facilitate passing the
587 same arguments to several methods.
589 Returns
590 -------
591 newCat: `lsst.afw.table.SourceCatalog`
592 Source Catalog with requested local calib columns
593 """
594 measureConfig = SingleFrameMeasurementTask.ConfigClass()
595 measureConfig.doReplaceWithNoise = False
597 # Clear all slots, because we aren't running the relevant plugins.
598 for slot in measureConfig.slots:
599 setattr(measureConfig.slots, slot, None)
601 measureConfig.plugins.names = []
602 if self.config.doReevaluateSkyWcs:
603 measureConfig.plugins.names.add('base_LocalWcs')
604 self.log.info("Re-evaluating base_LocalWcs plugin")
605 if self.config.doReevaluatePhotoCalib:
606 measureConfig.plugins.names.add('base_LocalPhotoCalib')
607 self.log.info("Re-evaluating base_LocalPhotoCalib plugin")
608 pluginsNotToCopy = tuple(measureConfig.plugins.names)
610 # Create a new schema and catalog
611 # Copy all columns from original except for the ones to reevaluate
612 aliasMap = catalog.schema.getAliasMap()
613 mapper = afwTable.SchemaMapper(catalog.schema)
614 for item in catalog.schema:
615 if not item.field.getName().startswith(pluginsNotToCopy):
616 mapper.addMapping(item.key)
618 schema = mapper.getOutputSchema()
619 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
620 schema.setAliasMap(aliasMap)
621 newCat = afwTable.SourceCatalog(schema)
622 newCat.extend(catalog, mapper=mapper)
624 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to
625 # update here. LocalPhotoCalibs are applied during transform tasks.
626 # Update coord_ra/coord_dec, which are expected to be positions on the
627 # sky and are used as such in sdm tables without transform
628 if self.config.doReevaluateSkyWcs and exposure.wcs is not None:
629 afwTable.updateSourceCoords(exposure.wcs, newCat)
631 measurement.run(measCat=newCat, exposure=exposure, exposureId=idGenerator.catalog_id)
633 return newCat
636class PostprocessAnalysis(object):
637 """Calculate columns from DataFrames or handles storing DataFrames.
639 This object manages and organizes an arbitrary set of computations
640 on a catalog. The catalog is defined by a
641 `DeferredDatasetHandle` or `InMemoryDatasetHandle` object
642 (or list thereof), such as a ``deepCoadd_obj`` dataset, and the
643 computations are defined by a collection of `lsst.pipe.tasks.functor.Functor`
644 objects (or, equivalently, a ``CompositeFunctor``).
646 After the object is initialized, accessing the ``.df`` attribute (which
647 holds the `pandas.DataFrame` containing the results of the calculations)
648 triggers computation of said dataframe.
650 One of the conveniences of using this object is the ability to define a
651 desired common filter for all functors. This enables the same functor
652 collection to be passed to several different `PostprocessAnalysis` objects
653 without having to change the original functor collection, since the ``filt``
654 keyword argument of this object triggers an overwrite of the ``filt``
655 property for all functors in the collection.
657 This object also allows a list of refFlags to be passed, and defines a set
658 of default refFlags that are always included even if not requested.
660 If a list of DataFrames or Handles is passed, rather than a single one,
661 then the calculations will be mapped over all the input catalogs. In
662 principle, it should be straightforward to parallelize this activity, but
663 initial tests have failed (see TODO in code comments).
665 Parameters
666 ----------
667 handles : `lsst.daf.butler.DeferredDatasetHandle` or
668 `lsst.pipe.base.InMemoryDatasetHandle` or
669 list of these.
670 Source catalog(s) for computation.
671 functors : `list`, `dict`, or `~lsst.pipe.tasks.functors.CompositeFunctor`
672 Computations to do (functors that act on ``handles``).
673 If a dict, the output
674 DataFrame will have columns keyed accordingly.
675 If a list, the column keys will come from the
676 ``.shortname`` attribute of each functor.
678 filt : `str`, optional
679 Filter in which to calculate. If provided,
680 this will overwrite any existing ``.filt`` attribute
681 of the provided functors.
683 flags : `list`, optional
684 List of flags (per-band) to include in output table.
685 Taken from the ``meas`` dataset if applied to a multilevel Object Table.
687 refFlags : `list`, optional
688 List of refFlags (only reference band) to include in output table.
690 forcedFlags : `list`, optional
691 List of flags (per-band) to include in output table.
692 Taken from the ``forced_src`` dataset if applied to a
693 multilevel Object Table. Intended for flags from measurement plugins
694 only run during multi-band forced-photometry.
695 """
696 _defaultRefFlags = []
697 _defaultFuncs = ()
699 def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
700 self.handles = handles
701 self.functors = functors
703 self.filt = filt
704 self.flags = list(flags) if flags is not None else []
705 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else []
706 self.refFlags = list(self._defaultRefFlags)
707 if refFlags is not None:
708 self.refFlags += list(refFlags)
710 self._df = None
712 @property
713 def defaultFuncs(self):
714 funcs = dict(self._defaultFuncs)
715 return funcs
717 @property
718 def func(self):
719 additionalFuncs = self.defaultFuncs
720 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags})
721 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
722 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
724 if isinstance(self.functors, CompositeFunctor):
725 func = self.functors
726 else:
727 func = CompositeFunctor(self.functors)
729 func.funcDict.update(additionalFuncs)
730 func.filt = self.filt
732 return func
734 @property
735 def noDupCols(self):
736 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
738 @property
739 def df(self):
740 if self._df is None:
741 self.compute()
742 return self._df
744 def compute(self, dropna=False, pool=None):
745 # map over multiple handles
746 if type(self.handles) in (list, tuple):
747 if pool is None:
748 dflist = [self.func(handle, dropna=dropna) for handle in self.handles]
749 else:
750 # TODO: Figure out why this doesn't work (pyarrow pickling
751 # issues?)
752 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.handles)
753 self._df = pd.concat(dflist)
754 else:
755 self._df = self.func(self.handles, dropna=dropna)
757 return self._df
760class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
761 dimensions=()):
762 """Expected Connections for subclasses of TransformCatalogBaseTask.
764 Must be subclassed.
765 """
766 inputCatalog = connectionTypes.Input(
767 name="",
768 storageClass="DataFrame",
769 )
770 outputCatalog = connectionTypes.Output(
771 name="",
772 storageClass="DataFrame",
773 )
776class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
777 pipelineConnections=TransformCatalogBaseConnections):
778 functorFile = pexConfig.Field(
779 dtype=str,
780 doc="Path to YAML file specifying Science Data Model functors to use "
781 "when copying columns and computing calibrated values.",
782 default=None,
783 optional=True
784 )
785 primaryKey = pexConfig.Field(
786 dtype=str,
787 doc="Name of column to be set as the DataFrame index. If None, the index"
788 "will be named `id`",
789 default=None,
790 optional=True
791 )
792 columnsFromDataId = pexConfig.ListField(
793 dtype=str,
794 default=None,
795 optional=True,
796 doc="Columns to extract from the dataId",
797 )
800class TransformCatalogBaseTask(pipeBase.PipelineTask):
801 """Base class for transforming/standardizing a catalog
803 by applying functors that convert units and apply calibrations.
804 The purpose of this task is to perform a set of computations on
805 an input ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` that holds
806 a ``DataFrame`` dataset (such as ``deepCoadd_obj``), and write the
807 results to a new dataset (which needs to be declared in an ``outputDataset``
808 attribute).
810 The calculations to be performed are defined in a YAML file that specifies
811 a set of functors to be computed, provided as
812 a ``--functorFile`` config parameter. An example of such a YAML file
813 is the following:
815 funcs:
816 psfMag:
817 functor: Mag
818 args:
819 - base_PsfFlux
820 filt: HSC-G
821 dataset: meas
822 cmodel_magDiff:
823 functor: MagDiff
824 args:
825 - modelfit_CModel
826 - base_PsfFlux
827 filt: HSC-G
828 gauss_magDiff:
829 functor: MagDiff
830 args:
831 - base_GaussianFlux
832 - base_PsfFlux
833 filt: HSC-G
834 count:
835 functor: Column
836 args:
837 - base_InputCount_value
838 filt: HSC-G
839 deconvolved_moments:
840 functor: DeconvolvedMoments
841 filt: HSC-G
842 dataset: forced_src
843 refFlags:
844 - calib_psfUsed
845 - merge_measurement_i
846 - merge_measurement_r
847 - merge_measurement_z
848 - merge_measurement_y
849 - merge_measurement_g
850 - base_PixelFlags_flag_inexact_psfCenter
851 - detect_isPrimary
853 The names for each entry under "func" will become the names of columns in
854 the output dataset. All the functors referenced are defined in
855 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each
856 functor are in the `args` list, and any additional entries for each column
857 other than "functor" or "args" (e.g., ``'filt'``, ``'dataset'``) are treated as
858 keyword arguments to be passed to the functor initialization.
860 The "flags" entry is the default shortcut for `Column` functors.
861 All columns listed under "flags" will be copied to the output table
862 untransformed. They can be of any datatype.
863 In the special case of transforming a multi-level oject table with
864 band and dataset indices (deepCoadd_obj), these will be taked from the
865 `meas` dataset and exploded out per band.
867 There are two special shortcuts that only apply when transforming
868 multi-level Object (deepCoadd_obj) tables:
869 - The "refFlags" entry is shortcut for `Column` functor
870 taken from the `'ref'` dataset if transforming an ObjectTable.
871 - The "forcedFlags" entry is shortcut for `Column` functors.
872 taken from the ``forced_src`` dataset if transforming an ObjectTable.
873 These are expanded out per band.
876 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
877 to organize and excecute the calculations.
878 """
879 @property
880 def _DefaultName(self):
881 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
883 @property
884 def outputDataset(self):
885 raise NotImplementedError('Subclass must define "outputDataset" attribute')
887 @property
888 def inputDataset(self):
889 raise NotImplementedError('Subclass must define "inputDataset" attribute')
891 @property
892 def ConfigClass(self):
893 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
895 def __init__(self, *args, **kwargs):
896 super().__init__(*args, **kwargs)
897 if self.config.functorFile:
898 self.log.info('Loading tranform functor definitions from %s',
899 self.config.functorFile)
900 self.funcs = CompositeFunctor.from_file(self.config.functorFile)
901 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs))
902 else:
903 self.funcs = None
905 def runQuantum(self, butlerQC, inputRefs, outputRefs):
906 inputs = butlerQC.get(inputRefs)
907 if self.funcs is None:
908 raise ValueError("config.functorFile is None. "
909 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
910 result = self.run(handle=inputs['inputCatalog'], funcs=self.funcs,
911 dataId=outputRefs.outputCatalog.dataId.full)
912 outputs = pipeBase.Struct(outputCatalog=result)
913 butlerQC.put(outputs, outputRefs)
915 def run(self, handle, funcs=None, dataId=None, band=None):
916 """Do postprocessing calculations
918 Takes a ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` or
919 ``DataFrame`` object and dataId,
920 returns a dataframe with results of postprocessing calculations.
922 Parameters
923 ----------
924 handles : `lsst.daf.butler.DeferredDatasetHandle` or
925 `lsst.pipe.base.InMemoryDatasetHandle` or
926 `pandas.DataFrame`, or list of these.
927 DataFrames from which calculations are done.
928 funcs : `lsst.pipe.tasks.functors.Functors`
929 Functors to apply to the table's columns
930 dataId : dict, optional
931 Used to add a `patchId` column to the output dataframe.
932 band : `str`, optional
933 Filter band that is being processed.
935 Returns
936 ------
937 df : `pandas.DataFrame`
938 """
939 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
941 df = self.transform(band, handle, funcs, dataId).df
942 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
943 return df
945 def getFunctors(self):
946 return self.funcs
948 def getAnalysis(self, handles, funcs=None, band=None):
949 if funcs is None:
950 funcs = self.funcs
951 analysis = PostprocessAnalysis(handles, funcs, filt=band)
952 return analysis
954 def transform(self, band, handles, funcs, dataId):
955 analysis = self.getAnalysis(handles, funcs=funcs, band=band)
956 df = analysis.df
957 if dataId and self.config.columnsFromDataId:
958 for key in self.config.columnsFromDataId:
959 if key in dataId:
960 df[str(key)] = dataId[key]
961 else:
962 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
964 if self.config.primaryKey:
965 if df.index.name != self.config.primaryKey and self.config.primaryKey in df:
966 df.reset_index(inplace=True, drop=True)
967 df.set_index(self.config.primaryKey, inplace=True)
969 return pipeBase.Struct(
970 df=df,
971 analysis=analysis
972 )
975class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
976 defaultTemplates={"coaddName": "deep"},
977 dimensions=("tract", "patch", "skymap")):
978 inputCatalog = connectionTypes.Input(
979 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
980 "stored as a DataFrame with a multi-level column index per-patch.",
981 dimensions=("tract", "patch", "skymap"),
982 storageClass="DataFrame",
983 name="{coaddName}Coadd_obj",
984 deferLoad=True,
985 )
986 outputCatalog = connectionTypes.Output(
987 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
988 "data model.",
989 dimensions=("tract", "patch", "skymap"),
990 storageClass="DataFrame",
991 name="objectTable"
992 )
995class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
996 pipelineConnections=TransformObjectCatalogConnections):
997 coaddName = pexConfig.Field(
998 dtype=str,
999 default="deep",
1000 doc="Name of coadd"
1001 )
1002 # TODO: remove in DM-27177
1003 filterMap = pexConfig.DictField(
1004 keytype=str,
1005 itemtype=str,
1006 default={},
1007 doc=("Dictionary mapping full filter name to short one for column name munging."
1008 "These filters determine the output columns no matter what filters the "
1009 "input data actually contain."),
1010 deprecated=("Coadds are now identified by the band, so this transform is unused."
1011 "Will be removed after v22.")
1012 )
1013 outputBands = pexConfig.ListField(
1014 dtype=str,
1015 default=None,
1016 optional=True,
1017 doc=("These bands and only these bands will appear in the output,"
1018 " NaN-filled if the input does not include them."
1019 " If None, then use all bands found in the input.")
1020 )
1021 camelCase = pexConfig.Field(
1022 dtype=bool,
1023 default=False,
1024 doc=("Write per-band columns names with camelCase, else underscore "
1025 "For example: gPsFlux instead of g_PsFlux.")
1026 )
1027 multilevelOutput = pexConfig.Field(
1028 dtype=bool,
1029 default=False,
1030 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
1031 "and name-munged (False).")
1032 )
1033 goodFlags = pexConfig.ListField(
1034 dtype=str,
1035 default=[],
1036 doc=("List of 'good' flags that should be set False when populating empty tables. "
1037 "All other flags are considered to be 'bad' flags and will be set to True.")
1038 )
1039 floatFillValue = pexConfig.Field(
1040 dtype=float,
1041 default=np.nan,
1042 doc="Fill value for float fields when populating empty tables."
1043 )
1044 integerFillValue = pexConfig.Field(
1045 dtype=int,
1046 default=-1,
1047 doc="Fill value for integer fields when populating empty tables."
1048 )
1050 def setDefaults(self):
1051 super().setDefaults()
1052 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml')
1053 self.primaryKey = 'objectId'
1054 self.columnsFromDataId = ['tract', 'patch']
1055 self.goodFlags = ['calib_astrometry_used',
1056 'calib_photometry_reserved',
1057 'calib_photometry_used',
1058 'calib_psf_candidate',
1059 'calib_psf_reserved',
1060 'calib_psf_used']
1063class TransformObjectCatalogTask(TransformCatalogBaseTask):
1064 """Produce a flattened Object Table to match the format specified in
1065 sdm_schemas.
1067 Do the same set of postprocessing calculations on all bands.
1069 This is identical to `TransformCatalogBaseTask`, except for that it does
1070 the specified functor calculations for all filters present in the
1071 input `deepCoadd_obj` table. Any specific ``"filt"`` keywords specified
1072 by the YAML file will be superceded.
1073 """
1074 _DefaultName = "transformObjectCatalog"
1075 ConfigClass = TransformObjectCatalogConfig
1077 def run(self, handle, funcs=None, dataId=None, band=None):
1078 # NOTE: band kwarg is ignored here.
1079 dfDict = {}
1080 analysisDict = {}
1081 templateDf = pd.DataFrame()
1083 columns = handle.get(component='columns')
1084 inputBands = columns.unique(level=1).values
1086 outputBands = self.config.outputBands if self.config.outputBands else inputBands
1088 # Perform transform for data of filters that exist in the handle dataframe.
1089 for inputBand in inputBands:
1090 if inputBand not in outputBands:
1091 self.log.info("Ignoring %s band data in the input", inputBand)
1092 continue
1093 self.log.info("Transforming the catalog of band %s", inputBand)
1094 result = self.transform(inputBand, handle, funcs, dataId)
1095 dfDict[inputBand] = result.df
1096 analysisDict[inputBand] = result.analysis
1097 if templateDf.empty:
1098 templateDf = result.df
1100 # Put filler values in columns of other wanted bands
1101 for filt in outputBands:
1102 if filt not in dfDict:
1103 self.log.info("Adding empty columns for band %s", filt)
1104 dfTemp = templateDf.copy()
1105 for col in dfTemp.columns:
1106 testValue = dfTemp[col].values[0]
1107 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1108 # Boolean flag type, check if it is a "good" flag
1109 if col in self.config.goodFlags:
1110 fillValue = False
1111 else:
1112 fillValue = True
1113 elif isinstance(testValue, numbers.Integral):
1114 # Checking numbers.Integral catches all flavors
1115 # of python, numpy, pandas, etc. integers.
1116 # We must ensure this is not an unsigned integer.
1117 if isinstance(testValue, np.unsignedinteger):
1118 raise ValueError("Parquet tables may not have unsigned integer columns.")
1119 else:
1120 fillValue = self.config.integerFillValue
1121 else:
1122 fillValue = self.config.floatFillValue
1123 dfTemp[col].values[:] = fillValue
1124 dfDict[filt] = dfTemp
1126 # This makes a multilevel column index, with band as first level
1127 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
1129 if not self.config.multilevelOutput:
1130 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
1131 if self.config.primaryKey in noDupCols:
1132 noDupCols.remove(self.config.primaryKey)
1133 if dataId and self.config.columnsFromDataId:
1134 noDupCols += self.config.columnsFromDataId
1135 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1136 inputBands=inputBands)
1138 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
1140 return df
1143class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1144 dimensions=("tract", "skymap")):
1145 inputCatalogs = connectionTypes.Input(
1146 doc="Per-Patch objectTables conforming to the standard data model.",
1147 name="objectTable",
1148 storageClass="DataFrame",
1149 dimensions=("tract", "patch", "skymap"),
1150 multiple=True,
1151 )
1152 outputCatalog = connectionTypes.Output(
1153 doc="Pre-tract horizontal concatenation of the input objectTables",
1154 name="objectTable_tract",
1155 storageClass="DataFrame",
1156 dimensions=("tract", "skymap"),
1157 )
1160class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1161 pipelineConnections=ConsolidateObjectTableConnections):
1162 coaddName = pexConfig.Field(
1163 dtype=str,
1164 default="deep",
1165 doc="Name of coadd"
1166 )
1169class ConsolidateObjectTableTask(pipeBase.PipelineTask):
1170 """Write patch-merged source tables to a tract-level DataFrame Parquet file.
1172 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1173 """
1174 _DefaultName = "consolidateObjectTable"
1175 ConfigClass = ConsolidateObjectTableConfig
1177 inputDataset = 'objectTable'
1178 outputDataset = 'objectTable_tract'
1180 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1181 inputs = butlerQC.get(inputRefs)
1182 self.log.info("Concatenating %s per-patch Object Tables",
1183 len(inputs['inputCatalogs']))
1184 df = pd.concat(inputs['inputCatalogs'])
1185 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1188class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1189 defaultTemplates={"catalogType": ""},
1190 dimensions=("instrument", "visit", "detector")):
1192 inputCatalog = connectionTypes.Input(
1193 doc="Wide input catalog of sources produced by WriteSourceTableTask",
1194 name="{catalogType}source",
1195 storageClass="DataFrame",
1196 dimensions=("instrument", "visit", "detector"),
1197 deferLoad=True
1198 )
1199 outputCatalog = connectionTypes.Output(
1200 doc="Narrower, per-detector Source Table transformed and converted per a "
1201 "specified set of functors",
1202 name="{catalogType}sourceTable",
1203 storageClass="DataFrame",
1204 dimensions=("instrument", "visit", "detector")
1205 )
1208class TransformSourceTableConfig(TransformCatalogBaseConfig,
1209 pipelineConnections=TransformSourceTableConnections):
1211 def setDefaults(self):
1212 super().setDefaults()
1213 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml')
1214 self.primaryKey = 'sourceId'
1215 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter']
1218class TransformSourceTableTask(TransformCatalogBaseTask):
1219 """Transform/standardize a source catalog
1220 """
1221 _DefaultName = "transformSourceTable"
1222 ConfigClass = TransformSourceTableConfig
1225class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1226 dimensions=("instrument", "visit",),
1227 defaultTemplates={"calexpType": ""}):
1228 calexp = connectionTypes.Input(
1229 doc="Processed exposures used for metadata",
1230 name="calexp",
1231 storageClass="ExposureF",
1232 dimensions=("instrument", "visit", "detector"),
1233 deferLoad=True,
1234 multiple=True,
1235 )
1236 visitSummary = connectionTypes.Output(
1237 doc=("Per-visit consolidated exposure metadata. These catalogs use "
1238 "detector id for the id and are sorted for fast lookups of a "
1239 "detector."),
1240 name="visitSummary",
1241 storageClass="ExposureCatalog",
1242 dimensions=("instrument", "visit"),
1243 )
1244 visitSummarySchema = connectionTypes.InitOutput(
1245 doc="Schema of the visitSummary catalog",
1246 name="visitSummary_schema",
1247 storageClass="ExposureCatalog",
1248 )
1251class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1252 pipelineConnections=ConsolidateVisitSummaryConnections):
1253 """Config for ConsolidateVisitSummaryTask"""
1254 pass
1257class ConsolidateVisitSummaryTask(pipeBase.PipelineTask):
1258 """Task to consolidate per-detector visit metadata.
1260 This task aggregates the following metadata from all the detectors in a
1261 single visit into an exposure catalog:
1262 - The visitInfo.
1263 - The wcs.
1264 - The photoCalib.
1265 - The physical_filter and band (if available).
1266 - The psf size, shape, and effective area at the center of the detector.
1267 - The corners of the bounding box in right ascension/declination.
1269 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1270 are not persisted here because of storage concerns, and because of their
1271 limited utility as summary statistics.
1273 Tests for this task are performed in ci_hsc_gen3.
1274 """
1275 _DefaultName = "consolidateVisitSummary"
1276 ConfigClass = ConsolidateVisitSummaryConfig
1278 def __init__(self, **kwargs):
1279 super().__init__(**kwargs)
1280 self.schema = afwTable.ExposureTable.makeMinimalSchema()
1281 self.schema.addField('visit', type='L', doc='Visit number')
1282 self.schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1283 self.schema.addField('band', type='String', size=32, doc='Name of band')
1284 ExposureSummaryStats.update_schema(self.schema)
1285 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema)
1287 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1288 dataRefs = butlerQC.get(inputRefs.calexp)
1289 visit = dataRefs[0].dataId.byName()['visit']
1291 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1292 len(dataRefs), visit)
1294 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1296 butlerQC.put(expCatalog, outputRefs.visitSummary)
1298 def _combineExposureMetadata(self, visit, dataRefs):
1299 """Make a combined exposure catalog from a list of dataRefs.
1300 These dataRefs must point to exposures with wcs, summaryStats,
1301 and other visit metadata.
1303 Parameters
1304 ----------
1305 visit : `int`
1306 Visit identification number.
1307 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1308 List of dataRefs in visit.
1310 Returns
1311 -------
1312 visitSummary : `lsst.afw.table.ExposureCatalog`
1313 Exposure catalog with per-detector summary information.
1314 """
1315 cat = afwTable.ExposureCatalog(self.schema)
1316 cat.resize(len(dataRefs))
1318 cat['visit'] = visit
1320 for i, dataRef in enumerate(dataRefs):
1321 visitInfo = dataRef.get(component='visitInfo')
1322 filterLabel = dataRef.get(component='filter')
1323 summaryStats = dataRef.get(component='summaryStats')
1324 detector = dataRef.get(component='detector')
1325 wcs = dataRef.get(component='wcs')
1326 photoCalib = dataRef.get(component='photoCalib')
1327 detector = dataRef.get(component='detector')
1328 bbox = dataRef.get(component='bbox')
1329 validPolygon = dataRef.get(component='validPolygon')
1331 rec = cat[i]
1332 rec.setBBox(bbox)
1333 rec.setVisitInfo(visitInfo)
1334 rec.setWcs(wcs)
1335 rec.setPhotoCalib(photoCalib)
1336 rec.setValidPolygon(validPolygon)
1338 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1339 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1340 rec.setId(detector.getId())
1341 summaryStats.update_record(rec)
1343 metadata = dafBase.PropertyList()
1344 metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1345 # We are looping over existing datarefs, so the following is true
1346 metadata.add("COMMENT", "Only detectors with data have entries.")
1347 cat.setMetadata(metadata)
1349 cat.sort()
1350 return cat
1353class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1354 defaultTemplates={"catalogType": ""},
1355 dimensions=("instrument", "visit")):
1356 inputCatalogs = connectionTypes.Input(
1357 doc="Input per-detector Source Tables",
1358 name="{catalogType}sourceTable",
1359 storageClass="DataFrame",
1360 dimensions=("instrument", "visit", "detector"),
1361 multiple=True
1362 )
1363 outputCatalog = connectionTypes.Output(
1364 doc="Per-visit concatenation of Source Table",
1365 name="{catalogType}sourceTable_visit",
1366 storageClass="DataFrame",
1367 dimensions=("instrument", "visit")
1368 )
1371class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1372 pipelineConnections=ConsolidateSourceTableConnections):
1373 pass
1376class ConsolidateSourceTableTask(pipeBase.PipelineTask):
1377 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1378 """
1379 _DefaultName = 'consolidateSourceTable'
1380 ConfigClass = ConsolidateSourceTableConfig
1382 inputDataset = 'sourceTable'
1383 outputDataset = 'sourceTable_visit'
1385 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1386 from .makeWarp import reorderRefs
1388 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs]
1389 detectorOrder.sort()
1390 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector')
1391 inputs = butlerQC.get(inputRefs)
1392 self.log.info("Concatenating %s per-detector Source Tables",
1393 len(inputs['inputCatalogs']))
1394 df = pd.concat(inputs['inputCatalogs'])
1395 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1398class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1399 dimensions=("instrument",),
1400 defaultTemplates={"calexpType": ""}):
1401 visitSummaryRefs = connectionTypes.Input(
1402 doc="Data references for per-visit consolidated exposure metadata",
1403 name="finalVisitSummary",
1404 storageClass="ExposureCatalog",
1405 dimensions=("instrument", "visit"),
1406 multiple=True,
1407 deferLoad=True,
1408 )
1409 outputCatalog = connectionTypes.Output(
1410 doc="CCD and Visit metadata table",
1411 name="ccdVisitTable",
1412 storageClass="DataFrame",
1413 dimensions=("instrument",)
1414 )
1417class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1418 pipelineConnections=MakeCcdVisitTableConnections):
1419 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
1422class MakeCcdVisitTableTask(pipeBase.PipelineTask):
1423 """Produce a `ccdVisitTable` from the visit summary exposure catalogs.
1424 """
1425 _DefaultName = 'makeCcdVisitTable'
1426 ConfigClass = MakeCcdVisitTableConfig
1428 def run(self, visitSummaryRefs):
1429 """Make a table of ccd information from the visit summary catalogs.
1431 Parameters
1432 ----------
1433 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1434 List of DeferredDatasetHandles pointing to exposure catalogs with
1435 per-detector summary information.
1437 Returns
1438 -------
1439 result : `lsst.pipe.Base.Struct`
1440 Results struct with attribute:
1442 ``outputCatalog``
1443 Catalog of ccd and visit information.
1444 """
1445 ccdEntries = []
1446 for visitSummaryRef in visitSummaryRefs:
1447 visitSummary = visitSummaryRef.get()
1448 visitInfo = visitSummary[0].getVisitInfo()
1450 ccdEntry = {}
1451 summaryTable = visitSummary.asAstropy()
1452 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'dec', 'zenithDistance',
1453 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise',
1454 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar',
1455 'psfStarDeltaE1Median', 'psfStarDeltaE2Median',
1456 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter',
1457 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter',
1458 'psfStarScaledDeltaSizeScatter',
1459 'psfTraceRadiusDelta', 'maxDistToNearestPsf']
1460 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1461 # 'visit' is the human readable visit number.
1462 # 'visitId' is the key to the visitId table. They are the same.
1463 # Technically you should join to get the visit from the visit
1464 # table.
1465 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"})
1467 # RFC-924: Temporarily keep a duplicate "decl" entry for backwards
1468 # compatibility. To be removed after September 2023.
1469 ccdEntry["decl"] = ccdEntry.loc[:, "dec"]
1471 ccdEntry['ccdVisitId'] = [
1472 self.config.idGenerator.apply(
1473 visitSummaryRef.dataId,
1474 detector=detector_id,
1475 is_exposure=False,
1476 ).catalog_id # The "catalog ID" here is the ccdVisit ID
1477 # because it's usually the ID for a whole catalog
1478 # with a {visit, detector}, and that's the main
1479 # use case for IdGenerator. This usage for a
1480 # summary table is rare.
1481 for detector_id in summaryTable['id']
1482 ]
1483 ccdEntry['detector'] = summaryTable['id']
1484 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() if vR.getWcs()
1485 else np.nan for vR in visitSummary])
1486 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1488 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1489 ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1490 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1491 expTime = visitInfo.getExposureTime()
1492 ccdEntry['expTime'] = expTime
1493 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1494 expTime_days = expTime / (60*60*24)
1495 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days
1496 ccdEntry['darkTime'] = visitInfo.getDarkTime()
1497 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1498 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1499 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1500 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1501 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1502 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1503 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1504 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1505 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1506 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1507 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY,
1508 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc.
1509 # values are actually wanted.
1510 ccdEntries.append(ccdEntry)
1512 outputCatalog = pd.concat(ccdEntries)
1513 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True)
1514 return pipeBase.Struct(outputCatalog=outputCatalog)
1517class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1518 dimensions=("instrument",),
1519 defaultTemplates={"calexpType": ""}):
1520 visitSummaries = connectionTypes.Input(
1521 doc="Per-visit consolidated exposure metadata",
1522 name="finalVisitSummary",
1523 storageClass="ExposureCatalog",
1524 dimensions=("instrument", "visit",),
1525 multiple=True,
1526 deferLoad=True,
1527 )
1528 outputCatalog = connectionTypes.Output(
1529 doc="Visit metadata table",
1530 name="visitTable",
1531 storageClass="DataFrame",
1532 dimensions=("instrument",)
1533 )
1536class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1537 pipelineConnections=MakeVisitTableConnections):
1538 pass
1541class MakeVisitTableTask(pipeBase.PipelineTask):
1542 """Produce a `visitTable` from the visit summary exposure catalogs.
1543 """
1544 _DefaultName = 'makeVisitTable'
1545 ConfigClass = MakeVisitTableConfig
1547 def run(self, visitSummaries):
1548 """Make a table of visit information from the visit summary catalogs.
1550 Parameters
1551 ----------
1552 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog`
1553 List of exposure catalogs with per-detector summary information.
1554 Returns
1555 -------
1556 result : `lsst.pipe.Base.Struct`
1557 Results struct with attribute:
1559 ``outputCatalog``
1560 Catalog of visit information.
1561 """
1562 visitEntries = []
1563 for visitSummary in visitSummaries:
1564 visitSummary = visitSummary.get()
1565 visitRow = visitSummary[0]
1566 visitInfo = visitRow.getVisitInfo()
1568 visitEntry = {}
1569 visitEntry["visitId"] = visitRow['visit']
1570 visitEntry["visit"] = visitRow['visit']
1571 visitEntry["physical_filter"] = visitRow['physical_filter']
1572 visitEntry["band"] = visitRow['band']
1573 raDec = visitInfo.getBoresightRaDec()
1574 visitEntry["ra"] = raDec.getRa().asDegrees()
1575 visitEntry["dec"] = raDec.getDec().asDegrees()
1577 # RFC-924: Temporarily keep a duplicate "decl" entry for backwards
1578 # compatibility. To be removed after September 2023.
1579 visitEntry["decl"] = visitEntry["dec"]
1581 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1582 azAlt = visitInfo.getBoresightAzAlt()
1583 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1584 visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1585 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1586 visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1587 expTime = visitInfo.getExposureTime()
1588 visitEntry["expTime"] = expTime
1589 visitEntry["expMidpt"] = visitInfo.getDate().toPython()
1590 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1591 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1592 expTime_days = expTime / (60*60*24)
1593 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days
1594 visitEntries.append(visitEntry)
1596 # TODO: DM-30623, Add programId, exposureType, cameraTemp,
1597 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp,
1598 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures.
1600 outputCatalog = pd.DataFrame(data=visitEntries)
1601 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True)
1602 return pipeBase.Struct(outputCatalog=outputCatalog)
1605class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1606 dimensions=("instrument", "visit", "detector", "skymap", "tract")):
1608 inputCatalog = connectionTypes.Input(
1609 doc="Primary per-detector, single-epoch forced-photometry catalog. "
1610 "By default, it is the output of ForcedPhotCcdTask on calexps",
1611 name="forced_src",
1612 storageClass="SourceCatalog",
1613 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1614 )
1615 inputCatalogDiff = connectionTypes.Input(
1616 doc="Secondary multi-epoch, per-detector, forced photometry catalog. "
1617 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1618 name="forced_diff",
1619 storageClass="SourceCatalog",
1620 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1621 )
1622 outputCatalog = connectionTypes.Output(
1623 doc="InputCatalogs horizonatally joined on `objectId` in DataFrame parquet format",
1624 name="mergedForcedSource",
1625 storageClass="DataFrame",
1626 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1627 )
1630class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1631 pipelineConnections=WriteForcedSourceTableConnections):
1632 key = lsst.pex.config.Field(
1633 doc="Column on which to join the two input tables on and make the primary key of the output",
1634 dtype=str,
1635 default="objectId",
1636 )
1637 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
1640class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1641 """Merge and convert per-detector forced source catalogs to DataFrame Parquet format.
1643 Because the predecessor ForcedPhotCcdTask operates per-detector,
1644 per-tract, (i.e., it has tract in its dimensions), detectors
1645 on the tract boundary may have multiple forced source catalogs.
1647 The successor task TransformForcedSourceTable runs per-patch
1648 and temporally-aggregates overlapping mergedForcedSource catalogs from all
1649 available multiple epochs.
1650 """
1651 _DefaultName = "writeForcedSourceTable"
1652 ConfigClass = WriteForcedSourceTableConfig
1654 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1655 inputs = butlerQC.get(inputRefs)
1656 # Add ccdVisitId to allow joining with CcdVisitTable
1657 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
1658 inputs['ccdVisitId'] = idGenerator.catalog_id
1659 inputs['band'] = butlerQC.quantum.dataId.full['band']
1660 outputs = self.run(**inputs)
1661 butlerQC.put(outputs, outputRefs)
1663 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1664 dfs = []
1665 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')):
1666 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False)
1667 df = df.reindex(sorted(df.columns), axis=1)
1668 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA
1669 df['band'] = band if band else pd.NA
1670 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns],
1671 names=('dataset', 'column'))
1673 dfs.append(df)
1675 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
1676 return pipeBase.Struct(outputCatalog=outputCatalog)
1679class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1680 dimensions=("instrument", "skymap", "patch", "tract")):
1682 inputCatalogs = connectionTypes.Input(
1683 doc="DataFrames of merged ForcedSources produced by WriteForcedSourceTableTask",
1684 name="mergedForcedSource",
1685 storageClass="DataFrame",
1686 dimensions=("instrument", "visit", "detector", "skymap", "tract"),
1687 multiple=True,
1688 deferLoad=True
1689 )
1690 referenceCatalog = connectionTypes.Input(
1691 doc="Reference catalog which was used to seed the forcedPhot. Columns "
1692 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1693 "are expected.",
1694 name="objectTable",
1695 storageClass="DataFrame",
1696 dimensions=("tract", "patch", "skymap"),
1697 deferLoad=True
1698 )
1699 outputCatalog = connectionTypes.Output(
1700 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1701 "specified set of functors",
1702 name="forcedSourceTable",
1703 storageClass="DataFrame",
1704 dimensions=("tract", "patch", "skymap")
1705 )
1708class TransformForcedSourceTableConfig(TransformCatalogBaseConfig,
1709 pipelineConnections=TransformForcedSourceTableConnections):
1710 referenceColumns = pexConfig.ListField(
1711 dtype=str,
1712 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"],
1713 optional=True,
1714 doc="Columns to pull from reference catalog",
1715 )
1716 keyRef = lsst.pex.config.Field(
1717 doc="Column on which to join the two input tables on and make the primary key of the output",
1718 dtype=str,
1719 default="objectId",
1720 )
1721 key = lsst.pex.config.Field(
1722 doc="Rename the output DataFrame index to this name",
1723 dtype=str,
1724 default="forcedSourceId",
1725 )
1727 def setDefaults(self):
1728 super().setDefaults()
1729 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml')
1730 self.columnsFromDataId = ['tract', 'patch']
1733class TransformForcedSourceTableTask(TransformCatalogBaseTask):
1734 """Transform/standardize a ForcedSource catalog
1736 Transforms each wide, per-detector forcedSource DataFrame per the
1737 specification file (per-camera defaults found in ForcedSource.yaml).
1738 All epochs that overlap the patch are aggregated into one per-patch
1739 narrow-DataFrame file.
1741 No de-duplication of rows is performed. Duplicate resolutions flags are
1742 pulled in from the referenceCatalog: `detect_isPrimary`,
1743 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1744 for analysis or compare duplicates for QA.
1746 The resulting table includes multiple bands. Epochs (MJDs) and other useful
1747 per-visit rows can be retreived by joining with the CcdVisitTable on
1748 ccdVisitId.
1749 """
1750 _DefaultName = "transformForcedSourceTable"
1751 ConfigClass = TransformForcedSourceTableConfig
1753 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1754 inputs = butlerQC.get(inputRefs)
1755 if self.funcs is None:
1756 raise ValueError("config.functorFile is None. "
1757 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1758 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs,
1759 dataId=outputRefs.outputCatalog.dataId.full)
1761 butlerQC.put(outputs, outputRefs)
1763 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1764 dfs = []
1765 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns})
1766 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs)))
1767 for handle in inputCatalogs:
1768 result = self.transform(None, handle, funcs, dataId)
1769 # Filter for only rows that were detected on (overlap) the patch
1770 dfs.append(result.df.join(ref, how='inner'))
1772 outputCatalog = pd.concat(dfs)
1774 # Now that we are done joining on config.keyRef
1775 # Change index to config.key by
1776 outputCatalog.index.rename(self.config.keyRef, inplace=True)
1777 # Add config.keyRef to the column list
1778 outputCatalog.reset_index(inplace=True)
1779 # Set the forcedSourceId to the index. This is specified in the
1780 # ForcedSource.yaml
1781 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True)
1782 # Rename it to the config.key
1783 outputCatalog.index.rename(self.config.key, inplace=True)
1785 self.log.info("Made a table of %d columns and %d rows",
1786 len(outputCatalog.columns), len(outputCatalog))
1787 return pipeBase.Struct(outputCatalog=outputCatalog)
1790class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1791 defaultTemplates={"catalogType": ""},
1792 dimensions=("instrument", "tract")):
1793 inputCatalogs = connectionTypes.Input(
1794 doc="Input per-patch DataFrame Tables to be concatenated",
1795 name="{catalogType}ForcedSourceTable",
1796 storageClass="DataFrame",
1797 dimensions=("tract", "patch", "skymap"),
1798 multiple=True,
1799 )
1801 outputCatalog = connectionTypes.Output(
1802 doc="Output per-tract concatenation of DataFrame Tables",
1803 name="{catalogType}ForcedSourceTable_tract",
1804 storageClass="DataFrame",
1805 dimensions=("tract", "skymap"),
1806 )
1809class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1810 pipelineConnections=ConsolidateTractConnections):
1811 pass
1814class ConsolidateTractTask(pipeBase.PipelineTask):
1815 """Concatenate any per-patch, dataframe list into a single
1816 per-tract DataFrame.
1817 """
1818 _DefaultName = 'ConsolidateTract'
1819 ConfigClass = ConsolidateTractConfig
1821 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1822 inputs = butlerQC.get(inputRefs)
1823 # Not checking at least one inputCatalog exists because that'd be an
1824 # empty QG.
1825 self.log.info("Concatenating %s per-patch %s Tables",
1826 len(inputs['inputCatalogs']),
1827 inputRefs.inputCatalogs[0].datasetType.name)
1828 df = pd.concat(inputs['inputCatalogs'])
1829 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)