Coverage for python/lsst/pipe/tasks/postprocess.py: 27%
649 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-13 02:56 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-13 02:56 -0700
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["WriteObjectTableConfig", "WriteObjectTableTask",
23 "WriteSourceTableConfig", "WriteSourceTableTask",
24 "WriteRecalibratedSourceTableConfig", "WriteRecalibratedSourceTableTask",
25 "PostprocessAnalysis",
26 "TransformCatalogBaseConfig", "TransformCatalogBaseTask",
27 "TransformObjectCatalogConfig", "TransformObjectCatalogTask",
28 "ConsolidateObjectTableConfig", "ConsolidateObjectTableTask",
29 "TransformSourceTableConfig", "TransformSourceTableTask",
30 "ConsolidateVisitSummaryConfig", "ConsolidateVisitSummaryTask",
31 "ConsolidateSourceTableConfig", "ConsolidateSourceTableTask",
32 "MakeCcdVisitTableConfig", "MakeCcdVisitTableTask",
33 "MakeVisitTableConfig", "MakeVisitTableTask",
34 "WriteForcedSourceTableConfig", "WriteForcedSourceTableTask",
35 "TransformForcedSourceTableConfig", "TransformForcedSourceTableTask",
36 "ConsolidateTractConfig", "ConsolidateTractTask"]
38import functools
39import pandas as pd
40import logging
41import numpy as np
42import numbers
43import os
45import lsst.geom
46import lsst.pex.config as pexConfig
47import lsst.pipe.base as pipeBase
48import lsst.daf.base as dafBase
49from lsst.pipe.base import connectionTypes
50import lsst.afw.table as afwTable
51from lsst.afw.image import ExposureSummaryStats
52from lsst.meas.base import SingleFrameMeasurementTask, DetectorVisitIdGeneratorConfig
53from lsst.skymap import BaseSkyMap
55from .functors import CompositeFunctor, Column
57log = logging.getLogger(__name__)
60def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
61 """Flattens a dataframe with multilevel column index.
62 """
63 newDf = pd.DataFrame()
64 # band is the level 0 index
65 dfBands = df.columns.unique(level=0).values
66 for band in dfBands:
67 subdf = df[band]
68 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
69 newColumns = {c: columnFormat.format(band, c)
70 for c in subdf.columns if c not in noDupCols}
71 cols = list(newColumns.keys())
72 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
74 # Band must be present in the input and output or else column is all NaN:
75 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
76 # Get the unexploded columns from any present band's partition
77 noDupDf = df[presentBands[0]][noDupCols]
78 newDf = pd.concat([noDupDf, newDf], axis=1)
79 return newDf
82class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
83 defaultTemplates={"coaddName": "deep"},
84 dimensions=("tract", "patch", "skymap")):
85 inputCatalogMeas = connectionTypes.Input(
86 doc="Catalog of source measurements on the deepCoadd.",
87 dimensions=("tract", "patch", "band", "skymap"),
88 storageClass="SourceCatalog",
89 name="{coaddName}Coadd_meas",
90 multiple=True
91 )
92 inputCatalogForcedSrc = connectionTypes.Input(
93 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
94 dimensions=("tract", "patch", "band", "skymap"),
95 storageClass="SourceCatalog",
96 name="{coaddName}Coadd_forced_src",
97 multiple=True
98 )
99 inputCatalogRef = connectionTypes.Input(
100 doc="Catalog marking the primary detection (which band provides a good shape and position)"
101 "for each detection in deepCoadd_mergeDet.",
102 dimensions=("tract", "patch", "skymap"),
103 storageClass="SourceCatalog",
104 name="{coaddName}Coadd_ref"
105 )
106 outputCatalog = connectionTypes.Output(
107 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
108 "stored as a DataFrame with a multi-level column index per-patch.",
109 dimensions=("tract", "patch", "skymap"),
110 storageClass="DataFrame",
111 name="{coaddName}Coadd_obj"
112 )
115class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
116 pipelineConnections=WriteObjectTableConnections):
117 engine = pexConfig.Field(
118 dtype=str,
119 default="pyarrow",
120 doc="Parquet engine for writing (pyarrow or fastparquet)",
121 deprecated="This config is no longer used, and will be removed after v26."
122 )
123 coaddName = pexConfig.Field(
124 dtype=str,
125 default="deep",
126 doc="Name of coadd"
127 )
130class WriteObjectTableTask(pipeBase.PipelineTask):
131 """Write filter-merged source tables as a DataFrame in parquet format.
132 """
133 _DefaultName = "writeObjectTable"
134 ConfigClass = WriteObjectTableConfig
136 # Names of table datasets to be merged
137 inputDatasets = ('forced_src', 'meas', 'ref')
139 # Tag of output dataset written by `MergeSourcesTask.write`
140 outputDataset = 'obj'
142 def runQuantum(self, butlerQC, inputRefs, outputRefs):
143 inputs = butlerQC.get(inputRefs)
145 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
146 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
147 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
148 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
150 catalogs = {}
151 for band in measDict.keys():
152 catalogs[band] = {'meas': measDict[band]['meas'],
153 'forced_src': forcedSourceDict[band]['forced_src'],
154 'ref': inputs['inputCatalogRef']}
155 dataId = butlerQC.quantum.dataId
156 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
157 outputs = pipeBase.Struct(outputCatalog=df)
158 butlerQC.put(outputs, outputRefs)
160 def run(self, catalogs, tract, patch):
161 """Merge multiple catalogs.
163 Parameters
164 ----------
165 catalogs : `dict`
166 Mapping from filter names to dict of catalogs.
167 tract : int
168 tractId to use for the tractId column.
169 patch : str
170 patchId to use for the patchId column.
172 Returns
173 -------
174 catalog : `pandas.DataFrame`
175 Merged dataframe.
176 """
177 dfs = []
178 for filt, tableDict in catalogs.items():
179 for dataset, table in tableDict.items():
180 # Convert afwTable to pandas DataFrame
181 df = table.asAstropy().to_pandas().set_index('id', drop=True)
183 # Sort columns by name, to ensure matching schema among patches
184 df = df.reindex(sorted(df.columns), axis=1)
185 df = df.assign(tractId=tract, patchId=patch)
187 # Make columns a 3-level MultiIndex
188 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
189 names=('dataset', 'band', 'column'))
190 dfs.append(df)
192 # We do this dance and not `pd.concat(dfs)` because the pandas
193 # concatenation uses infinite memory.
194 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
195 return catalog
198class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
199 defaultTemplates={"catalogType": ""},
200 dimensions=("instrument", "visit", "detector")):
202 catalog = connectionTypes.Input(
203 doc="Input full-depth catalog of sources produced by CalibrateTask",
204 name="{catalogType}src",
205 storageClass="SourceCatalog",
206 dimensions=("instrument", "visit", "detector")
207 )
208 outputCatalog = connectionTypes.Output(
209 doc="Catalog of sources, `src` in DataFrame/Parquet format. The 'id' column is "
210 "replaced with an index; all other columns are unchanged.",
211 name="{catalogType}source",
212 storageClass="DataFrame",
213 dimensions=("instrument", "visit", "detector")
214 )
217class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
218 pipelineConnections=WriteSourceTableConnections):
219 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
222class WriteSourceTableTask(pipeBase.PipelineTask):
223 """Write source table to DataFrame Parquet format.
224 """
225 _DefaultName = "writeSourceTable"
226 ConfigClass = WriteSourceTableConfig
228 def runQuantum(self, butlerQC, inputRefs, outputRefs):
229 inputs = butlerQC.get(inputRefs)
230 inputs['ccdVisitId'] = self.config.idGenerator.apply(butlerQC.quantum.dataId).catalog_id
231 result = self.run(**inputs)
232 outputs = pipeBase.Struct(outputCatalog=result.table)
233 butlerQC.put(outputs, outputRefs)
235 def run(self, catalog, ccdVisitId=None, **kwargs):
236 """Convert `src` catalog to DataFrame
238 Parameters
239 ----------
240 catalog: `afwTable.SourceCatalog`
241 catalog to be converted
242 ccdVisitId: `int`
243 ccdVisitId to be added as a column
244 **kwargs
245 Additional keyword arguments are ignored as a convenience for
246 subclasses that pass the same arguments to several different
247 methods.
249 Returns
250 -------
251 result : `lsst.pipe.base.Struct`
252 ``table``
253 `DataFrame` version of the input catalog
254 """
255 self.log.info("Generating DataFrame from src catalog ccdVisitId=%s", ccdVisitId)
256 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
257 df['ccdVisitId'] = ccdVisitId
258 return pipeBase.Struct(table=df)
261class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
262 defaultTemplates={"catalogType": "",
263 "skyWcsName": "gbdesAstrometricFit",
264 "photoCalibName": "fgcm"},
265 dimensions=("instrument", "visit", "detector", "skymap")):
266 skyMap = connectionTypes.Input(
267 doc="skyMap needed to choose which tract-level calibrations to use when multiple available",
268 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
269 storageClass="SkyMap",
270 dimensions=("skymap",),
271 )
272 exposure = connectionTypes.Input(
273 doc="Input exposure to perform photometry on.",
274 name="calexp",
275 storageClass="ExposureF",
276 dimensions=["instrument", "visit", "detector"],
277 )
278 externalSkyWcsTractCatalog = connectionTypes.Input(
279 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector "
280 "id for the catalog id, sorted on id for fast lookup."),
281 name="{skyWcsName}SkyWcsCatalog",
282 storageClass="ExposureCatalog",
283 dimensions=["instrument", "visit", "tract"],
284 multiple=True
285 )
286 externalSkyWcsGlobalCatalog = connectionTypes.Input(
287 doc=("Per-visit wcs calibrations computed globally (with no tract information). "
288 "These catalogs use the detector id for the catalog id, sorted on id for "
289 "fast lookup."),
290 name="finalVisitSummary",
291 storageClass="ExposureCatalog",
292 dimensions=["instrument", "visit"],
293 )
294 externalPhotoCalibTractCatalog = connectionTypes.Input(
295 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the "
296 "detector id for the catalog id, sorted on id for fast lookup."),
297 name="{photoCalibName}PhotoCalibCatalog",
298 storageClass="ExposureCatalog",
299 dimensions=["instrument", "visit", "tract"],
300 multiple=True
301 )
302 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
303 doc=("Per-visit photometric calibrations computed globally (with no tract "
304 "information). These catalogs use the detector id for the catalog id, "
305 "sorted on id for fast lookup."),
306 name="finalVisitSummary",
307 storageClass="ExposureCatalog",
308 dimensions=["instrument", "visit"],
309 )
311 def __init__(self, *, config=None):
312 super().__init__(config=config)
313 # Same connection boilerplate as all other applications of
314 # Global/Tract calibrations
315 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs:
316 if config.useGlobalExternalSkyWcs:
317 self.inputs.remove("externalSkyWcsTractCatalog")
318 else:
319 self.inputs.remove("externalSkyWcsGlobalCatalog")
320 else:
321 self.inputs.remove("externalSkyWcsTractCatalog")
322 self.inputs.remove("externalSkyWcsGlobalCatalog")
323 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib:
324 if config.useGlobalExternalPhotoCalib:
325 self.inputs.remove("externalPhotoCalibTractCatalog")
326 else:
327 self.inputs.remove("externalPhotoCalibGlobalCatalog")
328 else:
329 self.inputs.remove("externalPhotoCalibTractCatalog")
330 self.inputs.remove("externalPhotoCalibGlobalCatalog")
333class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
334 pipelineConnections=WriteRecalibratedSourceTableConnections):
336 doReevaluatePhotoCalib = pexConfig.Field(
337 dtype=bool,
338 default=True,
339 doc=("Add or replace local photoCalib columns")
340 )
341 doReevaluateSkyWcs = pexConfig.Field(
342 dtype=bool,
343 default=True,
344 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
345 )
346 doApplyExternalPhotoCalib = pexConfig.Field(
347 dtype=bool,
348 default=True,
349 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
350 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
351 )
352 doApplyExternalSkyWcs = pexConfig.Field(
353 dtype=bool,
354 default=True,
355 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
356 "else use the wcs already attached to the exposure."),
357 )
358 useGlobalExternalPhotoCalib = pexConfig.Field(
359 dtype=bool,
360 default=True,
361 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations "
362 "that are not run per-tract. When False, use per-tract photometric "
363 "calibration files.")
364 )
365 useGlobalExternalSkyWcs = pexConfig.Field(
366 dtype=bool,
367 default=True,
368 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations "
369 "that are not run per-tract. When False, use per-tract wcs "
370 "files.")
371 )
372 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
374 def validate(self):
375 super().validate()
376 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs:
377 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
378 "External SkyWcs will not be read or evaluated.")
379 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib:
380 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
381 "External PhotoCalib will not be read or evaluated.")
384class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
385 """Write source table to DataFrame Parquet format.
386 """
387 _DefaultName = "writeRecalibratedSourceTable"
388 ConfigClass = WriteRecalibratedSourceTableConfig
390 def runQuantum(self, butlerQC, inputRefs, outputRefs):
391 inputs = butlerQC.get(inputRefs)
393 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
394 inputs['idGenerator'] = idGenerator
395 inputs['ccdVisitId'] = idGenerator.catalog_id
397 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs:
398 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
399 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs)
401 inputs['catalog'] = self.addCalibColumns(**inputs)
403 result = self.run(**inputs)
404 outputs = pipeBase.Struct(outputCatalog=result.table)
405 butlerQC.put(outputs, outputRefs)
407 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
408 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
409 externalPhotoCalibTractCatalog=None, **kwargs):
410 """Apply external calibrations to exposure per configuration
412 When multiple tract-level calibrations overlap, select the one with the
413 center closest to detector.
415 Parameters
416 ----------
417 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of
418 tract-level calibs.
419 skyMap : `lsst.skymap.SkyMap`
420 exposure : `lsst.afw.image.exposure.Exposure`
421 Input exposure to adjust calibrations.
422 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
423 Exposure catalog with external skyWcs to be applied per config
424 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
425 Exposure catalog with external skyWcs to be applied per config
426 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
427 Exposure catalog with external photoCalib to be applied per config
428 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
429 Exposure catalog with external photoCalib to be applied per config
430 **kwargs
431 Additional keyword arguments are ignored to facilitate passing the
432 same arguments to several methods.
434 Returns
435 -------
436 exposure : `lsst.afw.image.exposure.Exposure`
437 Exposure with adjusted calibrations.
438 """
439 if not self.config.doApplyExternalSkyWcs:
440 # Do not modify the exposure's SkyWcs
441 externalSkyWcsCatalog = None
442 elif self.config.useGlobalExternalSkyWcs:
443 # Use the global external SkyWcs
444 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
445 self.log.info('Applying global SkyWcs')
446 else:
447 # use tract-level external SkyWcs from the closest overlapping tract
448 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog')
449 tracts = [ref.dataId['tract'] for ref in inputRef]
450 if len(tracts) == 1:
451 ind = 0
452 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind])
453 else:
454 if exposure.getWcs() is None: # TODO: could this look-up use the externalPhotoCalib?
455 raise ValueError("Trying to locate nearest tract, but exposure.wcs is None.")
456 ind = self.getClosestTract(tracts, skyMap,
457 exposure.getBBox(), exposure.getWcs())
458 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
459 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
461 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
463 if not self.config.doApplyExternalPhotoCalib:
464 # Do not modify the exposure's PhotoCalib
465 externalPhotoCalibCatalog = None
466 elif self.config.useGlobalExternalPhotoCalib:
467 # Use the global external PhotoCalib
468 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
469 self.log.info('Applying global PhotoCalib')
470 else:
471 # use tract-level external PhotoCalib from the closest overlapping tract
472 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog')
473 tracts = [ref.dataId['tract'] for ref in inputRef]
474 if len(tracts) == 1:
475 ind = 0
476 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind])
477 else:
478 ind = self.getClosestTract(tracts, skyMap,
479 exposure.getBBox(), exposure.getWcs())
480 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
481 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
483 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
485 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
487 def getClosestTract(self, tracts, skyMap, bbox, wcs):
488 """Find the index of the tract closest to detector from list of tractIds
490 Parameters
491 ----------
492 tracts: `list` [`int`]
493 Iterable of integer tractIds
494 skyMap : `lsst.skymap.SkyMap`
495 skyMap to lookup tract geometry and wcs
496 bbox : `lsst.geom.Box2I`
497 Detector bbox, center of which will compared to tract centers
498 wcs : `lsst.afw.geom.SkyWcs`
499 Detector Wcs object to map the detector center to SkyCoord
501 Returns
502 -------
503 index : `int`
504 """
505 if len(tracts) == 1:
506 return 0
508 center = wcs.pixelToSky(bbox.getCenter())
509 sep = []
510 for tractId in tracts:
511 tract = skyMap[tractId]
512 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
513 sep.append(center.separation(tractCenter))
515 return np.argmin(sep)
517 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
518 """Prepare a calibrated exposure and apply external calibrations
519 if so configured.
521 Parameters
522 ----------
523 exposure : `lsst.afw.image.exposure.Exposure`
524 Input exposure to adjust calibrations.
525 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional
526 Exposure catalog with external skyWcs to be applied
527 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id
528 for the catalog id, sorted on id for fast lookup.
529 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional
530 Exposure catalog with external photoCalib to be applied
531 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector
532 id for the catalog id, sorted on id for fast lookup.
534 Returns
535 -------
536 exposure : `lsst.afw.image.exposure.Exposure`
537 Exposure with adjusted calibrations.
538 """
539 detectorId = exposure.getInfo().getDetector().getId()
541 if externalPhotoCalibCatalog is not None:
542 row = externalPhotoCalibCatalog.find(detectorId)
543 if row is None:
544 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; "
545 "Using original photoCalib.", detectorId)
546 else:
547 photoCalib = row.getPhotoCalib()
548 if photoCalib is None:
549 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
550 "Using original photoCalib.", detectorId)
551 else:
552 exposure.setPhotoCalib(photoCalib)
554 if externalSkyWcsCatalog is not None:
555 row = externalSkyWcsCatalog.find(detectorId)
556 if row is None:
557 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; "
558 "Using original skyWcs.", detectorId)
559 else:
560 skyWcs = row.getWcs()
561 if skyWcs is None:
562 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
563 "Using original skyWcs.", detectorId)
564 else:
565 exposure.setWcs(skyWcs)
567 return exposure
569 def addCalibColumns(self, catalog, exposure, idGenerator, **kwargs):
570 """Add replace columns with calibs evaluated at each centroid
572 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a
573 a source catalog, by rerunning the plugins.
575 Parameters
576 ----------
577 catalog : `lsst.afw.table.SourceCatalog`
578 catalog to which calib columns will be added
579 exposure : `lsst.afw.image.exposure.Exposure`
580 Exposure with attached PhotoCalibs and SkyWcs attributes to be
581 reevaluated at local centroids. Pixels are not required.
582 idGenerator : `lsst.meas.base.IdGenerator`
583 Object that generates Source IDs and random seeds.
584 **kwargs
585 Additional keyword arguments are ignored to facilitate passing the
586 same arguments to several methods.
588 Returns
589 -------
590 newCat: `lsst.afw.table.SourceCatalog`
591 Source Catalog with requested local calib columns
592 """
593 measureConfig = SingleFrameMeasurementTask.ConfigClass()
594 measureConfig.doReplaceWithNoise = False
596 # Clear all slots, because we aren't running the relevant plugins.
597 for slot in measureConfig.slots:
598 setattr(measureConfig.slots, slot, None)
600 measureConfig.plugins.names = []
601 if self.config.doReevaluateSkyWcs:
602 measureConfig.plugins.names.add('base_LocalWcs')
603 self.log.info("Re-evaluating base_LocalWcs plugin")
604 if self.config.doReevaluatePhotoCalib:
605 measureConfig.plugins.names.add('base_LocalPhotoCalib')
606 self.log.info("Re-evaluating base_LocalPhotoCalib plugin")
607 pluginsNotToCopy = tuple(measureConfig.plugins.names)
609 # Create a new schema and catalog
610 # Copy all columns from original except for the ones to reevaluate
611 aliasMap = catalog.schema.getAliasMap()
612 mapper = afwTable.SchemaMapper(catalog.schema)
613 for item in catalog.schema:
614 if not item.field.getName().startswith(pluginsNotToCopy):
615 mapper.addMapping(item.key)
617 schema = mapper.getOutputSchema()
618 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
619 schema.setAliasMap(aliasMap)
620 newCat = afwTable.SourceCatalog(schema)
621 newCat.extend(catalog, mapper=mapper)
623 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to
624 # update here. LocalPhotoCalibs are applied during transform tasks.
625 # Update coord_ra/coord_dec, which are expected to be positions on the
626 # sky and are used as such in sdm tables without transform
627 if self.config.doReevaluateSkyWcs and exposure.wcs is not None:
628 afwTable.updateSourceCoords(exposure.wcs, newCat)
630 measurement.run(measCat=newCat, exposure=exposure, exposureId=idGenerator.catalog_id)
632 return newCat
635class PostprocessAnalysis(object):
636 """Calculate columns from DataFrames or handles storing DataFrames.
638 This object manages and organizes an arbitrary set of computations
639 on a catalog. The catalog is defined by a
640 `DeferredDatasetHandle` or `InMemoryDatasetHandle` object
641 (or list thereof), such as a ``deepCoadd_obj`` dataset, and the
642 computations are defined by a collection of `lsst.pipe.tasks.functor.Functor`
643 objects (or, equivalently, a ``CompositeFunctor``).
645 After the object is initialized, accessing the ``.df`` attribute (which
646 holds the `pandas.DataFrame` containing the results of the calculations)
647 triggers computation of said dataframe.
649 One of the conveniences of using this object is the ability to define a
650 desired common filter for all functors. This enables the same functor
651 collection to be passed to several different `PostprocessAnalysis` objects
652 without having to change the original functor collection, since the ``filt``
653 keyword argument of this object triggers an overwrite of the ``filt``
654 property for all functors in the collection.
656 This object also allows a list of refFlags to be passed, and defines a set
657 of default refFlags that are always included even if not requested.
659 If a list of DataFrames or Handles is passed, rather than a single one,
660 then the calculations will be mapped over all the input catalogs. In
661 principle, it should be straightforward to parallelize this activity, but
662 initial tests have failed (see TODO in code comments).
664 Parameters
665 ----------
666 handles : `lsst.daf.butler.DeferredDatasetHandle` or
667 `lsst.pipe.base.InMemoryDatasetHandle` or
668 list of these.
669 Source catalog(s) for computation.
670 functors : `list`, `dict`, or `~lsst.pipe.tasks.functors.CompositeFunctor`
671 Computations to do (functors that act on ``handles``).
672 If a dict, the output
673 DataFrame will have columns keyed accordingly.
674 If a list, the column keys will come from the
675 ``.shortname`` attribute of each functor.
677 filt : `str`, optional
678 Filter in which to calculate. If provided,
679 this will overwrite any existing ``.filt`` attribute
680 of the provided functors.
682 flags : `list`, optional
683 List of flags (per-band) to include in output table.
684 Taken from the ``meas`` dataset if applied to a multilevel Object Table.
686 refFlags : `list`, optional
687 List of refFlags (only reference band) to include in output table.
689 forcedFlags : `list`, optional
690 List of flags (per-band) to include in output table.
691 Taken from the ``forced_src`` dataset if applied to a
692 multilevel Object Table. Intended for flags from measurement plugins
693 only run during multi-band forced-photometry.
694 """
695 _defaultRefFlags = []
696 _defaultFuncs = ()
698 def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
699 self.handles = handles
700 self.functors = functors
702 self.filt = filt
703 self.flags = list(flags) if flags is not None else []
704 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else []
705 self.refFlags = list(self._defaultRefFlags)
706 if refFlags is not None:
707 self.refFlags += list(refFlags)
709 self._df = None
711 @property
712 def defaultFuncs(self):
713 funcs = dict(self._defaultFuncs)
714 return funcs
716 @property
717 def func(self):
718 additionalFuncs = self.defaultFuncs
719 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags})
720 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
721 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
723 if isinstance(self.functors, CompositeFunctor):
724 func = self.functors
725 else:
726 func = CompositeFunctor(self.functors)
728 func.funcDict.update(additionalFuncs)
729 func.filt = self.filt
731 return func
733 @property
734 def noDupCols(self):
735 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
737 @property
738 def df(self):
739 if self._df is None:
740 self.compute()
741 return self._df
743 def compute(self, dropna=False, pool=None):
744 # map over multiple handles
745 if type(self.handles) in (list, tuple):
746 if pool is None:
747 dflist = [self.func(handle, dropna=dropna) for handle in self.handles]
748 else:
749 # TODO: Figure out why this doesn't work (pyarrow pickling
750 # issues?)
751 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.handles)
752 self._df = pd.concat(dflist)
753 else:
754 self._df = self.func(self.handles, dropna=dropna)
756 return self._df
759class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
760 dimensions=()):
761 """Expected Connections for subclasses of TransformCatalogBaseTask.
763 Must be subclassed.
764 """
765 inputCatalog = connectionTypes.Input(
766 name="",
767 storageClass="DataFrame",
768 )
769 outputCatalog = connectionTypes.Output(
770 name="",
771 storageClass="DataFrame",
772 )
775class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
776 pipelineConnections=TransformCatalogBaseConnections):
777 functorFile = pexConfig.Field(
778 dtype=str,
779 doc="Path to YAML file specifying Science Data Model functors to use "
780 "when copying columns and computing calibrated values.",
781 default=None,
782 optional=True
783 )
784 primaryKey = pexConfig.Field(
785 dtype=str,
786 doc="Name of column to be set as the DataFrame index. If None, the index"
787 "will be named `id`",
788 default=None,
789 optional=True
790 )
791 columnsFromDataId = pexConfig.ListField(
792 dtype=str,
793 default=None,
794 optional=True,
795 doc="Columns to extract from the dataId",
796 )
799class TransformCatalogBaseTask(pipeBase.PipelineTask):
800 """Base class for transforming/standardizing a catalog
802 by applying functors that convert units and apply calibrations.
803 The purpose of this task is to perform a set of computations on
804 an input ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` that holds
805 a ``DataFrame`` dataset (such as ``deepCoadd_obj``), and write the
806 results to a new dataset (which needs to be declared in an ``outputDataset``
807 attribute).
809 The calculations to be performed are defined in a YAML file that specifies
810 a set of functors to be computed, provided as
811 a ``--functorFile`` config parameter. An example of such a YAML file
812 is the following:
814 funcs:
815 psfMag:
816 functor: Mag
817 args:
818 - base_PsfFlux
819 filt: HSC-G
820 dataset: meas
821 cmodel_magDiff:
822 functor: MagDiff
823 args:
824 - modelfit_CModel
825 - base_PsfFlux
826 filt: HSC-G
827 gauss_magDiff:
828 functor: MagDiff
829 args:
830 - base_GaussianFlux
831 - base_PsfFlux
832 filt: HSC-G
833 count:
834 functor: Column
835 args:
836 - base_InputCount_value
837 filt: HSC-G
838 deconvolved_moments:
839 functor: DeconvolvedMoments
840 filt: HSC-G
841 dataset: forced_src
842 refFlags:
843 - calib_psfUsed
844 - merge_measurement_i
845 - merge_measurement_r
846 - merge_measurement_z
847 - merge_measurement_y
848 - merge_measurement_g
849 - base_PixelFlags_flag_inexact_psfCenter
850 - detect_isPrimary
852 The names for each entry under "func" will become the names of columns in
853 the output dataset. All the functors referenced are defined in
854 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each
855 functor are in the `args` list, and any additional entries for each column
856 other than "functor" or "args" (e.g., ``'filt'``, ``'dataset'``) are treated as
857 keyword arguments to be passed to the functor initialization.
859 The "flags" entry is the default shortcut for `Column` functors.
860 All columns listed under "flags" will be copied to the output table
861 untransformed. They can be of any datatype.
862 In the special case of transforming a multi-level oject table with
863 band and dataset indices (deepCoadd_obj), these will be taked from the
864 `meas` dataset and exploded out per band.
866 There are two special shortcuts that only apply when transforming
867 multi-level Object (deepCoadd_obj) tables:
868 - The "refFlags" entry is shortcut for `Column` functor
869 taken from the `'ref'` dataset if transforming an ObjectTable.
870 - The "forcedFlags" entry is shortcut for `Column` functors.
871 taken from the ``forced_src`` dataset if transforming an ObjectTable.
872 These are expanded out per band.
875 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
876 to organize and excecute the calculations.
877 """
878 @property
879 def _DefaultName(self):
880 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
882 @property
883 def outputDataset(self):
884 raise NotImplementedError('Subclass must define "outputDataset" attribute')
886 @property
887 def inputDataset(self):
888 raise NotImplementedError('Subclass must define "inputDataset" attribute')
890 @property
891 def ConfigClass(self):
892 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
894 def __init__(self, *args, **kwargs):
895 super().__init__(*args, **kwargs)
896 if self.config.functorFile:
897 self.log.info('Loading tranform functor definitions from %s',
898 self.config.functorFile)
899 self.funcs = CompositeFunctor.from_file(self.config.functorFile)
900 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs))
901 else:
902 self.funcs = None
904 def runQuantum(self, butlerQC, inputRefs, outputRefs):
905 inputs = butlerQC.get(inputRefs)
906 if self.funcs is None:
907 raise ValueError("config.functorFile is None. "
908 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
909 result = self.run(handle=inputs['inputCatalog'], funcs=self.funcs,
910 dataId=outputRefs.outputCatalog.dataId.full)
911 outputs = pipeBase.Struct(outputCatalog=result)
912 butlerQC.put(outputs, outputRefs)
914 def run(self, handle, funcs=None, dataId=None, band=None):
915 """Do postprocessing calculations
917 Takes a ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` or
918 ``DataFrame`` object and dataId,
919 returns a dataframe with results of postprocessing calculations.
921 Parameters
922 ----------
923 handles : `lsst.daf.butler.DeferredDatasetHandle` or
924 `lsst.pipe.base.InMemoryDatasetHandle` or
925 `pandas.DataFrame`, or list of these.
926 DataFrames from which calculations are done.
927 funcs : `lsst.pipe.tasks.functors.Functors`
928 Functors to apply to the table's columns
929 dataId : dict, optional
930 Used to add a `patchId` column to the output dataframe.
931 band : `str`, optional
932 Filter band that is being processed.
934 Returns
935 ------
936 df : `pandas.DataFrame`
937 """
938 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
940 df = self.transform(band, handle, funcs, dataId).df
941 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
942 return df
944 def getFunctors(self):
945 return self.funcs
947 def getAnalysis(self, handles, funcs=None, band=None):
948 if funcs is None:
949 funcs = self.funcs
950 analysis = PostprocessAnalysis(handles, funcs, filt=band)
951 return analysis
953 def transform(self, band, handles, funcs, dataId):
954 analysis = self.getAnalysis(handles, funcs=funcs, band=band)
955 df = analysis.df
956 if dataId and self.config.columnsFromDataId:
957 for key in self.config.columnsFromDataId:
958 if key in dataId:
959 df[str(key)] = dataId[key]
960 else:
961 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
963 if self.config.primaryKey:
964 if df.index.name != self.config.primaryKey and self.config.primaryKey in df:
965 df.reset_index(inplace=True, drop=True)
966 df.set_index(self.config.primaryKey, inplace=True)
968 return pipeBase.Struct(
969 df=df,
970 analysis=analysis
971 )
974class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
975 defaultTemplates={"coaddName": "deep"},
976 dimensions=("tract", "patch", "skymap")):
977 inputCatalog = connectionTypes.Input(
978 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
979 "stored as a DataFrame with a multi-level column index per-patch.",
980 dimensions=("tract", "patch", "skymap"),
981 storageClass="DataFrame",
982 name="{coaddName}Coadd_obj",
983 deferLoad=True,
984 )
985 outputCatalog = connectionTypes.Output(
986 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
987 "data model.",
988 dimensions=("tract", "patch", "skymap"),
989 storageClass="DataFrame",
990 name="objectTable"
991 )
994class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
995 pipelineConnections=TransformObjectCatalogConnections):
996 coaddName = pexConfig.Field(
997 dtype=str,
998 default="deep",
999 doc="Name of coadd"
1000 )
1001 # TODO: remove in DM-27177
1002 filterMap = pexConfig.DictField(
1003 keytype=str,
1004 itemtype=str,
1005 default={},
1006 doc=("Dictionary mapping full filter name to short one for column name munging."
1007 "These filters determine the output columns no matter what filters the "
1008 "input data actually contain."),
1009 deprecated=("Coadds are now identified by the band, so this transform is unused."
1010 "Will be removed after v22.")
1011 )
1012 outputBands = pexConfig.ListField(
1013 dtype=str,
1014 default=None,
1015 optional=True,
1016 doc=("These bands and only these bands will appear in the output,"
1017 " NaN-filled if the input does not include them."
1018 " If None, then use all bands found in the input.")
1019 )
1020 camelCase = pexConfig.Field(
1021 dtype=bool,
1022 default=False,
1023 doc=("Write per-band columns names with camelCase, else underscore "
1024 "For example: gPsFlux instead of g_PsFlux.")
1025 )
1026 multilevelOutput = pexConfig.Field(
1027 dtype=bool,
1028 default=False,
1029 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
1030 "and name-munged (False).")
1031 )
1032 goodFlags = pexConfig.ListField(
1033 dtype=str,
1034 default=[],
1035 doc=("List of 'good' flags that should be set False when populating empty tables. "
1036 "All other flags are considered to be 'bad' flags and will be set to True.")
1037 )
1038 floatFillValue = pexConfig.Field(
1039 dtype=float,
1040 default=np.nan,
1041 doc="Fill value for float fields when populating empty tables."
1042 )
1043 integerFillValue = pexConfig.Field(
1044 dtype=int,
1045 default=-1,
1046 doc="Fill value for integer fields when populating empty tables."
1047 )
1049 def setDefaults(self):
1050 super().setDefaults()
1051 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml')
1052 self.primaryKey = 'objectId'
1053 self.columnsFromDataId = ['tract', 'patch']
1054 self.goodFlags = ['calib_astrometry_used',
1055 'calib_photometry_reserved',
1056 'calib_photometry_used',
1057 'calib_psf_candidate',
1058 'calib_psf_reserved',
1059 'calib_psf_used']
1062class TransformObjectCatalogTask(TransformCatalogBaseTask):
1063 """Produce a flattened Object Table to match the format specified in
1064 sdm_schemas.
1066 Do the same set of postprocessing calculations on all bands.
1068 This is identical to `TransformCatalogBaseTask`, except for that it does
1069 the specified functor calculations for all filters present in the
1070 input `deepCoadd_obj` table. Any specific ``"filt"`` keywords specified
1071 by the YAML file will be superceded.
1072 """
1073 _DefaultName = "transformObjectCatalog"
1074 ConfigClass = TransformObjectCatalogConfig
1076 def run(self, handle, funcs=None, dataId=None, band=None):
1077 # NOTE: band kwarg is ignored here.
1078 dfDict = {}
1079 analysisDict = {}
1080 templateDf = pd.DataFrame()
1082 columns = handle.get(component='columns')
1083 inputBands = columns.unique(level=1).values
1085 outputBands = self.config.outputBands if self.config.outputBands else inputBands
1087 # Perform transform for data of filters that exist in the handle dataframe.
1088 for inputBand in inputBands:
1089 if inputBand not in outputBands:
1090 self.log.info("Ignoring %s band data in the input", inputBand)
1091 continue
1092 self.log.info("Transforming the catalog of band %s", inputBand)
1093 result = self.transform(inputBand, handle, funcs, dataId)
1094 dfDict[inputBand] = result.df
1095 analysisDict[inputBand] = result.analysis
1096 if templateDf.empty:
1097 templateDf = result.df
1099 # Put filler values in columns of other wanted bands
1100 for filt in outputBands:
1101 if filt not in dfDict:
1102 self.log.info("Adding empty columns for band %s", filt)
1103 dfTemp = templateDf.copy()
1104 for col in dfTemp.columns:
1105 testValue = dfTemp[col].values[0]
1106 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1107 # Boolean flag type, check if it is a "good" flag
1108 if col in self.config.goodFlags:
1109 fillValue = False
1110 else:
1111 fillValue = True
1112 elif isinstance(testValue, numbers.Integral):
1113 # Checking numbers.Integral catches all flavors
1114 # of python, numpy, pandas, etc. integers.
1115 # We must ensure this is not an unsigned integer.
1116 if isinstance(testValue, np.unsignedinteger):
1117 raise ValueError("Parquet tables may not have unsigned integer columns.")
1118 else:
1119 fillValue = self.config.integerFillValue
1120 else:
1121 fillValue = self.config.floatFillValue
1122 dfTemp[col].values[:] = fillValue
1123 dfDict[filt] = dfTemp
1125 # This makes a multilevel column index, with band as first level
1126 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
1128 if not self.config.multilevelOutput:
1129 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
1130 if self.config.primaryKey in noDupCols:
1131 noDupCols.remove(self.config.primaryKey)
1132 if dataId and self.config.columnsFromDataId:
1133 noDupCols += self.config.columnsFromDataId
1134 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1135 inputBands=inputBands)
1137 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
1139 return df
1142class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1143 dimensions=("tract", "skymap")):
1144 inputCatalogs = connectionTypes.Input(
1145 doc="Per-Patch objectTables conforming to the standard data model.",
1146 name="objectTable",
1147 storageClass="DataFrame",
1148 dimensions=("tract", "patch", "skymap"),
1149 multiple=True,
1150 )
1151 outputCatalog = connectionTypes.Output(
1152 doc="Pre-tract horizontal concatenation of the input objectTables",
1153 name="objectTable_tract",
1154 storageClass="DataFrame",
1155 dimensions=("tract", "skymap"),
1156 )
1159class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1160 pipelineConnections=ConsolidateObjectTableConnections):
1161 coaddName = pexConfig.Field(
1162 dtype=str,
1163 default="deep",
1164 doc="Name of coadd"
1165 )
1168class ConsolidateObjectTableTask(pipeBase.PipelineTask):
1169 """Write patch-merged source tables to a tract-level DataFrame Parquet file.
1171 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1172 """
1173 _DefaultName = "consolidateObjectTable"
1174 ConfigClass = ConsolidateObjectTableConfig
1176 inputDataset = 'objectTable'
1177 outputDataset = 'objectTable_tract'
1179 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1180 inputs = butlerQC.get(inputRefs)
1181 self.log.info("Concatenating %s per-patch Object Tables",
1182 len(inputs['inputCatalogs']))
1183 df = pd.concat(inputs['inputCatalogs'])
1184 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1187class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1188 defaultTemplates={"catalogType": ""},
1189 dimensions=("instrument", "visit", "detector")):
1191 inputCatalog = connectionTypes.Input(
1192 doc="Wide input catalog of sources produced by WriteSourceTableTask",
1193 name="{catalogType}source",
1194 storageClass="DataFrame",
1195 dimensions=("instrument", "visit", "detector"),
1196 deferLoad=True
1197 )
1198 outputCatalog = connectionTypes.Output(
1199 doc="Narrower, per-detector Source Table transformed and converted per a "
1200 "specified set of functors",
1201 name="{catalogType}sourceTable",
1202 storageClass="DataFrame",
1203 dimensions=("instrument", "visit", "detector")
1204 )
1207class TransformSourceTableConfig(TransformCatalogBaseConfig,
1208 pipelineConnections=TransformSourceTableConnections):
1210 def setDefaults(self):
1211 super().setDefaults()
1212 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml')
1213 self.primaryKey = 'sourceId'
1214 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter']
1217class TransformSourceTableTask(TransformCatalogBaseTask):
1218 """Transform/standardize a source catalog
1219 """
1220 _DefaultName = "transformSourceTable"
1221 ConfigClass = TransformSourceTableConfig
1224class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1225 dimensions=("instrument", "visit",),
1226 defaultTemplates={"calexpType": ""}):
1227 calexp = connectionTypes.Input(
1228 doc="Processed exposures used for metadata",
1229 name="calexp",
1230 storageClass="ExposureF",
1231 dimensions=("instrument", "visit", "detector"),
1232 deferLoad=True,
1233 multiple=True,
1234 )
1235 visitSummary = connectionTypes.Output(
1236 doc=("Per-visit consolidated exposure metadata. These catalogs use "
1237 "detector id for the id and are sorted for fast lookups of a "
1238 "detector."),
1239 name="visitSummary",
1240 storageClass="ExposureCatalog",
1241 dimensions=("instrument", "visit"),
1242 )
1243 visitSummarySchema = connectionTypes.InitOutput(
1244 doc="Schema of the visitSummary catalog",
1245 name="visitSummary_schema",
1246 storageClass="ExposureCatalog",
1247 )
1250class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1251 pipelineConnections=ConsolidateVisitSummaryConnections):
1252 """Config for ConsolidateVisitSummaryTask"""
1253 pass
1256class ConsolidateVisitSummaryTask(pipeBase.PipelineTask):
1257 """Task to consolidate per-detector visit metadata.
1259 This task aggregates the following metadata from all the detectors in a
1260 single visit into an exposure catalog:
1261 - The visitInfo.
1262 - The wcs.
1263 - The photoCalib.
1264 - The physical_filter and band (if available).
1265 - The psf size, shape, and effective area at the center of the detector.
1266 - The corners of the bounding box in right ascension/declination.
1268 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1269 are not persisted here because of storage concerns, and because of their
1270 limited utility as summary statistics.
1272 Tests for this task are performed in ci_hsc_gen3.
1273 """
1274 _DefaultName = "consolidateVisitSummary"
1275 ConfigClass = ConsolidateVisitSummaryConfig
1277 def __init__(self, **kwargs):
1278 super().__init__(**kwargs)
1279 self.schema = afwTable.ExposureTable.makeMinimalSchema()
1280 self.schema.addField('visit', type='L', doc='Visit number')
1281 self.schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1282 self.schema.addField('band', type='String', size=32, doc='Name of band')
1283 ExposureSummaryStats.update_schema(self.schema)
1284 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema)
1286 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1287 dataRefs = butlerQC.get(inputRefs.calexp)
1288 visit = dataRefs[0].dataId.byName()['visit']
1290 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1291 len(dataRefs), visit)
1293 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1295 butlerQC.put(expCatalog, outputRefs.visitSummary)
1297 def _combineExposureMetadata(self, visit, dataRefs):
1298 """Make a combined exposure catalog from a list of dataRefs.
1299 These dataRefs must point to exposures with wcs, summaryStats,
1300 and other visit metadata.
1302 Parameters
1303 ----------
1304 visit : `int`
1305 Visit identification number.
1306 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1307 List of dataRefs in visit.
1309 Returns
1310 -------
1311 visitSummary : `lsst.afw.table.ExposureCatalog`
1312 Exposure catalog with per-detector summary information.
1313 """
1314 cat = afwTable.ExposureCatalog(self.schema)
1315 cat.resize(len(dataRefs))
1317 cat['visit'] = visit
1319 for i, dataRef in enumerate(dataRefs):
1320 visitInfo = dataRef.get(component='visitInfo')
1321 filterLabel = dataRef.get(component='filter')
1322 summaryStats = dataRef.get(component='summaryStats')
1323 detector = dataRef.get(component='detector')
1324 wcs = dataRef.get(component='wcs')
1325 photoCalib = dataRef.get(component='photoCalib')
1326 detector = dataRef.get(component='detector')
1327 bbox = dataRef.get(component='bbox')
1328 validPolygon = dataRef.get(component='validPolygon')
1330 rec = cat[i]
1331 rec.setBBox(bbox)
1332 rec.setVisitInfo(visitInfo)
1333 rec.setWcs(wcs)
1334 rec.setPhotoCalib(photoCalib)
1335 rec.setValidPolygon(validPolygon)
1337 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1338 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1339 rec.setId(detector.getId())
1340 summaryStats.update_record(rec)
1342 metadata = dafBase.PropertyList()
1343 metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1344 # We are looping over existing datarefs, so the following is true
1345 metadata.add("COMMENT", "Only detectors with data have entries.")
1346 cat.setMetadata(metadata)
1348 cat.sort()
1349 return cat
1352class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1353 defaultTemplates={"catalogType": ""},
1354 dimensions=("instrument", "visit")):
1355 inputCatalogs = connectionTypes.Input(
1356 doc="Input per-detector Source Tables",
1357 name="{catalogType}sourceTable",
1358 storageClass="DataFrame",
1359 dimensions=("instrument", "visit", "detector"),
1360 multiple=True
1361 )
1362 outputCatalog = connectionTypes.Output(
1363 doc="Per-visit concatenation of Source Table",
1364 name="{catalogType}sourceTable_visit",
1365 storageClass="DataFrame",
1366 dimensions=("instrument", "visit")
1367 )
1370class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1371 pipelineConnections=ConsolidateSourceTableConnections):
1372 pass
1375class ConsolidateSourceTableTask(pipeBase.PipelineTask):
1376 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1377 """
1378 _DefaultName = 'consolidateSourceTable'
1379 ConfigClass = ConsolidateSourceTableConfig
1381 inputDataset = 'sourceTable'
1382 outputDataset = 'sourceTable_visit'
1384 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1385 from .makeWarp import reorderRefs
1387 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs]
1388 detectorOrder.sort()
1389 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector')
1390 inputs = butlerQC.get(inputRefs)
1391 self.log.info("Concatenating %s per-detector Source Tables",
1392 len(inputs['inputCatalogs']))
1393 df = pd.concat(inputs['inputCatalogs'])
1394 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1397class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1398 dimensions=("instrument",),
1399 defaultTemplates={"calexpType": ""}):
1400 visitSummaryRefs = connectionTypes.Input(
1401 doc="Data references for per-visit consolidated exposure metadata",
1402 name="finalVisitSummary",
1403 storageClass="ExposureCatalog",
1404 dimensions=("instrument", "visit"),
1405 multiple=True,
1406 deferLoad=True,
1407 )
1408 outputCatalog = connectionTypes.Output(
1409 doc="CCD and Visit metadata table",
1410 name="ccdVisitTable",
1411 storageClass="DataFrame",
1412 dimensions=("instrument",)
1413 )
1416class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1417 pipelineConnections=MakeCcdVisitTableConnections):
1418 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
1421class MakeCcdVisitTableTask(pipeBase.PipelineTask):
1422 """Produce a `ccdVisitTable` from the visit summary exposure catalogs.
1423 """
1424 _DefaultName = 'makeCcdVisitTable'
1425 ConfigClass = MakeCcdVisitTableConfig
1427 def run(self, visitSummaryRefs):
1428 """Make a table of ccd information from the visit summary catalogs.
1430 Parameters
1431 ----------
1432 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1433 List of DeferredDatasetHandles pointing to exposure catalogs with
1434 per-detector summary information.
1436 Returns
1437 -------
1438 result : `lsst.pipe.Base.Struct`
1439 Results struct with attribute:
1441 ``outputCatalog``
1442 Catalog of ccd and visit information.
1443 """
1444 ccdEntries = []
1445 for visitSummaryRef in visitSummaryRefs:
1446 visitSummary = visitSummaryRef.get()
1447 visitInfo = visitSummary[0].getVisitInfo()
1449 ccdEntry = {}
1450 summaryTable = visitSummary.asAstropy()
1451 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance',
1452 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise',
1453 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar',
1454 'psfStarDeltaE1Median', 'psfStarDeltaE2Median',
1455 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter',
1456 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter',
1457 'psfStarScaledDeltaSizeScatter',
1458 'psfTraceRadiusDelta', 'maxDistToNearestPsf']
1459 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1460 # 'visit' is the human readable visit number.
1461 # 'visitId' is the key to the visitId table. They are the same.
1462 # Technically you should join to get the visit from the visit
1463 # table.
1464 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"})
1465 ccdEntry['ccdVisitId'] = [
1466 self.config.idGenerator.apply(
1467 visitSummaryRef.dataId,
1468 detector=detector_id,
1469 is_exposure=False,
1470 ).catalog_id # The "catalog ID" here is the ccdVisit ID
1471 # because it's usually the ID for a whole catalog
1472 # with a {visit, detector}, and that's the main
1473 # use case for IdGenerator. This usage for a
1474 # summary table is rare.
1475 for detector_id in summaryTable['id']
1476 ]
1477 ccdEntry['detector'] = summaryTable['id']
1478 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() if vR.getWcs()
1479 else np.nan for vR in visitSummary])
1480 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1482 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1483 ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1484 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1485 expTime = visitInfo.getExposureTime()
1486 ccdEntry['expTime'] = expTime
1487 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1488 expTime_days = expTime / (60*60*24)
1489 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days
1490 ccdEntry['darkTime'] = visitInfo.getDarkTime()
1491 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1492 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1493 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1494 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1495 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1496 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1497 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1498 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1499 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1500 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1501 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY,
1502 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc.
1503 # values are actually wanted.
1504 ccdEntries.append(ccdEntry)
1506 outputCatalog = pd.concat(ccdEntries)
1507 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True)
1508 return pipeBase.Struct(outputCatalog=outputCatalog)
1511class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1512 dimensions=("instrument",),
1513 defaultTemplates={"calexpType": ""}):
1514 visitSummaries = connectionTypes.Input(
1515 doc="Per-visit consolidated exposure metadata",
1516 name="finalVisitSummary",
1517 storageClass="ExposureCatalog",
1518 dimensions=("instrument", "visit",),
1519 multiple=True,
1520 deferLoad=True,
1521 )
1522 outputCatalog = connectionTypes.Output(
1523 doc="Visit metadata table",
1524 name="visitTable",
1525 storageClass="DataFrame",
1526 dimensions=("instrument",)
1527 )
1530class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1531 pipelineConnections=MakeVisitTableConnections):
1532 pass
1535class MakeVisitTableTask(pipeBase.PipelineTask):
1536 """Produce a `visitTable` from the visit summary exposure catalogs.
1537 """
1538 _DefaultName = 'makeVisitTable'
1539 ConfigClass = MakeVisitTableConfig
1541 def run(self, visitSummaries):
1542 """Make a table of visit information from the visit summary catalogs.
1544 Parameters
1545 ----------
1546 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog`
1547 List of exposure catalogs with per-detector summary information.
1548 Returns
1549 -------
1550 result : `lsst.pipe.Base.Struct`
1551 Results struct with attribute:
1553 ``outputCatalog``
1554 Catalog of visit information.
1555 """
1556 visitEntries = []
1557 for visitSummary in visitSummaries:
1558 visitSummary = visitSummary.get()
1559 visitRow = visitSummary[0]
1560 visitInfo = visitRow.getVisitInfo()
1562 visitEntry = {}
1563 visitEntry["visitId"] = visitRow['visit']
1564 visitEntry["visit"] = visitRow['visit']
1565 visitEntry["physical_filter"] = visitRow['physical_filter']
1566 visitEntry["band"] = visitRow['band']
1567 raDec = visitInfo.getBoresightRaDec()
1568 visitEntry["ra"] = raDec.getRa().asDegrees()
1569 visitEntry["decl"] = raDec.getDec().asDegrees()
1570 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1571 azAlt = visitInfo.getBoresightAzAlt()
1572 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1573 visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1574 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1575 visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1576 expTime = visitInfo.getExposureTime()
1577 visitEntry["expTime"] = expTime
1578 visitEntry["expMidpt"] = visitInfo.getDate().toPython()
1579 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1580 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1581 expTime_days = expTime / (60*60*24)
1582 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days
1583 visitEntries.append(visitEntry)
1585 # TODO: DM-30623, Add programId, exposureType, cameraTemp,
1586 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp,
1587 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures.
1589 outputCatalog = pd.DataFrame(data=visitEntries)
1590 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True)
1591 return pipeBase.Struct(outputCatalog=outputCatalog)
1594class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1595 dimensions=("instrument", "visit", "detector", "skymap", "tract")):
1597 inputCatalog = connectionTypes.Input(
1598 doc="Primary per-detector, single-epoch forced-photometry catalog. "
1599 "By default, it is the output of ForcedPhotCcdTask on calexps",
1600 name="forced_src",
1601 storageClass="SourceCatalog",
1602 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1603 )
1604 inputCatalogDiff = connectionTypes.Input(
1605 doc="Secondary multi-epoch, per-detector, forced photometry catalog. "
1606 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1607 name="forced_diff",
1608 storageClass="SourceCatalog",
1609 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1610 )
1611 outputCatalog = connectionTypes.Output(
1612 doc="InputCatalogs horizonatally joined on `objectId` in DataFrame parquet format",
1613 name="mergedForcedSource",
1614 storageClass="DataFrame",
1615 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1616 )
1619class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1620 pipelineConnections=WriteForcedSourceTableConnections):
1621 key = lsst.pex.config.Field(
1622 doc="Column on which to join the two input tables on and make the primary key of the output",
1623 dtype=str,
1624 default="objectId",
1625 )
1626 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
1629class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1630 """Merge and convert per-detector forced source catalogs to DataFrame Parquet format.
1632 Because the predecessor ForcedPhotCcdTask operates per-detector,
1633 per-tract, (i.e., it has tract in its dimensions), detectors
1634 on the tract boundary may have multiple forced source catalogs.
1636 The successor task TransformForcedSourceTable runs per-patch
1637 and temporally-aggregates overlapping mergedForcedSource catalogs from all
1638 available multiple epochs.
1639 """
1640 _DefaultName = "writeForcedSourceTable"
1641 ConfigClass = WriteForcedSourceTableConfig
1643 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1644 inputs = butlerQC.get(inputRefs)
1645 # Add ccdVisitId to allow joining with CcdVisitTable
1646 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
1647 inputs['ccdVisitId'] = idGenerator.catalog_id
1648 inputs['band'] = butlerQC.quantum.dataId.full['band']
1649 outputs = self.run(**inputs)
1650 butlerQC.put(outputs, outputRefs)
1652 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1653 dfs = []
1654 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')):
1655 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False)
1656 df = df.reindex(sorted(df.columns), axis=1)
1657 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA
1658 df['band'] = band if band else pd.NA
1659 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns],
1660 names=('dataset', 'column'))
1662 dfs.append(df)
1664 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
1665 return pipeBase.Struct(outputCatalog=outputCatalog)
1668class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1669 dimensions=("instrument", "skymap", "patch", "tract")):
1671 inputCatalogs = connectionTypes.Input(
1672 doc="DataFrames of merged ForcedSources produced by WriteForcedSourceTableTask",
1673 name="mergedForcedSource",
1674 storageClass="DataFrame",
1675 dimensions=("instrument", "visit", "detector", "skymap", "tract"),
1676 multiple=True,
1677 deferLoad=True
1678 )
1679 referenceCatalog = connectionTypes.Input(
1680 doc="Reference catalog which was used to seed the forcedPhot. Columns "
1681 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1682 "are expected.",
1683 name="objectTable",
1684 storageClass="DataFrame",
1685 dimensions=("tract", "patch", "skymap"),
1686 deferLoad=True
1687 )
1688 outputCatalog = connectionTypes.Output(
1689 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1690 "specified set of functors",
1691 name="forcedSourceTable",
1692 storageClass="DataFrame",
1693 dimensions=("tract", "patch", "skymap")
1694 )
1697class TransformForcedSourceTableConfig(TransformCatalogBaseConfig,
1698 pipelineConnections=TransformForcedSourceTableConnections):
1699 referenceColumns = pexConfig.ListField(
1700 dtype=str,
1701 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"],
1702 optional=True,
1703 doc="Columns to pull from reference catalog",
1704 )
1705 keyRef = lsst.pex.config.Field(
1706 doc="Column on which to join the two input tables on and make the primary key of the output",
1707 dtype=str,
1708 default="objectId",
1709 )
1710 key = lsst.pex.config.Field(
1711 doc="Rename the output DataFrame index to this name",
1712 dtype=str,
1713 default="forcedSourceId",
1714 )
1716 def setDefaults(self):
1717 super().setDefaults()
1718 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml')
1719 self.columnsFromDataId = ['tract', 'patch']
1722class TransformForcedSourceTableTask(TransformCatalogBaseTask):
1723 """Transform/standardize a ForcedSource catalog
1725 Transforms each wide, per-detector forcedSource DataFrame per the
1726 specification file (per-camera defaults found in ForcedSource.yaml).
1727 All epochs that overlap the patch are aggregated into one per-patch
1728 narrow-DataFrame file.
1730 No de-duplication of rows is performed. Duplicate resolutions flags are
1731 pulled in from the referenceCatalog: `detect_isPrimary`,
1732 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1733 for analysis or compare duplicates for QA.
1735 The resulting table includes multiple bands. Epochs (MJDs) and other useful
1736 per-visit rows can be retreived by joining with the CcdVisitTable on
1737 ccdVisitId.
1738 """
1739 _DefaultName = "transformForcedSourceTable"
1740 ConfigClass = TransformForcedSourceTableConfig
1742 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1743 inputs = butlerQC.get(inputRefs)
1744 if self.funcs is None:
1745 raise ValueError("config.functorFile is None. "
1746 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1747 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs,
1748 dataId=outputRefs.outputCatalog.dataId.full)
1750 butlerQC.put(outputs, outputRefs)
1752 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1753 dfs = []
1754 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns})
1755 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs)))
1756 for handle in inputCatalogs:
1757 result = self.transform(None, handle, funcs, dataId)
1758 # Filter for only rows that were detected on (overlap) the patch
1759 dfs.append(result.df.join(ref, how='inner'))
1761 outputCatalog = pd.concat(dfs)
1763 # Now that we are done joining on config.keyRef
1764 # Change index to config.key by
1765 outputCatalog.index.rename(self.config.keyRef, inplace=True)
1766 # Add config.keyRef to the column list
1767 outputCatalog.reset_index(inplace=True)
1768 # Set the forcedSourceId to the index. This is specified in the
1769 # ForcedSource.yaml
1770 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True)
1771 # Rename it to the config.key
1772 outputCatalog.index.rename(self.config.key, inplace=True)
1774 self.log.info("Made a table of %d columns and %d rows",
1775 len(outputCatalog.columns), len(outputCatalog))
1776 return pipeBase.Struct(outputCatalog=outputCatalog)
1779class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1780 defaultTemplates={"catalogType": ""},
1781 dimensions=("instrument", "tract")):
1782 inputCatalogs = connectionTypes.Input(
1783 doc="Input per-patch DataFrame Tables to be concatenated",
1784 name="{catalogType}ForcedSourceTable",
1785 storageClass="DataFrame",
1786 dimensions=("tract", "patch", "skymap"),
1787 multiple=True,
1788 )
1790 outputCatalog = connectionTypes.Output(
1791 doc="Output per-tract concatenation of DataFrame Tables",
1792 name="{catalogType}ForcedSourceTable_tract",
1793 storageClass="DataFrame",
1794 dimensions=("tract", "skymap"),
1795 )
1798class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1799 pipelineConnections=ConsolidateTractConnections):
1800 pass
1803class ConsolidateTractTask(pipeBase.PipelineTask):
1804 """Concatenate any per-patch, dataframe list into a single
1805 per-tract DataFrame.
1806 """
1807 _DefaultName = 'ConsolidateTract'
1808 ConfigClass = ConsolidateTractConfig
1810 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1811 inputs = butlerQC.get(inputRefs)
1812 # Not checking at least one inputCatalog exists because that'd be an
1813 # empty QG.
1814 self.log.info("Concatenating %s per-patch %s Tables",
1815 len(inputs['inputCatalogs']),
1816 inputRefs.inputCatalogs[0].datasetType.name)
1817 df = pd.concat(inputs['inputCatalogs'])
1818 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)