Coverage for python/lsst/pipe/tasks/postprocess.py: 28%
651 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-13 11:43 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-13 11:43 +0000
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["WriteObjectTableConfig", "WriteObjectTableTask",
23 "WriteSourceTableConfig", "WriteSourceTableTask",
24 "WriteRecalibratedSourceTableConfig", "WriteRecalibratedSourceTableTask",
25 "PostprocessAnalysis",
26 "TransformCatalogBaseConfig", "TransformCatalogBaseTask",
27 "TransformObjectCatalogConfig", "TransformObjectCatalogTask",
28 "ConsolidateObjectTableConfig", "ConsolidateObjectTableTask",
29 "TransformSourceTableConfig", "TransformSourceTableTask",
30 "ConsolidateVisitSummaryConfig", "ConsolidateVisitSummaryTask",
31 "ConsolidateSourceTableConfig", "ConsolidateSourceTableTask",
32 "MakeCcdVisitTableConfig", "MakeCcdVisitTableTask",
33 "MakeVisitTableConfig", "MakeVisitTableTask",
34 "WriteForcedSourceTableConfig", "WriteForcedSourceTableTask",
35 "TransformForcedSourceTableConfig", "TransformForcedSourceTableTask",
36 "ConsolidateTractConfig", "ConsolidateTractTask"]
38import functools
39import pandas as pd
40import logging
41import numpy as np
42import numbers
43import os
45import lsst.geom
46import lsst.pex.config as pexConfig
47import lsst.pipe.base as pipeBase
48import lsst.daf.base as dafBase
49from lsst.pipe.base import connectionTypes
50import lsst.afw.table as afwTable
51from lsst.afw.image import ExposureSummaryStats
52from lsst.meas.base import SingleFrameMeasurementTask, DetectorVisitIdGeneratorConfig
53from lsst.skymap import BaseSkyMap
55from .functors import CompositeFunctor, Column
57log = logging.getLogger(__name__)
60def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
61 """Flattens a dataframe with multilevel column index.
62 """
63 newDf = pd.DataFrame()
64 # band is the level 0 index
65 dfBands = df.columns.unique(level=0).values
66 for band in dfBands:
67 subdf = df[band]
68 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
69 newColumns = {c: columnFormat.format(band, c)
70 for c in subdf.columns if c not in noDupCols}
71 cols = list(newColumns.keys())
72 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
74 # Band must be present in the input and output or else column is all NaN:
75 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
76 # Get the unexploded columns from any present band's partition
77 noDupDf = df[presentBands[0]][noDupCols]
78 newDf = pd.concat([noDupDf, newDf], axis=1)
79 return newDf
82class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
83 defaultTemplates={"coaddName": "deep"},
84 dimensions=("tract", "patch", "skymap")):
85 inputCatalogMeas = connectionTypes.Input(
86 doc="Catalog of source measurements on the deepCoadd.",
87 dimensions=("tract", "patch", "band", "skymap"),
88 storageClass="SourceCatalog",
89 name="{coaddName}Coadd_meas",
90 multiple=True
91 )
92 inputCatalogForcedSrc = connectionTypes.Input(
93 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
94 dimensions=("tract", "patch", "band", "skymap"),
95 storageClass="SourceCatalog",
96 name="{coaddName}Coadd_forced_src",
97 multiple=True
98 )
99 inputCatalogRef = connectionTypes.Input(
100 doc="Catalog marking the primary detection (which band provides a good shape and position)"
101 "for each detection in deepCoadd_mergeDet.",
102 dimensions=("tract", "patch", "skymap"),
103 storageClass="SourceCatalog",
104 name="{coaddName}Coadd_ref"
105 )
106 outputCatalog = connectionTypes.Output(
107 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
108 "stored as a DataFrame with a multi-level column index per-patch.",
109 dimensions=("tract", "patch", "skymap"),
110 storageClass="DataFrame",
111 name="{coaddName}Coadd_obj"
112 )
115class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
116 pipelineConnections=WriteObjectTableConnections):
117 engine = pexConfig.Field(
118 dtype=str,
119 default="pyarrow",
120 doc="Parquet engine for writing (pyarrow or fastparquet)",
121 deprecated="This config is no longer used, and will be removed after v26."
122 )
123 coaddName = pexConfig.Field(
124 dtype=str,
125 default="deep",
126 doc="Name of coadd"
127 )
130class WriteObjectTableTask(pipeBase.PipelineTask):
131 """Write filter-merged source tables as a DataFrame in parquet format.
132 """
133 _DefaultName = "writeObjectTable"
134 ConfigClass = WriteObjectTableConfig
136 # Names of table datasets to be merged
137 inputDatasets = ('forced_src', 'meas', 'ref')
139 # Tag of output dataset written by `MergeSourcesTask.write`
140 outputDataset = 'obj'
142 def runQuantum(self, butlerQC, inputRefs, outputRefs):
143 inputs = butlerQC.get(inputRefs)
145 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
146 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
147 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
148 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
150 catalogs = {}
151 for band in measDict.keys():
152 catalogs[band] = {'meas': measDict[band]['meas'],
153 'forced_src': forcedSourceDict[band]['forced_src'],
154 'ref': inputs['inputCatalogRef']}
155 dataId = butlerQC.quantum.dataId
156 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
157 outputs = pipeBase.Struct(outputCatalog=df)
158 butlerQC.put(outputs, outputRefs)
160 def run(self, catalogs, tract, patch):
161 """Merge multiple catalogs.
163 Parameters
164 ----------
165 catalogs : `dict`
166 Mapping from filter names to dict of catalogs.
167 tract : int
168 tractId to use for the tractId column.
169 patch : str
170 patchId to use for the patchId column.
172 Returns
173 -------
174 catalog : `pandas.DataFrame`
175 Merged dataframe.
176 """
177 dfs = []
178 for filt, tableDict in catalogs.items():
179 for dataset, table in tableDict.items():
180 # Convert afwTable to pandas DataFrame
181 df = table.asAstropy().to_pandas().set_index('id', drop=True)
183 # Sort columns by name, to ensure matching schema among patches
184 df = df.reindex(sorted(df.columns), axis=1)
185 df = df.assign(tractId=tract, patchId=patch)
187 # Make columns a 3-level MultiIndex
188 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
189 names=('dataset', 'band', 'column'))
190 dfs.append(df)
192 # We do this dance and not `pd.concat(dfs)` because the pandas
193 # concatenation uses infinite memory.
194 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
195 return catalog
198class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
199 defaultTemplates={"catalogType": ""},
200 dimensions=("instrument", "visit", "detector")):
202 catalog = connectionTypes.Input(
203 doc="Input full-depth catalog of sources produced by CalibrateTask",
204 name="{catalogType}src",
205 storageClass="SourceCatalog",
206 dimensions=("instrument", "visit", "detector")
207 )
208 outputCatalog = connectionTypes.Output(
209 doc="Catalog of sources, `src` in DataFrame/Parquet format. The 'id' column is "
210 "replaced with an index; all other columns are unchanged.",
211 name="{catalogType}source",
212 storageClass="DataFrame",
213 dimensions=("instrument", "visit", "detector")
214 )
217class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
218 pipelineConnections=WriteSourceTableConnections):
219 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
222class WriteSourceTableTask(pipeBase.PipelineTask):
223 """Write source table to DataFrame Parquet format.
224 """
225 _DefaultName = "writeSourceTable"
226 ConfigClass = WriteSourceTableConfig
228 def runQuantum(self, butlerQC, inputRefs, outputRefs):
229 inputs = butlerQC.get(inputRefs)
230 inputs['ccdVisitId'] = self.config.idGenerator.apply(butlerQC.quantum.dataId).catalog_id
231 result = self.run(**inputs)
232 outputs = pipeBase.Struct(outputCatalog=result.table)
233 butlerQC.put(outputs, outputRefs)
235 def run(self, catalog, ccdVisitId=None, **kwargs):
236 """Convert `src` catalog to DataFrame
238 Parameters
239 ----------
240 catalog: `afwTable.SourceCatalog`
241 catalog to be converted
242 ccdVisitId: `int`
243 ccdVisitId to be added as a column
244 **kwargs
245 Additional keyword arguments are ignored as a convenience for
246 subclasses that pass the same arguments to several different
247 methods.
249 Returns
250 -------
251 result : `~lsst.pipe.base.Struct`
252 ``table``
253 `DataFrame` version of the input catalog
254 """
255 self.log.info("Generating DataFrame from src catalog ccdVisitId=%s", ccdVisitId)
256 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
257 df['ccdVisitId'] = ccdVisitId
259 return pipeBase.Struct(table=df)
262class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
263 defaultTemplates={"catalogType": "",
264 "skyWcsName": "gbdesAstrometricFit",
265 "photoCalibName": "fgcm"},
266 dimensions=("instrument", "visit", "detector", "skymap")):
267 skyMap = connectionTypes.Input(
268 doc="skyMap needed to choose which tract-level calibrations to use when multiple available",
269 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
270 storageClass="SkyMap",
271 dimensions=("skymap",),
272 )
273 exposure = connectionTypes.Input(
274 doc="Input exposure to perform photometry on.",
275 name="calexp",
276 storageClass="ExposureF",
277 dimensions=["instrument", "visit", "detector"],
278 )
279 externalSkyWcsTractCatalog = connectionTypes.Input(
280 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector "
281 "id for the catalog id, sorted on id for fast lookup."),
282 name="{skyWcsName}SkyWcsCatalog",
283 storageClass="ExposureCatalog",
284 dimensions=["instrument", "visit", "tract"],
285 multiple=True
286 )
287 externalSkyWcsGlobalCatalog = connectionTypes.Input(
288 doc=("Per-visit wcs calibrations computed globally (with no tract information). "
289 "These catalogs use the detector id for the catalog id, sorted on id for "
290 "fast lookup."),
291 name="finalVisitSummary",
292 storageClass="ExposureCatalog",
293 dimensions=["instrument", "visit"],
294 )
295 externalPhotoCalibTractCatalog = connectionTypes.Input(
296 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the "
297 "detector id for the catalog id, sorted on id for fast lookup."),
298 name="{photoCalibName}PhotoCalibCatalog",
299 storageClass="ExposureCatalog",
300 dimensions=["instrument", "visit", "tract"],
301 multiple=True
302 )
303 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
304 doc=("Per-visit photometric calibrations computed globally (with no tract "
305 "information). These catalogs use the detector id for the catalog id, "
306 "sorted on id for fast lookup."),
307 name="finalVisitSummary",
308 storageClass="ExposureCatalog",
309 dimensions=["instrument", "visit"],
310 )
312 def __init__(self, *, config=None):
313 super().__init__(config=config)
314 # Same connection boilerplate as all other applications of
315 # Global/Tract calibrations
316 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs:
317 if config.useGlobalExternalSkyWcs:
318 self.inputs.remove("externalSkyWcsTractCatalog")
319 else:
320 self.inputs.remove("externalSkyWcsGlobalCatalog")
321 else:
322 self.inputs.remove("externalSkyWcsTractCatalog")
323 self.inputs.remove("externalSkyWcsGlobalCatalog")
324 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib:
325 if config.useGlobalExternalPhotoCalib:
326 self.inputs.remove("externalPhotoCalibTractCatalog")
327 else:
328 self.inputs.remove("externalPhotoCalibGlobalCatalog")
329 else:
330 self.inputs.remove("externalPhotoCalibTractCatalog")
331 self.inputs.remove("externalPhotoCalibGlobalCatalog")
334class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
335 pipelineConnections=WriteRecalibratedSourceTableConnections):
337 doReevaluatePhotoCalib = pexConfig.Field(
338 dtype=bool,
339 default=True,
340 doc=("Add or replace local photoCalib columns")
341 )
342 doReevaluateSkyWcs = pexConfig.Field(
343 dtype=bool,
344 default=True,
345 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
346 )
347 doApplyExternalPhotoCalib = pexConfig.Field(
348 dtype=bool,
349 default=True,
350 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
351 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
352 )
353 doApplyExternalSkyWcs = pexConfig.Field(
354 dtype=bool,
355 default=True,
356 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
357 "else use the wcs already attached to the exposure."),
358 )
359 useGlobalExternalPhotoCalib = pexConfig.Field(
360 dtype=bool,
361 default=True,
362 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations "
363 "that are not run per-tract. When False, use per-tract photometric "
364 "calibration files.")
365 )
366 useGlobalExternalSkyWcs = pexConfig.Field(
367 dtype=bool,
368 default=True,
369 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations "
370 "that are not run per-tract. When False, use per-tract wcs "
371 "files.")
372 )
373 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
375 def validate(self):
376 super().validate()
377 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs:
378 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
379 "External SkyWcs will not be read or evaluated.")
380 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib:
381 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
382 "External PhotoCalib will not be read or evaluated.")
385class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
386 """Write source table to DataFrame Parquet format.
387 """
388 _DefaultName = "writeRecalibratedSourceTable"
389 ConfigClass = WriteRecalibratedSourceTableConfig
391 def runQuantum(self, butlerQC, inputRefs, outputRefs):
392 inputs = butlerQC.get(inputRefs)
394 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
395 inputs['idGenerator'] = idGenerator
396 inputs['ccdVisitId'] = idGenerator.catalog_id
398 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs:
399 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
400 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs)
402 inputs['catalog'] = self.addCalibColumns(**inputs)
404 result = self.run(**inputs)
405 outputs = pipeBase.Struct(outputCatalog=result.table)
406 butlerQC.put(outputs, outputRefs)
408 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
409 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
410 externalPhotoCalibTractCatalog=None, **kwargs):
411 """Apply external calibrations to exposure per configuration
413 When multiple tract-level calibrations overlap, select the one with the
414 center closest to detector.
416 Parameters
417 ----------
418 inputRefs : `~lsst.pipe.base.InputQuantizedConnection`, for dataIds of
419 tract-level calibs.
420 skyMap : `~lsst.skymap.BaseSkyMap`
421 skyMap to lookup tract geometry and WCS.
422 exposure : `lsst.afw.image.exposure.Exposure`
423 Input exposure to adjust calibrations.
424 externalSkyWcsGlobalCatalog : `~lsst.afw.table.ExposureCatalog`, optional
425 Exposure catalog with external skyWcs to be applied per config
426 externalSkyWcsTractCatalog : `~lsst.afw.table.ExposureCatalog`, optional
427 Exposure catalog with external skyWcs to be applied per config
428 externalPhotoCalibGlobalCatalog : `~lsst.afw.table.ExposureCatalog`, optional
429 Exposure catalog with external photoCalib to be applied per config
430 externalPhotoCalibTractCatalog : `~lsst.afw.table.ExposureCatalog`, optional
431 Exposure catalog with external photoCalib to be applied per config
432 **kwargs
433 Additional keyword arguments are ignored to facilitate passing the
434 same arguments to several methods.
436 Returns
437 -------
438 exposure : `lsst.afw.image.exposure.Exposure`
439 Exposure with adjusted calibrations.
440 """
441 if not self.config.doApplyExternalSkyWcs:
442 # Do not modify the exposure's SkyWcs
443 externalSkyWcsCatalog = None
444 elif self.config.useGlobalExternalSkyWcs:
445 # Use the global external SkyWcs
446 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
447 self.log.info('Applying global SkyWcs')
448 else:
449 # use tract-level external SkyWcs from the closest overlapping tract
450 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog')
451 tracts = [ref.dataId['tract'] for ref in inputRef]
452 if len(tracts) == 1:
453 ind = 0
454 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind])
455 else:
456 if exposure.getWcs() is None: # TODO: could this look-up use the externalPhotoCalib?
457 raise ValueError("Trying to locate nearest tract, but exposure.wcs is None.")
458 ind = self.getClosestTract(tracts, skyMap,
459 exposure.getBBox(), exposure.getWcs())
460 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
461 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
463 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
465 if not self.config.doApplyExternalPhotoCalib:
466 # Do not modify the exposure's PhotoCalib
467 externalPhotoCalibCatalog = None
468 elif self.config.useGlobalExternalPhotoCalib:
469 # Use the global external PhotoCalib
470 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
471 self.log.info('Applying global PhotoCalib')
472 else:
473 # use tract-level external PhotoCalib from the closest overlapping tract
474 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog')
475 tracts = [ref.dataId['tract'] for ref in inputRef]
476 if len(tracts) == 1:
477 ind = 0
478 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind])
479 else:
480 ind = self.getClosestTract(tracts, skyMap,
481 exposure.getBBox(), exposure.getWcs())
482 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
483 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
485 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
487 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
489 def getClosestTract(self, tracts, skyMap, bbox, wcs):
490 """Find the index of the tract closest to detector from list of tractIds
492 Parameters
493 ----------
494 tracts: `list` [`int`]
495 Iterable of integer tractIds
496 skyMap : `~lsst.skymap.BaseSkyMap`
497 skyMap to lookup tract geometry and wcs
498 bbox : `~lsst.geom.Box2I`
499 Detector bbox, center of which will compared to tract centers
500 wcs : `~lsst.afw.geom.SkyWcs`
501 Detector Wcs object to map the detector center to SkyCoord
503 Returns
504 -------
505 index : `int`
506 """
507 if len(tracts) == 1:
508 return 0
510 center = wcs.pixelToSky(bbox.getCenter())
511 sep = []
512 for tractId in tracts:
513 tract = skyMap[tractId]
514 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
515 sep.append(center.separation(tractCenter))
517 return np.argmin(sep)
519 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
520 """Prepare a calibrated exposure and apply external calibrations
521 if so configured.
523 Parameters
524 ----------
525 exposure : `lsst.afw.image.exposure.Exposure`
526 Input exposure to adjust calibrations.
527 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional
528 Exposure catalog with external skyWcs to be applied
529 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id
530 for the catalog id, sorted on id for fast lookup.
531 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional
532 Exposure catalog with external photoCalib to be applied
533 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector
534 id for the catalog id, sorted on id for fast lookup.
536 Returns
537 -------
538 exposure : `lsst.afw.image.exposure.Exposure`
539 Exposure with adjusted calibrations.
540 """
541 detectorId = exposure.getInfo().getDetector().getId()
543 if externalPhotoCalibCatalog is not None:
544 row = externalPhotoCalibCatalog.find(detectorId)
545 if row is None:
546 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; "
547 "Using original photoCalib.", detectorId)
548 else:
549 photoCalib = row.getPhotoCalib()
550 if photoCalib is None:
551 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
552 "Using original photoCalib.", detectorId)
553 else:
554 exposure.setPhotoCalib(photoCalib)
556 if externalSkyWcsCatalog is not None:
557 row = externalSkyWcsCatalog.find(detectorId)
558 if row is None:
559 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; "
560 "Using original skyWcs.", detectorId)
561 else:
562 skyWcs = row.getWcs()
563 if skyWcs is None:
564 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
565 "Using original skyWcs.", detectorId)
566 else:
567 exposure.setWcs(skyWcs)
569 return exposure
571 def addCalibColumns(self, catalog, exposure, idGenerator, **kwargs):
572 """Add replace columns with calibs evaluated at each centroid
574 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a
575 a source catalog, by rerunning the plugins.
577 Parameters
578 ----------
579 catalog : `lsst.afw.table.SourceCatalog`
580 catalog to which calib columns will be added
581 exposure : `lsst.afw.image.exposure.Exposure`
582 Exposure with attached PhotoCalibs and SkyWcs attributes to be
583 reevaluated at local centroids. Pixels are not required.
584 idGenerator : `lsst.meas.base.IdGenerator`
585 Object that generates Source IDs and random seeds.
586 **kwargs
587 Additional keyword arguments are ignored to facilitate passing the
588 same arguments to several methods.
590 Returns
591 -------
592 newCat: `lsst.afw.table.SourceCatalog`
593 Source Catalog with requested local calib columns
594 """
595 measureConfig = SingleFrameMeasurementTask.ConfigClass()
596 measureConfig.doReplaceWithNoise = False
598 # Clear all slots, because we aren't running the relevant plugins.
599 for slot in measureConfig.slots:
600 setattr(measureConfig.slots, slot, None)
602 measureConfig.plugins.names = []
603 if self.config.doReevaluateSkyWcs:
604 measureConfig.plugins.names.add('base_LocalWcs')
605 self.log.info("Re-evaluating base_LocalWcs plugin")
606 if self.config.doReevaluatePhotoCalib:
607 measureConfig.plugins.names.add('base_LocalPhotoCalib')
608 self.log.info("Re-evaluating base_LocalPhotoCalib plugin")
609 pluginsNotToCopy = tuple(measureConfig.plugins.names)
611 # Create a new schema and catalog
612 # Copy all columns from original except for the ones to reevaluate
613 aliasMap = catalog.schema.getAliasMap()
614 mapper = afwTable.SchemaMapper(catalog.schema)
615 for item in catalog.schema:
616 if not item.field.getName().startswith(pluginsNotToCopy):
617 mapper.addMapping(item.key)
619 schema = mapper.getOutputSchema()
620 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
621 schema.setAliasMap(aliasMap)
622 newCat = afwTable.SourceCatalog(schema)
623 newCat.extend(catalog, mapper=mapper)
625 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to
626 # update here. LocalPhotoCalibs are applied during transform tasks.
627 # Update coord_ra/coord_dec, which are expected to be positions on the
628 # sky and are used as such in sdm tables without transform
629 if self.config.doReevaluateSkyWcs and exposure.wcs is not None:
630 afwTable.updateSourceCoords(exposure.wcs, newCat)
632 measurement.run(measCat=newCat, exposure=exposure, exposureId=idGenerator.catalog_id)
634 return newCat
637class PostprocessAnalysis(object):
638 """Calculate columns from DataFrames or handles storing DataFrames.
640 This object manages and organizes an arbitrary set of computations
641 on a catalog. The catalog is defined by a
642 `DeferredDatasetHandle` or `InMemoryDatasetHandle` object
643 (or list thereof), such as a ``deepCoadd_obj`` dataset, and the
644 computations are defined by a collection of
645 `~lsst.pipe.tasks.functors.Functor` objects (or, equivalently, a
646 ``CompositeFunctor``).
648 After the object is initialized, accessing the ``.df`` attribute (which
649 holds the `pandas.DataFrame` containing the results of the calculations)
650 triggers computation of said dataframe.
652 One of the conveniences of using this object is the ability to define a
653 desired common filter for all functors. This enables the same functor
654 collection to be passed to several different `PostprocessAnalysis` objects
655 without having to change the original functor collection, since the ``filt``
656 keyword argument of this object triggers an overwrite of the ``filt``
657 property for all functors in the collection.
659 This object also allows a list of refFlags to be passed, and defines a set
660 of default refFlags that are always included even if not requested.
662 If a list of DataFrames or Handles is passed, rather than a single one,
663 then the calculations will be mapped over all the input catalogs. In
664 principle, it should be straightforward to parallelize this activity, but
665 initial tests have failed (see TODO in code comments).
667 Parameters
668 ----------
669 handles : `~lsst.daf.butler.DeferredDatasetHandle` or
670 `~lsst.pipe.base.InMemoryDatasetHandle` or
671 list of these.
672 Source catalog(s) for computation.
673 functors : `list`, `dict`, or `~lsst.pipe.tasks.functors.CompositeFunctor`
674 Computations to do (functors that act on ``handles``).
675 If a dict, the output
676 DataFrame will have columns keyed accordingly.
677 If a list, the column keys will come from the
678 ``.shortname`` attribute of each functor.
680 filt : `str`, optional
681 Filter in which to calculate. If provided,
682 this will overwrite any existing ``.filt`` attribute
683 of the provided functors.
685 flags : `list`, optional
686 List of flags (per-band) to include in output table.
687 Taken from the ``meas`` dataset if applied to a multilevel Object Table.
689 refFlags : `list`, optional
690 List of refFlags (only reference band) to include in output table.
692 forcedFlags : `list`, optional
693 List of flags (per-band) to include in output table.
694 Taken from the ``forced_src`` dataset if applied to a
695 multilevel Object Table. Intended for flags from measurement plugins
696 only run during multi-band forced-photometry.
697 """
698 _defaultRefFlags = []
699 _defaultFuncs = ()
701 def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
702 self.handles = handles
703 self.functors = functors
705 self.filt = filt
706 self.flags = list(flags) if flags is not None else []
707 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else []
708 self.refFlags = list(self._defaultRefFlags)
709 if refFlags is not None:
710 self.refFlags += list(refFlags)
712 self._df = None
714 @property
715 def defaultFuncs(self):
716 funcs = dict(self._defaultFuncs)
717 return funcs
719 @property
720 def func(self):
721 additionalFuncs = self.defaultFuncs
722 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags})
723 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
724 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
726 if isinstance(self.functors, CompositeFunctor):
727 func = self.functors
728 else:
729 func = CompositeFunctor(self.functors)
731 func.funcDict.update(additionalFuncs)
732 func.filt = self.filt
734 return func
736 @property
737 def noDupCols(self):
738 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
740 @property
741 def df(self):
742 if self._df is None:
743 self.compute()
744 return self._df
746 def compute(self, dropna=False, pool=None):
747 # map over multiple handles
748 if type(self.handles) in (list, tuple):
749 if pool is None:
750 dflist = [self.func(handle, dropna=dropna) for handle in self.handles]
751 else:
752 # TODO: Figure out why this doesn't work (pyarrow pickling
753 # issues?)
754 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.handles)
755 self._df = pd.concat(dflist)
756 else:
757 self._df = self.func(self.handles, dropna=dropna)
759 return self._df
762class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
763 dimensions=()):
764 """Expected Connections for subclasses of TransformCatalogBaseTask.
766 Must be subclassed.
767 """
768 inputCatalog = connectionTypes.Input(
769 name="",
770 storageClass="DataFrame",
771 )
772 outputCatalog = connectionTypes.Output(
773 name="",
774 storageClass="DataFrame",
775 )
778class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
779 pipelineConnections=TransformCatalogBaseConnections):
780 functorFile = pexConfig.Field(
781 dtype=str,
782 doc="Path to YAML file specifying Science Data Model functors to use "
783 "when copying columns and computing calibrated values.",
784 default=None,
785 optional=True
786 )
787 primaryKey = pexConfig.Field(
788 dtype=str,
789 doc="Name of column to be set as the DataFrame index. If None, the index"
790 "will be named `id`",
791 default=None,
792 optional=True
793 )
794 columnsFromDataId = pexConfig.ListField(
795 dtype=str,
796 default=None,
797 optional=True,
798 doc="Columns to extract from the dataId",
799 )
802class TransformCatalogBaseTask(pipeBase.PipelineTask):
803 """Base class for transforming/standardizing a catalog by applying functors
804 that convert units and apply calibrations.
806 The purpose of this task is to perform a set of computations on an input
807 ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` that holds a
808 ``DataFrame`` dataset (such as ``deepCoadd_obj``), and write the results to
809 a new dataset (which needs to be declared in an ``outputDataset``
810 attribute).
812 The calculations to be performed are defined in a YAML file that specifies
813 a set of functors to be computed, provided as a ``--functorFile`` config
814 parameter. An example of such a YAML file is the following:
816 funcs:
817 sourceId:
818 functor: Index
819 x:
820 functor: Column
821 args: slot_Centroid_x
822 y:
823 functor: Column
824 args: slot_Centroid_y
825 psfFlux:
826 functor: LocalNanojansky
827 args:
828 - slot_PsfFlux_instFlux
829 - slot_PsfFlux_instFluxErr
830 - base_LocalPhotoCalib
831 - base_LocalPhotoCalibErr
832 psfFluxErr:
833 functor: LocalNanojanskyErr
834 args:
835 - slot_PsfFlux_instFlux
836 - slot_PsfFlux_instFluxErr
837 - base_LocalPhotoCalib
838 - base_LocalPhotoCalibErr
839 flags:
840 - detect_isPrimary
842 The names for each entry under "func" will become the names of columns in
843 the output dataset. All the functors referenced are defined in
844 `~lsst.pipe.tasks.functors`. Positional arguments to be passed to each
845 functor are in the `args` list, and any additional entries for each column
846 other than "functor" or "args" (e.g., ``'filt'``, ``'dataset'``) are
847 treated as keyword arguments to be passed to the functor initialization.
849 The "flags" entry is the default shortcut for `Column` functors.
850 All columns listed under "flags" will be copied to the output table
851 untransformed. They can be of any datatype.
852 In the special case of transforming a multi-level oject table with
853 band and dataset indices (deepCoadd_obj), these will be taked from the
854 `meas` dataset and exploded out per band.
856 There are two special shortcuts that only apply when transforming
857 multi-level Object (deepCoadd_obj) tables:
858 - The "refFlags" entry is shortcut for `Column` functor
859 taken from the `'ref'` dataset if transforming an ObjectTable.
860 - The "forcedFlags" entry is shortcut for `Column` functors.
861 taken from the ``forced_src`` dataset if transforming an ObjectTable.
862 These are expanded out per band.
865 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
866 to organize and excecute the calculations.
867 """
868 @property
869 def _DefaultName(self):
870 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
872 @property
873 def outputDataset(self):
874 raise NotImplementedError('Subclass must define "outputDataset" attribute')
876 @property
877 def inputDataset(self):
878 raise NotImplementedError('Subclass must define "inputDataset" attribute')
880 @property
881 def ConfigClass(self):
882 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
884 def __init__(self, *args, **kwargs):
885 super().__init__(*args, **kwargs)
886 if self.config.functorFile:
887 self.log.info('Loading tranform functor definitions from %s',
888 self.config.functorFile)
889 self.funcs = CompositeFunctor.from_file(self.config.functorFile)
890 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs))
891 else:
892 self.funcs = None
894 def runQuantum(self, butlerQC, inputRefs, outputRefs):
895 inputs = butlerQC.get(inputRefs)
896 if self.funcs is None:
897 raise ValueError("config.functorFile is None. "
898 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
899 result = self.run(handle=inputs['inputCatalog'], funcs=self.funcs,
900 dataId=outputRefs.outputCatalog.dataId.full)
901 outputs = pipeBase.Struct(outputCatalog=result)
902 butlerQC.put(outputs, outputRefs)
904 def run(self, handle, funcs=None, dataId=None, band=None):
905 """Do postprocessing calculations
907 Takes a ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` or
908 ``DataFrame`` object and dataId,
909 returns a dataframe with results of postprocessing calculations.
911 Parameters
912 ----------
913 handles : `~lsst.daf.butler.DeferredDatasetHandle` or
914 `~lsst.pipe.base.InMemoryDatasetHandle` or
915 `~pandas.DataFrame`, or list of these.
916 DataFrames from which calculations are done.
917 funcs : `~lsst.pipe.tasks.functors.Functor`
918 Functors to apply to the table's columns
919 dataId : dict, optional
920 Used to add a `patchId` column to the output dataframe.
921 band : `str`, optional
922 Filter band that is being processed.
924 Returns
925 -------
926 df : `pandas.DataFrame`
927 """
928 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
930 df = self.transform(band, handle, funcs, dataId).df
931 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
932 return df
934 def getFunctors(self):
935 return self.funcs
937 def getAnalysis(self, handles, funcs=None, band=None):
938 if funcs is None:
939 funcs = self.funcs
940 analysis = PostprocessAnalysis(handles, funcs, filt=band)
941 return analysis
943 def transform(self, band, handles, funcs, dataId):
944 analysis = self.getAnalysis(handles, funcs=funcs, band=band)
945 df = analysis.df
946 if dataId and self.config.columnsFromDataId:
947 for key in self.config.columnsFromDataId:
948 if key in dataId:
949 df[str(key)] = dataId[key]
950 else:
951 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
953 if self.config.primaryKey:
954 if df.index.name != self.config.primaryKey and self.config.primaryKey in df:
955 df.reset_index(inplace=True, drop=True)
956 df.set_index(self.config.primaryKey, inplace=True)
958 return pipeBase.Struct(
959 df=df,
960 analysis=analysis
961 )
964class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
965 defaultTemplates={"coaddName": "deep"},
966 dimensions=("tract", "patch", "skymap")):
967 inputCatalog = connectionTypes.Input(
968 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
969 "stored as a DataFrame with a multi-level column index per-patch.",
970 dimensions=("tract", "patch", "skymap"),
971 storageClass="DataFrame",
972 name="{coaddName}Coadd_obj",
973 deferLoad=True,
974 )
975 outputCatalog = connectionTypes.Output(
976 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
977 "data model.",
978 dimensions=("tract", "patch", "skymap"),
979 storageClass="DataFrame",
980 name="objectTable"
981 )
984class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
985 pipelineConnections=TransformObjectCatalogConnections):
986 coaddName = pexConfig.Field(
987 dtype=str,
988 default="deep",
989 doc="Name of coadd"
990 )
991 # TODO: remove in DM-27177
992 filterMap = pexConfig.DictField(
993 keytype=str,
994 itemtype=str,
995 default={},
996 doc=("Dictionary mapping full filter name to short one for column name munging."
997 "These filters determine the output columns no matter what filters the "
998 "input data actually contain."),
999 deprecated=("Coadds are now identified by the band, so this transform is unused."
1000 "Will be removed after v22.")
1001 )
1002 outputBands = pexConfig.ListField(
1003 dtype=str,
1004 default=None,
1005 optional=True,
1006 doc=("These bands and only these bands will appear in the output,"
1007 " NaN-filled if the input does not include them."
1008 " If None, then use all bands found in the input.")
1009 )
1010 camelCase = pexConfig.Field(
1011 dtype=bool,
1012 default=False,
1013 doc=("Write per-band columns names with camelCase, else underscore "
1014 "For example: gPsFlux instead of g_PsFlux.")
1015 )
1016 multilevelOutput = pexConfig.Field(
1017 dtype=bool,
1018 default=False,
1019 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
1020 "and name-munged (False).")
1021 )
1022 goodFlags = pexConfig.ListField(
1023 dtype=str,
1024 default=[],
1025 doc=("List of 'good' flags that should be set False when populating empty tables. "
1026 "All other flags are considered to be 'bad' flags and will be set to True.")
1027 )
1028 floatFillValue = pexConfig.Field(
1029 dtype=float,
1030 default=np.nan,
1031 doc="Fill value for float fields when populating empty tables."
1032 )
1033 integerFillValue = pexConfig.Field(
1034 dtype=int,
1035 default=-1,
1036 doc="Fill value for integer fields when populating empty tables."
1037 )
1039 def setDefaults(self):
1040 super().setDefaults()
1041 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml')
1042 self.primaryKey = 'objectId'
1043 self.columnsFromDataId = ['tract', 'patch']
1044 self.goodFlags = ['calib_astrometry_used',
1045 'calib_photometry_reserved',
1046 'calib_photometry_used',
1047 'calib_psf_candidate',
1048 'calib_psf_reserved',
1049 'calib_psf_used']
1052class TransformObjectCatalogTask(TransformCatalogBaseTask):
1053 """Produce a flattened Object Table to match the format specified in
1054 sdm_schemas.
1056 Do the same set of postprocessing calculations on all bands.
1058 This is identical to `TransformCatalogBaseTask`, except for that it does
1059 the specified functor calculations for all filters present in the
1060 input `deepCoadd_obj` table. Any specific ``"filt"`` keywords specified
1061 by the YAML file will be superceded.
1062 """
1063 _DefaultName = "transformObjectCatalog"
1064 ConfigClass = TransformObjectCatalogConfig
1066 def run(self, handle, funcs=None, dataId=None, band=None):
1067 # NOTE: band kwarg is ignored here.
1068 dfDict = {}
1069 analysisDict = {}
1070 templateDf = pd.DataFrame()
1072 columns = handle.get(component='columns')
1073 inputBands = columns.unique(level=1).values
1075 outputBands = self.config.outputBands if self.config.outputBands else inputBands
1077 # Perform transform for data of filters that exist in the handle dataframe.
1078 for inputBand in inputBands:
1079 if inputBand not in outputBands:
1080 self.log.info("Ignoring %s band data in the input", inputBand)
1081 continue
1082 self.log.info("Transforming the catalog of band %s", inputBand)
1083 result = self.transform(inputBand, handle, funcs, dataId)
1084 dfDict[inputBand] = result.df
1085 analysisDict[inputBand] = result.analysis
1086 if templateDf.empty:
1087 templateDf = result.df
1089 # Put filler values in columns of other wanted bands
1090 for filt in outputBands:
1091 if filt not in dfDict:
1092 self.log.info("Adding empty columns for band %s", filt)
1093 dfTemp = templateDf.copy()
1094 for col in dfTemp.columns:
1095 testValue = dfTemp[col].values[0]
1096 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1097 # Boolean flag type, check if it is a "good" flag
1098 if col in self.config.goodFlags:
1099 fillValue = False
1100 else:
1101 fillValue = True
1102 elif isinstance(testValue, numbers.Integral):
1103 # Checking numbers.Integral catches all flavors
1104 # of python, numpy, pandas, etc. integers.
1105 # We must ensure this is not an unsigned integer.
1106 if isinstance(testValue, np.unsignedinteger):
1107 raise ValueError("Parquet tables may not have unsigned integer columns.")
1108 else:
1109 fillValue = self.config.integerFillValue
1110 else:
1111 fillValue = self.config.floatFillValue
1112 dfTemp[col].values[:] = fillValue
1113 dfDict[filt] = dfTemp
1115 # This makes a multilevel column index, with band as first level
1116 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
1118 if not self.config.multilevelOutput:
1119 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
1120 if self.config.primaryKey in noDupCols:
1121 noDupCols.remove(self.config.primaryKey)
1122 if dataId and self.config.columnsFromDataId:
1123 noDupCols += self.config.columnsFromDataId
1124 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1125 inputBands=inputBands)
1127 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
1129 return df
1132class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1133 dimensions=("tract", "skymap")):
1134 inputCatalogs = connectionTypes.Input(
1135 doc="Per-Patch objectTables conforming to the standard data model.",
1136 name="objectTable",
1137 storageClass="DataFrame",
1138 dimensions=("tract", "patch", "skymap"),
1139 multiple=True,
1140 )
1141 outputCatalog = connectionTypes.Output(
1142 doc="Pre-tract horizontal concatenation of the input objectTables",
1143 name="objectTable_tract",
1144 storageClass="DataFrame",
1145 dimensions=("tract", "skymap"),
1146 )
1149class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1150 pipelineConnections=ConsolidateObjectTableConnections):
1151 coaddName = pexConfig.Field(
1152 dtype=str,
1153 default="deep",
1154 doc="Name of coadd"
1155 )
1158class ConsolidateObjectTableTask(pipeBase.PipelineTask):
1159 """Write patch-merged source tables to a tract-level DataFrame Parquet file.
1161 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1162 """
1163 _DefaultName = "consolidateObjectTable"
1164 ConfigClass = ConsolidateObjectTableConfig
1166 inputDataset = 'objectTable'
1167 outputDataset = 'objectTable_tract'
1169 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1170 inputs = butlerQC.get(inputRefs)
1171 self.log.info("Concatenating %s per-patch Object Tables",
1172 len(inputs['inputCatalogs']))
1173 df = pd.concat(inputs['inputCatalogs'])
1174 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1177class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1178 defaultTemplates={"catalogType": ""},
1179 dimensions=("instrument", "visit", "detector")):
1181 inputCatalog = connectionTypes.Input(
1182 doc="Wide input catalog of sources produced by WriteSourceTableTask",
1183 name="{catalogType}source",
1184 storageClass="DataFrame",
1185 dimensions=("instrument", "visit", "detector"),
1186 deferLoad=True
1187 )
1188 outputCatalog = connectionTypes.Output(
1189 doc="Narrower, per-detector Source Table transformed and converted per a "
1190 "specified set of functors",
1191 name="{catalogType}sourceTable",
1192 storageClass="DataFrame",
1193 dimensions=("instrument", "visit", "detector")
1194 )
1197class TransformSourceTableConfig(TransformCatalogBaseConfig,
1198 pipelineConnections=TransformSourceTableConnections):
1200 def setDefaults(self):
1201 super().setDefaults()
1202 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml')
1203 self.primaryKey = 'sourceId'
1204 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter']
1207class TransformSourceTableTask(TransformCatalogBaseTask):
1208 """Transform/standardize a source catalog
1209 """
1210 _DefaultName = "transformSourceTable"
1211 ConfigClass = TransformSourceTableConfig
1214class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1215 dimensions=("instrument", "visit",),
1216 defaultTemplates={"calexpType": ""}):
1217 calexp = connectionTypes.Input(
1218 doc="Processed exposures used for metadata",
1219 name="calexp",
1220 storageClass="ExposureF",
1221 dimensions=("instrument", "visit", "detector"),
1222 deferLoad=True,
1223 multiple=True,
1224 )
1225 visitSummary = connectionTypes.Output(
1226 doc=("Per-visit consolidated exposure metadata. These catalogs use "
1227 "detector id for the id and are sorted for fast lookups of a "
1228 "detector."),
1229 name="visitSummary",
1230 storageClass="ExposureCatalog",
1231 dimensions=("instrument", "visit"),
1232 )
1233 visitSummarySchema = connectionTypes.InitOutput(
1234 doc="Schema of the visitSummary catalog",
1235 name="visitSummary_schema",
1236 storageClass="ExposureCatalog",
1237 )
1240class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1241 pipelineConnections=ConsolidateVisitSummaryConnections):
1242 """Config for ConsolidateVisitSummaryTask"""
1243 pass
1246class ConsolidateVisitSummaryTask(pipeBase.PipelineTask):
1247 """Task to consolidate per-detector visit metadata.
1249 This task aggregates the following metadata from all the detectors in a
1250 single visit into an exposure catalog:
1251 - The visitInfo.
1252 - The wcs.
1253 - The photoCalib.
1254 - The physical_filter and band (if available).
1255 - The psf size, shape, and effective area at the center of the detector.
1256 - The corners of the bounding box in right ascension/declination.
1258 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1259 are not persisted here because of storage concerns, and because of their
1260 limited utility as summary statistics.
1262 Tests for this task are performed in ci_hsc_gen3.
1263 """
1264 _DefaultName = "consolidateVisitSummary"
1265 ConfigClass = ConsolidateVisitSummaryConfig
1267 def __init__(self, **kwargs):
1268 super().__init__(**kwargs)
1269 self.schema = afwTable.ExposureTable.makeMinimalSchema()
1270 self.schema.addField('visit', type='L', doc='Visit number')
1271 self.schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1272 self.schema.addField('band', type='String', size=32, doc='Name of band')
1273 ExposureSummaryStats.update_schema(self.schema)
1274 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema)
1276 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1277 dataRefs = butlerQC.get(inputRefs.calexp)
1278 visit = dataRefs[0].dataId.byName()['visit']
1280 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1281 len(dataRefs), visit)
1283 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1285 butlerQC.put(expCatalog, outputRefs.visitSummary)
1287 def _combineExposureMetadata(self, visit, dataRefs):
1288 """Make a combined exposure catalog from a list of dataRefs.
1289 These dataRefs must point to exposures with wcs, summaryStats,
1290 and other visit metadata.
1292 Parameters
1293 ----------
1294 visit : `int`
1295 Visit identification number.
1296 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1297 List of dataRefs in visit.
1299 Returns
1300 -------
1301 visitSummary : `lsst.afw.table.ExposureCatalog`
1302 Exposure catalog with per-detector summary information.
1303 """
1304 cat = afwTable.ExposureCatalog(self.schema)
1305 cat.resize(len(dataRefs))
1307 cat['visit'] = visit
1309 for i, dataRef in enumerate(dataRefs):
1310 visitInfo = dataRef.get(component='visitInfo')
1311 filterLabel = dataRef.get(component='filter')
1312 summaryStats = dataRef.get(component='summaryStats')
1313 detector = dataRef.get(component='detector')
1314 wcs = dataRef.get(component='wcs')
1315 photoCalib = dataRef.get(component='photoCalib')
1316 detector = dataRef.get(component='detector')
1317 bbox = dataRef.get(component='bbox')
1318 validPolygon = dataRef.get(component='validPolygon')
1320 rec = cat[i]
1321 rec.setBBox(bbox)
1322 rec.setVisitInfo(visitInfo)
1323 rec.setWcs(wcs)
1324 rec.setPhotoCalib(photoCalib)
1325 rec.setValidPolygon(validPolygon)
1327 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1328 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1329 rec.setId(detector.getId())
1330 summaryStats.update_record(rec)
1332 metadata = dafBase.PropertyList()
1333 metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1334 # We are looping over existing datarefs, so the following is true
1335 metadata.add("COMMENT", "Only detectors with data have entries.")
1336 cat.setMetadata(metadata)
1338 cat.sort()
1339 return cat
1342class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1343 defaultTemplates={"catalogType": ""},
1344 dimensions=("instrument", "visit")):
1345 inputCatalogs = connectionTypes.Input(
1346 doc="Input per-detector Source Tables",
1347 name="{catalogType}sourceTable",
1348 storageClass="DataFrame",
1349 dimensions=("instrument", "visit", "detector"),
1350 multiple=True
1351 )
1352 outputCatalog = connectionTypes.Output(
1353 doc="Per-visit concatenation of Source Table",
1354 name="{catalogType}sourceTable_visit",
1355 storageClass="DataFrame",
1356 dimensions=("instrument", "visit")
1357 )
1360class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1361 pipelineConnections=ConsolidateSourceTableConnections):
1362 pass
1365class ConsolidateSourceTableTask(pipeBase.PipelineTask):
1366 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1367 """
1368 _DefaultName = 'consolidateSourceTable'
1369 ConfigClass = ConsolidateSourceTableConfig
1371 inputDataset = 'sourceTable'
1372 outputDataset = 'sourceTable_visit'
1374 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1375 from .makeWarp import reorderRefs
1377 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs]
1378 detectorOrder.sort()
1379 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector')
1380 inputs = butlerQC.get(inputRefs)
1381 self.log.info("Concatenating %s per-detector Source Tables",
1382 len(inputs['inputCatalogs']))
1383 df = pd.concat(inputs['inputCatalogs'])
1384 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1387class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1388 dimensions=("instrument",),
1389 defaultTemplates={"calexpType": ""}):
1390 visitSummaryRefs = connectionTypes.Input(
1391 doc="Data references for per-visit consolidated exposure metadata",
1392 name="finalVisitSummary",
1393 storageClass="ExposureCatalog",
1394 dimensions=("instrument", "visit"),
1395 multiple=True,
1396 deferLoad=True,
1397 )
1398 outputCatalog = connectionTypes.Output(
1399 doc="CCD and Visit metadata table",
1400 name="ccdVisitTable",
1401 storageClass="DataFrame",
1402 dimensions=("instrument",)
1403 )
1406class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1407 pipelineConnections=MakeCcdVisitTableConnections):
1408 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
1411class MakeCcdVisitTableTask(pipeBase.PipelineTask):
1412 """Produce a `ccdVisitTable` from the visit summary exposure catalogs.
1413 """
1414 _DefaultName = 'makeCcdVisitTable'
1415 ConfigClass = MakeCcdVisitTableConfig
1417 def run(self, visitSummaryRefs):
1418 """Make a table of ccd information from the visit summary catalogs.
1420 Parameters
1421 ----------
1422 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1423 List of DeferredDatasetHandles pointing to exposure catalogs with
1424 per-detector summary information.
1426 Returns
1427 -------
1428 result : `~lsst.pipe.base.Struct`
1429 Results struct with attribute:
1431 ``outputCatalog``
1432 Catalog of ccd and visit information.
1433 """
1434 ccdEntries = []
1435 for visitSummaryRef in visitSummaryRefs:
1436 visitSummary = visitSummaryRef.get()
1437 visitInfo = visitSummary[0].getVisitInfo()
1439 ccdEntry = {}
1440 summaryTable = visitSummary.asAstropy()
1441 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'dec', 'zenithDistance',
1442 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise',
1443 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar',
1444 'psfStarDeltaE1Median', 'psfStarDeltaE2Median',
1445 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter',
1446 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter',
1447 'psfStarScaledDeltaSizeScatter',
1448 'psfTraceRadiusDelta', 'maxDistToNearestPsf']
1449 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1450 # 'visit' is the human readable visit number.
1451 # 'visitId' is the key to the visitId table. They are the same.
1452 # Technically you should join to get the visit from the visit
1453 # table.
1454 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"})
1456 # RFC-924: Temporarily keep a duplicate "decl" entry for backwards
1457 # compatibility. To be removed after September 2023.
1458 ccdEntry["decl"] = ccdEntry.loc[:, "dec"]
1460 ccdEntry['ccdVisitId'] = [
1461 self.config.idGenerator.apply(
1462 visitSummaryRef.dataId,
1463 detector=detector_id,
1464 is_exposure=False,
1465 ).catalog_id # The "catalog ID" here is the ccdVisit ID
1466 # because it's usually the ID for a whole catalog
1467 # with a {visit, detector}, and that's the main
1468 # use case for IdGenerator. This usage for a
1469 # summary table is rare.
1470 for detector_id in summaryTable['id']
1471 ]
1472 ccdEntry['detector'] = summaryTable['id']
1473 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() if vR.getWcs()
1474 else np.nan for vR in visitSummary])
1475 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1477 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1478 ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1479 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1480 expTime = visitInfo.getExposureTime()
1481 ccdEntry['expTime'] = expTime
1482 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1483 expTime_days = expTime / (60*60*24)
1484 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days
1485 ccdEntry['darkTime'] = visitInfo.getDarkTime()
1486 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1487 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1488 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1489 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1490 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1491 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1492 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1493 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1494 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1495 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1496 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY,
1497 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc.
1498 # values are actually wanted.
1499 ccdEntries.append(ccdEntry)
1501 outputCatalog = pd.concat(ccdEntries)
1502 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True)
1503 return pipeBase.Struct(outputCatalog=outputCatalog)
1506class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1507 dimensions=("instrument",),
1508 defaultTemplates={"calexpType": ""}):
1509 visitSummaries = connectionTypes.Input(
1510 doc="Per-visit consolidated exposure metadata",
1511 name="finalVisitSummary",
1512 storageClass="ExposureCatalog",
1513 dimensions=("instrument", "visit",),
1514 multiple=True,
1515 deferLoad=True,
1516 )
1517 outputCatalog = connectionTypes.Output(
1518 doc="Visit metadata table",
1519 name="visitTable",
1520 storageClass="DataFrame",
1521 dimensions=("instrument",)
1522 )
1525class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1526 pipelineConnections=MakeVisitTableConnections):
1527 pass
1530class MakeVisitTableTask(pipeBase.PipelineTask):
1531 """Produce a `visitTable` from the visit summary exposure catalogs.
1532 """
1533 _DefaultName = 'makeVisitTable'
1534 ConfigClass = MakeVisitTableConfig
1536 def run(self, visitSummaries):
1537 """Make a table of visit information from the visit summary catalogs.
1539 Parameters
1540 ----------
1541 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog`
1542 List of exposure catalogs with per-detector summary information.
1543 Returns
1544 -------
1545 result : `~lsst.pipe.base.Struct`
1546 Results struct with attribute:
1548 ``outputCatalog``
1549 Catalog of visit information.
1550 """
1551 visitEntries = []
1552 for visitSummary in visitSummaries:
1553 visitSummary = visitSummary.get()
1554 visitRow = visitSummary[0]
1555 visitInfo = visitRow.getVisitInfo()
1557 visitEntry = {}
1558 visitEntry["visitId"] = visitRow['visit']
1559 visitEntry["visit"] = visitRow['visit']
1560 visitEntry["physical_filter"] = visitRow['physical_filter']
1561 visitEntry["band"] = visitRow['band']
1562 raDec = visitInfo.getBoresightRaDec()
1563 visitEntry["ra"] = raDec.getRa().asDegrees()
1564 visitEntry["dec"] = raDec.getDec().asDegrees()
1566 # RFC-924: Temporarily keep a duplicate "decl" entry for backwards
1567 # compatibility. To be removed after September 2023.
1568 visitEntry["decl"] = visitEntry["dec"]
1570 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1571 azAlt = visitInfo.getBoresightAzAlt()
1572 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1573 visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1574 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1575 visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1576 expTime = visitInfo.getExposureTime()
1577 visitEntry["expTime"] = expTime
1578 visitEntry["expMidpt"] = visitInfo.getDate().toPython()
1579 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1580 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1581 expTime_days = expTime / (60*60*24)
1582 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days
1583 visitEntries.append(visitEntry)
1585 # TODO: DM-30623, Add programId, exposureType, cameraTemp,
1586 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp,
1587 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures.
1589 outputCatalog = pd.DataFrame(data=visitEntries)
1590 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True)
1591 return pipeBase.Struct(outputCatalog=outputCatalog)
1594class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1595 dimensions=("instrument", "visit", "detector", "skymap", "tract")):
1597 inputCatalog = connectionTypes.Input(
1598 doc="Primary per-detector, single-epoch forced-photometry catalog. "
1599 "By default, it is the output of ForcedPhotCcdTask on calexps",
1600 name="forced_src",
1601 storageClass="SourceCatalog",
1602 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1603 )
1604 inputCatalogDiff = connectionTypes.Input(
1605 doc="Secondary multi-epoch, per-detector, forced photometry catalog. "
1606 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1607 name="forced_diff",
1608 storageClass="SourceCatalog",
1609 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1610 )
1611 outputCatalog = connectionTypes.Output(
1612 doc="InputCatalogs horizonatally joined on `objectId` in DataFrame parquet format",
1613 name="mergedForcedSource",
1614 storageClass="DataFrame",
1615 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1616 )
1619class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1620 pipelineConnections=WriteForcedSourceTableConnections):
1621 key = lsst.pex.config.Field(
1622 doc="Column on which to join the two input tables on and make the primary key of the output",
1623 dtype=str,
1624 default="objectId",
1625 )
1626 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
1629class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1630 """Merge and convert per-detector forced source catalogs to DataFrame Parquet format.
1632 Because the predecessor ForcedPhotCcdTask operates per-detector,
1633 per-tract, (i.e., it has tract in its dimensions), detectors
1634 on the tract boundary may have multiple forced source catalogs.
1636 The successor task TransformForcedSourceTable runs per-patch
1637 and temporally-aggregates overlapping mergedForcedSource catalogs from all
1638 available multiple epochs.
1639 """
1640 _DefaultName = "writeForcedSourceTable"
1641 ConfigClass = WriteForcedSourceTableConfig
1643 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1644 inputs = butlerQC.get(inputRefs)
1645 # Add ccdVisitId to allow joining with CcdVisitTable
1646 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
1647 inputs['ccdVisitId'] = idGenerator.catalog_id
1648 inputs['band'] = butlerQC.quantum.dataId.full['band']
1649 outputs = self.run(**inputs)
1650 butlerQC.put(outputs, outputRefs)
1652 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1653 dfs = []
1654 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')):
1655 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False)
1656 df = df.reindex(sorted(df.columns), axis=1)
1657 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA
1658 df['band'] = band if band else pd.NA
1659 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns],
1660 names=('dataset', 'column'))
1662 dfs.append(df)
1664 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
1665 return pipeBase.Struct(outputCatalog=outputCatalog)
1668class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1669 dimensions=("instrument", "skymap", "patch", "tract")):
1671 inputCatalogs = connectionTypes.Input(
1672 doc="DataFrames of merged ForcedSources produced by WriteForcedSourceTableTask",
1673 name="mergedForcedSource",
1674 storageClass="DataFrame",
1675 dimensions=("instrument", "visit", "detector", "skymap", "tract"),
1676 multiple=True,
1677 deferLoad=True
1678 )
1679 referenceCatalog = connectionTypes.Input(
1680 doc="Reference catalog which was used to seed the forcedPhot. Columns "
1681 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1682 "are expected.",
1683 name="objectTable",
1684 storageClass="DataFrame",
1685 dimensions=("tract", "patch", "skymap"),
1686 deferLoad=True
1687 )
1688 outputCatalog = connectionTypes.Output(
1689 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1690 "specified set of functors",
1691 name="forcedSourceTable",
1692 storageClass="DataFrame",
1693 dimensions=("tract", "patch", "skymap")
1694 )
1697class TransformForcedSourceTableConfig(TransformCatalogBaseConfig,
1698 pipelineConnections=TransformForcedSourceTableConnections):
1699 referenceColumns = pexConfig.ListField(
1700 dtype=str,
1701 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"],
1702 optional=True,
1703 doc="Columns to pull from reference catalog",
1704 )
1705 keyRef = lsst.pex.config.Field(
1706 doc="Column on which to join the two input tables on and make the primary key of the output",
1707 dtype=str,
1708 default="objectId",
1709 )
1710 key = lsst.pex.config.Field(
1711 doc="Rename the output DataFrame index to this name",
1712 dtype=str,
1713 default="forcedSourceId",
1714 )
1716 def setDefaults(self):
1717 super().setDefaults()
1718 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml')
1719 self.columnsFromDataId = ['tract', 'patch']
1722class TransformForcedSourceTableTask(TransformCatalogBaseTask):
1723 """Transform/standardize a ForcedSource catalog
1725 Transforms each wide, per-detector forcedSource DataFrame per the
1726 specification file (per-camera defaults found in ForcedSource.yaml).
1727 All epochs that overlap the patch are aggregated into one per-patch
1728 narrow-DataFrame file.
1730 No de-duplication of rows is performed. Duplicate resolutions flags are
1731 pulled in from the referenceCatalog: `detect_isPrimary`,
1732 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1733 for analysis or compare duplicates for QA.
1735 The resulting table includes multiple bands. Epochs (MJDs) and other useful
1736 per-visit rows can be retreived by joining with the CcdVisitTable on
1737 ccdVisitId.
1738 """
1739 _DefaultName = "transformForcedSourceTable"
1740 ConfigClass = TransformForcedSourceTableConfig
1742 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1743 inputs = butlerQC.get(inputRefs)
1744 if self.funcs is None:
1745 raise ValueError("config.functorFile is None. "
1746 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1747 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs,
1748 dataId=outputRefs.outputCatalog.dataId.full)
1750 butlerQC.put(outputs, outputRefs)
1752 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1753 dfs = []
1754 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns})
1755 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs)))
1756 for handle in inputCatalogs:
1757 result = self.transform(None, handle, funcs, dataId)
1758 # Filter for only rows that were detected on (overlap) the patch
1759 dfs.append(result.df.join(ref, how='inner'))
1761 outputCatalog = pd.concat(dfs)
1763 # Now that we are done joining on config.keyRef
1764 # Change index to config.key by
1765 outputCatalog.index.rename(self.config.keyRef, inplace=True)
1766 # Add config.keyRef to the column list
1767 outputCatalog.reset_index(inplace=True)
1768 # Set the forcedSourceId to the index. This is specified in the
1769 # ForcedSource.yaml
1770 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True)
1771 # Rename it to the config.key
1772 outputCatalog.index.rename(self.config.key, inplace=True)
1774 self.log.info("Made a table of %d columns and %d rows",
1775 len(outputCatalog.columns), len(outputCatalog))
1776 return pipeBase.Struct(outputCatalog=outputCatalog)
1779class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1780 defaultTemplates={"catalogType": ""},
1781 dimensions=("instrument", "tract")):
1782 inputCatalogs = connectionTypes.Input(
1783 doc="Input per-patch DataFrame Tables to be concatenated",
1784 name="{catalogType}ForcedSourceTable",
1785 storageClass="DataFrame",
1786 dimensions=("tract", "patch", "skymap"),
1787 multiple=True,
1788 )
1790 outputCatalog = connectionTypes.Output(
1791 doc="Output per-tract concatenation of DataFrame Tables",
1792 name="{catalogType}ForcedSourceTable_tract",
1793 storageClass="DataFrame",
1794 dimensions=("tract", "skymap"),
1795 )
1798class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1799 pipelineConnections=ConsolidateTractConnections):
1800 pass
1803class ConsolidateTractTask(pipeBase.PipelineTask):
1804 """Concatenate any per-patch, dataframe list into a single
1805 per-tract DataFrame.
1806 """
1807 _DefaultName = 'ConsolidateTract'
1808 ConfigClass = ConsolidateTractConfig
1810 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1811 inputs = butlerQC.get(inputRefs)
1812 # Not checking at least one inputCatalog exists because that'd be an
1813 # empty QG.
1814 self.log.info("Concatenating %s per-patch %s Tables",
1815 len(inputs['inputCatalogs']),
1816 inputRefs.inputCatalogs[0].datasetType.name)
1817 df = pd.concat(inputs['inputCatalogs'])
1818 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)