Coverage for python/lsst/pipe/tasks/postprocess.py: 26%
763 statements
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-24 00:14 +0000
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-24 00:14 +0000
1# This file is part of pipe_tasks
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import functools
23import pandas as pd
24from collections import defaultdict
25import logging
26import numpy as np
27import numbers
28import os
30import lsst.geom
31import lsst.pex.config as pexConfig
32import lsst.pipe.base as pipeBase
33import lsst.daf.base as dafBase
34from lsst.obs.base import ExposureIdInfo
35from lsst.pipe.base import connectionTypes
36import lsst.afw.table as afwTable
37from lsst.afw.image import ExposureSummaryStats
38from lsst.meas.base import SingleFrameMeasurementTask
39from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
40from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
41from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate
42from lsst.skymap import BaseSkyMap
44from .parquetTable import ParquetTable
45from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
46from .functors import CompositeFunctor, Column
48log = logging.getLogger(__name__)
51def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
52 """Flattens a dataframe with multilevel column index.
53 """
54 newDf = pd.DataFrame()
55 # band is the level 0 index
56 dfBands = df.columns.unique(level=0).values
57 for band in dfBands:
58 subdf = df[band]
59 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
60 newColumns = {c: columnFormat.format(band, c)
61 for c in subdf.columns if c not in noDupCols}
62 cols = list(newColumns.keys())
63 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
65 # Band must be present in the input and output or else column is all NaN:
66 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
67 # Get the unexploded columns from any present band's partition
68 noDupDf = df[presentBands[0]][noDupCols]
69 newDf = pd.concat([noDupDf, newDf], axis=1)
70 return newDf
73class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
74 defaultTemplates={"coaddName": "deep"},
75 dimensions=("tract", "patch", "skymap")):
76 inputCatalogMeas = connectionTypes.Input(
77 doc="Catalog of source measurements on the deepCoadd.",
78 dimensions=("tract", "patch", "band", "skymap"),
79 storageClass="SourceCatalog",
80 name="{coaddName}Coadd_meas",
81 multiple=True
82 )
83 inputCatalogForcedSrc = connectionTypes.Input(
84 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
85 dimensions=("tract", "patch", "band", "skymap"),
86 storageClass="SourceCatalog",
87 name="{coaddName}Coadd_forced_src",
88 multiple=True
89 )
90 inputCatalogRef = connectionTypes.Input(
91 doc="Catalog marking the primary detection (which band provides a good shape and position)"
92 "for each detection in deepCoadd_mergeDet.",
93 dimensions=("tract", "patch", "skymap"),
94 storageClass="SourceCatalog",
95 name="{coaddName}Coadd_ref"
96 )
97 outputCatalog = connectionTypes.Output(
98 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
99 "stored as a DataFrame with a multi-level column index per-patch.",
100 dimensions=("tract", "patch", "skymap"),
101 storageClass="DataFrame",
102 name="{coaddName}Coadd_obj"
103 )
106class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
107 pipelineConnections=WriteObjectTableConnections):
108 engine = pexConfig.Field(
109 dtype=str,
110 default="pyarrow",
111 doc="Parquet engine for writing (pyarrow or fastparquet)"
112 )
113 coaddName = pexConfig.Field(
114 dtype=str,
115 default="deep",
116 doc="Name of coadd"
117 )
120class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
121 """Write filter-merged source tables to parquet
122 """
123 _DefaultName = "writeObjectTable"
124 ConfigClass = WriteObjectTableConfig
125 RunnerClass = MergeSourcesRunner
127 # Names of table datasets to be merged
128 inputDatasets = ('forced_src', 'meas', 'ref')
130 # Tag of output dataset written by `MergeSourcesTask.write`
131 outputDataset = 'obj'
133 def __init__(self, butler=None, schema=None, **kwargs):
134 # It is a shame that this class can't use the default init for
135 # CmdLineTask, but to do so would require its own special task
136 # runner, which is many more lines of specialization, so this is
137 # how it is for now.
138 super().__init__(**kwargs)
140 def runDataRef(self, patchRefList):
141 """!
142 @brief Merge coadd sources from multiple bands. Calls @ref `run` which
143 must be defined in subclasses that inherit from MergeSourcesTask.
144 @param[in] patchRefList list of data references for each filter
145 """
146 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
147 dataId = patchRefList[0].dataId
148 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
149 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog))
151 def runQuantum(self, butlerQC, inputRefs, outputRefs):
152 inputs = butlerQC.get(inputRefs)
154 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
155 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
156 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
157 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
159 catalogs = {}
160 for band in measDict.keys():
161 catalogs[band] = {'meas': measDict[band]['meas'],
162 'forced_src': forcedSourceDict[band]['forced_src'],
163 'ref': inputs['inputCatalogRef']}
164 dataId = butlerQC.quantum.dataId
165 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
166 outputs = pipeBase.Struct(outputCatalog=df)
167 butlerQC.put(outputs, outputRefs)
169 @classmethod
170 def _makeArgumentParser(cls):
171 """Create a suitable ArgumentParser.
173 We will use the ArgumentParser to get a list of data
174 references for patches; the RunnerClass will sort them into lists
175 of data references for the same patch.
177 References first of self.inputDatasets, rather than
178 self.inputDataset
179 """
180 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
182 def readCatalog(self, patchRef):
183 """Read input catalogs
185 Read all the input datasets given by the 'inputDatasets'
186 attribute.
188 Parameters
189 ----------
190 patchRef : `lsst.daf.persistence.ButlerDataRef`
191 Data reference for patch.
193 Returns
194 -------
195 Tuple consisting of band name and a dict of catalogs, keyed by
196 dataset name.
197 """
198 band = patchRef.get(self.config.coaddName + "Coadd_filter", immediate=True).bandLabel
199 catalogDict = {}
200 for dataset in self.inputDatasets:
201 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
202 self.log.info("Read %d sources from %s for band %s: %s",
203 len(catalog), dataset, band, patchRef.dataId)
204 catalogDict[dataset] = catalog
205 return band, catalogDict
207 def run(self, catalogs, tract, patch):
208 """Merge multiple catalogs.
210 Parameters
211 ----------
212 catalogs : `dict`
213 Mapping from filter names to dict of catalogs.
214 tract : int
215 tractId to use for the tractId column.
216 patch : str
217 patchId to use for the patchId column.
219 Returns
220 -------
221 catalog : `pandas.DataFrame`
222 Merged dataframe.
223 """
225 dfs = []
226 for filt, tableDict in catalogs.items():
227 for dataset, table in tableDict.items():
228 # Convert afwTable to pandas DataFrame
229 df = table.asAstropy().to_pandas().set_index('id', drop=True)
231 # Sort columns by name, to ensure matching schema among patches
232 df = df.reindex(sorted(df.columns), axis=1)
233 df['tractId'] = tract
234 df['patchId'] = patch
236 # Make columns a 3-level MultiIndex
237 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
238 names=('dataset', 'band', 'column'))
239 dfs.append(df)
241 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
242 return catalog
244 def write(self, patchRef, catalog):
245 """Write the output.
247 Parameters
248 ----------
249 catalog : `ParquetTable`
250 Catalog to write.
251 patchRef : `lsst.daf.persistence.ButlerDataRef`
252 Data reference for patch.
253 """
254 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
255 # since the filter isn't actually part of the data ID for the dataset
256 # we're saving, it's confusing to see it in the log message, even if
257 # the butler simply ignores it.
258 mergeDataId = patchRef.dataId.copy()
259 del mergeDataId["filter"]
260 self.log.info("Wrote merged catalog: %s", mergeDataId)
262 def writeMetadata(self, dataRefList):
263 """No metadata to write, and not sure how to write it for a list of
264 dataRefs.
265 """
266 pass
269class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
270 defaultTemplates={"catalogType": ""},
271 dimensions=("instrument", "visit", "detector")):
273 catalog = connectionTypes.Input(
274 doc="Input full-depth catalog of sources produced by CalibrateTask",
275 name="{catalogType}src",
276 storageClass="SourceCatalog",
277 dimensions=("instrument", "visit", "detector")
278 )
279 outputCatalog = connectionTypes.Output(
280 doc="Catalog of sources, `src` in Parquet format. The 'id' column is "
281 "replaced with an index; all other columns are unchanged.",
282 name="{catalogType}source",
283 storageClass="DataFrame",
284 dimensions=("instrument", "visit", "detector")
285 )
288class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
289 pipelineConnections=WriteSourceTableConnections):
290 pass
293class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
294 """Write source table to parquet.
295 """
296 _DefaultName = "writeSourceTable"
297 ConfigClass = WriteSourceTableConfig
299 def runQuantum(self, butlerQC, inputRefs, outputRefs):
300 inputs = butlerQC.get(inputRefs)
301 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
302 result = self.run(**inputs).table
303 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
304 butlerQC.put(outputs, outputRefs)
306 def run(self, catalog, ccdVisitId=None, **kwargs):
307 """Convert `src` catalog to parquet
309 Parameters
310 ----------
311 catalog: `afwTable.SourceCatalog`
312 catalog to be converted
313 ccdVisitId: `int`
314 ccdVisitId to be added as a column
316 Returns
317 -------
318 result : `lsst.pipe.base.Struct`
319 ``table``
320 `ParquetTable` version of the input catalog
321 """
322 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId)
323 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
324 df['ccdVisitId'] = ccdVisitId
325 return pipeBase.Struct(table=ParquetTable(dataFrame=df))
328class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
329 defaultTemplates={"catalogType": "",
330 "skyWcsName": "jointcal",
331 "photoCalibName": "fgcm"},
332 dimensions=("instrument", "visit", "detector", "skymap")):
333 skyMap = connectionTypes.Input(
334 doc="skyMap needed to choose which tract-level calibrations to use when multiple available",
335 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
336 storageClass="SkyMap",
337 dimensions=("skymap",),
338 )
339 exposure = connectionTypes.Input(
340 doc="Input exposure to perform photometry on.",
341 name="calexp",
342 storageClass="ExposureF",
343 dimensions=["instrument", "visit", "detector"],
344 )
345 externalSkyWcsTractCatalog = connectionTypes.Input(
346 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector "
347 "id for the catalog id, sorted on id for fast lookup."),
348 name="{skyWcsName}SkyWcsCatalog",
349 storageClass="ExposureCatalog",
350 dimensions=["instrument", "visit", "tract"],
351 multiple=True
352 )
353 externalSkyWcsGlobalCatalog = connectionTypes.Input(
354 doc=("Per-visit wcs calibrations computed globally (with no tract information). "
355 "These catalogs use the detector id for the catalog id, sorted on id for "
356 "fast lookup."),
357 name="finalVisitSummary",
358 storageClass="ExposureCatalog",
359 dimensions=["instrument", "visit"],
360 )
361 externalPhotoCalibTractCatalog = connectionTypes.Input(
362 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the "
363 "detector id for the catalog id, sorted on id for fast lookup."),
364 name="{photoCalibName}PhotoCalibCatalog",
365 storageClass="ExposureCatalog",
366 dimensions=["instrument", "visit", "tract"],
367 multiple=True
368 )
369 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
370 doc=("Per-visit photometric calibrations computed globally (with no tract "
371 "information). These catalogs use the detector id for the catalog id, "
372 "sorted on id for fast lookup."),
373 name="finalVisitSummary",
374 storageClass="ExposureCatalog",
375 dimensions=["instrument", "visit"],
376 )
378 def __init__(self, *, config=None):
379 super().__init__(config=config)
380 # Same connection boilerplate as all other applications of
381 # Global/Tract calibrations
382 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs:
383 if config.useGlobalExternalSkyWcs:
384 self.inputs.remove("externalSkyWcsTractCatalog")
385 else:
386 self.inputs.remove("externalSkyWcsGlobalCatalog")
387 else:
388 self.inputs.remove("externalSkyWcsTractCatalog")
389 self.inputs.remove("externalSkyWcsGlobalCatalog")
390 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib:
391 if config.useGlobalExternalPhotoCalib:
392 self.inputs.remove("externalPhotoCalibTractCatalog")
393 else:
394 self.inputs.remove("externalPhotoCalibGlobalCatalog")
395 else:
396 self.inputs.remove("externalPhotoCalibTractCatalog")
397 self.inputs.remove("externalPhotoCalibGlobalCatalog")
400class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
401 pipelineConnections=WriteRecalibratedSourceTableConnections):
403 doReevaluatePhotoCalib = pexConfig.Field(
404 dtype=bool,
405 default=True,
406 doc=("Add or replace local photoCalib columns")
407 )
408 doReevaluateSkyWcs = pexConfig.Field(
409 dtype=bool,
410 default=True,
411 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
412 )
413 doApplyExternalPhotoCalib = pexConfig.Field(
414 dtype=bool,
415 default=True,
416 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
417 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
418 )
419 doApplyExternalSkyWcs = pexConfig.Field(
420 dtype=bool,
421 default=True,
422 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
423 "else use the wcs already attached to the exposure."),
424 )
425 useGlobalExternalPhotoCalib = pexConfig.Field(
426 dtype=bool,
427 default=True,
428 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations "
429 "that are not run per-tract. When False, use per-tract photometric "
430 "calibration files.")
431 )
432 useGlobalExternalSkyWcs = pexConfig.Field(
433 dtype=bool,
434 default=True,
435 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations "
436 "that are not run per-tract. When False, use per-tract wcs "
437 "files.")
438 )
440 def validate(self):
441 super().validate()
442 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs:
443 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
444 "External SkyWcs will not be read or evaluated.")
445 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib:
446 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
447 "External PhotoCalib will not be read or evaluated.")
450class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
451 """Write source table to parquet
452 """
453 _DefaultName = "writeRecalibratedSourceTable"
454 ConfigClass = WriteRecalibratedSourceTableConfig
456 def runQuantum(self, butlerQC, inputRefs, outputRefs):
457 inputs = butlerQC.get(inputRefs)
458 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
459 inputs['exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "visit_detector")
461 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs:
462 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
463 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs)
465 inputs['catalog'] = self.addCalibColumns(**inputs)
467 result = self.run(**inputs).table
468 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
469 butlerQC.put(outputs, outputRefs)
471 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
472 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
473 externalPhotoCalibTractCatalog=None, **kwargs):
474 """Apply external calibrations to exposure per configuration
476 When multiple tract-level calibrations overlap, select the one with the
477 center closest to detector.
479 Parameters
480 ----------
481 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of
482 tract-level calibs.
483 skyMap : `lsst.skymap.SkyMap`
484 exposure : `lsst.afw.image.exposure.Exposure`
485 Input exposure to adjust calibrations.
486 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
487 Exposure catalog with external skyWcs to be applied per config
488 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
489 Exposure catalog with external skyWcs to be applied per config
490 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
491 Exposure catalog with external photoCalib to be applied per config
492 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
495 Returns
496 -------
497 exposure : `lsst.afw.image.exposure.Exposure`
498 Exposure with adjusted calibrations.
499 """
500 if not self.config.doApplyExternalSkyWcs:
501 # Do not modify the exposure's SkyWcs
502 externalSkyWcsCatalog = None
503 elif self.config.useGlobalExternalSkyWcs:
504 # Use the global external SkyWcs
505 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
506 self.log.info('Applying global SkyWcs')
507 else:
508 # use tract-level external SkyWcs from the closest overlapping tract
509 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog')
510 tracts = [ref.dataId['tract'] for ref in inputRef]
511 if len(tracts) == 1:
512 ind = 0
513 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind])
514 else:
515 ind = self.getClosestTract(tracts, skyMap,
516 exposure.getBBox(), exposure.getWcs())
517 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
518 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
520 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
522 if not self.config.doApplyExternalPhotoCalib:
523 # Do not modify the exposure's PhotoCalib
524 externalPhotoCalibCatalog = None
525 elif self.config.useGlobalExternalPhotoCalib:
526 # Use the global external PhotoCalib
527 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
528 self.log.info('Applying global PhotoCalib')
529 else:
530 # use tract-level external PhotoCalib from the closest overlapping tract
531 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog')
532 tracts = [ref.dataId['tract'] for ref in inputRef]
533 if len(tracts) == 1:
534 ind = 0
535 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind])
536 else:
537 ind = self.getClosestTract(tracts, skyMap,
538 exposure.getBBox(), exposure.getWcs())
539 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
540 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
542 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
544 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
546 def getClosestTract(self, tracts, skyMap, bbox, wcs):
547 """Find the index of the tract closest to detector from list of tractIds
549 Parameters
550 ----------
551 tracts: `list` [`int`]
552 Iterable of integer tractIds
553 skyMap : `lsst.skymap.SkyMap`
554 skyMap to lookup tract geometry and wcs
555 bbox : `lsst.geom.Box2I`
556 Detector bbox, center of which will compared to tract centers
557 wcs : `lsst.afw.geom.SkyWcs`
558 Detector Wcs object to map the detector center to SkyCoord
560 Returns
561 -------
562 index : `int`
563 """
564 if len(tracts) == 1:
565 return 0
567 center = wcs.pixelToSky(bbox.getCenter())
568 sep = []
569 for tractId in tracts:
570 tract = skyMap[tractId]
571 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
572 sep.append(center.separation(tractCenter))
574 return np.argmin(sep)
576 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
577 """Prepare a calibrated exposure and apply external calibrations
578 if so configured.
580 Parameters
581 ----------
582 exposure : `lsst.afw.image.exposure.Exposure`
583 Input exposure to adjust calibrations.
584 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional
585 Exposure catalog with external skyWcs to be applied
586 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id
587 for the catalog id, sorted on id for fast lookup.
588 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional
589 Exposure catalog with external photoCalib to be applied
590 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector
591 id for the catalog id, sorted on id for fast lookup.
593 Returns
594 -------
595 exposure : `lsst.afw.image.exposure.Exposure`
596 Exposure with adjusted calibrations.
597 """
598 detectorId = exposure.getInfo().getDetector().getId()
600 if externalPhotoCalibCatalog is not None:
601 row = externalPhotoCalibCatalog.find(detectorId)
602 if row is None:
603 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; "
604 "Using original photoCalib.", detectorId)
605 else:
606 photoCalib = row.getPhotoCalib()
607 if photoCalib is None:
608 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
609 "Using original photoCalib.", detectorId)
610 else:
611 exposure.setPhotoCalib(photoCalib)
613 if externalSkyWcsCatalog is not None:
614 row = externalSkyWcsCatalog.find(detectorId)
615 if row is None:
616 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; "
617 "Using original skyWcs.", detectorId)
618 else:
619 skyWcs = row.getWcs()
620 if skyWcs is None:
621 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
622 "Using original skyWcs.", detectorId)
623 else:
624 exposure.setWcs(skyWcs)
626 return exposure
628 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs):
629 """Add replace columns with calibs evaluated at each centroid
631 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a
632 a source catalog, by rerunning the plugins.
634 Parameters
635 ----------
636 catalog : `lsst.afw.table.SourceCatalog`
637 catalog to which calib columns will be added
638 exposure : `lsst.afw.image.exposure.Exposure`
639 Exposure with attached PhotoCalibs and SkyWcs attributes to be
640 reevaluated at local centroids. Pixels are not required.
641 exposureIdInfo : `lsst.obs.base.ExposureIdInfo`
643 Returns
644 -------
645 newCat: `lsst.afw.table.SourceCatalog`
646 Source Catalog with requested local calib columns
647 """
648 measureConfig = SingleFrameMeasurementTask.ConfigClass()
649 measureConfig.doReplaceWithNoise = False
651 measureConfig.plugins.names = []
652 if self.config.doReevaluateSkyWcs:
653 measureConfig.plugins.names.add('base_LocalWcs')
654 self.log.info("Re-evaluating base_LocalWcs plugin")
655 if self.config.doReevaluatePhotoCalib:
656 measureConfig.plugins.names.add('base_LocalPhotoCalib')
657 self.log.info("Re-evaluating base_LocalPhotoCalib plugin")
658 pluginsNotToCopy = tuple(measureConfig.plugins.names)
660 # Create a new schema and catalog
661 # Copy all columns from original except for the ones to reevaluate
662 aliasMap = catalog.schema.getAliasMap()
663 mapper = afwTable.SchemaMapper(catalog.schema)
664 for item in catalog.schema:
665 if not item.field.getName().startswith(pluginsNotToCopy):
666 mapper.addMapping(item.key)
668 schema = mapper.getOutputSchema()
669 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
670 schema.setAliasMap(aliasMap)
671 newCat = afwTable.SourceCatalog(schema)
672 newCat.extend(catalog, mapper=mapper)
674 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to
675 # update here. LocalPhotoCalibs are applied during transform tasks.
676 # Update coord_ra/coord_dec, which are expected to be positions on the
677 # sky and are used as such in sdm tables without transform
678 if self.config.doReevaluateSkyWcs:
679 afwTable.updateSourceCoords(exposure.wcs, newCat)
681 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
683 return newCat
686class PostprocessAnalysis(object):
687 """Calculate columns from ParquetTable.
689 This object manages and organizes an arbitrary set of computations
690 on a catalog. The catalog is defined by a
691 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such
692 as a `deepCoadd_obj` dataset, and the computations are defined by a
693 collection of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
694 a `CompositeFunctor`).
696 After the object is initialized, accessing the `.df` attribute (which
697 holds the `pandas.DataFrame` containing the results of the calculations)
698 triggers computation of said dataframe.
700 One of the conveniences of using this object is the ability to define a
701 desired common filter for all functors. This enables the same functor
702 collection to be passed to several different `PostprocessAnalysis` objects
703 without having to change the original functor collection, since the `filt`
704 keyword argument of this object triggers an overwrite of the `filt`
705 property for all functors in the collection.
707 This object also allows a list of refFlags to be passed, and defines a set
708 of default refFlags that are always included even if not requested.
710 If a list of `ParquetTable` object is passed, rather than a single one,
711 then the calculations will be mapped over all the input catalogs. In
712 principle, it should be straightforward to parallelize this activity, but
713 initial tests have failed (see TODO in code comments).
715 Parameters
716 ----------
717 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
718 Source catalog(s) for computation.
720 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
721 Computations to do (functors that act on `parq`).
722 If a dict, the output
723 DataFrame will have columns keyed accordingly.
724 If a list, the column keys will come from the
725 `.shortname` attribute of each functor.
727 filt : `str`, optional
728 Filter in which to calculate. If provided,
729 this will overwrite any existing `.filt` attribute
730 of the provided functors.
732 flags : `list`, optional
733 List of flags (per-band) to include in output table.
734 Taken from the `meas` dataset if applied to a multilevel Object Table.
736 refFlags : `list`, optional
737 List of refFlags (only reference band) to include in output table.
739 forcedFlags : `list`, optional
740 List of flags (per-band) to include in output table.
741 Taken from the ``forced_src`` dataset if applied to a
742 multilevel Object Table. Intended for flags from measurement plugins
743 only run during multi-band forced-photometry.
744 """
745 _defaultRefFlags = []
746 _defaultFuncs = ()
748 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
749 self.parq = parq
750 self.functors = functors
752 self.filt = filt
753 self.flags = list(flags) if flags is not None else []
754 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else []
755 self.refFlags = list(self._defaultRefFlags)
756 if refFlags is not None:
757 self.refFlags += list(refFlags)
759 self._df = None
761 @property
762 def defaultFuncs(self):
763 funcs = dict(self._defaultFuncs)
764 return funcs
766 @property
767 def func(self):
768 additionalFuncs = self.defaultFuncs
769 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags})
770 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
771 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
773 if isinstance(self.functors, CompositeFunctor):
774 func = self.functors
775 else:
776 func = CompositeFunctor(self.functors)
778 func.funcDict.update(additionalFuncs)
779 func.filt = self.filt
781 return func
783 @property
784 def noDupCols(self):
785 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
787 @property
788 def df(self):
789 if self._df is None:
790 self.compute()
791 return self._df
793 def compute(self, dropna=False, pool=None):
794 # map over multiple parquet tables
795 if type(self.parq) in (list, tuple):
796 if pool is None:
797 dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
798 else:
799 # TODO: Figure out why this doesn't work (pyarrow pickling
800 # issues?)
801 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
802 self._df = pd.concat(dflist)
803 else:
804 self._df = self.func(self.parq, dropna=dropna)
806 return self._df
809class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
810 dimensions=()):
811 """Expected Connections for subclasses of TransformCatalogBaseTask.
813 Must be subclassed.
814 """
815 inputCatalog = connectionTypes.Input(
816 name="",
817 storageClass="DataFrame",
818 )
819 outputCatalog = connectionTypes.Output(
820 name="",
821 storageClass="DataFrame",
822 )
825class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
826 pipelineConnections=TransformCatalogBaseConnections):
827 functorFile = pexConfig.Field(
828 dtype=str,
829 doc="Path to YAML file specifying Science Data Model functors to use "
830 "when copying columns and computing calibrated values.",
831 default=None,
832 optional=True
833 )
834 primaryKey = pexConfig.Field(
835 dtype=str,
836 doc="Name of column to be set as the DataFrame index. If None, the index"
837 "will be named `id`",
838 default=None,
839 optional=True
840 )
841 columnsFromDataId = pexConfig.ListField(
842 dtype=str,
843 default=None,
844 optional=True,
845 doc="Columns to extract from the dataId",
846 )
849class TransformCatalogBaseTask(pipeBase.PipelineTask):
850 """Base class for transforming/standardizing a catalog
852 by applying functors that convert units and apply calibrations.
853 The purpose of this task is to perform a set of computations on
854 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
855 results to a new dataset (which needs to be declared in an `outputDataset`
856 attribute).
858 The calculations to be performed are defined in a YAML file that specifies
859 a set of functors to be computed, provided as
860 a `--functorFile` config parameter. An example of such a YAML file
861 is the following:
863 funcs:
864 psfMag:
865 functor: Mag
866 args:
867 - base_PsfFlux
868 filt: HSC-G
869 dataset: meas
870 cmodel_magDiff:
871 functor: MagDiff
872 args:
873 - modelfit_CModel
874 - base_PsfFlux
875 filt: HSC-G
876 gauss_magDiff:
877 functor: MagDiff
878 args:
879 - base_GaussianFlux
880 - base_PsfFlux
881 filt: HSC-G
882 count:
883 functor: Column
884 args:
885 - base_InputCount_value
886 filt: HSC-G
887 deconvolved_moments:
888 functor: DeconvolvedMoments
889 filt: HSC-G
890 dataset: forced_src
891 refFlags:
892 - calib_psfUsed
893 - merge_measurement_i
894 - merge_measurement_r
895 - merge_measurement_z
896 - merge_measurement_y
897 - merge_measurement_g
898 - base_PixelFlags_flag_inexact_psfCenter
899 - detect_isPrimary
901 The names for each entry under "func" will become the names of columns in
902 the output dataset. All the functors referenced are defined in
903 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each
904 functor are in the `args` list, and any additional entries for each column
905 other than "functor" or "args" (e.g., `'filt'`, `'dataset'`) are treated as
906 keyword arguments to be passed to the functor initialization.
908 The "flags" entry is the default shortcut for `Column` functors.
909 All columns listed under "flags" will be copied to the output table
910 untransformed. They can be of any datatype.
911 In the special case of transforming a multi-level oject table with
912 band and dataset indices (deepCoadd_obj), these will be taked from the
913 `meas` dataset and exploded out per band.
915 There are two special shortcuts that only apply when transforming
916 multi-level Object (deepCoadd_obj) tables:
917 - The "refFlags" entry is shortcut for `Column` functor
918 taken from the `'ref'` dataset if transforming an ObjectTable.
919 - The "forcedFlags" entry is shortcut for `Column` functors.
920 taken from the ``forced_src`` dataset if transforming an ObjectTable.
921 These are expanded out per band.
924 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
925 to organize and excecute the calculations.
926 """
927 @property
928 def _DefaultName(self):
929 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
931 @property
932 def outputDataset(self):
933 raise NotImplementedError('Subclass must define "outputDataset" attribute')
935 @property
936 def inputDataset(self):
937 raise NotImplementedError('Subclass must define "inputDataset" attribute')
939 @property
940 def ConfigClass(self):
941 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
943 def __init__(self, *args, **kwargs):
944 super().__init__(*args, **kwargs)
945 if self.config.functorFile:
946 self.log.info('Loading tranform functor definitions from %s',
947 self.config.functorFile)
948 self.funcs = CompositeFunctor.from_file(self.config.functorFile)
949 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs))
950 else:
951 self.funcs = None
953 def runQuantum(self, butlerQC, inputRefs, outputRefs):
954 inputs = butlerQC.get(inputRefs)
955 if self.funcs is None:
956 raise ValueError("config.functorFile is None. "
957 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
958 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs,
959 dataId=outputRefs.outputCatalog.dataId.full)
960 outputs = pipeBase.Struct(outputCatalog=result)
961 butlerQC.put(outputs, outputRefs)
963 def run(self, parq, funcs=None, dataId=None, band=None):
964 """Do postprocessing calculations
966 Takes a `ParquetTable` object and dataId,
967 returns a dataframe with results of postprocessing calculations.
969 Parameters
970 ----------
971 parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
972 ParquetTable from which calculations are done.
973 funcs : `lsst.pipe.tasks.functors.Functors`
974 Functors to apply to the table's columns
975 dataId : dict, optional
976 Used to add a `patchId` column to the output dataframe.
977 band : `str`, optional
978 Filter band that is being processed.
980 Returns
981 ------
982 df : `pandas.DataFrame`
983 """
984 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
986 df = self.transform(band, parq, funcs, dataId).df
987 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
988 return df
990 def getFunctors(self):
991 return self.funcs
993 def getAnalysis(self, parq, funcs=None, band=None):
994 if funcs is None:
995 funcs = self.funcs
996 analysis = PostprocessAnalysis(parq, funcs, filt=band)
997 return analysis
999 def transform(self, band, parq, funcs, dataId):
1000 analysis = self.getAnalysis(parq, funcs=funcs, band=band)
1001 df = analysis.df
1002 if dataId and self.config.columnsFromDataId:
1003 for key in self.config.columnsFromDataId:
1004 if key in dataId:
1005 df[str(key)] = dataId[key]
1006 else:
1007 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
1009 if self.config.primaryKey:
1010 if df.index.name != self.config.primaryKey and self.config.primaryKey in df:
1011 df.reset_index(inplace=True, drop=True)
1012 df.set_index(self.config.primaryKey, inplace=True)
1014 return pipeBase.Struct(
1015 df=df,
1016 analysis=analysis
1017 )
1019 def write(self, df, parqRef):
1020 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset)
1022 def writeMetadata(self, dataRef):
1023 """No metadata to write.
1024 """
1025 pass
1028class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
1029 defaultTemplates={"coaddName": "deep"},
1030 dimensions=("tract", "patch", "skymap")):
1031 inputCatalog = connectionTypes.Input(
1032 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
1033 "stored as a DataFrame with a multi-level column index per-patch.",
1034 dimensions=("tract", "patch", "skymap"),
1035 storageClass="DataFrame",
1036 name="{coaddName}Coadd_obj",
1037 deferLoad=True,
1038 )
1039 outputCatalog = connectionTypes.Output(
1040 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
1041 "data model.",
1042 dimensions=("tract", "patch", "skymap"),
1043 storageClass="DataFrame",
1044 name="objectTable"
1045 )
1048class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
1049 pipelineConnections=TransformObjectCatalogConnections):
1050 coaddName = pexConfig.Field(
1051 dtype=str,
1052 default="deep",
1053 doc="Name of coadd"
1054 )
1055 # TODO: remove in DM-27177
1056 filterMap = pexConfig.DictField(
1057 keytype=str,
1058 itemtype=str,
1059 default={},
1060 doc=("Dictionary mapping full filter name to short one for column name munging."
1061 "These filters determine the output columns no matter what filters the "
1062 "input data actually contain."),
1063 deprecated=("Coadds are now identified by the band, so this transform is unused."
1064 "Will be removed after v22.")
1065 )
1066 outputBands = pexConfig.ListField(
1067 dtype=str,
1068 default=None,
1069 optional=True,
1070 doc=("These bands and only these bands will appear in the output,"
1071 " NaN-filled if the input does not include them."
1072 " If None, then use all bands found in the input.")
1073 )
1074 camelCase = pexConfig.Field(
1075 dtype=bool,
1076 default=False,
1077 doc=("Write per-band columns names with camelCase, else underscore "
1078 "For example: gPsFlux instead of g_PsFlux.")
1079 )
1080 multilevelOutput = pexConfig.Field(
1081 dtype=bool,
1082 default=False,
1083 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
1084 "and name-munged (False).")
1085 )
1086 goodFlags = pexConfig.ListField(
1087 dtype=str,
1088 default=[],
1089 doc=("List of 'good' flags that should be set False when populating empty tables. "
1090 "All other flags are considered to be 'bad' flags and will be set to True.")
1091 )
1092 floatFillValue = pexConfig.Field(
1093 dtype=float,
1094 default=np.nan,
1095 doc="Fill value for float fields when populating empty tables."
1096 )
1097 integerFillValue = pexConfig.Field(
1098 dtype=int,
1099 default=-1,
1100 doc="Fill value for integer fields when populating empty tables."
1101 )
1103 def setDefaults(self):
1104 super().setDefaults()
1105 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml')
1106 self.primaryKey = 'objectId'
1107 self.columnsFromDataId = ['tract', 'patch']
1108 self.goodFlags = ['calib_astrometry_used',
1109 'calib_photometry_reserved',
1110 'calib_photometry_used',
1111 'calib_psf_candidate',
1112 'calib_psf_reserved',
1113 'calib_psf_used']
1116class TransformObjectCatalogTask(TransformCatalogBaseTask):
1117 """Produce a flattened Object Table to match the format specified in
1118 sdm_schemas.
1120 Do the same set of postprocessing calculations on all bands.
1122 This is identical to `TransformCatalogBaseTask`, except for that it does
1123 the specified functor calculations for all filters present in the
1124 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
1125 by the YAML file will be superceded.
1126 """
1127 _DefaultName = "transformObjectCatalog"
1128 ConfigClass = TransformObjectCatalogConfig
1130 def run(self, parq, funcs=None, dataId=None, band=None):
1131 # NOTE: band kwarg is ignored here.
1132 dfDict = {}
1133 analysisDict = {}
1134 templateDf = pd.DataFrame()
1136 if isinstance(parq, DeferredDatasetHandle):
1137 columns = parq.get(component='columns')
1138 inputBands = columns.unique(level=1).values
1139 else:
1140 inputBands = parq.columnLevelNames['band']
1142 outputBands = self.config.outputBands if self.config.outputBands else inputBands
1144 # Perform transform for data of filters that exist in parq.
1145 for inputBand in inputBands:
1146 if inputBand not in outputBands:
1147 self.log.info("Ignoring %s band data in the input", inputBand)
1148 continue
1149 self.log.info("Transforming the catalog of band %s", inputBand)
1150 result = self.transform(inputBand, parq, funcs, dataId)
1151 dfDict[inputBand] = result.df
1152 analysisDict[inputBand] = result.analysis
1153 if templateDf.empty:
1154 templateDf = result.df
1156 # Put filler values in columns of other wanted bands
1157 for filt in outputBands:
1158 if filt not in dfDict:
1159 self.log.info("Adding empty columns for band %s", filt)
1160 dfTemp = templateDf.copy()
1161 for col in dfTemp.columns:
1162 testValue = dfTemp[col].values[0]
1163 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1164 # Boolean flag type, check if it is a "good" flag
1165 if col in self.config.goodFlags:
1166 fillValue = False
1167 else:
1168 fillValue = True
1169 elif isinstance(testValue, numbers.Integral):
1170 # Checking numbers.Integral catches all flavors
1171 # of python, numpy, pandas, etc. integers.
1172 # We must ensure this is not an unsigned integer.
1173 if isinstance(testValue, np.unsignedinteger):
1174 raise ValueError("Parquet tables may not have unsigned integer columns.")
1175 else:
1176 fillValue = self.config.integerFillValue
1177 else:
1178 fillValue = self.config.floatFillValue
1179 dfTemp[col].values[:] = fillValue
1180 dfDict[filt] = dfTemp
1182 # This makes a multilevel column index, with band as first level
1183 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
1185 if not self.config.multilevelOutput:
1186 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
1187 if self.config.primaryKey in noDupCols:
1188 noDupCols.remove(self.config.primaryKey)
1189 if dataId and self.config.columnsFromDataId:
1190 noDupCols += self.config.columnsFromDataId
1191 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1192 inputBands=inputBands)
1194 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
1196 return df
1199class TractObjectDataIdContainer(CoaddDataIdContainer):
1201 def makeDataRefList(self, namespace):
1202 """Make self.refList from self.idList
1204 Generate a list of data references given tract and/or patch.
1205 This was adapted from `TractQADataIdContainer`, which was
1206 `TractDataIdContainer` modifie to not require "filter".
1207 Only existing dataRefs are returned.
1208 """
1209 def getPatchRefList(tract):
1210 return [namespace.butler.dataRef(datasetType=self.datasetType,
1211 tract=tract.getId(),
1212 patch="%d,%d" % patch.getIndex()) for patch in tract]
1214 tractRefs = defaultdict(list) # Data references for each tract
1215 for dataId in self.idList:
1216 skymap = self.getSkymap(namespace)
1218 if "tract" in dataId:
1219 tractId = dataId["tract"]
1220 if "patch" in dataId:
1221 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
1222 tract=tractId,
1223 patch=dataId['patch']))
1224 else:
1225 tractRefs[tractId] += getPatchRefList(skymap[tractId])
1226 else:
1227 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
1228 for tract in skymap)
1229 outputRefList = []
1230 for tractRefList in tractRefs.values():
1231 existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
1232 outputRefList.append(existingRefs)
1234 self.refList = outputRefList
1237class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1238 dimensions=("tract", "skymap")):
1239 inputCatalogs = connectionTypes.Input(
1240 doc="Per-Patch objectTables conforming to the standard data model.",
1241 name="objectTable",
1242 storageClass="DataFrame",
1243 dimensions=("tract", "patch", "skymap"),
1244 multiple=True,
1245 )
1246 outputCatalog = connectionTypes.Output(
1247 doc="Pre-tract horizontal concatenation of the input objectTables",
1248 name="objectTable_tract",
1249 storageClass="DataFrame",
1250 dimensions=("tract", "skymap"),
1251 )
1254class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1255 pipelineConnections=ConsolidateObjectTableConnections):
1256 coaddName = pexConfig.Field(
1257 dtype=str,
1258 default="deep",
1259 doc="Name of coadd"
1260 )
1263class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
1264 """Write patch-merged source tables to a tract-level parquet file.
1266 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1267 """
1268 _DefaultName = "consolidateObjectTable"
1269 ConfigClass = ConsolidateObjectTableConfig
1271 inputDataset = 'objectTable'
1272 outputDataset = 'objectTable_tract'
1274 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1275 inputs = butlerQC.get(inputRefs)
1276 self.log.info("Concatenating %s per-patch Object Tables",
1277 len(inputs['inputCatalogs']))
1278 df = pd.concat(inputs['inputCatalogs'])
1279 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1281 @classmethod
1282 def _makeArgumentParser(cls):
1283 parser = ArgumentParser(name=cls._DefaultName)
1285 parser.add_id_argument("--id", cls.inputDataset,
1286 help="data ID, e.g. --id tract=12345",
1287 ContainerClass=TractObjectDataIdContainer)
1288 return parser
1290 def runDataRef(self, patchRefList):
1291 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
1292 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1294 def writeMetadata(self, dataRef):
1295 """No metadata to write.
1296 """
1297 pass
1300class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1301 defaultTemplates={"catalogType": ""},
1302 dimensions=("instrument", "visit", "detector")):
1304 inputCatalog = connectionTypes.Input(
1305 doc="Wide input catalog of sources produced by WriteSourceTableTask",
1306 name="{catalogType}source",
1307 storageClass="DataFrame",
1308 dimensions=("instrument", "visit", "detector"),
1309 deferLoad=True
1310 )
1311 outputCatalog = connectionTypes.Output(
1312 doc="Narrower, per-detector Source Table transformed and converted per a "
1313 "specified set of functors",
1314 name="{catalogType}sourceTable",
1315 storageClass="DataFrame",
1316 dimensions=("instrument", "visit", "detector")
1317 )
1320class TransformSourceTableConfig(TransformCatalogBaseConfig,
1321 pipelineConnections=TransformSourceTableConnections):
1323 def setDefaults(self):
1324 super().setDefaults()
1325 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml')
1326 self.primaryKey = 'sourceId'
1327 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter']
1330class TransformSourceTableTask(TransformCatalogBaseTask):
1331 """Transform/standardize a source catalog
1332 """
1333 _DefaultName = "transformSourceTable"
1334 ConfigClass = TransformSourceTableConfig
1337class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1338 dimensions=("instrument", "visit",),
1339 defaultTemplates={"calexpType": ""}):
1340 calexp = connectionTypes.Input(
1341 doc="Processed exposures used for metadata",
1342 name="calexp",
1343 storageClass="ExposureF",
1344 dimensions=("instrument", "visit", "detector"),
1345 deferLoad=True,
1346 multiple=True,
1347 )
1348 visitSummary = connectionTypes.Output(
1349 doc=("Per-visit consolidated exposure metadata. These catalogs use "
1350 "detector id for the id and are sorted for fast lookups of a "
1351 "detector."),
1352 name="visitSummary",
1353 storageClass="ExposureCatalog",
1354 dimensions=("instrument", "visit"),
1355 )
1356 visitSummarySchema = connectionTypes.InitOutput(
1357 doc="Schema of the visitSummary catalog",
1358 name="visitSummary_schema",
1359 storageClass="ExposureCatalog",
1360 )
1363class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1364 pipelineConnections=ConsolidateVisitSummaryConnections):
1365 """Config for ConsolidateVisitSummaryTask"""
1366 pass
1369class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1370 """Task to consolidate per-detector visit metadata.
1372 This task aggregates the following metadata from all the detectors in a
1373 single visit into an exposure catalog:
1374 - The visitInfo.
1375 - The wcs.
1376 - The photoCalib.
1377 - The physical_filter and band (if available).
1378 - The psf size, shape, and effective area at the center of the detector.
1379 - The corners of the bounding box in right ascension/declination.
1381 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1382 are not persisted here because of storage concerns, and because of their
1383 limited utility as summary statistics.
1385 Tests for this task are performed in ci_hsc_gen3.
1386 """
1387 _DefaultName = "consolidateVisitSummary"
1388 ConfigClass = ConsolidateVisitSummaryConfig
1390 @classmethod
1391 def _makeArgumentParser(cls):
1392 parser = ArgumentParser(name=cls._DefaultName)
1394 parser.add_id_argument("--id", "calexp",
1395 help="data ID, e.g. --id visit=12345",
1396 ContainerClass=VisitDataIdContainer)
1397 return parser
1399 def __init__(self, **kwargs):
1400 super().__init__(**kwargs)
1401 self.schema = afwTable.ExposureTable.makeMinimalSchema()
1402 self.schema.addField('visit', type='L', doc='Visit number')
1403 self.schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1404 self.schema.addField('band', type='String', size=32, doc='Name of band')
1405 ExposureSummaryStats.update_schema(self.schema)
1406 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema)
1408 def writeMetadata(self, dataRef):
1409 """No metadata to persist, so override to remove metadata persistance.
1410 """
1411 pass
1413 def writeConfig(self, butler, clobber=False, doBackup=True):
1414 """No config to persist, so override to remove config persistance.
1415 """
1416 pass
1418 def runDataRef(self, dataRefList):
1419 visit = dataRefList[0].dataId['visit']
1421 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1422 len(dataRefList), visit)
1424 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
1426 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
1428 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1429 dataRefs = butlerQC.get(inputRefs.calexp)
1430 visit = dataRefs[0].dataId.byName()['visit']
1432 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1433 len(dataRefs), visit)
1435 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1437 butlerQC.put(expCatalog, outputRefs.visitSummary)
1439 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1440 """Make a combined exposure catalog from a list of dataRefs.
1441 These dataRefs must point to exposures with wcs, summaryStats,
1442 and other visit metadata.
1444 Parameters
1445 ----------
1446 visit : `int`
1447 Visit identification number.
1448 dataRefs : `list`
1449 List of dataRefs in visit. May be list of
1450 `lsst.daf.persistence.ButlerDataRef` (Gen2) or
1451 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1452 isGen3 : `bool`, optional
1453 Specifies if this is a Gen3 list of datarefs.
1455 Returns
1456 -------
1457 visitSummary : `lsst.afw.table.ExposureCatalog`
1458 Exposure catalog with per-detector summary information.
1459 """
1460 cat = afwTable.ExposureCatalog(self.schema)
1461 cat.resize(len(dataRefs))
1463 cat['visit'] = visit
1465 for i, dataRef in enumerate(dataRefs):
1466 if isGen3:
1467 visitInfo = dataRef.get(component='visitInfo')
1468 filterLabel = dataRef.get(component='filter')
1469 summaryStats = dataRef.get(component='summaryStats')
1470 detector = dataRef.get(component='detector')
1471 wcs = dataRef.get(component='wcs')
1472 photoCalib = dataRef.get(component='photoCalib')
1473 detector = dataRef.get(component='detector')
1474 bbox = dataRef.get(component='bbox')
1475 validPolygon = dataRef.get(component='validPolygon')
1476 else:
1477 # Note that we need to read the calexp because there is
1478 # no magic access to the psf except through the exposure.
1479 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
1480 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
1481 visitInfo = exp.getInfo().getVisitInfo()
1482 filterLabel = dataRef.get("calexp_filter")
1483 summaryStats = exp.getInfo().getSummaryStats()
1484 wcs = exp.getWcs()
1485 photoCalib = exp.getPhotoCalib()
1486 detector = exp.getDetector()
1487 bbox = dataRef.get(datasetType='calexp_bbox')
1488 validPolygon = exp.getInfo().getValidPolygon()
1490 rec = cat[i]
1491 rec.setBBox(bbox)
1492 rec.setVisitInfo(visitInfo)
1493 rec.setWcs(wcs)
1494 rec.setPhotoCalib(photoCalib)
1495 rec.setValidPolygon(validPolygon)
1497 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1498 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1499 rec.setId(detector.getId())
1500 summaryStats.update_record(rec)
1502 metadata = dafBase.PropertyList()
1503 metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1504 # We are looping over existing datarefs, so the following is true
1505 metadata.add("COMMENT", "Only detectors with data have entries.")
1506 cat.setMetadata(metadata)
1508 cat.sort()
1509 return cat
1512class VisitDataIdContainer(DataIdContainer):
1513 """DataIdContainer that groups sensor-level ids by visit.
1514 """
1516 def makeDataRefList(self, namespace):
1517 """Make self.refList from self.idList
1519 Generate a list of data references grouped by visit.
1521 Parameters
1522 ----------
1523 namespace : `argparse.Namespace`
1524 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command
1525 line arguments.
1526 """
1527 # Group by visits
1528 visitRefs = defaultdict(list)
1529 for dataId in self.idList:
1530 if "visit" in dataId:
1531 visitId = dataId["visit"]
1532 # append all subsets to
1533 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1534 visitRefs[visitId].extend([dataRef for dataRef in subset])
1536 outputRefList = []
1537 for refList in visitRefs.values():
1538 existingRefs = [ref for ref in refList if ref.datasetExists()]
1539 if existingRefs:
1540 outputRefList.append(existingRefs)
1542 self.refList = outputRefList
1545class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1546 defaultTemplates={"catalogType": ""},
1547 dimensions=("instrument", "visit")):
1548 inputCatalogs = connectionTypes.Input(
1549 doc="Input per-detector Source Tables",
1550 name="{catalogType}sourceTable",
1551 storageClass="DataFrame",
1552 dimensions=("instrument", "visit", "detector"),
1553 multiple=True
1554 )
1555 outputCatalog = connectionTypes.Output(
1556 doc="Per-visit concatenation of Source Table",
1557 name="{catalogType}sourceTable_visit",
1558 storageClass="DataFrame",
1559 dimensions=("instrument", "visit")
1560 )
1563class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1564 pipelineConnections=ConsolidateSourceTableConnections):
1565 pass
1568class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1569 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1570 """
1571 _DefaultName = 'consolidateSourceTable'
1572 ConfigClass = ConsolidateSourceTableConfig
1574 inputDataset = 'sourceTable'
1575 outputDataset = 'sourceTable_visit'
1577 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1578 from .makeCoaddTempExp import reorderRefs
1580 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs]
1581 detectorOrder.sort()
1582 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector')
1583 inputs = butlerQC.get(inputRefs)
1584 self.log.info("Concatenating %s per-detector Source Tables",
1585 len(inputs['inputCatalogs']))
1586 df = pd.concat(inputs['inputCatalogs'])
1587 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1589 def runDataRef(self, dataRefList):
1590 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1591 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1592 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1594 @classmethod
1595 def _makeArgumentParser(cls):
1596 parser = ArgumentParser(name=cls._DefaultName)
1598 parser.add_id_argument("--id", cls.inputDataset,
1599 help="data ID, e.g. --id visit=12345",
1600 ContainerClass=VisitDataIdContainer)
1601 return parser
1603 def writeMetadata(self, dataRef):
1604 """No metadata to write.
1605 """
1606 pass
1608 def writeConfig(self, butler, clobber=False, doBackup=True):
1609 """No config to write.
1610 """
1611 pass
1614class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1615 dimensions=("instrument",),
1616 defaultTemplates={"calexpType": ""}):
1617 visitSummaryRefs = connectionTypes.Input(
1618 doc="Data references for per-visit consolidated exposure metadata",
1619 name="finalVisitSummary",
1620 storageClass="ExposureCatalog",
1621 dimensions=("instrument", "visit"),
1622 multiple=True,
1623 deferLoad=True,
1624 )
1625 outputCatalog = connectionTypes.Output(
1626 doc="CCD and Visit metadata table",
1627 name="ccdVisitTable",
1628 storageClass="DataFrame",
1629 dimensions=("instrument",)
1630 )
1633class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1634 pipelineConnections=MakeCcdVisitTableConnections):
1635 pass
1638class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1639 """Produce a `ccdVisitTable` from the visit summary exposure catalogs.
1640 """
1641 _DefaultName = 'makeCcdVisitTable'
1642 ConfigClass = MakeCcdVisitTableConfig
1644 def run(self, visitSummaryRefs):
1645 """Make a table of ccd information from the visit summary catalogs.
1647 Parameters
1648 ----------
1649 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1650 List of DeferredDatasetHandles pointing to exposure catalogs with
1651 per-detector summary information.
1653 Returns
1654 -------
1655 result : `lsst.pipe.Base.Struct`
1656 Results struct with attribute:
1658 ``outputCatalog``
1659 Catalog of ccd and visit information.
1660 """
1661 ccdEntries = []
1662 for visitSummaryRef in visitSummaryRefs:
1663 visitSummary = visitSummaryRef.get()
1664 visitInfo = visitSummary[0].getVisitInfo()
1666 ccdEntry = {}
1667 summaryTable = visitSummary.asAstropy()
1668 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance',
1669 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise',
1670 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar',
1671 'psfStarDeltaE1Median', 'psfStarDeltaE2Median',
1672 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter',
1673 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter',
1674 'psfStarScaledDeltaSizeScatter',
1675 'psfTraceRadiusDelta', 'maxDistToNearestPsf']
1676 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1677 # 'visit' is the human readable visit number.
1678 # 'visitId' is the key to the visitId table. They are the same.
1679 # Technically you should join to get the visit from the visit
1680 # table.
1681 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"})
1682 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in
1683 summaryTable['id']]
1684 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId)
1685 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds]
1686 ccdEntry['ccdVisitId'] = ccdVisitIds
1687 ccdEntry['detector'] = summaryTable['id']
1688 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary])
1689 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1691 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1692 ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1693 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1694 expTime = visitInfo.getExposureTime()
1695 ccdEntry['expTime'] = expTime
1696 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1697 expTime_days = expTime / (60*60*24)
1698 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days
1699 ccdEntry['darkTime'] = visitInfo.getDarkTime()
1700 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1701 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1702 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1703 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1704 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1705 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1706 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1707 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1708 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1709 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1710 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY,
1711 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc.
1712 # values are actually wanted.
1713 ccdEntries.append(ccdEntry)
1715 outputCatalog = pd.concat(ccdEntries)
1716 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True)
1717 return pipeBase.Struct(outputCatalog=outputCatalog)
1720class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1721 dimensions=("instrument",),
1722 defaultTemplates={"calexpType": ""}):
1723 visitSummaries = connectionTypes.Input(
1724 doc="Per-visit consolidated exposure metadata",
1725 name="finalVisitSummary",
1726 storageClass="ExposureCatalog",
1727 dimensions=("instrument", "visit",),
1728 multiple=True,
1729 deferLoad=True,
1730 )
1731 outputCatalog = connectionTypes.Output(
1732 doc="Visit metadata table",
1733 name="visitTable",
1734 storageClass="DataFrame",
1735 dimensions=("instrument",)
1736 )
1739class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1740 pipelineConnections=MakeVisitTableConnections):
1741 pass
1744class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1745 """Produce a `visitTable` from the visit summary exposure catalogs.
1746 """
1747 _DefaultName = 'makeVisitTable'
1748 ConfigClass = MakeVisitTableConfig
1750 def run(self, visitSummaries):
1751 """Make a table of visit information from the visit summary catalogs.
1753 Parameters
1754 ----------
1755 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog`
1756 List of exposure catalogs with per-detector summary information.
1757 Returns
1758 -------
1759 result : `lsst.pipe.Base.Struct`
1760 Results struct with attribute:
1762 ``outputCatalog``
1763 Catalog of visit information.
1764 """
1765 visitEntries = []
1766 for visitSummary in visitSummaries:
1767 visitSummary = visitSummary.get()
1768 visitRow = visitSummary[0]
1769 visitInfo = visitRow.getVisitInfo()
1771 visitEntry = {}
1772 visitEntry["visitId"] = visitRow['visit']
1773 visitEntry["visit"] = visitRow['visit']
1774 visitEntry["physical_filter"] = visitRow['physical_filter']
1775 visitEntry["band"] = visitRow['band']
1776 raDec = visitInfo.getBoresightRaDec()
1777 visitEntry["ra"] = raDec.getRa().asDegrees()
1778 visitEntry["decl"] = raDec.getDec().asDegrees()
1779 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1780 azAlt = visitInfo.getBoresightAzAlt()
1781 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1782 visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1783 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1784 visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1785 expTime = visitInfo.getExposureTime()
1786 visitEntry["expTime"] = expTime
1787 visitEntry["expMidpt"] = visitInfo.getDate().toPython()
1788 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1789 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1790 expTime_days = expTime / (60*60*24)
1791 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days
1792 visitEntries.append(visitEntry)
1794 # TODO: DM-30623, Add programId, exposureType, cameraTemp,
1795 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp,
1796 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures.
1798 outputCatalog = pd.DataFrame(data=visitEntries)
1799 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True)
1800 return pipeBase.Struct(outputCatalog=outputCatalog)
1803class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1804 dimensions=("instrument", "visit", "detector", "skymap", "tract")):
1806 inputCatalog = connectionTypes.Input(
1807 doc="Primary per-detector, single-epoch forced-photometry catalog. "
1808 "By default, it is the output of ForcedPhotCcdTask on calexps",
1809 name="forced_src",
1810 storageClass="SourceCatalog",
1811 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1812 )
1813 inputCatalogDiff = connectionTypes.Input(
1814 doc="Secondary multi-epoch, per-detector, forced photometry catalog. "
1815 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1816 name="forced_diff",
1817 storageClass="SourceCatalog",
1818 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1819 )
1820 outputCatalog = connectionTypes.Output(
1821 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format",
1822 name="mergedForcedSource",
1823 storageClass="DataFrame",
1824 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1825 )
1828class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1829 pipelineConnections=WriteForcedSourceTableConnections):
1830 key = lsst.pex.config.Field(
1831 doc="Column on which to join the two input tables on and make the primary key of the output",
1832 dtype=str,
1833 default="objectId",
1834 )
1837class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1838 """Merge and convert per-detector forced source catalogs to parquet.
1840 Because the predecessor ForcedPhotCcdTask operates per-detector,
1841 per-tract, (i.e., it has tract in its dimensions), detectors
1842 on the tract boundary may have multiple forced source catalogs.
1844 The successor task TransformForcedSourceTable runs per-patch
1845 and temporally-aggregates overlapping mergedForcedSource catalogs from all
1846 available multiple epochs.
1847 """
1848 _DefaultName = "writeForcedSourceTable"
1849 ConfigClass = WriteForcedSourceTableConfig
1851 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1852 inputs = butlerQC.get(inputRefs)
1853 # Add ccdVisitId to allow joining with CcdVisitTable
1854 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
1855 inputs['band'] = butlerQC.quantum.dataId.full['band']
1856 outputs = self.run(**inputs)
1857 butlerQC.put(outputs, outputRefs)
1859 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1860 dfs = []
1861 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')):
1862 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False)
1863 df = df.reindex(sorted(df.columns), axis=1)
1864 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA
1865 df['band'] = band if band else pd.NA
1866 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns],
1867 names=('dataset', 'column'))
1869 dfs.append(df)
1871 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
1872 return pipeBase.Struct(outputCatalog=outputCatalog)
1875class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1876 dimensions=("instrument", "skymap", "patch", "tract")):
1878 inputCatalogs = connectionTypes.Input(
1879 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1880 name="mergedForcedSource",
1881 storageClass="DataFrame",
1882 dimensions=("instrument", "visit", "detector", "skymap", "tract"),
1883 multiple=True,
1884 deferLoad=True
1885 )
1886 referenceCatalog = connectionTypes.Input(
1887 doc="Reference catalog which was used to seed the forcedPhot. Columns "
1888 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1889 "are expected.",
1890 name="objectTable",
1891 storageClass="DataFrame",
1892 dimensions=("tract", "patch", "skymap"),
1893 deferLoad=True
1894 )
1895 outputCatalog = connectionTypes.Output(
1896 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1897 "specified set of functors",
1898 name="forcedSourceTable",
1899 storageClass="DataFrame",
1900 dimensions=("tract", "patch", "skymap")
1901 )
1904class TransformForcedSourceTableConfig(TransformCatalogBaseConfig,
1905 pipelineConnections=TransformForcedSourceTableConnections):
1906 referenceColumns = pexConfig.ListField(
1907 dtype=str,
1908 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"],
1909 optional=True,
1910 doc="Columns to pull from reference catalog",
1911 )
1912 keyRef = lsst.pex.config.Field(
1913 doc="Column on which to join the two input tables on and make the primary key of the output",
1914 dtype=str,
1915 default="objectId",
1916 )
1917 key = lsst.pex.config.Field(
1918 doc="Rename the output DataFrame index to this name",
1919 dtype=str,
1920 default="forcedSourceId",
1921 )
1923 def setDefaults(self):
1924 super().setDefaults()
1925 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml')
1926 self.columnsFromDataId = ['tract', 'patch']
1929class TransformForcedSourceTableTask(TransformCatalogBaseTask):
1930 """Transform/standardize a ForcedSource catalog
1932 Transforms each wide, per-detector forcedSource parquet table per the
1933 specification file (per-camera defaults found in ForcedSource.yaml).
1934 All epochs that overlap the patch are aggregated into one per-patch
1935 narrow-parquet file.
1937 No de-duplication of rows is performed. Duplicate resolutions flags are
1938 pulled in from the referenceCatalog: `detect_isPrimary`,
1939 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1940 for analysis or compare duplicates for QA.
1942 The resulting table includes multiple bands. Epochs (MJDs) and other useful
1943 per-visit rows can be retreived by joining with the CcdVisitTable on
1944 ccdVisitId.
1945 """
1946 _DefaultName = "transformForcedSourceTable"
1947 ConfigClass = TransformForcedSourceTableConfig
1949 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1950 inputs = butlerQC.get(inputRefs)
1951 if self.funcs is None:
1952 raise ValueError("config.functorFile is None. "
1953 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1954 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs,
1955 dataId=outputRefs.outputCatalog.dataId.full)
1957 butlerQC.put(outputs, outputRefs)
1959 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1960 dfs = []
1961 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns})
1962 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs)))
1963 for handle in inputCatalogs:
1964 result = self.transform(None, handle, funcs, dataId)
1965 # Filter for only rows that were detected on (overlap) the patch
1966 dfs.append(result.df.join(ref, how='inner'))
1968 outputCatalog = pd.concat(dfs)
1970 # Now that we are done joining on config.keyRef
1971 # Change index to config.key by
1972 outputCatalog.index.rename(self.config.keyRef, inplace=True)
1973 # Add config.keyRef to the column list
1974 outputCatalog.reset_index(inplace=True)
1975 # Set the forcedSourceId to the index. This is specified in the
1976 # ForcedSource.yaml
1977 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True)
1978 # Rename it to the config.key
1979 outputCatalog.index.rename(self.config.key, inplace=True)
1981 self.log.info("Made a table of %d columns and %d rows",
1982 len(outputCatalog.columns), len(outputCatalog))
1983 return pipeBase.Struct(outputCatalog=outputCatalog)
1986class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1987 defaultTemplates={"catalogType": ""},
1988 dimensions=("instrument", "tract")):
1989 inputCatalogs = connectionTypes.Input(
1990 doc="Input per-patch DataFrame Tables to be concatenated",
1991 name="{catalogType}ForcedSourceTable",
1992 storageClass="DataFrame",
1993 dimensions=("tract", "patch", "skymap"),
1994 multiple=True,
1995 )
1997 outputCatalog = connectionTypes.Output(
1998 doc="Output per-tract concatenation of DataFrame Tables",
1999 name="{catalogType}ForcedSourceTable_tract",
2000 storageClass="DataFrame",
2001 dimensions=("tract", "skymap"),
2002 )
2005class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
2006 pipelineConnections=ConsolidateTractConnections):
2007 pass
2010class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask):
2011 """Concatenate any per-patch, dataframe list into a single
2012 per-tract DataFrame.
2013 """
2014 _DefaultName = 'ConsolidateTract'
2015 ConfigClass = ConsolidateTractConfig
2017 def runQuantum(self, butlerQC, inputRefs, outputRefs):
2018 inputs = butlerQC.get(inputRefs)
2019 # Not checking at least one inputCatalog exists because that'd be an
2020 # empty QG.
2021 self.log.info("Concatenating %s per-patch %s Tables",
2022 len(inputs['inputCatalogs']),
2023 inputRefs.inputCatalogs[0].datasetType.name)
2024 df = pd.concat(inputs['inputCatalogs'])
2025 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)