Coverage for python/lsst/pipe/tasks/postprocess.py: 31%
807 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-23 03:22 -0700
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-23 03:22 -0700
1# This file is part of pipe_tasks
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import functools
23import pandas as pd
24from collections import defaultdict
25import logging
26import numpy as np
27import numbers
28import os
30import lsst.geom
31import lsst.pex.config as pexConfig
32import lsst.pipe.base as pipeBase
33import lsst.daf.base as dafBase
34from lsst.obs.base import ExposureIdInfo
35from lsst.pipe.base import connectionTypes
36import lsst.afw.table as afwTable
37from lsst.meas.base import SingleFrameMeasurementTask
38from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
39from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
40from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate
41from lsst.skymap import BaseSkyMap
43from .parquetTable import ParquetTable
44from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
45from .functors import CompositeFunctor, Column
47log = logging.getLogger(__name__)
50def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
51 """Flattens a dataframe with multilevel column index.
52 """
53 newDf = pd.DataFrame()
54 # band is the level 0 index
55 dfBands = df.columns.unique(level=0).values
56 for band in dfBands:
57 subdf = df[band]
58 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
59 newColumns = {c: columnFormat.format(band, c)
60 for c in subdf.columns if c not in noDupCols}
61 cols = list(newColumns.keys())
62 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
64 # Band must be present in the input and output or else column is all NaN:
65 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
66 # Get the unexploded columns from any present band's partition
67 noDupDf = df[presentBands[0]][noDupCols]
68 newDf = pd.concat([noDupDf, newDf], axis=1)
69 return newDf
72class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
73 defaultTemplates={"coaddName": "deep"},
74 dimensions=("tract", "patch", "skymap")):
75 inputCatalogMeas = connectionTypes.Input(
76 doc="Catalog of source measurements on the deepCoadd.",
77 dimensions=("tract", "patch", "band", "skymap"),
78 storageClass="SourceCatalog",
79 name="{coaddName}Coadd_meas",
80 multiple=True
81 )
82 inputCatalogForcedSrc = connectionTypes.Input(
83 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
84 dimensions=("tract", "patch", "band", "skymap"),
85 storageClass="SourceCatalog",
86 name="{coaddName}Coadd_forced_src",
87 multiple=True
88 )
89 inputCatalogRef = connectionTypes.Input(
90 doc="Catalog marking the primary detection (which band provides a good shape and position)"
91 "for each detection in deepCoadd_mergeDet.",
92 dimensions=("tract", "patch", "skymap"),
93 storageClass="SourceCatalog",
94 name="{coaddName}Coadd_ref"
95 )
96 outputCatalog = connectionTypes.Output(
97 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
98 "stored as a DataFrame with a multi-level column index per-patch.",
99 dimensions=("tract", "patch", "skymap"),
100 storageClass="DataFrame",
101 name="{coaddName}Coadd_obj"
102 )
105class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
106 pipelineConnections=WriteObjectTableConnections):
107 engine = pexConfig.Field(
108 dtype=str,
109 default="pyarrow",
110 doc="Parquet engine for writing (pyarrow or fastparquet)"
111 )
112 coaddName = pexConfig.Field(
113 dtype=str,
114 default="deep",
115 doc="Name of coadd"
116 )
119class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
120 """Write filter-merged source tables to parquet
121 """
122 _DefaultName = "writeObjectTable"
123 ConfigClass = WriteObjectTableConfig
124 RunnerClass = MergeSourcesRunner
126 # Names of table datasets to be merged
127 inputDatasets = ('forced_src', 'meas', 'ref')
129 # Tag of output dataset written by `MergeSourcesTask.write`
130 outputDataset = 'obj'
132 def __init__(self, butler=None, schema=None, **kwargs):
133 # It is a shame that this class can't use the default init for
134 # CmdLineTask, but to do so would require its own special task
135 # runner, which is many more lines of specialization, so this is
136 # how it is for now.
137 super().__init__(**kwargs)
139 def runDataRef(self, patchRefList):
140 """!
141 @brief Merge coadd sources from multiple bands. Calls @ref `run` which
142 must be defined in subclasses that inherit from MergeSourcesTask.
143 @param[in] patchRefList list of data references for each filter
144 """
145 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
146 dataId = patchRefList[0].dataId
147 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
148 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog))
150 def runQuantum(self, butlerQC, inputRefs, outputRefs):
151 inputs = butlerQC.get(inputRefs)
153 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
154 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
155 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
156 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
158 catalogs = {}
159 for band in measDict.keys():
160 catalogs[band] = {'meas': measDict[band]['meas'],
161 'forced_src': forcedSourceDict[band]['forced_src'],
162 'ref': inputs['inputCatalogRef']}
163 dataId = butlerQC.quantum.dataId
164 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
165 outputs = pipeBase.Struct(outputCatalog=df)
166 butlerQC.put(outputs, outputRefs)
168 @classmethod
169 def _makeArgumentParser(cls):
170 """Create a suitable ArgumentParser.
172 We will use the ArgumentParser to get a list of data
173 references for patches; the RunnerClass will sort them into lists
174 of data references for the same patch.
176 References first of self.inputDatasets, rather than
177 self.inputDataset
178 """
179 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
181 def readCatalog(self, patchRef):
182 """Read input catalogs
184 Read all the input datasets given by the 'inputDatasets'
185 attribute.
187 Parameters
188 ----------
189 patchRef : `lsst.daf.persistence.ButlerDataRef`
190 Data reference for patch.
192 Returns
193 -------
194 Tuple consisting of band name and a dict of catalogs, keyed by
195 dataset name.
196 """
197 band = patchRef.get(self.config.coaddName + "Coadd_filter", immediate=True).bandLabel
198 catalogDict = {}
199 for dataset in self.inputDatasets:
200 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
201 self.log.info("Read %d sources from %s for band %s: %s",
202 len(catalog), dataset, band, patchRef.dataId)
203 catalogDict[dataset] = catalog
204 return band, catalogDict
206 def run(self, catalogs, tract, patch):
207 """Merge multiple catalogs.
209 Parameters
210 ----------
211 catalogs : `dict`
212 Mapping from filter names to dict of catalogs.
213 tract : int
214 tractId to use for the tractId column.
215 patch : str
216 patchId to use for the patchId column.
218 Returns
219 -------
220 catalog : `pandas.DataFrame`
221 Merged dataframe.
222 """
224 dfs = []
225 for filt, tableDict in catalogs.items():
226 for dataset, table in tableDict.items():
227 # Convert afwTable to pandas DataFrame
228 df = table.asAstropy().to_pandas().set_index('id', drop=True)
230 # Sort columns by name, to ensure matching schema among patches
231 df = df.reindex(sorted(df.columns), axis=1)
232 df['tractId'] = tract
233 df['patchId'] = patch
235 # Make columns a 3-level MultiIndex
236 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
237 names=('dataset', 'band', 'column'))
238 dfs.append(df)
240 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
241 return catalog
243 def write(self, patchRef, catalog):
244 """Write the output.
246 Parameters
247 ----------
248 catalog : `ParquetTable`
249 Catalog to write.
250 patchRef : `lsst.daf.persistence.ButlerDataRef`
251 Data reference for patch.
252 """
253 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
254 # since the filter isn't actually part of the data ID for the dataset
255 # we're saving, it's confusing to see it in the log message, even if
256 # the butler simply ignores it.
257 mergeDataId = patchRef.dataId.copy()
258 del mergeDataId["filter"]
259 self.log.info("Wrote merged catalog: %s", mergeDataId)
261 def writeMetadata(self, dataRefList):
262 """No metadata to write, and not sure how to write it for a list of
263 dataRefs.
264 """
265 pass
268class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
269 defaultTemplates={"catalogType": ""},
270 dimensions=("instrument", "visit", "detector")):
272 catalog = connectionTypes.Input(
273 doc="Input full-depth catalog of sources produced by CalibrateTask",
274 name="{catalogType}src",
275 storageClass="SourceCatalog",
276 dimensions=("instrument", "visit", "detector")
277 )
278 outputCatalog = connectionTypes.Output(
279 doc="Catalog of sources, `src` in Parquet format. The 'id' column is "
280 "replaced with an index; all other columns are unchanged.",
281 name="{catalogType}source",
282 storageClass="DataFrame",
283 dimensions=("instrument", "visit", "detector")
284 )
287class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
288 pipelineConnections=WriteSourceTableConnections):
289 pass
292class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
293 """Write source table to parquet.
294 """
295 _DefaultName = "writeSourceTable"
296 ConfigClass = WriteSourceTableConfig
298 def runQuantum(self, butlerQC, inputRefs, outputRefs):
299 inputs = butlerQC.get(inputRefs)
300 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
301 result = self.run(**inputs).table
302 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
303 butlerQC.put(outputs, outputRefs)
305 def run(self, catalog, ccdVisitId=None, **kwargs):
306 """Convert `src` catalog to parquet
308 Parameters
309 ----------
310 catalog: `afwTable.SourceCatalog`
311 catalog to be converted
312 ccdVisitId: `int`
313 ccdVisitId to be added as a column
315 Returns
316 -------
317 result : `lsst.pipe.base.Struct`
318 ``table``
319 `ParquetTable` version of the input catalog
320 """
321 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId)
322 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
323 df['ccdVisitId'] = ccdVisitId
324 return pipeBase.Struct(table=ParquetTable(dataFrame=df))
327class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
328 defaultTemplates={"catalogType": "",
329 "skyWcsName": "jointcal",
330 "photoCalibName": "fgcm"},
331 dimensions=("instrument", "visit", "detector", "skymap")):
332 skyMap = connectionTypes.Input(
333 doc="skyMap needed to choose which tract-level calibrations to use when multiple available",
334 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
335 storageClass="SkyMap",
336 dimensions=("skymap",),
337 )
338 exposure = connectionTypes.Input(
339 doc="Input exposure to perform photometry on.",
340 name="calexp",
341 storageClass="ExposureF",
342 dimensions=["instrument", "visit", "detector"],
343 )
344 externalSkyWcsTractCatalog = connectionTypes.Input(
345 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector "
346 "id for the catalog id, sorted on id for fast lookup."),
347 name="{skyWcsName}SkyWcsCatalog",
348 storageClass="ExposureCatalog",
349 dimensions=["instrument", "visit", "tract"],
350 multiple=True
351 )
352 externalSkyWcsGlobalCatalog = connectionTypes.Input(
353 doc=("Per-visit wcs calibrations computed globally (with no tract information). "
354 "These catalogs use the detector id for the catalog id, sorted on id for "
355 "fast lookup."),
356 name="{skyWcsName}SkyWcsCatalog",
357 storageClass="ExposureCatalog",
358 dimensions=["instrument", "visit"],
359 )
360 externalPhotoCalibTractCatalog = connectionTypes.Input(
361 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the "
362 "detector id for the catalog id, sorted on id for fast lookup."),
363 name="{photoCalibName}PhotoCalibCatalog",
364 storageClass="ExposureCatalog",
365 dimensions=["instrument", "visit", "tract"],
366 multiple=True
367 )
368 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
369 doc=("Per-visit photometric calibrations computed globally (with no tract "
370 "information). These catalogs use the detector id for the catalog id, "
371 "sorted on id for fast lookup."),
372 name="{photoCalibName}PhotoCalibCatalog",
373 storageClass="ExposureCatalog",
374 dimensions=["instrument", "visit"],
375 )
377 def __init__(self, *, config=None):
378 super().__init__(config=config)
379 # Same connection boilerplate as all other applications of
380 # Global/Tract calibrations
381 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs:
382 if config.useGlobalExternalSkyWcs:
383 self.inputs.remove("externalSkyWcsTractCatalog")
384 else:
385 self.inputs.remove("externalSkyWcsGlobalCatalog")
386 else:
387 self.inputs.remove("externalSkyWcsTractCatalog")
388 self.inputs.remove("externalSkyWcsGlobalCatalog")
389 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib:
390 if config.useGlobalExternalPhotoCalib:
391 self.inputs.remove("externalPhotoCalibTractCatalog")
392 else:
393 self.inputs.remove("externalPhotoCalibGlobalCatalog")
394 else:
395 self.inputs.remove("externalPhotoCalibTractCatalog")
396 self.inputs.remove("externalPhotoCalibGlobalCatalog")
399class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
400 pipelineConnections=WriteRecalibratedSourceTableConnections):
402 doReevaluatePhotoCalib = pexConfig.Field(
403 dtype=bool,
404 default=True,
405 doc=("Add or replace local photoCalib columns")
406 )
407 doReevaluateSkyWcs = pexConfig.Field(
408 dtype=bool,
409 default=True,
410 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
411 )
412 doApplyExternalPhotoCalib = pexConfig.Field(
413 dtype=bool,
414 default=True,
415 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
416 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
417 )
418 doApplyExternalSkyWcs = pexConfig.Field(
419 dtype=bool,
420 default=True,
421 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
422 "else use the wcs already attached to the exposure."),
423 )
424 useGlobalExternalPhotoCalib = pexConfig.Field(
425 dtype=bool,
426 default=True,
427 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations "
428 "that are not run per-tract. When False, use per-tract photometric "
429 "calibration files.")
430 )
431 useGlobalExternalSkyWcs = pexConfig.Field(
432 dtype=bool,
433 default=False,
434 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations "
435 "that are not run per-tract. When False, use per-tract wcs "
436 "files.")
437 )
439 def validate(self):
440 super().validate()
441 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs:
442 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
443 "External SkyWcs will not be read or evaluated.")
444 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib:
445 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
446 "External PhotoCalib will not be read or evaluated.")
449class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
450 """Write source table to parquet
451 """
452 _DefaultName = "writeRecalibratedSourceTable"
453 ConfigClass = WriteRecalibratedSourceTableConfig
455 def runQuantum(self, butlerQC, inputRefs, outputRefs):
456 inputs = butlerQC.get(inputRefs)
457 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
458 inputs['exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "visit_detector")
460 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs:
461 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
462 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs)
464 inputs['catalog'] = self.addCalibColumns(**inputs)
466 result = self.run(**inputs).table
467 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
468 butlerQC.put(outputs, outputRefs)
470 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
471 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
472 externalPhotoCalibTractCatalog=None, **kwargs):
473 """Apply external calibrations to exposure per configuration
475 When multiple tract-level calibrations overlap, select the one with the
476 center closest to detector.
478 Parameters
479 ----------
480 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of
481 tract-level calibs.
482 skyMap : `lsst.skymap.SkyMap`
483 exposure : `lsst.afw.image.exposure.Exposure`
484 Input exposure to adjust calibrations.
485 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
486 Exposure catalog with external skyWcs to be applied per config
487 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
488 Exposure catalog with external skyWcs to be applied per config
489 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
490 Exposure catalog with external photoCalib to be applied per config
491 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
494 Returns
495 -------
496 exposure : `lsst.afw.image.exposure.Exposure`
497 Exposure with adjusted calibrations.
498 """
499 if not self.config.doApplyExternalSkyWcs:
500 # Do not modify the exposure's SkyWcs
501 externalSkyWcsCatalog = None
502 elif self.config.useGlobalExternalSkyWcs:
503 # Use the global external SkyWcs
504 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
505 self.log.info('Applying global SkyWcs')
506 else:
507 # use tract-level external SkyWcs from the closest overlapping tract
508 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog')
509 tracts = [ref.dataId['tract'] for ref in inputRef]
510 if len(tracts) == 1:
511 ind = 0
512 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind])
513 else:
514 ind = self.getClosestTract(tracts, skyMap,
515 exposure.getBBox(), exposure.getWcs())
516 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
517 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
519 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
521 if not self.config.doApplyExternalPhotoCalib:
522 # Do not modify the exposure's PhotoCalib
523 externalPhotoCalibCatalog = None
524 elif self.config.useGlobalExternalPhotoCalib:
525 # Use the global external PhotoCalib
526 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
527 self.log.info('Applying global PhotoCalib')
528 else:
529 # use tract-level external PhotoCalib from the closest overlapping tract
530 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog')
531 tracts = [ref.dataId['tract'] for ref in inputRef]
532 if len(tracts) == 1:
533 ind = 0
534 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind])
535 else:
536 ind = self.getClosestTract(tracts, skyMap,
537 exposure.getBBox(), exposure.getWcs())
538 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
539 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
541 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
543 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
545 def getClosestTract(self, tracts, skyMap, bbox, wcs):
546 """Find the index of the tract closest to detector from list of tractIds
548 Parameters
549 ----------
550 tracts: `list` [`int`]
551 Iterable of integer tractIds
552 skyMap : `lsst.skymap.SkyMap`
553 skyMap to lookup tract geometry and wcs
554 bbox : `lsst.geom.Box2I`
555 Detector bbox, center of which will compared to tract centers
556 wcs : `lsst.afw.geom.SkyWcs`
557 Detector Wcs object to map the detector center to SkyCoord
559 Returns
560 -------
561 index : `int`
562 """
563 if len(tracts) == 1:
564 return 0
566 center = wcs.pixelToSky(bbox.getCenter())
567 sep = []
568 for tractId in tracts:
569 tract = skyMap[tractId]
570 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
571 sep.append(center.separation(tractCenter))
573 return np.argmin(sep)
575 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
576 """Prepare a calibrated exposure and apply external calibrations
577 if so configured.
579 Parameters
580 ----------
581 exposure : `lsst.afw.image.exposure.Exposure`
582 Input exposure to adjust calibrations.
583 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional
584 Exposure catalog with external skyWcs to be applied
585 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id
586 for the catalog id, sorted on id for fast lookup.
587 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional
588 Exposure catalog with external photoCalib to be applied
589 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector
590 id for the catalog id, sorted on id for fast lookup.
592 Returns
593 -------
594 exposure : `lsst.afw.image.exposure.Exposure`
595 Exposure with adjusted calibrations.
596 """
597 detectorId = exposure.getInfo().getDetector().getId()
599 if externalPhotoCalibCatalog is not None:
600 row = externalPhotoCalibCatalog.find(detectorId)
601 if row is None:
602 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; "
603 "Using original photoCalib.", detectorId)
604 else:
605 photoCalib = row.getPhotoCalib()
606 if photoCalib is None:
607 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
608 "Using original photoCalib.", detectorId)
609 else:
610 exposure.setPhotoCalib(photoCalib)
612 if externalSkyWcsCatalog is not None:
613 row = externalSkyWcsCatalog.find(detectorId)
614 if row is None:
615 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; "
616 "Using original skyWcs.", detectorId)
617 else:
618 skyWcs = row.getWcs()
619 if skyWcs is None:
620 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
621 "Using original skyWcs.", detectorId)
622 else:
623 exposure.setWcs(skyWcs)
625 return exposure
627 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs):
628 """Add replace columns with calibs evaluated at each centroid
630 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a
631 a source catalog, by rerunning the plugins.
633 Parameters
634 ----------
635 catalog : `lsst.afw.table.SourceCatalog`
636 catalog to which calib columns will be added
637 exposure : `lsst.afw.image.exposure.Exposure`
638 Exposure with attached PhotoCalibs and SkyWcs attributes to be
639 reevaluated at local centroids. Pixels are not required.
640 exposureIdInfo : `lsst.obs.base.ExposureIdInfo`
642 Returns
643 -------
644 newCat: `lsst.afw.table.SourceCatalog`
645 Source Catalog with requested local calib columns
646 """
647 measureConfig = SingleFrameMeasurementTask.ConfigClass()
648 measureConfig.doReplaceWithNoise = False
650 measureConfig.plugins.names = []
651 if self.config.doReevaluateSkyWcs:
652 measureConfig.plugins.names.add('base_LocalWcs')
653 self.log.info("Re-evaluating base_LocalWcs plugin")
654 if self.config.doReevaluatePhotoCalib:
655 measureConfig.plugins.names.add('base_LocalPhotoCalib')
656 self.log.info("Re-evaluating base_LocalPhotoCalib plugin")
657 pluginsNotToCopy = tuple(measureConfig.plugins.names)
659 # Create a new schema and catalog
660 # Copy all columns from original except for the ones to reevaluate
661 aliasMap = catalog.schema.getAliasMap()
662 mapper = afwTable.SchemaMapper(catalog.schema)
663 for item in catalog.schema:
664 if not item.field.getName().startswith(pluginsNotToCopy):
665 mapper.addMapping(item.key)
667 schema = mapper.getOutputSchema()
668 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
669 schema.setAliasMap(aliasMap)
670 newCat = afwTable.SourceCatalog(schema)
671 newCat.extend(catalog, mapper=mapper)
673 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to
674 # update here. LocalPhotoCalibs are applied during transform tasks.
675 # Update coord_ra/coord_dec, which are expected to be positions on the
676 # sky and are used as such in sdm tables without transform
677 if self.config.doReevaluateSkyWcs:
678 afwTable.updateSourceCoords(exposure.wcs, newCat)
680 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
682 return newCat
685class PostprocessAnalysis(object):
686 """Calculate columns from ParquetTable.
688 This object manages and organizes an arbitrary set of computations
689 on a catalog. The catalog is defined by a
690 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such
691 as a `deepCoadd_obj` dataset, and the computations are defined by a
692 collection of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
693 a `CompositeFunctor`).
695 After the object is initialized, accessing the `.df` attribute (which
696 holds the `pandas.DataFrame` containing the results of the calculations)
697 triggers computation of said dataframe.
699 One of the conveniences of using this object is the ability to define a
700 desired common filter for all functors. This enables the same functor
701 collection to be passed to several different `PostprocessAnalysis` objects
702 without having to change the original functor collection, since the `filt`
703 keyword argument of this object triggers an overwrite of the `filt`
704 property for all functors in the collection.
706 This object also allows a list of refFlags to be passed, and defines a set
707 of default refFlags that are always included even if not requested.
709 If a list of `ParquetTable` object is passed, rather than a single one,
710 then the calculations will be mapped over all the input catalogs. In
711 principle, it should be straightforward to parallelize this activity, but
712 initial tests have failed (see TODO in code comments).
714 Parameters
715 ----------
716 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
717 Source catalog(s) for computation.
719 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
720 Computations to do (functors that act on `parq`).
721 If a dict, the output
722 DataFrame will have columns keyed accordingly.
723 If a list, the column keys will come from the
724 `.shortname` attribute of each functor.
726 filt : `str`, optional
727 Filter in which to calculate. If provided,
728 this will overwrite any existing `.filt` attribute
729 of the provided functors.
731 flags : `list`, optional
732 List of flags (per-band) to include in output table.
733 Taken from the `meas` dataset if applied to a multilevel Object Table.
735 refFlags : `list`, optional
736 List of refFlags (only reference band) to include in output table.
738 forcedFlags : `list`, optional
739 List of flags (per-band) to include in output table.
740 Taken from the ``forced_src`` dataset if applied to a
741 multilevel Object Table. Intended for flags from measurement plugins
742 only run during multi-band forced-photometry.
743 """
744 _defaultRefFlags = []
745 _defaultFuncs = ()
747 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
748 self.parq = parq
749 self.functors = functors
751 self.filt = filt
752 self.flags = list(flags) if flags is not None else []
753 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else []
754 self.refFlags = list(self._defaultRefFlags)
755 if refFlags is not None:
756 self.refFlags += list(refFlags)
758 self._df = None
760 @property
761 def defaultFuncs(self):
762 funcs = dict(self._defaultFuncs)
763 return funcs
765 @property
766 def func(self):
767 additionalFuncs = self.defaultFuncs
768 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags})
769 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
770 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
772 if isinstance(self.functors, CompositeFunctor):
773 func = self.functors
774 else:
775 func = CompositeFunctor(self.functors)
777 func.funcDict.update(additionalFuncs)
778 func.filt = self.filt
780 return func
782 @property
783 def noDupCols(self):
784 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
786 @property
787 def df(self):
788 if self._df is None:
789 self.compute()
790 return self._df
792 def compute(self, dropna=False, pool=None):
793 # map over multiple parquet tables
794 if type(self.parq) in (list, tuple):
795 if pool is None:
796 dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
797 else:
798 # TODO: Figure out why this doesn't work (pyarrow pickling
799 # issues?)
800 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
801 self._df = pd.concat(dflist)
802 else:
803 self._df = self.func(self.parq, dropna=dropna)
805 return self._df
808class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
809 dimensions=()):
810 """Expected Connections for subclasses of TransformCatalogBaseTask.
812 Must be subclassed.
813 """
814 inputCatalog = connectionTypes.Input(
815 name="",
816 storageClass="DataFrame",
817 )
818 outputCatalog = connectionTypes.Output(
819 name="",
820 storageClass="DataFrame",
821 )
824class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
825 pipelineConnections=TransformCatalogBaseConnections):
826 functorFile = pexConfig.Field(
827 dtype=str,
828 doc="Path to YAML file specifying Science Data Model functors to use "
829 "when copying columns and computing calibrated values.",
830 default=None,
831 optional=True
832 )
833 primaryKey = pexConfig.Field(
834 dtype=str,
835 doc="Name of column to be set as the DataFrame index. If None, the index"
836 "will be named `id`",
837 default=None,
838 optional=True
839 )
840 columnsFromDataId = pexConfig.ListField(
841 dtype=str,
842 default=None,
843 optional=True,
844 doc="Columns to extract from the dataId",
845 )
848class TransformCatalogBaseTask(pipeBase.PipelineTask):
849 """Base class for transforming/standardizing a catalog
851 by applying functors that convert units and apply calibrations.
852 The purpose of this task is to perform a set of computations on
853 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
854 results to a new dataset (which needs to be declared in an `outputDataset`
855 attribute).
857 The calculations to be performed are defined in a YAML file that specifies
858 a set of functors to be computed, provided as
859 a `--functorFile` config parameter. An example of such a YAML file
860 is the following:
862 funcs:
863 psfMag:
864 functor: Mag
865 args:
866 - base_PsfFlux
867 filt: HSC-G
868 dataset: meas
869 cmodel_magDiff:
870 functor: MagDiff
871 args:
872 - modelfit_CModel
873 - base_PsfFlux
874 filt: HSC-G
875 gauss_magDiff:
876 functor: MagDiff
877 args:
878 - base_GaussianFlux
879 - base_PsfFlux
880 filt: HSC-G
881 count:
882 functor: Column
883 args:
884 - base_InputCount_value
885 filt: HSC-G
886 deconvolved_moments:
887 functor: DeconvolvedMoments
888 filt: HSC-G
889 dataset: forced_src
890 refFlags:
891 - calib_psfUsed
892 - merge_measurement_i
893 - merge_measurement_r
894 - merge_measurement_z
895 - merge_measurement_y
896 - merge_measurement_g
897 - base_PixelFlags_flag_inexact_psfCenter
898 - detect_isPrimary
900 The names for each entry under "func" will become the names of columns in
901 the output dataset. All the functors referenced are defined in
902 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each
903 functor are in the `args` list, and any additional entries for each column
904 other than "functor" or "args" (e.g., `'filt'`, `'dataset'`) are treated as
905 keyword arguments to be passed to the functor initialization.
907 The "flags" entry is the default shortcut for `Column` functors.
908 All columns listed under "flags" will be copied to the output table
909 untransformed. They can be of any datatype.
910 In the special case of transforming a multi-level oject table with
911 band and dataset indices (deepCoadd_obj), these will be taked from the
912 `meas` dataset and exploded out per band.
914 There are two special shortcuts that only apply when transforming
915 multi-level Object (deepCoadd_obj) tables:
916 - The "refFlags" entry is shortcut for `Column` functor
917 taken from the `'ref'` dataset if transforming an ObjectTable.
918 - The "forcedFlags" entry is shortcut for `Column` functors.
919 taken from the ``forced_src`` dataset if transforming an ObjectTable.
920 These are expanded out per band.
923 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
924 to organize and excecute the calculations.
925 """
926 @property
927 def _DefaultName(self):
928 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
930 @property
931 def outputDataset(self):
932 raise NotImplementedError('Subclass must define "outputDataset" attribute')
934 @property
935 def inputDataset(self):
936 raise NotImplementedError('Subclass must define "inputDataset" attribute')
938 @property
939 def ConfigClass(self):
940 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
942 def __init__(self, *args, **kwargs):
943 super().__init__(*args, **kwargs)
944 if self.config.functorFile:
945 self.log.info('Loading tranform functor definitions from %s',
946 self.config.functorFile)
947 self.funcs = CompositeFunctor.from_file(self.config.functorFile)
948 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs))
949 else:
950 self.funcs = None
952 def runQuantum(self, butlerQC, inputRefs, outputRefs):
953 inputs = butlerQC.get(inputRefs)
954 if self.funcs is None:
955 raise ValueError("config.functorFile is None. "
956 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
957 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs,
958 dataId=outputRefs.outputCatalog.dataId.full)
959 outputs = pipeBase.Struct(outputCatalog=result)
960 butlerQC.put(outputs, outputRefs)
962 def run(self, parq, funcs=None, dataId=None, band=None):
963 """Do postprocessing calculations
965 Takes a `ParquetTable` object and dataId,
966 returns a dataframe with results of postprocessing calculations.
968 Parameters
969 ----------
970 parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
971 ParquetTable from which calculations are done.
972 funcs : `lsst.pipe.tasks.functors.Functors`
973 Functors to apply to the table's columns
974 dataId : dict, optional
975 Used to add a `patchId` column to the output dataframe.
976 band : `str`, optional
977 Filter band that is being processed.
979 Returns
980 ------
981 df : `pandas.DataFrame`
982 """
983 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
985 df = self.transform(band, parq, funcs, dataId).df
986 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
987 return df
989 def getFunctors(self):
990 return self.funcs
992 def getAnalysis(self, parq, funcs=None, band=None):
993 if funcs is None:
994 funcs = self.funcs
995 analysis = PostprocessAnalysis(parq, funcs, filt=band)
996 return analysis
998 def transform(self, band, parq, funcs, dataId):
999 analysis = self.getAnalysis(parq, funcs=funcs, band=band)
1000 df = analysis.df
1001 if dataId and self.config.columnsFromDataId:
1002 for key in self.config.columnsFromDataId:
1003 if key in dataId:
1004 df[str(key)] = dataId[key]
1005 else:
1006 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
1008 if self.config.primaryKey:
1009 if df.index.name != self.config.primaryKey and self.config.primaryKey in df:
1010 df.reset_index(inplace=True, drop=True)
1011 df.set_index(self.config.primaryKey, inplace=True)
1013 return pipeBase.Struct(
1014 df=df,
1015 analysis=analysis
1016 )
1018 def write(self, df, parqRef):
1019 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset)
1021 def writeMetadata(self, dataRef):
1022 """No metadata to write.
1023 """
1024 pass
1027class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
1028 defaultTemplates={"coaddName": "deep"},
1029 dimensions=("tract", "patch", "skymap")):
1030 inputCatalog = connectionTypes.Input(
1031 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
1032 "stored as a DataFrame with a multi-level column index per-patch.",
1033 dimensions=("tract", "patch", "skymap"),
1034 storageClass="DataFrame",
1035 name="{coaddName}Coadd_obj",
1036 deferLoad=True,
1037 )
1038 outputCatalog = connectionTypes.Output(
1039 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
1040 "data model.",
1041 dimensions=("tract", "patch", "skymap"),
1042 storageClass="DataFrame",
1043 name="objectTable"
1044 )
1047class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
1048 pipelineConnections=TransformObjectCatalogConnections):
1049 coaddName = pexConfig.Field(
1050 dtype=str,
1051 default="deep",
1052 doc="Name of coadd"
1053 )
1054 # TODO: remove in DM-27177
1055 filterMap = pexConfig.DictField(
1056 keytype=str,
1057 itemtype=str,
1058 default={},
1059 doc=("Dictionary mapping full filter name to short one for column name munging."
1060 "These filters determine the output columns no matter what filters the "
1061 "input data actually contain."),
1062 deprecated=("Coadds are now identified by the band, so this transform is unused."
1063 "Will be removed after v22.")
1064 )
1065 outputBands = pexConfig.ListField(
1066 dtype=str,
1067 default=None,
1068 optional=True,
1069 doc=("These bands and only these bands will appear in the output,"
1070 " NaN-filled if the input does not include them."
1071 " If None, then use all bands found in the input.")
1072 )
1073 camelCase = pexConfig.Field(
1074 dtype=bool,
1075 default=False,
1076 doc=("Write per-band columns names with camelCase, else underscore "
1077 "For example: gPsFlux instead of g_PsFlux.")
1078 )
1079 multilevelOutput = pexConfig.Field(
1080 dtype=bool,
1081 default=False,
1082 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
1083 "and name-munged (False).")
1084 )
1085 goodFlags = pexConfig.ListField(
1086 dtype=str,
1087 default=[],
1088 doc=("List of 'good' flags that should be set False when populating empty tables. "
1089 "All other flags are considered to be 'bad' flags and will be set to True.")
1090 )
1091 floatFillValue = pexConfig.Field(
1092 dtype=float,
1093 default=np.nan,
1094 doc="Fill value for float fields when populating empty tables."
1095 )
1096 integerFillValue = pexConfig.Field(
1097 dtype=int,
1098 default=-1,
1099 doc="Fill value for integer fields when populating empty tables."
1100 )
1102 def setDefaults(self):
1103 super().setDefaults()
1104 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml')
1105 self.primaryKey = 'objectId'
1106 self.columnsFromDataId = ['tract', 'patch']
1107 self.goodFlags = ['calib_astrometry_used',
1108 'calib_photometry_reserved',
1109 'calib_photometry_used',
1110 'calib_psf_candidate',
1111 'calib_psf_reserved',
1112 'calib_psf_used']
1115class TransformObjectCatalogTask(TransformCatalogBaseTask):
1116 """Produce a flattened Object Table to match the format specified in
1117 sdm_schemas.
1119 Do the same set of postprocessing calculations on all bands.
1121 This is identical to `TransformCatalogBaseTask`, except for that it does
1122 the specified functor calculations for all filters present in the
1123 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
1124 by the YAML file will be superceded.
1125 """
1126 _DefaultName = "transformObjectCatalog"
1127 ConfigClass = TransformObjectCatalogConfig
1129 def run(self, parq, funcs=None, dataId=None, band=None):
1130 # NOTE: band kwarg is ignored here.
1131 dfDict = {}
1132 analysisDict = {}
1133 templateDf = pd.DataFrame()
1135 if isinstance(parq, DeferredDatasetHandle):
1136 columns = parq.get(component='columns')
1137 inputBands = columns.unique(level=1).values
1138 else:
1139 inputBands = parq.columnLevelNames['band']
1141 outputBands = self.config.outputBands if self.config.outputBands else inputBands
1143 # Perform transform for data of filters that exist in parq.
1144 for inputBand in inputBands:
1145 if inputBand not in outputBands:
1146 self.log.info("Ignoring %s band data in the input", inputBand)
1147 continue
1148 self.log.info("Transforming the catalog of band %s", inputBand)
1149 result = self.transform(inputBand, parq, funcs, dataId)
1150 dfDict[inputBand] = result.df
1151 analysisDict[inputBand] = result.analysis
1152 if templateDf.empty:
1153 templateDf = result.df
1155 # Put filler values in columns of other wanted bands
1156 for filt in outputBands:
1157 if filt not in dfDict:
1158 self.log.info("Adding empty columns for band %s", filt)
1159 dfTemp = templateDf.copy()
1160 for col in dfTemp.columns:
1161 testValue = dfTemp[col].values[0]
1162 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1163 # Boolean flag type, check if it is a "good" flag
1164 if col in self.config.goodFlags:
1165 fillValue = False
1166 else:
1167 fillValue = True
1168 elif isinstance(testValue, numbers.Integral):
1169 # Checking numbers.Integral catches all flavors
1170 # of python, numpy, pandas, etc. integers.
1171 # We must ensure this is not an unsigned integer.
1172 if isinstance(testValue, np.unsignedinteger):
1173 raise ValueError("Parquet tables may not have unsigned integer columns.")
1174 else:
1175 fillValue = self.config.integerFillValue
1176 else:
1177 fillValue = self.config.floatFillValue
1178 dfTemp[col].values[:] = fillValue
1179 dfDict[filt] = dfTemp
1181 # This makes a multilevel column index, with band as first level
1182 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
1184 if not self.config.multilevelOutput:
1185 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
1186 if self.config.primaryKey in noDupCols:
1187 noDupCols.remove(self.config.primaryKey)
1188 if dataId and self.config.columnsFromDataId:
1189 noDupCols += self.config.columnsFromDataId
1190 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1191 inputBands=inputBands)
1193 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
1195 return df
1198class TractObjectDataIdContainer(CoaddDataIdContainer):
1200 def makeDataRefList(self, namespace):
1201 """Make self.refList from self.idList
1203 Generate a list of data references given tract and/or patch.
1204 This was adapted from `TractQADataIdContainer`, which was
1205 `TractDataIdContainer` modifie to not require "filter".
1206 Only existing dataRefs are returned.
1207 """
1208 def getPatchRefList(tract):
1209 return [namespace.butler.dataRef(datasetType=self.datasetType,
1210 tract=tract.getId(),
1211 patch="%d,%d" % patch.getIndex()) for patch in tract]
1213 tractRefs = defaultdict(list) # Data references for each tract
1214 for dataId in self.idList:
1215 skymap = self.getSkymap(namespace)
1217 if "tract" in dataId:
1218 tractId = dataId["tract"]
1219 if "patch" in dataId:
1220 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
1221 tract=tractId,
1222 patch=dataId['patch']))
1223 else:
1224 tractRefs[tractId] += getPatchRefList(skymap[tractId])
1225 else:
1226 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
1227 for tract in skymap)
1228 outputRefList = []
1229 for tractRefList in tractRefs.values():
1230 existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
1231 outputRefList.append(existingRefs)
1233 self.refList = outputRefList
1236class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1237 dimensions=("tract", "skymap")):
1238 inputCatalogs = connectionTypes.Input(
1239 doc="Per-Patch objectTables conforming to the standard data model.",
1240 name="objectTable",
1241 storageClass="DataFrame",
1242 dimensions=("tract", "patch", "skymap"),
1243 multiple=True,
1244 )
1245 outputCatalog = connectionTypes.Output(
1246 doc="Pre-tract horizontal concatenation of the input objectTables",
1247 name="objectTable_tract",
1248 storageClass="DataFrame",
1249 dimensions=("tract", "skymap"),
1250 )
1253class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1254 pipelineConnections=ConsolidateObjectTableConnections):
1255 coaddName = pexConfig.Field(
1256 dtype=str,
1257 default="deep",
1258 doc="Name of coadd"
1259 )
1262class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
1263 """Write patch-merged source tables to a tract-level parquet file.
1265 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1266 """
1267 _DefaultName = "consolidateObjectTable"
1268 ConfigClass = ConsolidateObjectTableConfig
1270 inputDataset = 'objectTable'
1271 outputDataset = 'objectTable_tract'
1273 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1274 inputs = butlerQC.get(inputRefs)
1275 self.log.info("Concatenating %s per-patch Object Tables",
1276 len(inputs['inputCatalogs']))
1277 df = pd.concat(inputs['inputCatalogs'])
1278 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1280 @classmethod
1281 def _makeArgumentParser(cls):
1282 parser = ArgumentParser(name=cls._DefaultName)
1284 parser.add_id_argument("--id", cls.inputDataset,
1285 help="data ID, e.g. --id tract=12345",
1286 ContainerClass=TractObjectDataIdContainer)
1287 return parser
1289 def runDataRef(self, patchRefList):
1290 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
1291 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1293 def writeMetadata(self, dataRef):
1294 """No metadata to write.
1295 """
1296 pass
1299class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1300 defaultTemplates={"catalogType": ""},
1301 dimensions=("instrument", "visit", "detector")):
1303 inputCatalog = connectionTypes.Input(
1304 doc="Wide input catalog of sources produced by WriteSourceTableTask",
1305 name="{catalogType}source",
1306 storageClass="DataFrame",
1307 dimensions=("instrument", "visit", "detector"),
1308 deferLoad=True
1309 )
1310 outputCatalog = connectionTypes.Output(
1311 doc="Narrower, per-detector Source Table transformed and converted per a "
1312 "specified set of functors",
1313 name="{catalogType}sourceTable",
1314 storageClass="DataFrame",
1315 dimensions=("instrument", "visit", "detector")
1316 )
1319class TransformSourceTableConfig(TransformCatalogBaseConfig,
1320 pipelineConnections=TransformSourceTableConnections):
1322 def setDefaults(self):
1323 super().setDefaults()
1324 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml')
1325 self.primaryKey = 'sourceId'
1326 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter']
1329class TransformSourceTableTask(TransformCatalogBaseTask):
1330 """Transform/standardize a source catalog
1331 """
1332 _DefaultName = "transformSourceTable"
1333 ConfigClass = TransformSourceTableConfig
1336class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1337 dimensions=("instrument", "visit",),
1338 defaultTemplates={"calexpType": ""}):
1339 calexp = connectionTypes.Input(
1340 doc="Processed exposures used for metadata",
1341 name="{calexpType}calexp",
1342 storageClass="ExposureF",
1343 dimensions=("instrument", "visit", "detector"),
1344 deferLoad=True,
1345 multiple=True,
1346 )
1347 visitSummary = connectionTypes.Output(
1348 doc=("Per-visit consolidated exposure metadata. These catalogs use "
1349 "detector id for the id and are sorted for fast lookups of a "
1350 "detector."),
1351 name="{calexpType}visitSummary",
1352 storageClass="ExposureCatalog",
1353 dimensions=("instrument", "visit"),
1354 )
1357class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1358 pipelineConnections=ConsolidateVisitSummaryConnections):
1359 """Config for ConsolidateVisitSummaryTask"""
1360 pass
1363class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1364 """Task to consolidate per-detector visit metadata.
1366 This task aggregates the following metadata from all the detectors in a
1367 single visit into an exposure catalog:
1368 - The visitInfo.
1369 - The wcs.
1370 - The photoCalib.
1371 - The physical_filter and band (if available).
1372 - The psf size, shape, and effective area at the center of the detector.
1373 - The corners of the bounding box in right ascension/declination.
1375 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1376 are not persisted here because of storage concerns, and because of their
1377 limited utility as summary statistics.
1379 Tests for this task are performed in ci_hsc_gen3.
1380 """
1381 _DefaultName = "consolidateVisitSummary"
1382 ConfigClass = ConsolidateVisitSummaryConfig
1384 @classmethod
1385 def _makeArgumentParser(cls):
1386 parser = ArgumentParser(name=cls._DefaultName)
1388 parser.add_id_argument("--id", "calexp",
1389 help="data ID, e.g. --id visit=12345",
1390 ContainerClass=VisitDataIdContainer)
1391 return parser
1393 def writeMetadata(self, dataRef):
1394 """No metadata to persist, so override to remove metadata persistance.
1395 """
1396 pass
1398 def writeConfig(self, butler, clobber=False, doBackup=True):
1399 """No config to persist, so override to remove config persistance.
1400 """
1401 pass
1403 def runDataRef(self, dataRefList):
1404 visit = dataRefList[0].dataId['visit']
1406 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1407 len(dataRefList), visit)
1409 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
1411 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
1413 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1414 dataRefs = butlerQC.get(inputRefs.calexp)
1415 visit = dataRefs[0].dataId.byName()['visit']
1417 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1418 len(dataRefs), visit)
1420 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1422 butlerQC.put(expCatalog, outputRefs.visitSummary)
1424 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1425 """Make a combined exposure catalog from a list of dataRefs.
1426 These dataRefs must point to exposures with wcs, summaryStats,
1427 and other visit metadata.
1429 Parameters
1430 ----------
1431 visit : `int`
1432 Visit identification number.
1433 dataRefs : `list`
1434 List of dataRefs in visit. May be list of
1435 `lsst.daf.persistence.ButlerDataRef` (Gen2) or
1436 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1437 isGen3 : `bool`, optional
1438 Specifies if this is a Gen3 list of datarefs.
1440 Returns
1441 -------
1442 visitSummary : `lsst.afw.table.ExposureCatalog`
1443 Exposure catalog with per-detector summary information.
1444 """
1445 schema = self._makeVisitSummarySchema()
1446 cat = afwTable.ExposureCatalog(schema)
1447 cat.resize(len(dataRefs))
1449 cat['visit'] = visit
1451 for i, dataRef in enumerate(dataRefs):
1452 if isGen3:
1453 visitInfo = dataRef.get(component='visitInfo')
1454 filterLabel = dataRef.get(component='filter')
1455 summaryStats = dataRef.get(component='summaryStats')
1456 detector = dataRef.get(component='detector')
1457 wcs = dataRef.get(component='wcs')
1458 photoCalib = dataRef.get(component='photoCalib')
1459 detector = dataRef.get(component='detector')
1460 bbox = dataRef.get(component='bbox')
1461 validPolygon = dataRef.get(component='validPolygon')
1462 else:
1463 # Note that we need to read the calexp because there is
1464 # no magic access to the psf except through the exposure.
1465 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
1466 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
1467 visitInfo = exp.getInfo().getVisitInfo()
1468 filterLabel = dataRef.get("calexp_filter")
1469 summaryStats = exp.getInfo().getSummaryStats()
1470 wcs = exp.getWcs()
1471 photoCalib = exp.getPhotoCalib()
1472 detector = exp.getDetector()
1473 bbox = dataRef.get(datasetType='calexp_bbox')
1474 validPolygon = exp.getInfo().getValidPolygon()
1476 rec = cat[i]
1477 rec.setBBox(bbox)
1478 rec.setVisitInfo(visitInfo)
1479 rec.setWcs(wcs)
1480 rec.setPhotoCalib(photoCalib)
1481 rec.setValidPolygon(validPolygon)
1483 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1484 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1485 rec.setId(detector.getId())
1486 rec['psfSigma'] = summaryStats.psfSigma
1487 rec['psfIxx'] = summaryStats.psfIxx
1488 rec['psfIyy'] = summaryStats.psfIyy
1489 rec['psfIxy'] = summaryStats.psfIxy
1490 rec['psfArea'] = summaryStats.psfArea
1491 rec['raCorners'][:] = summaryStats.raCorners
1492 rec['decCorners'][:] = summaryStats.decCorners
1493 rec['ra'] = summaryStats.ra
1494 rec['decl'] = summaryStats.decl
1495 rec['zenithDistance'] = summaryStats.zenithDistance
1496 rec['zeroPoint'] = summaryStats.zeroPoint
1497 rec['skyBg'] = summaryStats.skyBg
1498 rec['skyNoise'] = summaryStats.skyNoise
1499 rec['meanVar'] = summaryStats.meanVar
1500 rec['astromOffsetMean'] = summaryStats.astromOffsetMean
1501 rec['astromOffsetStd'] = summaryStats.astromOffsetStd
1502 rec['nPsfStar'] = summaryStats.nPsfStar
1503 rec['psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median
1504 rec['psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median
1505 rec['psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter
1506 rec['psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter
1507 rec['psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian
1508 rec['psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter
1509 rec['psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter
1511 metadata = dafBase.PropertyList()
1512 metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1513 # We are looping over existing datarefs, so the following is true
1514 metadata.add("COMMENT", "Only detectors with data have entries.")
1515 cat.setMetadata(metadata)
1517 cat.sort()
1518 return cat
1520 def _makeVisitSummarySchema(self):
1521 """Make the schema for the visitSummary catalog."""
1522 schema = afwTable.ExposureTable.makeMinimalSchema()
1523 schema.addField('visit', type='L', doc='Visit number')
1524 schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1525 schema.addField('band', type='String', size=32, doc='Name of band')
1526 schema.addField('psfSigma', type='F',
1527 doc='PSF model second-moments determinant radius (center of chip) (pixel)')
1528 schema.addField('psfArea', type='F',
1529 doc='PSF model effective area (center of chip) (pixel**2)')
1530 schema.addField('psfIxx', type='F',
1531 doc='PSF model Ixx (center of chip) (pixel**2)')
1532 schema.addField('psfIyy', type='F',
1533 doc='PSF model Iyy (center of chip) (pixel**2)')
1534 schema.addField('psfIxy', type='F',
1535 doc='PSF model Ixy (center of chip) (pixel**2)')
1536 schema.addField('raCorners', type='ArrayD', size=4,
1537 doc='Right Ascension of bounding box corners (degrees)')
1538 schema.addField('decCorners', type='ArrayD', size=4,
1539 doc='Declination of bounding box corners (degrees)')
1540 schema.addField('ra', type='D',
1541 doc='Right Ascension of bounding box center (degrees)')
1542 schema.addField('decl', type='D',
1543 doc='Declination of bounding box center (degrees)')
1544 schema.addField('zenithDistance', type='F',
1545 doc='Zenith distance of bounding box center (degrees)')
1546 schema.addField('zeroPoint', type='F',
1547 doc='Mean zeropoint in detector (mag)')
1548 schema.addField('skyBg', type='F',
1549 doc='Average sky background (ADU)')
1550 schema.addField('skyNoise', type='F',
1551 doc='Average sky noise (ADU)')
1552 schema.addField('meanVar', type='F',
1553 doc='Mean variance of the weight plane (ADU**2)')
1554 schema.addField('astromOffsetMean', type='F',
1555 doc='Mean offset of astrometric calibration matches (arcsec)')
1556 schema.addField('astromOffsetStd', type='F',
1557 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)')
1558 schema.addField('nPsfStar', type='I', doc='Number of stars used for PSF model')
1559 schema.addField('psfStarDeltaE1Median', type='F',
1560 doc='Median E1 residual (starE1 - psfE1) for psf stars')
1561 schema.addField('psfStarDeltaE2Median', type='F',
1562 doc='Median E2 residual (starE2 - psfE2) for psf stars')
1563 schema.addField('psfStarDeltaE1Scatter', type='F',
1564 doc='Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars')
1565 schema.addField('psfStarDeltaE2Scatter', type='F',
1566 doc='Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars')
1567 schema.addField('psfStarDeltaSizeMedian', type='F',
1568 doc='Median size residual (starSize - psfSize) for psf stars (pixel)')
1569 schema.addField('psfStarDeltaSizeScatter', type='F',
1570 doc='Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)')
1571 schema.addField('psfStarScaledDeltaSizeScatter', type='F',
1572 doc='Scatter (via MAD) of size residual scaled by median size squared')
1574 return schema
1577class VisitDataIdContainer(DataIdContainer):
1578 """DataIdContainer that groups sensor-level ids by visit.
1579 """
1581 def makeDataRefList(self, namespace):
1582 """Make self.refList from self.idList
1584 Generate a list of data references grouped by visit.
1586 Parameters
1587 ----------
1588 namespace : `argparse.Namespace`
1589 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command
1590 line arguments.
1591 """
1592 # Group by visits
1593 visitRefs = defaultdict(list)
1594 for dataId in self.idList:
1595 if "visit" in dataId:
1596 visitId = dataId["visit"]
1597 # append all subsets to
1598 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1599 visitRefs[visitId].extend([dataRef for dataRef in subset])
1601 outputRefList = []
1602 for refList in visitRefs.values():
1603 existingRefs = [ref for ref in refList if ref.datasetExists()]
1604 if existingRefs:
1605 outputRefList.append(existingRefs)
1607 self.refList = outputRefList
1610class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1611 defaultTemplates={"catalogType": ""},
1612 dimensions=("instrument", "visit")):
1613 inputCatalogs = connectionTypes.Input(
1614 doc="Input per-detector Source Tables",
1615 name="{catalogType}sourceTable",
1616 storageClass="DataFrame",
1617 dimensions=("instrument", "visit", "detector"),
1618 multiple=True
1619 )
1620 outputCatalog = connectionTypes.Output(
1621 doc="Per-visit concatenation of Source Table",
1622 name="{catalogType}sourceTable_visit",
1623 storageClass="DataFrame",
1624 dimensions=("instrument", "visit")
1625 )
1628class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1629 pipelineConnections=ConsolidateSourceTableConnections):
1630 pass
1633class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1634 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1635 """
1636 _DefaultName = 'consolidateSourceTable'
1637 ConfigClass = ConsolidateSourceTableConfig
1639 inputDataset = 'sourceTable'
1640 outputDataset = 'sourceTable_visit'
1642 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1643 from .makeCoaddTempExp import reorderRefs
1645 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs]
1646 detectorOrder.sort()
1647 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector')
1648 inputs = butlerQC.get(inputRefs)
1649 self.log.info("Concatenating %s per-detector Source Tables",
1650 len(inputs['inputCatalogs']))
1651 df = pd.concat(inputs['inputCatalogs'])
1652 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1654 def runDataRef(self, dataRefList):
1655 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1656 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1657 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1659 @classmethod
1660 def _makeArgumentParser(cls):
1661 parser = ArgumentParser(name=cls._DefaultName)
1663 parser.add_id_argument("--id", cls.inputDataset,
1664 help="data ID, e.g. --id visit=12345",
1665 ContainerClass=VisitDataIdContainer)
1666 return parser
1668 def writeMetadata(self, dataRef):
1669 """No metadata to write.
1670 """
1671 pass
1673 def writeConfig(self, butler, clobber=False, doBackup=True):
1674 """No config to write.
1675 """
1676 pass
1679class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1680 dimensions=("instrument",),
1681 defaultTemplates={"calexpType": ""}):
1682 visitSummaryRefs = connectionTypes.Input(
1683 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1684 name="{calexpType}visitSummary",
1685 storageClass="ExposureCatalog",
1686 dimensions=("instrument", "visit"),
1687 multiple=True,
1688 deferLoad=True,
1689 )
1690 outputCatalog = connectionTypes.Output(
1691 doc="CCD and Visit metadata table",
1692 name="ccdVisitTable",
1693 storageClass="DataFrame",
1694 dimensions=("instrument",)
1695 )
1698class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1699 pipelineConnections=MakeCcdVisitTableConnections):
1700 pass
1703class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1704 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1705 """
1706 _DefaultName = 'makeCcdVisitTable'
1707 ConfigClass = MakeCcdVisitTableConfig
1709 def run(self, visitSummaryRefs):
1710 """Make a table of ccd information from the `visitSummary` catalogs.
1712 Parameters
1713 ----------
1714 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1715 List of DeferredDatasetHandles pointing to exposure catalogs with
1716 per-detector summary information.
1718 Returns
1719 -------
1720 result : `lsst.pipe.Base.Struct`
1721 Results struct with attribute:
1723 ``outputCatalog``
1724 Catalog of ccd and visit information.
1725 """
1726 ccdEntries = []
1727 for visitSummaryRef in visitSummaryRefs:
1728 visitSummary = visitSummaryRef.get()
1729 visitInfo = visitSummary[0].getVisitInfo()
1731 ccdEntry = {}
1732 summaryTable = visitSummary.asAstropy()
1733 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance',
1734 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise',
1735 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar',
1736 'psfStarDeltaE1Median', 'psfStarDeltaE2Median',
1737 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter',
1738 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter',
1739 'psfStarScaledDeltaSizeScatter']
1740 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1741 # 'visit' is the human readable visit number.
1742 # 'visitId' is the key to the visitId table. They are the same.
1743 # Technically you should join to get the visit from the visit
1744 # table.
1745 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"})
1746 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in
1747 summaryTable['id']]
1748 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId)
1749 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds]
1750 ccdEntry['ccdVisitId'] = ccdVisitIds
1751 ccdEntry['detector'] = summaryTable['id']
1752 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary])
1753 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1755 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1756 ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1757 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1758 expTime = visitInfo.getExposureTime()
1759 ccdEntry['expTime'] = expTime
1760 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1761 expTime_days = expTime / (60*60*24)
1762 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days
1763 ccdEntry['darkTime'] = visitInfo.getDarkTime()
1764 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1765 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1766 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1767 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1768 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1769 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1770 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1771 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1772 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1773 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1774 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY,
1775 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc.
1776 # values are actually wanted.
1777 ccdEntries.append(ccdEntry)
1779 outputCatalog = pd.concat(ccdEntries)
1780 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True)
1781 return pipeBase.Struct(outputCatalog=outputCatalog)
1784class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1785 dimensions=("instrument",),
1786 defaultTemplates={"calexpType": ""}):
1787 visitSummaries = connectionTypes.Input(
1788 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1789 name="{calexpType}visitSummary",
1790 storageClass="ExposureCatalog",
1791 dimensions=("instrument", "visit",),
1792 multiple=True,
1793 deferLoad=True,
1794 )
1795 outputCatalog = connectionTypes.Output(
1796 doc="Visit metadata table",
1797 name="visitTable",
1798 storageClass="DataFrame",
1799 dimensions=("instrument",)
1800 )
1803class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1804 pipelineConnections=MakeVisitTableConnections):
1805 pass
1808class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1809 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1810 """
1811 _DefaultName = 'makeVisitTable'
1812 ConfigClass = MakeVisitTableConfig
1814 def run(self, visitSummaries):
1815 """Make a table of visit information from the `visitSummary` catalogs.
1817 Parameters
1818 ----------
1819 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog`
1820 List of exposure catalogs with per-detector summary information.
1821 Returns
1822 -------
1823 result : `lsst.pipe.Base.Struct`
1824 Results struct with attribute:
1826 ``outputCatalog``
1827 Catalog of visit information.
1828 """
1829 visitEntries = []
1830 for visitSummary in visitSummaries:
1831 visitSummary = visitSummary.get()
1832 visitRow = visitSummary[0]
1833 visitInfo = visitRow.getVisitInfo()
1835 visitEntry = {}
1836 visitEntry["visitId"] = visitRow['visit']
1837 visitEntry["visit"] = visitRow['visit']
1838 visitEntry["physical_filter"] = visitRow['physical_filter']
1839 visitEntry["band"] = visitRow['band']
1840 raDec = visitInfo.getBoresightRaDec()
1841 visitEntry["ra"] = raDec.getRa().asDegrees()
1842 visitEntry["decl"] = raDec.getDec().asDegrees()
1843 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1844 azAlt = visitInfo.getBoresightAzAlt()
1845 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1846 visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1847 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1848 visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1849 expTime = visitInfo.getExposureTime()
1850 visitEntry["expTime"] = expTime
1851 visitEntry["expMidpt"] = visitInfo.getDate().toPython()
1852 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1853 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1854 expTime_days = expTime / (60*60*24)
1855 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days
1856 visitEntries.append(visitEntry)
1858 # TODO: DM-30623, Add programId, exposureType, cameraTemp,
1859 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp,
1860 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures.
1862 outputCatalog = pd.DataFrame(data=visitEntries)
1863 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True)
1864 return pipeBase.Struct(outputCatalog=outputCatalog)
1867class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1868 dimensions=("instrument", "visit", "detector", "skymap", "tract")):
1870 inputCatalog = connectionTypes.Input(
1871 doc="Primary per-detector, single-epoch forced-photometry catalog. "
1872 "By default, it is the output of ForcedPhotCcdTask on calexps",
1873 name="forced_src",
1874 storageClass="SourceCatalog",
1875 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1876 )
1877 inputCatalogDiff = connectionTypes.Input(
1878 doc="Secondary multi-epoch, per-detector, forced photometry catalog. "
1879 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1880 name="forced_diff",
1881 storageClass="SourceCatalog",
1882 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1883 )
1884 outputCatalog = connectionTypes.Output(
1885 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format",
1886 name="mergedForcedSource",
1887 storageClass="DataFrame",
1888 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1889 )
1892class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1893 pipelineConnections=WriteForcedSourceTableConnections):
1894 key = lsst.pex.config.Field(
1895 doc="Column on which to join the two input tables on and make the primary key of the output",
1896 dtype=str,
1897 default="objectId",
1898 )
1901class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1902 """Merge and convert per-detector forced source catalogs to parquet.
1904 Because the predecessor ForcedPhotCcdTask operates per-detector,
1905 per-tract, (i.e., it has tract in its dimensions), detectors
1906 on the tract boundary may have multiple forced source catalogs.
1908 The successor task TransformForcedSourceTable runs per-patch
1909 and temporally-aggregates overlapping mergedForcedSource catalogs from all
1910 available multiple epochs.
1911 """
1912 _DefaultName = "writeForcedSourceTable"
1913 ConfigClass = WriteForcedSourceTableConfig
1915 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1916 inputs = butlerQC.get(inputRefs)
1917 # Add ccdVisitId to allow joining with CcdVisitTable
1918 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
1919 inputs['band'] = butlerQC.quantum.dataId.full['band']
1920 outputs = self.run(**inputs)
1921 butlerQC.put(outputs, outputRefs)
1923 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1924 dfs = []
1925 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')):
1926 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False)
1927 df = df.reindex(sorted(df.columns), axis=1)
1928 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA
1929 df['band'] = band if band else pd.NA
1930 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns],
1931 names=('dataset', 'column'))
1933 dfs.append(df)
1935 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
1936 return pipeBase.Struct(outputCatalog=outputCatalog)
1939class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1940 dimensions=("instrument", "skymap", "patch", "tract")):
1942 inputCatalogs = connectionTypes.Input(
1943 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1944 name="mergedForcedSource",
1945 storageClass="DataFrame",
1946 dimensions=("instrument", "visit", "detector", "skymap", "tract"),
1947 multiple=True,
1948 deferLoad=True
1949 )
1950 referenceCatalog = connectionTypes.Input(
1951 doc="Reference catalog which was used to seed the forcedPhot. Columns "
1952 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1953 "are expected.",
1954 name="objectTable",
1955 storageClass="DataFrame",
1956 dimensions=("tract", "patch", "skymap"),
1957 deferLoad=True
1958 )
1959 outputCatalog = connectionTypes.Output(
1960 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1961 "specified set of functors",
1962 name="forcedSourceTable",
1963 storageClass="DataFrame",
1964 dimensions=("tract", "patch", "skymap")
1965 )
1968class TransformForcedSourceTableConfig(TransformCatalogBaseConfig,
1969 pipelineConnections=TransformForcedSourceTableConnections):
1970 referenceColumns = pexConfig.ListField(
1971 dtype=str,
1972 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"],
1973 optional=True,
1974 doc="Columns to pull from reference catalog",
1975 )
1976 keyRef = lsst.pex.config.Field(
1977 doc="Column on which to join the two input tables on and make the primary key of the output",
1978 dtype=str,
1979 default="objectId",
1980 )
1981 key = lsst.pex.config.Field(
1982 doc="Rename the output DataFrame index to this name",
1983 dtype=str,
1984 default="forcedSourceId",
1985 )
1987 def setDefaults(self):
1988 super().setDefaults()
1989 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml')
1990 self.columnsFromDataId = ['tract', 'patch']
1993class TransformForcedSourceTableTask(TransformCatalogBaseTask):
1994 """Transform/standardize a ForcedSource catalog
1996 Transforms each wide, per-detector forcedSource parquet table per the
1997 specification file (per-camera defaults found in ForcedSource.yaml).
1998 All epochs that overlap the patch are aggregated into one per-patch
1999 narrow-parquet file.
2001 No de-duplication of rows is performed. Duplicate resolutions flags are
2002 pulled in from the referenceCatalog: `detect_isPrimary`,
2003 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
2004 for analysis or compare duplicates for QA.
2006 The resulting table includes multiple bands. Epochs (MJDs) and other useful
2007 per-visit rows can be retreived by joining with the CcdVisitTable on
2008 ccdVisitId.
2009 """
2010 _DefaultName = "transformForcedSourceTable"
2011 ConfigClass = TransformForcedSourceTableConfig
2013 def runQuantum(self, butlerQC, inputRefs, outputRefs):
2014 inputs = butlerQC.get(inputRefs)
2015 if self.funcs is None:
2016 raise ValueError("config.functorFile is None. "
2017 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
2018 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs,
2019 dataId=outputRefs.outputCatalog.dataId.full)
2021 butlerQC.put(outputs, outputRefs)
2023 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
2024 dfs = []
2025 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns})
2026 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs)))
2027 for handle in inputCatalogs:
2028 result = self.transform(None, handle, funcs, dataId)
2029 # Filter for only rows that were detected on (overlap) the patch
2030 dfs.append(result.df.join(ref, how='inner'))
2032 outputCatalog = pd.concat(dfs)
2034 # Now that we are done joining on config.keyRef
2035 # Change index to config.key by
2036 outputCatalog.index.rename(self.config.keyRef, inplace=True)
2037 # Add config.keyRef to the column list
2038 outputCatalog.reset_index(inplace=True)
2039 # Set the forcedSourceId to the index. This is specified in the
2040 # ForcedSource.yaml
2041 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True)
2042 # Rename it to the config.key
2043 outputCatalog.index.rename(self.config.key, inplace=True)
2045 self.log.info("Made a table of %d columns and %d rows",
2046 len(outputCatalog.columns), len(outputCatalog))
2047 return pipeBase.Struct(outputCatalog=outputCatalog)
2050class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
2051 defaultTemplates={"catalogType": ""},
2052 dimensions=("instrument", "tract")):
2053 inputCatalogs = connectionTypes.Input(
2054 doc="Input per-patch DataFrame Tables to be concatenated",
2055 name="{catalogType}ForcedSourceTable",
2056 storageClass="DataFrame",
2057 dimensions=("tract", "patch", "skymap"),
2058 multiple=True,
2059 )
2061 outputCatalog = connectionTypes.Output(
2062 doc="Output per-tract concatenation of DataFrame Tables",
2063 name="{catalogType}ForcedSourceTable_tract",
2064 storageClass="DataFrame",
2065 dimensions=("tract", "skymap"),
2066 )
2069class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
2070 pipelineConnections=ConsolidateTractConnections):
2071 pass
2074class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask):
2075 """Concatenate any per-patch, dataframe list into a single
2076 per-tract DataFrame.
2077 """
2078 _DefaultName = 'ConsolidateTract'
2079 ConfigClass = ConsolidateTractConfig
2081 def runQuantum(self, butlerQC, inputRefs, outputRefs):
2082 inputs = butlerQC.get(inputRefs)
2083 # Not checking at least one inputCatalog exists because that'd be an
2084 # empty QG.
2085 self.log.info("Concatenating %s per-patch %s Tables",
2086 len(inputs['inputCatalogs']),
2087 inputRefs.inputCatalogs[0].datasetType.name)
2088 df = pd.concat(inputs['inputCatalogs'])
2089 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)