24from collections
import defaultdict
34from lsst.obs.base
import ExposureIdInfo
39from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
41from lsst.daf.butler
import DeferredDatasetHandle, DataCoordinate
44from .parquetTable
import ParquetTable
45from .multiBandUtils
import makeMergeArgumentParser, MergeSourcesRunner
46from .functors
import CompositeFunctor, Column
48log = logging.getLogger(__name__)
51def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
52 """Flattens a dataframe with multilevel column index.
54 newDf = pd.DataFrame()
56 dfBands = df.columns.unique(level=0).values
59 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
60 newColumns = {c: columnFormat.format(band, c)
61 for c
in subdf.columns
if c
not in noDupCols}
62 cols = list(newColumns.keys())
63 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
66 presentBands = dfBands
if inputBands
is None else list(set(inputBands).intersection(dfBands))
68 noDupDf = df[presentBands[0]][noDupCols]
69 newDf = pd.concat([noDupDf, newDf], axis=1)
74 defaultTemplates={
"coaddName":
"deep"},
75 dimensions=(
"tract",
"patch",
"skymap")):
76 inputCatalogMeas = connectionTypes.Input(
77 doc=
"Catalog of source measurements on the deepCoadd.",
78 dimensions=(
"tract",
"patch",
"band",
"skymap"),
79 storageClass=
"SourceCatalog",
80 name=
"{coaddName}Coadd_meas",
83 inputCatalogForcedSrc = connectionTypes.Input(
84 doc=
"Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
85 dimensions=(
"tract",
"patch",
"band",
"skymap"),
86 storageClass=
"SourceCatalog",
87 name=
"{coaddName}Coadd_forced_src",
90 inputCatalogRef = connectionTypes.Input(
91 doc=
"Catalog marking the primary detection (which band provides a good shape and position)"
92 "for each detection in deepCoadd_mergeDet.",
93 dimensions=(
"tract",
"patch",
"skymap"),
94 storageClass=
"SourceCatalog",
95 name=
"{coaddName}Coadd_ref"
97 outputCatalog = connectionTypes.Output(
98 doc=
"A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
99 "stored as a DataFrame with a multi-level column index per-patch.",
100 dimensions=(
"tract",
"patch",
"skymap"),
101 storageClass=
"DataFrame",
102 name=
"{coaddName}Coadd_obj"
106class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
107 pipelineConnections=WriteObjectTableConnections):
108 engine = pexConfig.Field(
111 doc=
"Parquet engine for writing (pyarrow or fastparquet)"
113 coaddName = pexConfig.Field(
120class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
121 """Write filter-merged source tables to parquet
123 _DefaultName = "writeObjectTable"
124 ConfigClass = WriteObjectTableConfig
125 RunnerClass = MergeSourcesRunner
128 inputDatasets = (
'forced_src',
'meas',
'ref')
131 outputDataset =
'obj'
133 def __init__(self, butler=None, schema=None, **kwargs):
138 super().__init__(**kwargs)
140 def runDataRef(self, patchRefList):
142 @brief Merge coadd sources
from multiple bands. Calls
@ref `run` which
143 must be defined
in subclasses that inherit
from MergeSourcesTask.
144 @param[
in] patchRefList list of data references
for each filter
146 catalogs = dict(self.readCatalog(patchRef) for patchRef
in patchRefList)
147 dataId = patchRefList[0].dataId
148 mergedCatalog = self.run(catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
149 self.write(patchRefList[0],
ParquetTable(dataFrame=mergedCatalog))
151 def runQuantum(self, butlerQC, inputRefs, outputRefs):
152 inputs = butlerQC.get(inputRefs)
154 measDict = {ref.dataId[
'band']: {
'meas': cat}
for ref, cat
in
155 zip(inputRefs.inputCatalogMeas, inputs[
'inputCatalogMeas'])}
156 forcedSourceDict = {ref.dataId[
'band']: {
'forced_src': cat}
for ref, cat
in
157 zip(inputRefs.inputCatalogForcedSrc, inputs[
'inputCatalogForcedSrc'])}
160 for band
in measDict.keys():
161 catalogs[band] = {
'meas': measDict[band][
'meas'],
162 'forced_src': forcedSourceDict[band][
'forced_src'],
163 'ref': inputs[
'inputCatalogRef']}
164 dataId = butlerQC.quantum.dataId
165 df = self.run(catalogs=catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
166 outputs = pipeBase.Struct(outputCatalog=df)
167 butlerQC.put(outputs, outputRefs)
170 def _makeArgumentParser(cls):
171 """Create a suitable ArgumentParser.
173 We will use the ArgumentParser to get a list of data
174 references for patches; the RunnerClass will sort them into lists
175 of data references
for the same patch.
177 References first of self.inputDatasets, rather than
180 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
182 def readCatalog(self, patchRef):
183 """Read input catalogs
185 Read all the input datasets given by the 'inputDatasets'
190 patchRef : `lsst.daf.persistence.ButlerDataRef`
191 Data reference
for patch.
195 Tuple consisting of band name
and a dict of catalogs, keyed by
198 band = patchRef.get(self.config.coaddName + "Coadd_filter", immediate=
True).bandLabel
200 for dataset
in self.inputDatasets:
201 catalog = patchRef.get(self.config.coaddName +
"Coadd_" + dataset, immediate=
True)
202 self.log.info(
"Read %d sources from %s for band %s: %s",
203 len(catalog), dataset, band, patchRef.dataId)
204 catalogDict[dataset] = catalog
205 return band, catalogDict
207 def run(self, catalogs, tract, patch):
208 """Merge multiple catalogs.
213 Mapping from filter names to dict of catalogs.
215 tractId to use
for the tractId column.
217 patchId to use
for the patchId column.
221 catalog : `pandas.DataFrame`
226 for filt, tableDict
in catalogs.items():
227 for dataset, table
in tableDict.items():
229 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
232 df = df.reindex(sorted(df.columns), axis=1)
233 df[
'tractId'] = tract
234 df[
'patchId'] = patch
237 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
238 names=(
'dataset',
'band',
'column'))
241 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
244 def write(self, patchRef, catalog):
249 catalog : `ParquetTable`
251 patchRef : `lsst.daf.persistence.ButlerDataRef`
252 Data reference for patch.
254 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
258 mergeDataId = patchRef.dataId.copy()
259 del mergeDataId[
"filter"]
260 self.log.info(
"Wrote merged catalog: %s", mergeDataId)
262 def writeMetadata(self, dataRefList):
263 """No metadata to write, and not sure how to write it for a list of
269class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
270 defaultTemplates={
"catalogType":
""},
271 dimensions=(
"instrument",
"visit",
"detector")):
273 catalog = connectionTypes.Input(
274 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
275 name=
"{catalogType}src",
276 storageClass=
"SourceCatalog",
277 dimensions=(
"instrument",
"visit",
"detector")
279 outputCatalog = connectionTypes.Output(
280 doc=
"Catalog of sources, `src` in Parquet format. The 'id' column is "
281 "replaced with an index; all other columns are unchanged.",
282 name=
"{catalogType}source",
283 storageClass=
"DataFrame",
284 dimensions=(
"instrument",
"visit",
"detector")
288class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
289 pipelineConnections=WriteSourceTableConnections):
293class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
294 """Write source table to parquet.
296 _DefaultName = "writeSourceTable"
297 ConfigClass = WriteSourceTableConfig
299 def runQuantum(self, butlerQC, inputRefs, outputRefs):
300 inputs = butlerQC.get(inputRefs)
301 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
302 result = self.run(**inputs).table
303 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
304 butlerQC.put(outputs, outputRefs)
306 def run(self, catalog, ccdVisitId=None, **kwargs):
307 """Convert `src` catalog to parquet
311 catalog: `afwTable.SourceCatalog`
312 catalog to be converted
314 ccdVisitId to be added as a column
318 result : `lsst.pipe.base.Struct`
320 `ParquetTable` version of the input catalog
322 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId)
323 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
324 df[
'ccdVisitId'] = ccdVisitId
325 return pipeBase.Struct(table=
ParquetTable(dataFrame=df))
328class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
329 defaultTemplates={
"catalogType":
"",
330 "skyWcsName":
"jointcal",
331 "photoCalibName":
"fgcm"},
332 dimensions=(
"instrument",
"visit",
"detector",
"skymap")):
333 skyMap = connectionTypes.Input(
334 doc=
"skyMap needed to choose which tract-level calibrations to use when multiple available",
335 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
336 storageClass=
"SkyMap",
337 dimensions=(
"skymap",),
339 exposure = connectionTypes.Input(
340 doc=
"Input exposure to perform photometry on.",
342 storageClass=
"ExposureF",
343 dimensions=[
"instrument",
"visit",
"detector"],
345 externalSkyWcsTractCatalog = connectionTypes.Input(
346 doc=(
"Per-tract, per-visit wcs calibrations. These catalogs use the detector "
347 "id for the catalog id, sorted on id for fast lookup."),
348 name=
"{skyWcsName}SkyWcsCatalog",
349 storageClass=
"ExposureCatalog",
350 dimensions=[
"instrument",
"visit",
"tract"],
353 externalSkyWcsGlobalCatalog = connectionTypes.Input(
354 doc=(
"Per-visit wcs calibrations computed globally (with no tract information). "
355 "These catalogs use the detector id for the catalog id, sorted on id for "
357 name=
"finalVisitSummary",
358 storageClass=
"ExposureCatalog",
359 dimensions=[
"instrument",
"visit"],
361 externalPhotoCalibTractCatalog = connectionTypes.Input(
362 doc=(
"Per-tract, per-visit photometric calibrations. These catalogs use the "
363 "detector id for the catalog id, sorted on id for fast lookup."),
364 name=
"{photoCalibName}PhotoCalibCatalog",
365 storageClass=
"ExposureCatalog",
366 dimensions=[
"instrument",
"visit",
"tract"],
369 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
370 doc=(
"Per-visit photometric calibrations computed globally (with no tract "
371 "information). These catalogs use the detector id for the catalog id, "
372 "sorted on id for fast lookup."),
373 name=
"finalVisitSummary",
374 storageClass=
"ExposureCatalog",
375 dimensions=[
"instrument",
"visit"],
378 def __init__(self, *, config=None):
379 super().__init__(config=config)
382 if config.doApplyExternalSkyWcs
and config.doReevaluateSkyWcs:
383 if config.useGlobalExternalSkyWcs:
384 self.inputs.remove(
"externalSkyWcsTractCatalog")
386 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
388 self.inputs.remove(
"externalSkyWcsTractCatalog")
389 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
390 if config.doApplyExternalPhotoCalib
and config.doReevaluatePhotoCalib:
391 if config.useGlobalExternalPhotoCalib:
392 self.inputs.remove(
"externalPhotoCalibTractCatalog")
394 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
396 self.inputs.remove(
"externalPhotoCalibTractCatalog")
397 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
400class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
401 pipelineConnections=WriteRecalibratedSourceTableConnections):
403 doReevaluatePhotoCalib = pexConfig.Field(
406 doc=(
"Add or replace local photoCalib columns")
408 doReevaluateSkyWcs = pexConfig.Field(
411 doc=(
"Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
413 doApplyExternalPhotoCalib = pexConfig.Field(
416 doc=(
"If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
417 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
419 doApplyExternalSkyWcs = pexConfig.Field(
422 doc=(
"if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
423 "else use the wcs already attached to the exposure."),
425 useGlobalExternalPhotoCalib = pexConfig.Field(
428 doc=(
"When using doApplyExternalPhotoCalib, use 'global' calibrations "
429 "that are not run per-tract. When False, use per-tract photometric "
430 "calibration files.")
432 useGlobalExternalSkyWcs = pexConfig.Field(
435 doc=(
"When using doApplyExternalSkyWcs, use 'global' calibrations "
436 "that are not run per-tract. When False, use per-tract wcs "
442 if self.doApplyExternalSkyWcs
and not self.doReevaluateSkyWcs:
443 log.warning(
"doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
444 "External SkyWcs will not be read or evaluated.")
445 if self.doApplyExternalPhotoCalib
and not self.doReevaluatePhotoCalib:
446 log.warning(
"doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
447 "External PhotoCalib will not be read or evaluated.")
450class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
451 """Write source table to parquet
453 _DefaultName = "writeRecalibratedSourceTable"
454 ConfigClass = WriteRecalibratedSourceTableConfig
456 def runQuantum(self, butlerQC, inputRefs, outputRefs):
457 inputs = butlerQC.get(inputRefs)
458 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
459 inputs[
'exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId,
"visit_detector")
461 if self.config.doReevaluatePhotoCalib
or self.config.doReevaluateSkyWcs:
462 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
463 inputs[
'exposure'] = self.attachCalibs(inputRefs, **inputs)
465 inputs[
'catalog'] = self.addCalibColumns(**inputs)
467 result = self.run(**inputs).table
468 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
469 butlerQC.put(outputs, outputRefs)
471 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
472 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
473 externalPhotoCalibTractCatalog=None, **kwargs):
474 """Apply external calibrations to exposure per configuration
476 When multiple tract-level calibrations overlap, select the one with the
477 center closest to detector.
481 inputRefs : `lsst.pipe.base.InputQuantizedConnection`,
for dataIds of
483 skyMap : `lsst.skymap.SkyMap`
484 exposure : `lsst.afw.image.exposure.Exposure`
485 Input exposure to adjust calibrations.
487 Exposure catalog
with external skyWcs to be applied per config
489 Exposure catalog
with external skyWcs to be applied per config
491 Exposure catalog
with external photoCalib to be applied per config
497 exposure : `lsst.afw.image.exposure.Exposure`
498 Exposure
with adjusted calibrations.
500 if not self.config.doApplyExternalSkyWcs:
502 externalSkyWcsCatalog =
None
503 elif self.config.useGlobalExternalSkyWcs:
505 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
506 self.log.info(
'Applying global SkyWcs')
509 inputRef = getattr(inputRefs,
'externalSkyWcsTractCatalog')
510 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
513 self.log.info(
'Applying tract-level SkyWcs from tract %s', tracts[ind])
515 ind = self.getClosestTract(tracts, skyMap,
516 exposure.getBBox(), exposure.getWcs())
517 self.log.info(
'Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
518 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
520 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
522 if not self.config.doApplyExternalPhotoCalib:
524 externalPhotoCalibCatalog =
None
525 elif self.config.useGlobalExternalPhotoCalib:
527 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
528 self.log.info(
'Applying global PhotoCalib')
531 inputRef = getattr(inputRefs,
'externalPhotoCalibTractCatalog')
532 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
535 self.log.info(
'Applying tract-level PhotoCalib from tract %s', tracts[ind])
537 ind = self.getClosestTract(tracts, skyMap,
538 exposure.getBBox(), exposure.getWcs())
539 self.log.info(
'Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
540 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
542 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
544 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
546 def getClosestTract(self, tracts, skyMap, bbox, wcs):
547 """Find the index of the tract closest to detector from list of tractIds
551 tracts: `list` [`int`]
552 Iterable of integer tractIds
553 skyMap : `lsst.skymap.SkyMap`
554 skyMap to lookup tract geometry and wcs
556 Detector bbox, center of which will compared to tract centers
558 Detector Wcs object to map the detector center to SkyCoord
567 center = wcs.pixelToSky(bbox.getCenter())
569 for tractId
in tracts:
570 tract = skyMap[tractId]
571 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
572 sep.append(center.separation(tractCenter))
574 return np.argmin(sep)
576 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
577 """Prepare a calibrated exposure and apply external calibrations
582 exposure : `lsst.afw.image.exposure.Exposure`
583 Input exposure to adjust calibrations.
585 Exposure catalog
with external skyWcs to be applied
586 if config.doApplyExternalSkyWcs=
True. Catalog uses the detector id
587 for the catalog id, sorted on id
for fast lookup.
589 Exposure catalog
with external photoCalib to be applied
590 if config.doApplyExternalPhotoCalib=
True. Catalog uses the detector
591 id
for the catalog id, sorted on id
for fast lookup.
595 exposure : `lsst.afw.image.exposure.Exposure`
596 Exposure
with adjusted calibrations.
598 detectorId = exposure.getInfo().getDetector().getId()
600 if externalPhotoCalibCatalog
is not None:
601 row = externalPhotoCalibCatalog.find(detectorId)
603 self.log.warning(
"Detector id %s not found in externalPhotoCalibCatalog; "
604 "Using original photoCalib.", detectorId)
606 photoCalib = row.getPhotoCalib()
607 if photoCalib
is None:
608 self.log.warning(
"Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
609 "Using original photoCalib.", detectorId)
611 exposure.setPhotoCalib(photoCalib)
613 if externalSkyWcsCatalog
is not None:
614 row = externalSkyWcsCatalog.find(detectorId)
616 self.log.warning(
"Detector id %s not found in externalSkyWcsCatalog; "
617 "Using original skyWcs.", detectorId)
619 skyWcs = row.getWcs()
621 self.log.warning(
"Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
622 "Using original skyWcs.", detectorId)
624 exposure.setWcs(skyWcs)
628 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs):
629 """Add replace columns with calibs evaluated at each centroid
631 Add or replace
'base_LocalWcs' `base_LocalPhotoCalib
' columns in a
632 a source catalog, by rerunning the plugins.
637 catalog to which calib columns will be added
638 exposure : `lsst.afw.image.exposure.Exposure`
639 Exposure with attached PhotoCalibs
and SkyWcs attributes to be
640 reevaluated at local centroids. Pixels are
not required.
641 exposureIdInfo : `lsst.obs.base.ExposureIdInfo`
646 Source Catalog
with requested local calib columns
648 measureConfig = SingleFrameMeasurementTask.ConfigClass()
649 measureConfig.doReplaceWithNoise = False
651 measureConfig.plugins.names = []
652 if self.config.doReevaluateSkyWcs:
653 measureConfig.plugins.names.add(
'base_LocalWcs')
654 self.log.info(
"Re-evaluating base_LocalWcs plugin")
655 if self.config.doReevaluatePhotoCalib:
656 measureConfig.plugins.names.add(
'base_LocalPhotoCalib')
657 self.log.info(
"Re-evaluating base_LocalPhotoCalib plugin")
658 pluginsNotToCopy = tuple(measureConfig.plugins.names)
662 aliasMap = catalog.schema.getAliasMap()
663 mapper = afwTable.SchemaMapper(catalog.schema)
664 for item
in catalog.schema:
665 if not item.field.getName().startswith(pluginsNotToCopy):
666 mapper.addMapping(item.key)
668 schema = mapper.getOutputSchema()
669 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
670 schema.setAliasMap(aliasMap)
671 newCat = afwTable.SourceCatalog(schema)
672 newCat.extend(catalog, mapper=mapper)
678 if self.config.doReevaluateSkyWcs:
679 afwTable.updateSourceCoords(exposure.wcs, newCat)
681 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
686class PostprocessAnalysis(object):
687 """Calculate columns from ParquetTable.
689 This object manages and organizes an arbitrary set of computations
690 on a catalog. The catalog
is defined by a
692 as a `deepCoadd_obj` dataset,
and the computations are defined by a
693 collection of `lsst.pipe.tasks.functor.Functor` objects (
or, equivalently,
694 a `CompositeFunctor`).
696 After the object
is initialized, accessing the `.df` attribute (which
697 holds the `pandas.DataFrame` containing the results of the calculations)
698 triggers computation of said dataframe.
700 One of the conveniences of using this object
is the ability to define a
701 desired common filter
for all functors. This enables the same functor
702 collection to be passed to several different `PostprocessAnalysis` objects
703 without having to change the original functor collection, since the `filt`
704 keyword argument of this object triggers an overwrite of the `filt`
705 property
for all functors
in the collection.
707 This object also allows a list of refFlags to be passed,
and defines a set
708 of default refFlags that are always included even
if not requested.
710 If a list of `ParquetTable` object
is passed, rather than a single one,
711 then the calculations will be mapped over all the input catalogs. In
712 principle, it should be straightforward to parallelize this activity, but
713 initial tests have failed (see TODO
in code comments).
717 parq : `lsst.pipe.tasks.ParquetTable` (
or list of such)
718 Source
catalog(s)
for computation.
721 Computations to do (functors that act on `parq`).
722 If a dict, the output
723 DataFrame will have columns keyed accordingly.
724 If a list, the column keys will come
from the
725 `.shortname` attribute of each functor.
727 filt : `str`, optional
728 Filter
in which to calculate. If provided,
729 this will overwrite any existing `.filt` attribute
730 of the provided functors.
732 flags : `list`, optional
733 List of flags (per-band) to include
in output table.
734 Taken
from the `meas` dataset
if applied to a multilevel Object Table.
736 refFlags : `list`, optional
737 List of refFlags (only reference band) to include
in output table.
739 forcedFlags : `list`, optional
740 List of flags (per-band) to include
in output table.
741 Taken
from the ``forced_src`` dataset
if applied to a
742 multilevel Object Table. Intended
for flags
from measurement plugins
743 only run during multi-band forced-photometry.
745 _defaultRefFlags = []
748 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
750 self.functors = functors
753 self.flags = list(flags)
if flags
is not None else []
754 self.forcedFlags = list(forcedFlags)
if forcedFlags
is not None else []
755 self.refFlags = list(self._defaultRefFlags)
756 if refFlags
is not None:
757 self.refFlags += list(refFlags)
762 def defaultFuncs(self):
763 funcs = dict(self._defaultFuncs)
768 additionalFuncs = self.defaultFuncs
769 additionalFuncs.update({flag:
Column(flag, dataset=
'forced_src')
for flag
in self.forcedFlags})
770 additionalFuncs.update({flag:
Column(flag, dataset=
'ref')
for flag
in self.refFlags})
771 additionalFuncs.update({flag:
Column(flag, dataset=
'meas')
for flag
in self.flags})
773 if isinstance(self.functors, CompositeFunctor):
778 func.funcDict.update(additionalFuncs)
779 func.filt = self.filt
785 return [name
for name, func
in self.func.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
793 def compute(self, dropna=False, pool=None):
795 if type(self.parq)
in (list, tuple):
797 dflist = [self.func(parq, dropna=dropna)
for parq
in self.parq]
801 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
802 self._df = pd.concat(dflist)
804 self._df = self.func(self.parq, dropna=dropna)
811 """Expected Connections for subclasses of TransformCatalogBaseTask.
815 inputCatalog = connectionTypes.Input(
817 storageClass=
"DataFrame",
819 outputCatalog = connectionTypes.Output(
821 storageClass=
"DataFrame",
826 pipelineConnections=TransformCatalogBaseConnections):
827 functorFile = pexConfig.Field(
829 doc=
"Path to YAML file specifying Science Data Model functors to use "
830 "when copying columns and computing calibrated values.",
834 primaryKey = pexConfig.Field(
836 doc=
"Name of column to be set as the DataFrame index. If None, the index"
837 "will be named `id`",
841 columnsFromDataId = pexConfig.ListField(
845 doc=
"Columns to extract from the dataId",
850 """Base class for transforming/standardizing a catalog
852 by applying functors that convert units and apply calibrations.
853 The purpose of this task
is to perform a set of computations on
854 an input `ParquetTable` dataset (such
as `deepCoadd_obj`)
and write the
855 results to a new dataset (which needs to be declared
in an `outputDataset`
858 The calculations to be performed are defined
in a YAML file that specifies
859 a set of functors to be computed, provided
as
860 a `--functorFile` config parameter. An example of such a YAML file
885 - base_InputCount_value
888 functor: DeconvolvedMoments
893 - merge_measurement_i
894 - merge_measurement_r
895 - merge_measurement_z
896 - merge_measurement_y
897 - merge_measurement_g
898 - base_PixelFlags_flag_inexact_psfCenter
901 The names
for each entry under
"func" will become the names of columns
in
902 the output dataset. All the functors referenced are defined
in
904 functor are
in the `args` list,
and any additional entries
for each column
905 other than
"functor" or "args" (e.g., `
'filt'`, `
'dataset'`) are treated
as
906 keyword arguments to be passed to the functor initialization.
908 The
"flags" entry
is the default shortcut
for `Column` functors.
909 All columns listed under
"flags" will be copied to the output table
910 untransformed. They can be of any datatype.
911 In the special case of transforming a multi-level oject table
with
912 band
and dataset indices (deepCoadd_obj), these will be taked
from the
913 `meas` dataset
and exploded out per band.
915 There are two special shortcuts that only apply when transforming
916 multi-level Object (deepCoadd_obj) tables:
917 - The
"refFlags" entry
is shortcut
for `Column` functor
918 taken
from the `
'ref'` dataset
if transforming an ObjectTable.
919 - The
"forcedFlags" entry
is shortcut
for `Column` functors.
920 taken
from the ``forced_src`` dataset
if transforming an ObjectTable.
921 These are expanded out per band.
924 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
925 to organize
and excecute the calculations.
929 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
933 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
937 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
940 def ConfigClass(self):
941 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
945 if self.config.functorFile:
946 self.log.info(
'Loading tranform functor definitions from %s',
947 self.config.functorFile)
948 self.
funcs = CompositeFunctor.from_file(self.config.functorFile)
949 self.
funcs.update(dict(PostprocessAnalysis._defaultFuncs))
954 inputs = butlerQC.get(inputRefs)
955 if self.
funcs is None:
956 raise ValueError(
"config.functorFile is None. "
957 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
958 result = self.
run(parq=inputs[
'inputCatalog'], funcs=self.
funcs,
959 dataId=outputRefs.outputCatalog.dataId.full)
960 outputs = pipeBase.Struct(outputCatalog=result)
961 butlerQC.put(outputs, outputRefs)
963 def run(self, parq, funcs=None, dataId=None, band=None):
964 """Do postprocessing calculations
966 Takes a `ParquetTable` object and dataId,
967 returns a dataframe
with results of postprocessing calculations.
972 ParquetTable
from which calculations are done.
973 funcs : `lsst.pipe.tasks.functors.Functors`
974 Functors to apply to the table
's columns
975 dataId : dict, optional
976 Used to add a `patchId` column to the output dataframe.
977 band : `str`, optional
978 Filter band that is being processed.
982 df : `pandas.DataFrame`
984 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
986 df = self.
transform(band, parq, funcs, dataId).df
987 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
996 analysis = PostprocessAnalysis(parq, funcs, filt=band)
999 def transform(self, band, parq, funcs, dataId):
1000 analysis = self.
getAnalysis(parq, funcs=funcs, band=band)
1002 if dataId
and self.config.columnsFromDataId:
1003 for key
in self.config.columnsFromDataId:
1005 df[str(key)] = dataId[key]
1007 raise ValueError(f
"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
1009 if self.config.primaryKey:
1010 if df.index.name != self.config.primaryKey
and self.config.primaryKey
in df:
1011 df.reset_index(inplace=
True, drop=
True)
1012 df.set_index(self.config.primaryKey, inplace=
True)
1014 return pipeBase.Struct(
1019 def write(self, df, parqRef):
1023 """No metadata to write.
1029 defaultTemplates={
"coaddName":
"deep"},
1030 dimensions=(
"tract",
"patch",
"skymap")):
1031 inputCatalog = connectionTypes.Input(
1032 doc=
"The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
1033 "stored as a DataFrame with a multi-level column index per-patch.",
1034 dimensions=(
"tract",
"patch",
"skymap"),
1035 storageClass=
"DataFrame",
1036 name=
"{coaddName}Coadd_obj",
1039 outputCatalog = connectionTypes.Output(
1040 doc=
"Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
1042 dimensions=(
"tract",
"patch",
"skymap"),
1043 storageClass=
"DataFrame",
1049 pipelineConnections=TransformObjectCatalogConnections):
1050 coaddName = pexConfig.Field(
1056 filterMap = pexConfig.DictField(
1060 doc=(
"Dictionary mapping full filter name to short one for column name munging."
1061 "These filters determine the output columns no matter what filters the "
1062 "input data actually contain."),
1063 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
1064 "Will be removed after v22.")
1066 outputBands = pexConfig.ListField(
1070 doc=(
"These bands and only these bands will appear in the output,"
1071 " NaN-filled if the input does not include them."
1072 " If None, then use all bands found in the input.")
1074 camelCase = pexConfig.Field(
1077 doc=(
"Write per-band columns names with camelCase, else underscore "
1078 "For example: gPsFlux instead of g_PsFlux.")
1080 multilevelOutput = pexConfig.Field(
1083 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
1084 "and name-munged (False).")
1086 goodFlags = pexConfig.ListField(
1089 doc=(
"List of 'good' flags that should be set False when populating empty tables. "
1090 "All other flags are considered to be 'bad' flags and will be set to True.")
1092 floatFillValue = pexConfig.Field(
1095 doc=
"Fill value for float fields when populating empty tables."
1097 integerFillValue = pexConfig.Field(
1100 doc=
"Fill value for integer fields when populating empty tables."
1103 def setDefaults(self):
1104 super().setDefaults()
1105 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Object.yaml')
1106 self.primaryKey =
'objectId'
1107 self.columnsFromDataId = [
'tract',
'patch']
1108 self.goodFlags = [
'calib_astrometry_used',
1109 'calib_photometry_reserved',
1110 'calib_photometry_used',
1111 'calib_psf_candidate',
1112 'calib_psf_reserved',
1117 """Produce a flattened Object Table to match the format specified in
1120 Do the same set of postprocessing calculations on all bands.
1122 This is identical to `TransformCatalogBaseTask`,
except for that it does
1123 the specified functor calculations
for all filters present
in the
1124 input `deepCoadd_obj` table. Any specific `
"filt"` keywords specified
1125 by the YAML file will be superceded.
1127 _DefaultName = "transformObjectCatalog"
1128 ConfigClass = TransformObjectCatalogConfig
1130 def run(self, parq, funcs=None, dataId=None, band=None):
1134 templateDf = pd.DataFrame()
1136 if isinstance(parq, DeferredDatasetHandle):
1137 columns = parq.get(component=
'columns')
1138 inputBands = columns.unique(level=1).values
1140 inputBands = parq.columnLevelNames[
'band']
1142 outputBands = self.config.outputBands
if self.config.outputBands
else inputBands
1145 for inputBand
in inputBands:
1146 if inputBand
not in outputBands:
1147 self.log.info(
"Ignoring %s band data in the input", inputBand)
1149 self.log.info(
"Transforming the catalog of band %s", inputBand)
1150 result = self.transform(inputBand, parq, funcs, dataId)
1151 dfDict[inputBand] = result.df
1152 analysisDict[inputBand] = result.analysis
1153 if templateDf.empty:
1154 templateDf = result.df
1157 for filt
in outputBands:
1158 if filt
not in dfDict:
1159 self.log.info(
"Adding empty columns for band %s", filt)
1160 dfTemp = templateDf.copy()
1161 for col
in dfTemp.columns:
1162 testValue = dfTemp[col].values[0]
1163 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1165 if col
in self.config.goodFlags:
1169 elif isinstance(testValue, numbers.Integral):
1173 if isinstance(testValue, np.unsignedinteger):
1174 raise ValueError(
"Parquet tables may not have unsigned integer columns.")
1176 fillValue = self.config.integerFillValue
1178 fillValue = self.config.floatFillValue
1179 dfTemp[col].values[:] = fillValue
1180 dfDict[filt] = dfTemp
1183 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
1185 if not self.config.multilevelOutput:
1186 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
1187 if self.config.primaryKey
in noDupCols:
1188 noDupCols.remove(self.config.primaryKey)
1189 if dataId
and self.config.columnsFromDataId:
1190 noDupCols += self.config.columnsFromDataId
1191 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1192 inputBands=inputBands)
1194 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
1201 def makeDataRefList(self, namespace):
1202 """Make self.refList from self.idList
1204 Generate a list of data references given tract and/
or patch.
1205 This was adapted
from `TractQADataIdContainer`, which was
1206 `TractDataIdContainer` modifie to
not require
"filter".
1207 Only existing dataRefs are returned.
1209 def getPatchRefList(tract):
1210 return [namespace.butler.dataRef(datasetType=self.datasetType,
1211 tract=tract.getId(),
1212 patch=
"%d,%d" % patch.getIndex())
for patch
in tract]
1214 tractRefs = defaultdict(list)
1215 for dataId
in self.idList:
1216 skymap = self.getSkymap(namespace)
1218 if "tract" in dataId:
1219 tractId = dataId[
"tract"]
1220 if "patch" in dataId:
1221 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
1223 patch=dataId[
'patch']))
1225 tractRefs[tractId] += getPatchRefList(skymap[tractId])
1227 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
1228 for tract
in skymap)
1230 for tractRefList
in tractRefs.values():
1231 existingRefs = [ref
for ref
in tractRefList
if ref.datasetExists()]
1232 outputRefList.append(existingRefs)
1234 self.refList = outputRefList
1237class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1238 dimensions=(
"tract",
"skymap")):
1239 inputCatalogs = connectionTypes.Input(
1240 doc=
"Per-Patch objectTables conforming to the standard data model.",
1242 storageClass=
"DataFrame",
1243 dimensions=(
"tract",
"patch",
"skymap"),
1246 outputCatalog = connectionTypes.Output(
1247 doc=
"Pre-tract horizontal concatenation of the input objectTables",
1248 name=
"objectTable_tract",
1249 storageClass=
"DataFrame",
1250 dimensions=(
"tract",
"skymap"),
1254class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1255 pipelineConnections=ConsolidateObjectTableConnections):
1256 coaddName = pexConfig.Field(
1263class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
1264 """Write patch-merged source tables to a tract-level parquet file.
1266 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1268 _DefaultName = "consolidateObjectTable"
1269 ConfigClass = ConsolidateObjectTableConfig
1271 inputDataset =
'objectTable'
1272 outputDataset =
'objectTable_tract'
1274 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1275 inputs = butlerQC.get(inputRefs)
1276 self.log.info(
"Concatenating %s per-patch Object Tables",
1277 len(inputs[
'inputCatalogs']))
1278 df = pd.concat(inputs[
'inputCatalogs'])
1279 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1282 def _makeArgumentParser(cls):
1283 parser = ArgumentParser(name=cls._DefaultName)
1285 parser.add_id_argument(
"--id", cls.inputDataset,
1286 help=
"data ID, e.g. --id tract=12345",
1287 ContainerClass=TractObjectDataIdContainer)
1290 def runDataRef(self, patchRefList):
1291 df = pd.concat([patchRef.get().toDataFrame()
for patchRef
in patchRefList])
1292 patchRefList[0].put(
ParquetTable(dataFrame=df), self.outputDataset)
1294 def writeMetadata(self, dataRef):
1295 """No metadata to write.
1300class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1301 defaultTemplates={
"catalogType":
""},
1302 dimensions=(
"instrument",
"visit",
"detector")):
1304 inputCatalog = connectionTypes.Input(
1305 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
1306 name=
"{catalogType}source",
1307 storageClass=
"DataFrame",
1308 dimensions=(
"instrument",
"visit",
"detector"),
1311 outputCatalog = connectionTypes.Output(
1312 doc=
"Narrower, per-detector Source Table transformed and converted per a "
1313 "specified set of functors",
1314 name=
"{catalogType}sourceTable",
1315 storageClass=
"DataFrame",
1316 dimensions=(
"instrument",
"visit",
"detector")
1321 pipelineConnections=TransformSourceTableConnections):
1323 def setDefaults(self):
1324 super().setDefaults()
1325 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Source.yaml')
1326 self.primaryKey =
'sourceId'
1327 self.columnsFromDataId = [
'visit',
'detector',
'band',
'physical_filter']
1331 """Transform/standardize a source catalog
1333 _DefaultName = "transformSourceTable"
1334 ConfigClass = TransformSourceTableConfig
1337class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1338 dimensions=(
"instrument",
"visit",),
1339 defaultTemplates={
"calexpType":
""}):
1340 calexp = connectionTypes.Input(
1341 doc=
"Processed exposures used for metadata",
1343 storageClass=
"ExposureF",
1344 dimensions=(
"instrument",
"visit",
"detector"),
1348 visitSummary = connectionTypes.Output(
1349 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
1350 "detector id for the id and are sorted for fast lookups of a "
1352 name=
"visitSummary",
1353 storageClass=
"ExposureCatalog",
1354 dimensions=(
"instrument",
"visit"),
1356 visitSummarySchema = connectionTypes.InitOutput(
1357 doc=
"Schema of the visitSummary catalog",
1358 name=
"visitSummary_schema",
1359 storageClass=
"ExposureCatalog",
1363class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1364 pipelineConnections=ConsolidateVisitSummaryConnections):
1365 """Config for ConsolidateVisitSummaryTask"""
1369class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1370 """Task to consolidate per-detector visit metadata.
1372 This task aggregates the following metadata from all the detectors
in a
1373 single visit into an exposure catalog:
1377 - The physical_filter
and band (
if available).
1378 - The psf size, shape,
and effective area at the center of the detector.
1379 - The corners of the bounding box
in right ascension/declination.
1381 Other quantities such
as Detector, Psf, ApCorrMap,
and TransmissionCurve
1382 are
not persisted here because of storage concerns,
and because of their
1383 limited utility
as summary statistics.
1385 Tests
for this task are performed
in ci_hsc_gen3.
1387 _DefaultName = "consolidateVisitSummary"
1388 ConfigClass = ConsolidateVisitSummaryConfig
1391 def _makeArgumentParser(cls):
1392 parser = ArgumentParser(name=cls._DefaultName)
1394 parser.add_id_argument(
"--id",
"calexp",
1395 help=
"data ID, e.g. --id visit=12345",
1396 ContainerClass=VisitDataIdContainer)
1399 def __init__(self, **kwargs):
1400 super().__init__(**kwargs)
1401 self.schema = afwTable.ExposureTable.makeMinimalSchema()
1402 self.schema.addField(
'visit', type=
'L', doc=
'Visit number')
1403 self.schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
1404 self.schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
1405 ExposureSummaryStats.update_schema(self.schema)
1406 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema)
1408 def writeMetadata(self, dataRef):
1409 """No metadata to persist, so override to remove metadata persistance.
1413 def writeConfig(self, butler, clobber=False, doBackup=True):
1414 """No config to persist, so override to remove config persistance.
1418 def runDataRef(self, dataRefList):
1419 visit = dataRefList[0].dataId[
'visit']
1421 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1422 len(dataRefList), visit)
1424 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=
False)
1426 dataRefList[0].put(expCatalog,
'visitSummary', visit=visit)
1428 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1429 dataRefs = butlerQC.get(inputRefs.calexp)
1430 visit = dataRefs[0].dataId.byName()[
'visit']
1432 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1433 len(dataRefs), visit)
1435 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1437 butlerQC.put(expCatalog, outputRefs.visitSummary)
1439 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1440 """Make a combined exposure catalog from a list of dataRefs.
1441 These dataRefs must point to exposures with wcs, summaryStats,
1442 and other visit metadata.
1447 Visit identification number.
1449 List of dataRefs
in visit. May be list of
1450 `lsst.daf.persistence.ButlerDataRef` (Gen2)
or
1451 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1452 isGen3 : `bool`, optional
1453 Specifies
if this
is a Gen3 list of datarefs.
1458 Exposure catalog
with per-detector summary information.
1460 cat = afwTable.ExposureCatalog(self.schema)
1461 cat.resize(len(dataRefs))
1463 cat['visit'] = visit
1465 for i, dataRef
in enumerate(dataRefs):
1467 visitInfo = dataRef.get(component=
'visitInfo')
1468 filterLabel = dataRef.get(component=
'filter')
1469 summaryStats = dataRef.get(component=
'summaryStats')
1470 detector = dataRef.get(component=
'detector')
1471 wcs = dataRef.get(component=
'wcs')
1472 photoCalib = dataRef.get(component=
'photoCalib')
1473 detector = dataRef.get(component=
'detector')
1474 bbox = dataRef.get(component=
'bbox')
1475 validPolygon = dataRef.get(component=
'validPolygon')
1480 exp = dataRef.get(datasetType=
'calexp_sub', bbox=gen2_read_bbox)
1481 visitInfo = exp.getInfo().getVisitInfo()
1482 filterLabel = dataRef.get(
"calexp_filter")
1483 summaryStats = exp.getInfo().getSummaryStats()
1485 photoCalib = exp.getPhotoCalib()
1486 detector = exp.getDetector()
1487 bbox = dataRef.get(datasetType=
'calexp_bbox')
1488 validPolygon = exp.getInfo().getValidPolygon()
1492 rec.setVisitInfo(visitInfo)
1494 rec.setPhotoCalib(photoCalib)
1495 rec.setValidPolygon(validPolygon)
1497 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1498 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1499 rec.setId(detector.getId())
1500 summaryStats.update_record(rec)
1502 metadata = dafBase.PropertyList()
1503 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1505 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1506 cat.setMetadata(metadata)
1512class VisitDataIdContainer(DataIdContainer):
1513 """DataIdContainer that groups sensor-level ids by visit.
1516 def makeDataRefList(self, namespace):
1517 """Make self.refList from self.idList
1519 Generate a list of data references grouped by visit.
1523 namespace : `argparse.Namespace`
1524 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command
1528 visitRefs = defaultdict(list)
1529 for dataId
in self.idList:
1530 if "visit" in dataId:
1531 visitId = dataId[
"visit"]
1533 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1534 visitRefs[visitId].extend([dataRef
for dataRef
in subset])
1537 for refList
in visitRefs.values():
1538 existingRefs = [ref
for ref
in refList
if ref.datasetExists()]
1540 outputRefList.append(existingRefs)
1542 self.refList = outputRefList
1545class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1546 defaultTemplates={
"catalogType":
""},
1547 dimensions=(
"instrument",
"visit")):
1548 inputCatalogs = connectionTypes.Input(
1549 doc=
"Input per-detector Source Tables",
1550 name=
"{catalogType}sourceTable",
1551 storageClass=
"DataFrame",
1552 dimensions=(
"instrument",
"visit",
"detector"),
1555 outputCatalog = connectionTypes.Output(
1556 doc=
"Per-visit concatenation of Source Table",
1557 name=
"{catalogType}sourceTable_visit",
1558 storageClass=
"DataFrame",
1559 dimensions=(
"instrument",
"visit")
1563class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1564 pipelineConnections=ConsolidateSourceTableConnections):
1568class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1569 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1571 _DefaultName = 'consolidateSourceTable'
1572 ConfigClass = ConsolidateSourceTableConfig
1574 inputDataset =
'sourceTable'
1575 outputDataset =
'sourceTable_visit'
1577 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1578 from .makeCoaddTempExp
import reorderRefs
1580 detectorOrder = [ref.dataId[
'detector']
for ref
in inputRefs.inputCatalogs]
1581 detectorOrder.sort()
1582 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey=
'detector')
1583 inputs = butlerQC.get(inputRefs)
1584 self.log.info(
"Concatenating %s per-detector Source Tables",
1585 len(inputs[
'inputCatalogs']))
1586 df = pd.concat(inputs[
'inputCatalogs'])
1587 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1589 def runDataRef(self, dataRefList):
1590 self.log.info(
"Concatenating %s per-detector Source Tables", len(dataRefList))
1591 df = pd.concat([dataRef.get().toDataFrame()
for dataRef
in dataRefList])
1592 dataRefList[0].put(
ParquetTable(dataFrame=df), self.outputDataset)
1595 def _makeArgumentParser(cls):
1596 parser = ArgumentParser(name=cls._DefaultName)
1598 parser.add_id_argument(
"--id", cls.inputDataset,
1599 help=
"data ID, e.g. --id visit=12345",
1600 ContainerClass=VisitDataIdContainer)
1603 def writeMetadata(self, dataRef):
1604 """No metadata to write.
1608 def writeConfig(self, butler, clobber=False, doBackup=True):
1609 """No config to write.
1614class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1615 dimensions=(
"instrument",),
1616 defaultTemplates={
"calexpType":
""}):
1617 visitSummaryRefs = connectionTypes.Input(
1618 doc=
"Data references for per-visit consolidated exposure metadata",
1619 name=
"finalVisitSummary",
1620 storageClass=
"ExposureCatalog",
1621 dimensions=(
"instrument",
"visit"),
1625 outputCatalog = connectionTypes.Output(
1626 doc=
"CCD and Visit metadata table",
1627 name=
"ccdVisitTable",
1628 storageClass=
"DataFrame",
1629 dimensions=(
"instrument",)
1633class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1634 pipelineConnections=MakeCcdVisitTableConnections):
1638class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1639 """Produce a `ccdVisitTable` from the visit summary exposure catalogs.
1641 _DefaultName = 'makeCcdVisitTable'
1642 ConfigClass = MakeCcdVisitTableConfig
1644 def run(self, visitSummaryRefs):
1645 """Make a table of ccd information from the visit summary catalogs.
1649 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1650 List of DeferredDatasetHandles pointing to exposure catalogs with
1651 per-detector summary information.
1655 result : `lsst.pipe.Base.Struct`
1656 Results struct
with attribute:
1659 Catalog of ccd
and visit information.
1662 for visitSummaryRef
in visitSummaryRefs:
1663 visitSummary = visitSummaryRef.get()
1664 visitInfo = visitSummary[0].getVisitInfo()
1667 summaryTable = visitSummary.asAstropy()
1668 selectColumns = [
'id',
'visit',
'physical_filter',
'band',
'ra',
'decl',
'zenithDistance',
1669 'zeroPoint',
'psfSigma',
'skyBg',
'skyNoise',
1670 'astromOffsetMean',
'astromOffsetStd',
'nPsfStar',
1671 'psfStarDeltaE1Median',
'psfStarDeltaE2Median',
1672 'psfStarDeltaE1Scatter',
'psfStarDeltaE2Scatter',
1673 'psfStarDeltaSizeMedian',
'psfStarDeltaSizeScatter',
1674 'psfStarScaledDeltaSizeScatter',
1675 'psfTraceRadiusDelta',
'maxDistToNearestPsf']
1676 ccdEntry = summaryTable[selectColumns].to_pandas().set_index(
'id')
1681 ccdEntry = ccdEntry.rename(columns={
"visit":
"visitId"})
1682 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id)
for id
in
1684 packer = visitSummaryRef.dataId.universe.makePacker(
'visit_detector', visitSummaryRef.dataId)
1685 ccdVisitIds = [packer.pack(dataId)
for dataId
in dataIds]
1686 ccdEntry[
'ccdVisitId'] = ccdVisitIds
1687 ccdEntry[
'detector'] = summaryTable[
'id']
1688 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds()
for vR
in visitSummary])
1689 ccdEntry[
"seeing"] = visitSummary[
'psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1691 ccdEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1692 ccdEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1693 ccdEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1694 expTime = visitInfo.getExposureTime()
1695 ccdEntry[
'expTime'] = expTime
1696 ccdEntry[
"obsStart"] = ccdEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1697 expTime_days = expTime / (60*60*24)
1698 ccdEntry[
"obsStartMJD"] = ccdEntry[
"expMidptMJD"] - 0.5 * expTime_days
1699 ccdEntry[
'darkTime'] = visitInfo.getDarkTime()
1700 ccdEntry[
'xSize'] = summaryTable[
'bbox_max_x'] - summaryTable[
'bbox_min_x']
1701 ccdEntry[
'ySize'] = summaryTable[
'bbox_max_y'] - summaryTable[
'bbox_min_y']
1702 ccdEntry[
'llcra'] = summaryTable[
'raCorners'][:, 0]
1703 ccdEntry[
'llcdec'] = summaryTable[
'decCorners'][:, 0]
1704 ccdEntry[
'ulcra'] = summaryTable[
'raCorners'][:, 1]
1705 ccdEntry[
'ulcdec'] = summaryTable[
'decCorners'][:, 1]
1706 ccdEntry[
'urcra'] = summaryTable[
'raCorners'][:, 2]
1707 ccdEntry[
'urcdec'] = summaryTable[
'decCorners'][:, 2]
1708 ccdEntry[
'lrcra'] = summaryTable[
'raCorners'][:, 3]
1709 ccdEntry[
'lrcdec'] = summaryTable[
'decCorners'][:, 3]
1713 ccdEntries.append(ccdEntry)
1715 outputCatalog = pd.concat(ccdEntries)
1716 outputCatalog.set_index(
'ccdVisitId', inplace=
True, verify_integrity=
True)
1717 return pipeBase.Struct(outputCatalog=outputCatalog)
1720class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1721 dimensions=(
"instrument",),
1722 defaultTemplates={
"calexpType":
""}):
1723 visitSummaries = connectionTypes.Input(
1724 doc=
"Per-visit consolidated exposure metadata",
1725 name=
"finalVisitSummary",
1726 storageClass=
"ExposureCatalog",
1727 dimensions=(
"instrument",
"visit",),
1731 outputCatalog = connectionTypes.Output(
1732 doc=
"Visit metadata table",
1734 storageClass=
"DataFrame",
1735 dimensions=(
"instrument",)
1739class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1740 pipelineConnections=MakeVisitTableConnections):
1744class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1745 """Produce a `visitTable` from the visit summary exposure catalogs.
1747 _DefaultName = 'makeVisitTable'
1748 ConfigClass = MakeVisitTableConfig
1750 def run(self, visitSummaries):
1751 """Make a table of visit information from the visit summary catalogs.
1756 List of exposure catalogs with per-detector summary information.
1759 result : `lsst.pipe.Base.Struct`
1760 Results struct
with attribute:
1763 Catalog of visit information.
1766 for visitSummary
in visitSummaries:
1767 visitSummary = visitSummary.get()
1768 visitRow = visitSummary[0]
1769 visitInfo = visitRow.getVisitInfo()
1772 visitEntry[
"visitId"] = visitRow[
'visit']
1773 visitEntry[
"visit"] = visitRow[
'visit']
1774 visitEntry[
"physical_filter"] = visitRow[
'physical_filter']
1775 visitEntry[
"band"] = visitRow[
'band']
1776 raDec = visitInfo.getBoresightRaDec()
1777 visitEntry[
"ra"] = raDec.getRa().asDegrees()
1778 visitEntry[
"decl"] = raDec.getDec().asDegrees()
1779 visitEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1780 azAlt = visitInfo.getBoresightAzAlt()
1781 visitEntry[
"azimuth"] = azAlt.getLongitude().asDegrees()
1782 visitEntry[
"altitude"] = azAlt.getLatitude().asDegrees()
1783 visitEntry[
"zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1784 visitEntry[
"airmass"] = visitInfo.getBoresightAirmass()
1785 expTime = visitInfo.getExposureTime()
1786 visitEntry[
"expTime"] = expTime
1787 visitEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1788 visitEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1789 visitEntry[
"obsStart"] = visitEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1790 expTime_days = expTime / (60*60*24)
1791 visitEntry[
"obsStartMJD"] = visitEntry[
"expMidptMJD"] - 0.5 * expTime_days
1792 visitEntries.append(visitEntry)
1798 outputCatalog = pd.DataFrame(data=visitEntries)
1799 outputCatalog.set_index(
'visitId', inplace=
True, verify_integrity=
True)
1800 return pipeBase.Struct(outputCatalog=outputCatalog)
1803class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1804 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")):
1806 inputCatalog = connectionTypes.Input(
1807 doc=
"Primary per-detector, single-epoch forced-photometry catalog. "
1808 "By default, it is the output of ForcedPhotCcdTask on calexps",
1810 storageClass=
"SourceCatalog",
1811 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1813 inputCatalogDiff = connectionTypes.Input(
1814 doc=
"Secondary multi-epoch, per-detector, forced photometry catalog. "
1815 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1817 storageClass=
"SourceCatalog",
1818 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1820 outputCatalog = connectionTypes.Output(
1821 doc=
"InputCatalogs horizonatally joined on `objectId` in Parquet format",
1822 name=
"mergedForcedSource",
1823 storageClass=
"DataFrame",
1824 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1828class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1829 pipelineConnections=WriteForcedSourceTableConnections):
1830 key = lsst.pex.config.Field(
1831 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1837class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1838 """Merge and convert per-detector forced source catalogs to parquet.
1840 Because the predecessor ForcedPhotCcdTask operates per-detector,
1841 per-tract, (i.e., it has tract in its dimensions), detectors
1842 on the tract boundary may have multiple forced source catalogs.
1844 The successor task TransformForcedSourceTable runs per-patch
1845 and temporally-aggregates overlapping mergedForcedSource catalogs
from all
1846 available multiple epochs.
1848 _DefaultName = "writeForcedSourceTable"
1849 ConfigClass = WriteForcedSourceTableConfig
1851 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1852 inputs = butlerQC.get(inputRefs)
1854 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
1855 inputs[
'band'] = butlerQC.quantum.dataId.full[
'band']
1856 outputs = self.run(**inputs)
1857 butlerQC.put(outputs, outputRefs)
1859 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1861 for table, dataset,
in zip((inputCatalog, inputCatalogDiff), (
'calexp',
'diff')):
1862 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=
False)
1863 df = df.reindex(sorted(df.columns), axis=1)
1864 df[
'ccdVisitId'] = ccdVisitId
if ccdVisitId
else pd.NA
1865 df[
'band'] = band
if band
else pd.NA
1866 df.columns = pd.MultiIndex.from_tuples([(dataset, c)
for c
in df.columns],
1867 names=(
'dataset',
'column'))
1871 outputCatalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
1872 return pipeBase.Struct(outputCatalog=outputCatalog)
1875class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1876 dimensions=(
"instrument",
"skymap",
"patch",
"tract")):
1878 inputCatalogs = connectionTypes.Input(
1879 doc=
"Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1880 name=
"mergedForcedSource",
1881 storageClass=
"DataFrame",
1882 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract"),
1886 referenceCatalog = connectionTypes.Input(
1887 doc=
"Reference catalog which was used to seed the forcedPhot. Columns "
1888 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1891 storageClass=
"DataFrame",
1892 dimensions=(
"tract",
"patch",
"skymap"),
1895 outputCatalog = connectionTypes.Output(
1896 doc=
"Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1897 "specified set of functors",
1898 name=
"forcedSourceTable",
1899 storageClass=
"DataFrame",
1900 dimensions=(
"tract",
"patch",
"skymap")
1905 pipelineConnections=TransformForcedSourceTableConnections):
1906 referenceColumns = pexConfig.ListField(
1908 default=[
"detect_isPrimary",
"detect_isTractInner",
"detect_isPatchInner"],
1910 doc=
"Columns to pull from reference catalog",
1912 keyRef = lsst.pex.config.Field(
1913 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1917 key = lsst.pex.config.Field(
1918 doc=
"Rename the output DataFrame index to this name",
1920 default=
"forcedSourceId",
1923 def setDefaults(self):
1924 super().setDefaults()
1925 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'ForcedSource.yaml')
1926 self.columnsFromDataId = [
'tract',
'patch']
1930 """Transform/standardize a ForcedSource catalog
1932 Transforms each wide, per-detector forcedSource parquet table per the
1933 specification file (per-camera defaults found in ForcedSource.yaml).
1934 All epochs that overlap the patch are aggregated into one per-patch
1935 narrow-parquet file.
1937 No de-duplication of rows
is performed. Duplicate resolutions flags are
1938 pulled
in from the referenceCatalog: `detect_isPrimary`,
1939 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1940 for analysis
or compare duplicates
for QA.
1942 The resulting table includes multiple bands. Epochs (MJDs)
and other useful
1943 per-visit rows can be retreived by joining
with the CcdVisitTable on
1946 _DefaultName = "transformForcedSourceTable"
1947 ConfigClass = TransformForcedSourceTableConfig
1949 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1950 inputs = butlerQC.get(inputRefs)
1951 if self.funcs
is None:
1952 raise ValueError(
"config.functorFile is None. "
1953 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1954 outputs = self.run(inputs[
'inputCatalogs'], inputs[
'referenceCatalog'], funcs=self.funcs,
1955 dataId=outputRefs.outputCatalog.dataId.full)
1957 butlerQC.put(outputs, outputRefs)
1959 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1961 ref = referenceCatalog.get(parameters={
"columns": self.config.referenceColumns})
1962 self.log.info(
"Aggregating %s input catalogs" % (len(inputCatalogs)))
1963 for handle
in inputCatalogs:
1964 result = self.transform(
None, handle, funcs, dataId)
1966 dfs.append(result.df.join(ref, how=
'inner'))
1968 outputCatalog = pd.concat(dfs)
1972 outputCatalog.index.rename(self.config.keyRef, inplace=
True)
1974 outputCatalog.reset_index(inplace=
True)
1977 outputCatalog.set_index(
"forcedSourceId", inplace=
True, verify_integrity=
True)
1979 outputCatalog.index.rename(self.config.key, inplace=
True)
1981 self.log.info(
"Made a table of %d columns and %d rows",
1982 len(outputCatalog.columns), len(outputCatalog))
1983 return pipeBase.Struct(outputCatalog=outputCatalog)
1986class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1987 defaultTemplates={
"catalogType":
""},
1988 dimensions=(
"instrument",
"tract")):
1989 inputCatalogs = connectionTypes.Input(
1990 doc=
"Input per-patch DataFrame Tables to be concatenated",
1991 name=
"{catalogType}ForcedSourceTable",
1992 storageClass=
"DataFrame",
1993 dimensions=(
"tract",
"patch",
"skymap"),
1997 outputCatalog = connectionTypes.Output(
1998 doc=
"Output per-tract concatenation of DataFrame Tables",
1999 name=
"{catalogType}ForcedSourceTable_tract",
2000 storageClass=
"DataFrame",
2001 dimensions=(
"tract",
"skymap"),
2005class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
2006 pipelineConnections=ConsolidateTractConnections):
2010class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask):
2011 """Concatenate any per-patch, dataframe list into a single
2012 per-tract DataFrame.
2014 _DefaultName = 'ConsolidateTract'
2015 ConfigClass = ConsolidateTractConfig
2017 def runQuantum(self, butlerQC, inputRefs, outputRefs):
2018 inputs = butlerQC.get(inputRefs)
2021 self.log.info(
"Concatenating %s per-patch %s Tables",
2022 len(inputs[
'inputCatalogs']),
2023 inputRefs.inputCatalogs[0].datasetType.name)
2024 df = pd.concat(inputs[
'inputCatalogs'])
2025 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def getAnalysis(self, parq, funcs=None, band=None)
def __init__(self, *args, **kwargs)
def transform(self, band, parq, funcs, dataId)
def run(self, parq, funcs=None, dataId=None, band=None)
def writeMetadata(self, dataRef)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)