24 from collections
import defaultdict
33 from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
35 from lsst.daf.butler
import DeferredDatasetHandle
37 from .parquetTable
import ParquetTable
38 from .multiBandUtils
import makeMergeArgumentParser, MergeSourcesRunner
39 from .functors
import CompositeFunctor, RAColumn, DecColumn, Column
42 def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
43 """Flattens a dataframe with multilevel column index
45 newDf = pd.DataFrame()
47 dfBands = df.columns.unique(level=0).values
50 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
51 newColumns = {c: columnFormat.format(band, c)
52 for c
in subdf.columns
if c
not in noDupCols}
53 cols = list(newColumns.keys())
54 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
57 presentBands = dfBands
if inputBands
is None else list(set(inputBands).intersection(dfBands))
59 noDupDf = df[presentBands[0]][noDupCols]
60 newDf = pd.concat([noDupDf, newDf], axis=1)
65 defaultTemplates={
"coaddName":
"deep"},
66 dimensions=(
"tract",
"patch",
"skymap")):
67 inputCatalogMeas = connectionTypes.Input(
68 doc=
"Catalog of source measurements on the deepCoadd.",
69 dimensions=(
"tract",
"patch",
"band",
"skymap"),
70 storageClass=
"SourceCatalog",
71 name=
"{coaddName}Coadd_meas",
74 inputCatalogForcedSrc = connectionTypes.Input(
75 doc=
"Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
76 dimensions=(
"tract",
"patch",
"band",
"skymap"),
77 storageClass=
"SourceCatalog",
78 name=
"{coaddName}Coadd_forced_src",
81 inputCatalogRef = connectionTypes.Input(
82 doc=
"Catalog marking the primary detection (which band provides a good shape and position)"
83 "for each detection in deepCoadd_mergeDet.",
84 dimensions=(
"tract",
"patch",
"skymap"),
85 storageClass=
"SourceCatalog",
86 name=
"{coaddName}Coadd_ref"
88 outputCatalog = connectionTypes.Output(
89 doc=
"A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
90 "stored as a DataFrame with a multi-level column index per-patch.",
91 dimensions=(
"tract",
"patch",
"skymap"),
92 storageClass=
"DataFrame",
93 name=
"{coaddName}Coadd_obj"
97 class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
98 pipelineConnections=WriteObjectTableConnections):
99 engine = pexConfig.Field(
102 doc=
"Parquet engine for writing (pyarrow or fastparquet)"
104 coaddName = pexConfig.Field(
111 class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
112 """Write filter-merged source tables to parquet
114 _DefaultName =
"writeObjectTable"
115 ConfigClass = WriteObjectTableConfig
116 RunnerClass = MergeSourcesRunner
119 inputDatasets = (
'forced_src',
'meas',
'ref')
122 outputDataset =
'obj'
124 def __init__(self, butler=None, schema=None, **kwargs):
128 super().__init__(**kwargs)
130 def runDataRef(self, patchRefList):
132 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
133 subclasses that inherit from MergeSourcesTask.
134 @param[in] patchRefList list of data references for each filter
136 catalogs = dict(self.readCatalog(patchRef)
for patchRef
in patchRefList)
137 dataId = patchRefList[0].dataId
138 mergedCatalog = self.run(catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
139 self.write(patchRefList[0],
ParquetTable(dataFrame=mergedCatalog))
141 def runQuantum(self, butlerQC, inputRefs, outputRefs):
142 inputs = butlerQC.get(inputRefs)
144 measDict = {ref.dataId[
'band']: {
'meas': cat}
for ref, cat
in
145 zip(inputRefs.inputCatalogMeas, inputs[
'inputCatalogMeas'])}
146 forcedSourceDict = {ref.dataId[
'band']: {
'forced_src': cat}
for ref, cat
in
147 zip(inputRefs.inputCatalogForcedSrc, inputs[
'inputCatalogForcedSrc'])}
150 for band
in measDict.keys():
151 catalogs[band] = {
'meas': measDict[band][
'meas'],
152 'forced_src': forcedSourceDict[band][
'forced_src'],
153 'ref': inputs[
'inputCatalogRef']}
154 dataId = butlerQC.quantum.dataId
155 df = self.run(catalogs=catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
156 outputs = pipeBase.Struct(outputCatalog=df)
157 butlerQC.put(outputs, outputRefs)
160 def _makeArgumentParser(cls):
161 """Create a suitable ArgumentParser.
163 We will use the ArgumentParser to get a list of data
164 references for patches; the RunnerClass will sort them into lists
165 of data references for the same patch.
167 References first of self.inputDatasets, rather than
173 """Read input catalogs
175 Read all the input datasets given by the 'inputDatasets'
180 patchRef : `lsst.daf.persistence.ButlerDataRef`
181 Data reference for patch
185 Tuple consisting of band name and a dict of catalogs, keyed by
188 band = patchRef.get(self.config.coaddName +
"Coadd_filterLabel", immediate=
True).bandLabel
190 for dataset
in self.inputDatasets:
191 catalog = patchRef.get(self.config.coaddName +
"Coadd_" + dataset, immediate=
True)
192 self.log.info(
"Read %d sources from %s for band %s: %s" %
193 (len(catalog), dataset, band, patchRef.dataId))
194 catalogDict[dataset] = catalog
195 return band, catalogDict
197 def run(self, catalogs, tract, patch):
198 """Merge multiple catalogs.
203 Mapping from filter names to dict of catalogs.
205 tractId to use for the tractId column
207 patchId to use for the patchId column
211 catalog : `pandas.DataFrame`
216 for filt, tableDict
in catalogs.items():
217 for dataset, table
in tableDict.items():
219 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
222 df = df.reindex(sorted(df.columns), axis=1)
223 df[
'tractId'] = tract
224 df[
'patchId'] = patch
227 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
228 names=(
'dataset',
'band',
'column'))
231 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
234 def write(self, patchRef, catalog):
239 catalog : `ParquetTable`
241 patchRef : `lsst.daf.persistence.ButlerDataRef`
242 Data reference for patch
244 patchRef.put(catalog, self.config.coaddName +
"Coadd_" + self.outputDataset)
247 mergeDataId = patchRef.dataId.copy()
248 del mergeDataId[
"filter"]
249 self.log.info(
"Wrote merged catalog: %s" % (mergeDataId,))
252 """No metadata to write, and not sure how to write it for a list of dataRefs.
257 class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
258 dimensions=(
"instrument",
"visit",
"detector")):
260 catalog = connectionTypes.Input(
261 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
263 storageClass=
"SourceCatalog",
264 dimensions=(
"instrument",
"visit",
"detector")
266 outputCatalog = connectionTypes.Output(
267 doc=
"Catalog of sources, `src` in Parquet format",
269 storageClass=
"DataFrame",
270 dimensions=(
"instrument",
"visit",
"detector")
274 class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
275 pipelineConnections=WriteSourceTableConnections):
276 doApplyExternalPhotoCalib = pexConfig.Field(
279 doc=(
"Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
280 "generating Source Tables from older src tables which do not already have local calib columns")
282 doApplyExternalSkyWcs = pexConfig.Field(
285 doc=(
"Add local WCS columns from the calexp.wcs? Should only set True if "
286 "generating Source Tables from older src tables which do not already have local calib columns")
290 class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
291 """Write source table to parquet
293 _DefaultName =
"writeSourceTable"
294 ConfigClass = WriteSourceTableConfig
296 def runDataRef(self, dataRef):
297 src = dataRef.get(
'src')
298 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
299 src = self.addCalibColumns(src, dataRef)
301 ccdVisitId = dataRef.get(
'ccdExposureId')
302 result = self.run(src, ccdVisitId=ccdVisitId)
303 dataRef.put(result.table,
'source')
305 def runQuantum(self, butlerQC, inputRefs, outputRefs):
306 inputs = butlerQC.get(inputRefs)
307 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
308 result = self.run(**inputs).table
309 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
310 butlerQC.put(outputs, outputRefs)
312 def run(self, catalog, ccdVisitId=None):
313 """Convert `src` catalog to parquet
317 catalog: `afwTable.SourceCatalog`
318 catalog to be converted
320 ccdVisitId to be added as a column
324 result : `lsst.pipe.base.Struct`
326 `ParquetTable` version of the input catalog
328 self.log.info(
"Generating parquet table from src catalog %s", ccdVisitId)
329 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
330 df[
'ccdVisitId'] = ccdVisitId
331 return pipeBase.Struct(table=ParquetTable(dataFrame=df))
333 def addCalibColumns(self, catalog, dataRef):
334 """Add columns with local calibration evaluated at each centroid
336 for backwards compatibility with old repos.
337 This exists for the purpose of converting old src catalogs
338 (which don't have the expected local calib columns) to Source Tables.
342 catalog: `afwTable.SourceCatalog`
343 catalog to which calib columns will be added
344 dataRef: `lsst.daf.persistence.ButlerDataRef
345 for fetching the calibs from disk.
349 newCat: `afwTable.SourceCatalog`
350 Source Catalog with requested local calib columns
352 mapper = afwTable.SchemaMapper(catalog.schema)
353 measureConfig = SingleFrameMeasurementTask.ConfigClass()
354 measureConfig.doReplaceWithNoise =
False
357 exposure = dataRef.get(
'calexp_sub',
360 mapper = afwTable.SchemaMapper(catalog.schema)
361 mapper.addMinimalSchema(catalog.schema,
True)
362 schema = mapper.getOutputSchema()
364 exposureIdInfo = dataRef.get(
"expIdInfo")
365 measureConfig.plugins.names = []
366 if self.config.doApplyExternalSkyWcs:
367 plugin =
'base_LocalWcs'
369 raise RuntimeError(f
"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
371 measureConfig.plugins.names.add(plugin)
373 if self.config.doApplyExternalPhotoCalib:
374 plugin =
'base_LocalPhotoCalib'
376 raise RuntimeError(f
"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
378 measureConfig.plugins.names.add(plugin)
380 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
381 newCat = afwTable.SourceCatalog(schema)
382 newCat.extend(catalog, mapper=mapper)
383 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
387 """No metadata to write.
392 def _makeArgumentParser(cls):
393 parser = ArgumentParser(name=cls._DefaultName)
394 parser.add_id_argument(
"--id",
'src',
395 help=
"data ID, e.g. --id visit=12345 ccd=0")
399 class PostprocessAnalysis(object):
400 """Calculate columns from ParquetTable
402 This object manages and organizes an arbitrary set of computations
403 on a catalog. The catalog is defined by a
404 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
405 `deepCoadd_obj` dataset, and the computations are defined by a collection
406 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
407 a `CompositeFunctor`).
409 After the object is initialized, accessing the `.df` attribute (which
410 holds the `pandas.DataFrame` containing the results of the calculations) triggers
411 computation of said dataframe.
413 One of the conveniences of using this object is the ability to define a desired common
414 filter for all functors. This enables the same functor collection to be passed to
415 several different `PostprocessAnalysis` objects without having to change the original
416 functor collection, since the `filt` keyword argument of this object triggers an
417 overwrite of the `filt` property for all functors in the collection.
419 This object also allows a list of refFlags to be passed, and defines a set of default
420 refFlags that are always included even if not requested.
422 If a list of `ParquetTable` object is passed, rather than a single one, then the
423 calculations will be mapped over all the input catalogs. In principle, it should
424 be straightforward to parallelize this activity, but initial tests have failed
425 (see TODO in code comments).
429 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
430 Source catalog(s) for computation
432 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
433 Computations to do (functors that act on `parq`).
434 If a dict, the output
435 DataFrame will have columns keyed accordingly.
436 If a list, the column keys will come from the
437 `.shortname` attribute of each functor.
439 filt : `str` (optional)
440 Filter in which to calculate. If provided,
441 this will overwrite any existing `.filt` attribute
442 of the provided functors.
444 flags : `list` (optional)
445 List of flags (per-band) to include in output table.
447 refFlags : `list` (optional)
448 List of refFlags (only reference band) to include in output table.
452 _defaultRefFlags = []
453 _defaultFuncs = ((
'coord_ra', RAColumn()),
454 (
'coord_dec', DecColumn()))
456 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None):
458 self.functors = functors
461 self.flags = list(flags)
if flags
is not None else []
462 self.refFlags = list(self._defaultRefFlags)
463 if refFlags
is not None:
464 self.refFlags += list(refFlags)
469 def defaultFuncs(self):
470 funcs = dict(self._defaultFuncs)
475 additionalFuncs = self.defaultFuncs
476 additionalFuncs.update({flag: Column(flag, dataset=
'ref')
for flag
in self.refFlags})
477 additionalFuncs.update({flag: Column(flag, dataset=
'meas')
for flag
in self.flags})
479 if isinstance(self.functors, CompositeFunctor):
482 func = CompositeFunctor(self.functors)
484 func.funcDict.update(additionalFuncs)
485 func.filt = self.filt
491 return [name
for name, func
in self.func.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
499 def compute(self, dropna=False, pool=None):
501 if type(self.parq)
in (list, tuple):
503 dflist = [self.func(parq, dropna=dropna)
for parq
in self.parq]
506 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
507 self._df = pd.concat(dflist)
509 self._df = self.func(self.parq, dropna=dropna)
516 """Expected Connections for subclasses of TransformCatalogBaseTask.
520 inputCatalog = connectionTypes.Input(
522 storageClass=
"DataFrame",
524 outputCatalog = connectionTypes.Output(
526 storageClass=
"DataFrame",
531 pipelineConnections=TransformCatalogBaseConnections):
532 functorFile = pexConfig.Field(
534 doc=
'Path to YAML file specifying functors to be computed',
541 """Base class for transforming/standardizing a catalog
543 by applying functors that convert units and apply calibrations.
544 The purpose of this task is to perform a set of computations on
545 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
546 results to a new dataset (which needs to be declared in an `outputDataset`
549 The calculations to be performed are defined in a YAML file that specifies
550 a set of functors to be computed, provided as
551 a `--functorFile` config parameter. An example of such a YAML file
576 - base_InputCount_value
579 functor: DeconvolvedMoments
584 - merge_measurement_i
585 - merge_measurement_r
586 - merge_measurement_z
587 - merge_measurement_y
588 - merge_measurement_g
589 - base_PixelFlags_flag_inexact_psfCenter
592 The names for each entry under "func" will become the names of columns in the
593 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
594 Positional arguments to be passed to each functor are in the `args` list,
595 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
596 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
598 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and
599 taken from the `'ref'` dataset.
601 The "flags" entry will be expanded out per band.
603 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
604 to organize and excecute the calculations.
608 def _DefaultName(self):
609 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
613 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
617 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
621 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
625 if self.config.functorFile:
626 self.log.info(
'Loading tranform functor definitions from %s',
627 self.config.functorFile)
628 self.
funcsfuncs = CompositeFunctor.from_file(self.config.functorFile)
629 self.
funcsfuncs.update(dict(PostprocessAnalysis._defaultFuncs))
631 self.
funcsfuncs =
None
634 inputs = butlerQC.get(inputRefs)
635 if self.
funcsfuncs
is None:
636 raise ValueError(
"config.functorFile is None. "
637 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
638 result = self.
runrun(parq=inputs[
'inputCatalog'], funcs=self.
funcsfuncs,
639 dataId=outputRefs.outputCatalog.dataId.full)
640 outputs = pipeBase.Struct(outputCatalog=result)
641 butlerQC.put(outputs, outputRefs)
645 if self.
funcsfuncs
is None:
646 raise ValueError(
"config.functorFile is None. "
647 "Must be a valid path to yaml in order to run as a CommandlineTask.")
648 df = self.
runrun(parq, funcs=self.
funcsfuncs, dataId=dataRef.dataId)
649 self.
writewrite(df, dataRef)
652 def run(self, parq, funcs=None, dataId=None, band=None):
653 """Do postprocessing calculations
655 Takes a `ParquetTable` object and dataId,
656 returns a dataframe with results of postprocessing calculations.
660 parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
661 ParquetTable from which calculations are done.
662 funcs : `lsst.pipe.tasks.functors.Functors`
663 Functors to apply to the table's columns
664 dataId : dict, optional
665 Used to add a `patchId` column to the output dataframe.
666 band : `str`, optional
667 Filter band that is being processed.
674 self.log.info(
"Transforming/standardizing the source table dataId: %s", dataId)
676 df = self.
transformtransform(band, parq, funcs, dataId).df
677 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
681 return self.
funcsfuncs
685 funcs = self.
funcsfuncs
686 analysis = PostprocessAnalysis(parq, funcs, filt=band)
690 analysis = self.
getAnalysisgetAnalysis(parq, funcs=funcs, band=band)
692 if dataId
is not None:
693 for key, value
in dataId.items():
696 return pipeBase.Struct(
705 """No metadata to write.
710 class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
711 defaultTemplates={
"coaddName":
"deep"},
712 dimensions=(
"tract",
"patch",
"skymap")):
713 inputCatalog = connectionTypes.Input(
714 doc=
"The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
715 "stored as a DataFrame with a multi-level column index per-patch.",
716 dimensions=(
"tract",
"patch",
"skymap"),
717 storageClass=
"DataFrame",
718 name=
"{coaddName}Coadd_obj",
721 outputCatalog = connectionTypes.Output(
722 doc=
"Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
724 dimensions=(
"tract",
"patch",
"skymap"),
725 storageClass=
"DataFrame",
730 class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
731 pipelineConnections=TransformObjectCatalogConnections):
732 coaddName = pexConfig.Field(
738 filterMap = pexConfig.DictField(
742 doc=(
"Dictionary mapping full filter name to short one for column name munging."
743 "These filters determine the output columns no matter what filters the "
744 "input data actually contain."),
745 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
746 "Will be removed after v22.")
748 outputBands = pexConfig.ListField(
752 doc=(
"These bands and only these bands will appear in the output,"
753 " NaN-filled if the input does not include them."
754 " If None, then use all bands found in the input.")
756 camelCase = pexConfig.Field(
759 doc=(
"Write per-band columns names with camelCase, else underscore "
760 "For example: gPsFlux instead of g_PsFlux.")
762 multilevelOutput = pexConfig.Field(
765 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
766 "and name-munged (False).")
770 class TransformObjectCatalogTask(TransformCatalogBaseTask):
771 """Produce a flattened Object Table to match the format specified in
774 Do the same set of postprocessing calculations on all bands
776 This is identical to `TransformCatalogBaseTask`, except for that it does the
777 specified functor calculations for all filters present in the
778 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
779 by the YAML file will be superceded.
781 _DefaultName =
"transformObjectCatalog"
782 ConfigClass = TransformObjectCatalogConfig
785 inputDataset =
'deepCoadd_obj'
786 outputDataset =
'objectTable'
789 def _makeArgumentParser(cls):
790 parser = ArgumentParser(name=cls._DefaultName)
791 parser.add_id_argument(
"--id", cls.inputDataset,
792 ContainerClass=CoaddDataIdContainer,
793 help=
"data ID, e.g. --id tract=12345 patch=1,2")
796 def run(self, parq, funcs=None, dataId=None, band=None):
800 templateDf = pd.DataFrame()
802 if isinstance(parq, DeferredDatasetHandle):
803 columns = parq.get(component=
'columns')
804 inputBands = columns.unique(level=1).values
806 inputBands = parq.columnLevelNames[
'band']
808 outputBands = self.config.outputBands
if self.config.outputBands
else inputBands
811 for inputBand
in inputBands:
812 if inputBand
not in outputBands:
813 self.log.info(
"Ignoring %s band data in the input", inputBand)
815 self.log.info(
"Transforming the catalog of band %s", inputBand)
816 result = self.transform(inputBand, parq, funcs, dataId)
817 dfDict[inputBand] = result.df
818 analysisDict[inputBand] = result.analysis
820 templateDf = result.df
823 for filt
in outputBands:
824 if filt
not in dfDict:
825 self.log.info(
"Adding empty columns for band %s", filt)
826 dfDict[filt] = pd.DataFrame().reindex_like(templateDf)
829 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
831 if not self.config.multilevelOutput:
832 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
833 if dataId
is not None:
834 noDupCols += list(dataId.keys())
835 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
836 inputBands=inputBands)
838 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
842 class TractObjectDataIdContainer(CoaddDataIdContainer):
844 def makeDataRefList(self, namespace):
845 """Make self.refList from self.idList
847 Generate a list of data references given tract and/or patch.
848 This was adapted from `TractQADataIdContainer`, which was
849 `TractDataIdContainer` modifie to not require "filter".
850 Only existing dataRefs are returned.
852 def getPatchRefList(tract):
853 return [namespace.butler.dataRef(datasetType=self.datasetType,
855 patch=
"%d,%d" % patch.getIndex())
for patch
in tract]
857 tractRefs = defaultdict(list)
858 for dataId
in self.idList:
859 skymap = self.getSkymap(namespace)
861 if "tract" in dataId:
862 tractId = dataId[
"tract"]
863 if "patch" in dataId:
864 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
866 patch=dataId[
'patch']))
868 tractRefs[tractId] += getPatchRefList(skymap[tractId])
870 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
873 for tractRefList
in tractRefs.values():
874 existingRefs = [ref
for ref
in tractRefList
if ref.datasetExists()]
875 outputRefList.append(existingRefs)
877 self.refList = outputRefList
880 class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
881 dimensions=(
"tract",
"skymap")):
882 inputCatalogs = connectionTypes.Input(
883 doc=
"Per-Patch objectTables conforming to the standard data model.",
885 storageClass=
"DataFrame",
886 dimensions=(
"tract",
"patch",
"skymap"),
889 outputCatalog = connectionTypes.Output(
890 doc=
"Pre-tract horizontal concatenation of the input objectTables",
891 name=
"objectTable_tract",
892 storageClass=
"DataFrame",
893 dimensions=(
"tract",
"skymap"),
897 class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
898 pipelineConnections=ConsolidateObjectTableConnections):
899 coaddName = pexConfig.Field(
906 class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
907 """Write patch-merged source tables to a tract-level parquet file
909 Concatenates `objectTable` list into a per-visit `objectTable_tract`
911 _DefaultName =
"consolidateObjectTable"
912 ConfigClass = ConsolidateObjectTableConfig
914 inputDataset =
'objectTable'
915 outputDataset =
'objectTable_tract'
917 def runQuantum(self, butlerQC, inputRefs, outputRefs):
918 inputs = butlerQC.get(inputRefs)
919 self.log.info(
"Concatenating %s per-patch Object Tables",
920 len(inputs[
'inputCatalogs']))
921 df = pd.concat(inputs[
'inputCatalogs'])
922 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
925 def _makeArgumentParser(cls):
926 parser = ArgumentParser(name=cls._DefaultName)
928 parser.add_id_argument(
"--id", cls.inputDataset,
929 help=
"data ID, e.g. --id tract=12345",
930 ContainerClass=TractObjectDataIdContainer)
933 def runDataRef(self, patchRefList):
934 df = pd.concat([patchRef.get().toDataFrame()
for patchRef
in patchRefList])
935 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
938 """No metadata to write.
943 class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
944 dimensions=(
"instrument",
"visit",
"detector")):
946 inputCatalog = connectionTypes.Input(
947 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
949 storageClass=
"DataFrame",
950 dimensions=(
"instrument",
"visit",
"detector"),
953 outputCatalog = connectionTypes.Output(
954 doc=
"Narrower, per-detector Source Table transformed and converted per a "
955 "specified set of functors",
957 storageClass=
"DataFrame",
958 dimensions=(
"instrument",
"visit",
"detector")
962 class TransformSourceTableConfig(TransformCatalogBaseConfig,
963 pipelineConnections=TransformSourceTableConnections):
967 class TransformSourceTableTask(TransformCatalogBaseTask):
968 """Transform/standardize a source catalog
970 _DefaultName =
"transformSourceTable"
971 ConfigClass = TransformSourceTableConfig
973 inputDataset =
'source'
974 outputDataset =
'sourceTable'
977 def _makeArgumentParser(cls):
978 parser = ArgumentParser(name=cls._DefaultName)
979 parser.add_id_argument(
"--id", datasetType=cls.inputDataset,
981 help=
"data ID, e.g. --id visit=12345 ccd=0")
984 def runDataRef(self, dataRef):
985 """Override to specify band label to run()."""
987 funcs = self.getFunctors()
988 band = dataRef.get(
"calexp_filterLabel", immediate=
True).bandLabel
989 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
990 self.write(df, dataRef)
994 class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
995 dimensions=(
"instrument",
"visit",),
996 defaultTemplates={}):
997 calexp = connectionTypes.Input(
998 doc=
"Processed exposures used for metadata",
1000 storageClass=
"ExposureF",
1001 dimensions=(
"instrument",
"visit",
"detector"),
1005 visitSummary = connectionTypes.Output(
1006 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
1007 "detector id for the id and are sorted for fast lookups of a "
1009 name=
"visitSummary",
1010 storageClass=
"ExposureCatalog",
1011 dimensions=(
"instrument",
"visit"),
1015 class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1016 pipelineConnections=ConsolidateVisitSummaryConnections):
1017 """Config for ConsolidateVisitSummaryTask"""
1021 class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1022 """Task to consolidate per-detector visit metadata.
1024 This task aggregates the following metadata from all the detectors in a
1025 single visit into an exposure catalog:
1029 - The physical_filter and band (if available).
1030 - The psf size, shape, and effective area at the center of the detector.
1031 - The corners of the bounding box in right ascension/declination.
1033 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1034 are not persisted here because of storage concerns, and because of their
1035 limited utility as summary statistics.
1037 Tests for this task are performed in ci_hsc_gen3.
1039 _DefaultName =
"consolidateVisitSummary"
1040 ConfigClass = ConsolidateVisitSummaryConfig
1043 def _makeArgumentParser(cls):
1044 parser = ArgumentParser(name=cls._DefaultName)
1046 parser.add_id_argument(
"--id",
"calexp",
1047 help=
"data ID, e.g. --id visit=12345",
1048 ContainerClass=VisitDataIdContainer)
1052 """No metadata to persist, so override to remove metadata persistance.
1056 def writeConfig(self, butler, clobber=False, doBackup=True):
1057 """No config to persist, so override to remove config persistance.
1061 def runDataRef(self, dataRefList):
1062 visit = dataRefList[0].dataId[
'visit']
1064 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)" %
1065 (len(dataRefList), visit))
1067 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=
False)
1069 dataRefList[0].put(expCatalog,
'visitSummary', visit=visit)
1071 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1072 dataRefs = butlerQC.get(inputRefs.calexp)
1073 visit = dataRefs[0].dataId.byName()[
'visit']
1075 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)" %
1076 (len(dataRefs), visit))
1078 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1080 butlerQC.put(expCatalog, outputRefs.visitSummary)
1082 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1083 """Make a combined exposure catalog from a list of dataRefs.
1084 These dataRefs must point to exposures with wcs, summaryStats,
1085 and other visit metadata.
1090 Visit identification number.
1092 List of dataRefs in visit. May be list of
1093 `lsst.daf.persistence.ButlerDataRef` (Gen2) or
1094 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1095 isGen3 : `bool`, optional
1096 Specifies if this is a Gen3 list of datarefs.
1100 visitSummary : `lsst.afw.table.ExposureCatalog`
1101 Exposure catalog with per-detector summary information.
1103 schema = self._makeVisitSummarySchema()
1104 cat = afwTable.ExposureCatalog(schema)
1105 cat.resize(len(dataRefs))
1107 cat[
'visit'] = visit
1109 for i, dataRef
in enumerate(dataRefs):
1111 visitInfo = dataRef.get(component=
'visitInfo')
1112 filterLabel = dataRef.get(component=
'filterLabel')
1113 summaryStats = dataRef.get(component=
'summaryStats')
1114 detector = dataRef.get(component=
'detector')
1115 wcs = dataRef.get(component=
'wcs')
1116 photoCalib = dataRef.get(component=
'photoCalib')
1117 detector = dataRef.get(component=
'detector')
1118 bbox = dataRef.get(component=
'bbox')
1119 validPolygon = dataRef.get(component=
'validPolygon')
1124 exp = dataRef.get(datasetType=
'calexp_sub', bbox=gen2_read_bbox)
1125 visitInfo = exp.getInfo().getVisitInfo()
1126 filterLabel = dataRef.get(
"calexp_filterLabel")
1127 summaryStats = exp.getInfo().getSummaryStats()
1129 photoCalib = exp.getPhotoCalib()
1130 detector = exp.getDetector()
1131 bbox = dataRef.get(datasetType=
'calexp_bbox')
1132 validPolygon = exp.getInfo().getValidPolygon()
1136 rec.setVisitInfo(visitInfo)
1138 rec.setPhotoCalib(photoCalib)
1139 rec.setValidPolygon(validPolygon)
1141 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1142 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1143 rec.setId(detector.getId())
1144 rec[
'psfSigma'] = summaryStats.psfSigma
1145 rec[
'psfIxx'] = summaryStats.psfIxx
1146 rec[
'psfIyy'] = summaryStats.psfIyy
1147 rec[
'psfIxy'] = summaryStats.psfIxy
1148 rec[
'psfArea'] = summaryStats.psfArea
1149 rec[
'raCorners'][:] = summaryStats.raCorners
1150 rec[
'decCorners'][:] = summaryStats.decCorners
1151 rec[
'ra'] = summaryStats.ra
1152 rec[
'decl'] = summaryStats.decl
1153 rec[
'zenithDistance'] = summaryStats.zenithDistance
1154 rec[
'zeroPoint'] = summaryStats.zeroPoint
1155 rec[
'skyBg'] = summaryStats.skyBg
1156 rec[
'skyNoise'] = summaryStats.skyNoise
1157 rec[
'meanVar'] = summaryStats.meanVar
1159 metadata = dafBase.PropertyList()
1160 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1162 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1163 cat.setMetadata(metadata)
1168 def _makeVisitSummarySchema(self):
1169 """Make the schema for the visitSummary catalog."""
1170 schema = afwTable.ExposureTable.makeMinimalSchema()
1171 schema.addField(
'visit', type=
'I', doc=
'Visit number')
1172 schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
1173 schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
1174 schema.addField(
'psfSigma', type=
'F',
1175 doc=
'PSF model second-moments determinant radius (center of chip) (pixel)')
1176 schema.addField(
'psfArea', type=
'F',
1177 doc=
'PSF model effective area (center of chip) (pixel**2)')
1178 schema.addField(
'psfIxx', type=
'F',
1179 doc=
'PSF model Ixx (center of chip) (pixel**2)')
1180 schema.addField(
'psfIyy', type=
'F',
1181 doc=
'PSF model Iyy (center of chip) (pixel**2)')
1182 schema.addField(
'psfIxy', type=
'F',
1183 doc=
'PSF model Ixy (center of chip) (pixel**2)')
1184 schema.addField(
'raCorners', type=
'ArrayD', size=4,
1185 doc=
'Right Ascension of bounding box corners (degrees)')
1186 schema.addField(
'decCorners', type=
'ArrayD', size=4,
1187 doc=
'Declination of bounding box corners (degrees)')
1188 schema.addField(
'ra', type=
'D',
1189 doc=
'Right Ascension of bounding box center (degrees)')
1190 schema.addField(
'decl', type=
'D',
1191 doc=
'Declination of bounding box center (degrees)')
1192 schema.addField(
'zenithDistance', type=
'F',
1193 doc=
'Zenith distance of bounding box center (degrees)')
1194 schema.addField(
'zeroPoint', type=
'F',
1195 doc=
'Mean zeropoint in detector (mag)')
1196 schema.addField(
'skyBg', type=
'F',
1197 doc=
'Average sky background (ADU)')
1198 schema.addField(
'skyNoise', type=
'F',
1199 doc=
'Average sky noise (ADU)')
1200 schema.addField(
'meanVar', type=
'F',
1201 doc=
'Mean variance of the weight plane (ADU**2)')
1206 class VisitDataIdContainer(DataIdContainer):
1207 """DataIdContainer that groups sensor-level id's by visit
1210 def makeDataRefList(self, namespace):
1211 """Make self.refList from self.idList
1213 Generate a list of data references grouped by visit.
1217 namespace : `argparse.Namespace`
1218 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1221 visitRefs = defaultdict(list)
1222 for dataId
in self.idList:
1223 if "visit" in dataId:
1224 visitId = dataId[
"visit"]
1226 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1227 visitRefs[visitId].extend([dataRef
for dataRef
in subset])
1230 for refList
in visitRefs.values():
1231 existingRefs = [ref
for ref
in refList
if ref.datasetExists()]
1233 outputRefList.append(existingRefs)
1235 self.refList = outputRefList
1238 class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1239 dimensions=(
"instrument",
"visit")):
1240 inputCatalogs = connectionTypes.Input(
1241 doc=
"Input per-detector Source Tables",
1243 storageClass=
"DataFrame",
1244 dimensions=(
"instrument",
"visit",
"detector"),
1247 outputCatalog = connectionTypes.Output(
1248 doc=
"Per-visit concatenation of Source Table",
1249 name=
"sourceTable_visit",
1250 storageClass=
"DataFrame",
1251 dimensions=(
"instrument",
"visit")
1255 class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1256 pipelineConnections=ConsolidateSourceTableConnections):
1260 class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1261 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1263 _DefaultName =
'consolidateSourceTable'
1264 ConfigClass = ConsolidateSourceTableConfig
1266 inputDataset =
'sourceTable'
1267 outputDataset =
'sourceTable_visit'
1269 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1270 inputs = butlerQC.get(inputRefs)
1271 self.log.info(
"Concatenating %s per-detector Source Tables",
1272 len(inputs[
'inputCatalogs']))
1273 df = pd.concat(inputs[
'inputCatalogs'])
1274 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1276 def runDataRef(self, dataRefList):
1277 self.log.info(
"Concatenating %s per-detector Source Tables", len(dataRefList))
1278 df = pd.concat([dataRef.get().toDataFrame()
for dataRef
in dataRefList])
1279 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1282 def _makeArgumentParser(cls):
1283 parser = ArgumentParser(name=cls._DefaultName)
1285 parser.add_id_argument(
"--id", cls.inputDataset,
1286 help=
"data ID, e.g. --id visit=12345",
1287 ContainerClass=VisitDataIdContainer)
1291 """No metadata to write.
1295 def writeConfig(self, butler, clobber=False, doBackup=True):
1296 """No config to write.
def runDataRef(self, dataRef)
def getAnalysis(self, parq, funcs=None, band=None)
def write(self, df, parqRef)
def __init__(self, *args, **kwargs)
def transform(self, band, parq, funcs, dataId)
def run(self, parq, funcs=None, dataId=None, band=None)
def writeMetadata(self, dataRef)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def run(self, skyInfo, tempExpRefList, imageScalerList, weightList, altMaskList=None, mask=None, supplementaryData=None)
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def readCatalog(task, patchRef)
Read input catalog.
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)