24 from collections
import defaultdict
34 from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
36 from lsst.daf.butler
import DeferredDatasetHandle, DataCoordinate
38 from .parquetTable
import ParquetTable
39 from .multiBandUtils
import makeMergeArgumentParser, MergeSourcesRunner
40 from .functors
import CompositeFunctor, Column
43 def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
44 """Flattens a dataframe with multilevel column index
46 newDf = pd.DataFrame()
48 dfBands = df.columns.unique(level=0).values
51 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
52 newColumns = {c: columnFormat.format(band, c)
53 for c
in subdf.columns
if c
not in noDupCols}
54 cols = list(newColumns.keys())
55 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
58 presentBands = dfBands
if inputBands
is None else list(set(inputBands).intersection(dfBands))
60 noDupDf = df[presentBands[0]][noDupCols]
61 newDf = pd.concat([noDupDf, newDf], axis=1)
66 defaultTemplates={
"coaddName":
"deep"},
67 dimensions=(
"tract",
"patch",
"skymap")):
68 inputCatalogMeas = connectionTypes.Input(
69 doc=
"Catalog of source measurements on the deepCoadd.",
70 dimensions=(
"tract",
"patch",
"band",
"skymap"),
71 storageClass=
"SourceCatalog",
72 name=
"{coaddName}Coadd_meas",
75 inputCatalogForcedSrc = connectionTypes.Input(
76 doc=
"Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
77 dimensions=(
"tract",
"patch",
"band",
"skymap"),
78 storageClass=
"SourceCatalog",
79 name=
"{coaddName}Coadd_forced_src",
82 inputCatalogRef = connectionTypes.Input(
83 doc=
"Catalog marking the primary detection (which band provides a good shape and position)"
84 "for each detection in deepCoadd_mergeDet.",
85 dimensions=(
"tract",
"patch",
"skymap"),
86 storageClass=
"SourceCatalog",
87 name=
"{coaddName}Coadd_ref"
89 outputCatalog = connectionTypes.Output(
90 doc=
"A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
91 "stored as a DataFrame with a multi-level column index per-patch.",
92 dimensions=(
"tract",
"patch",
"skymap"),
93 storageClass=
"DataFrame",
94 name=
"{coaddName}Coadd_obj"
98 class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
99 pipelineConnections=WriteObjectTableConnections):
100 engine = pexConfig.Field(
103 doc=
"Parquet engine for writing (pyarrow or fastparquet)"
105 coaddName = pexConfig.Field(
112 class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
113 """Write filter-merged source tables to parquet
115 _DefaultName =
"writeObjectTable"
116 ConfigClass = WriteObjectTableConfig
117 RunnerClass = MergeSourcesRunner
120 inputDatasets = (
'forced_src',
'meas',
'ref')
123 outputDataset =
'obj'
125 def __init__(self, butler=None, schema=None, **kwargs):
129 super().__init__(**kwargs)
131 def runDataRef(self, patchRefList):
133 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
134 subclasses that inherit from MergeSourcesTask.
135 @param[in] patchRefList list of data references for each filter
137 catalogs = dict(self.readCatalog(patchRef)
for patchRef
in patchRefList)
138 dataId = patchRefList[0].dataId
139 mergedCatalog = self.run(catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
140 self.write(patchRefList[0],
ParquetTable(dataFrame=mergedCatalog))
142 def runQuantum(self, butlerQC, inputRefs, outputRefs):
143 inputs = butlerQC.get(inputRefs)
145 measDict = {ref.dataId[
'band']: {
'meas': cat}
for ref, cat
in
146 zip(inputRefs.inputCatalogMeas, inputs[
'inputCatalogMeas'])}
147 forcedSourceDict = {ref.dataId[
'band']: {
'forced_src': cat}
for ref, cat
in
148 zip(inputRefs.inputCatalogForcedSrc, inputs[
'inputCatalogForcedSrc'])}
151 for band
in measDict.keys():
152 catalogs[band] = {
'meas': measDict[band][
'meas'],
153 'forced_src': forcedSourceDict[band][
'forced_src'],
154 'ref': inputs[
'inputCatalogRef']}
155 dataId = butlerQC.quantum.dataId
156 df = self.run(catalogs=catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
157 outputs = pipeBase.Struct(outputCatalog=df)
158 butlerQC.put(outputs, outputRefs)
161 def _makeArgumentParser(cls):
162 """Create a suitable ArgumentParser.
164 We will use the ArgumentParser to get a list of data
165 references for patches; the RunnerClass will sort them into lists
166 of data references for the same patch.
168 References first of self.inputDatasets, rather than
174 """Read input catalogs
176 Read all the input datasets given by the 'inputDatasets'
181 patchRef : `lsst.daf.persistence.ButlerDataRef`
182 Data reference for patch
186 Tuple consisting of band name and a dict of catalogs, keyed by
189 band = patchRef.get(self.config.coaddName +
"Coadd_filterLabel", immediate=
True).bandLabel
191 for dataset
in self.inputDatasets:
192 catalog = patchRef.get(self.config.coaddName +
"Coadd_" + dataset, immediate=
True)
193 self.log.info(
"Read %d sources from %s for band %s: %s",
194 len(catalog), dataset, band, patchRef.dataId)
195 catalogDict[dataset] = catalog
196 return band, catalogDict
198 def run(self, catalogs, tract, patch):
199 """Merge multiple catalogs.
204 Mapping from filter names to dict of catalogs.
206 tractId to use for the tractId column
208 patchId to use for the patchId column
212 catalog : `pandas.DataFrame`
217 for filt, tableDict
in catalogs.items():
218 for dataset, table
in tableDict.items():
220 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
223 df = df.reindex(sorted(df.columns), axis=1)
224 df[
'tractId'] = tract
225 df[
'patchId'] = patch
228 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
229 names=(
'dataset',
'band',
'column'))
232 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
235 def write(self, patchRef, catalog):
240 catalog : `ParquetTable`
242 patchRef : `lsst.daf.persistence.ButlerDataRef`
243 Data reference for patch
245 patchRef.put(catalog, self.config.coaddName +
"Coadd_" + self.outputDataset)
248 mergeDataId = patchRef.dataId.copy()
249 del mergeDataId[
"filter"]
250 self.log.info(
"Wrote merged catalog: %s", mergeDataId)
253 """No metadata to write, and not sure how to write it for a list of dataRefs.
258 class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
259 defaultTemplates={
"catalogType":
""},
260 dimensions=(
"instrument",
"visit",
"detector")):
262 catalog = connectionTypes.Input(
263 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
264 name=
"{catalogType}src",
265 storageClass=
"SourceCatalog",
266 dimensions=(
"instrument",
"visit",
"detector")
268 outputCatalog = connectionTypes.Output(
269 doc=
"Catalog of sources, `src` in Parquet format. The 'id' column is "
270 "replaced with an index; all other columns are unchanged.",
271 name=
"{catalogType}source",
272 storageClass=
"DataFrame",
273 dimensions=(
"instrument",
"visit",
"detector")
277 class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
278 pipelineConnections=WriteSourceTableConnections):
279 doApplyExternalPhotoCalib = pexConfig.Field(
282 doc=(
"Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
283 "generating Source Tables from older src tables which do not already have local calib columns")
285 doApplyExternalSkyWcs = pexConfig.Field(
288 doc=(
"Add local WCS columns from the calexp.wcs? Should only set True if "
289 "generating Source Tables from older src tables which do not already have local calib columns")
293 class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
294 """Write source table to parquet
296 _DefaultName =
"writeSourceTable"
297 ConfigClass = WriteSourceTableConfig
299 def runDataRef(self, dataRef):
300 src = dataRef.get(
'src')
301 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
302 src = self.addCalibColumns(src, dataRef)
304 ccdVisitId = dataRef.get(
'ccdExposureId')
305 result = self.run(src, ccdVisitId=ccdVisitId)
306 dataRef.put(result.table,
'source')
308 def runQuantum(self, butlerQC, inputRefs, outputRefs):
309 inputs = butlerQC.get(inputRefs)
310 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
311 result = self.run(**inputs).table
312 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
313 butlerQC.put(outputs, outputRefs)
315 def run(self, catalog, ccdVisitId=None):
316 """Convert `src` catalog to parquet
320 catalog: `afwTable.SourceCatalog`
321 catalog to be converted
323 ccdVisitId to be added as a column
327 result : `lsst.pipe.base.Struct`
329 `ParquetTable` version of the input catalog
331 self.log.info(
"Generating parquet table from src catalog %s", ccdVisitId)
332 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
333 df[
'ccdVisitId'] = ccdVisitId
334 return pipeBase.Struct(table=ParquetTable(dataFrame=df))
336 def addCalibColumns(self, catalog, dataRef):
337 """Add columns with local calibration evaluated at each centroid
339 for backwards compatibility with old repos.
340 This exists for the purpose of converting old src catalogs
341 (which don't have the expected local calib columns) to Source Tables.
345 catalog: `afwTable.SourceCatalog`
346 catalog to which calib columns will be added
347 dataRef: `lsst.daf.persistence.ButlerDataRef
348 for fetching the calibs from disk.
352 newCat: `afwTable.SourceCatalog`
353 Source Catalog with requested local calib columns
355 mapper = afwTable.SchemaMapper(catalog.schema)
356 measureConfig = SingleFrameMeasurementTask.ConfigClass()
357 measureConfig.doReplaceWithNoise =
False
360 exposure = dataRef.get(
'calexp_sub',
363 mapper = afwTable.SchemaMapper(catalog.schema)
364 mapper.addMinimalSchema(catalog.schema,
True)
365 schema = mapper.getOutputSchema()
367 exposureIdInfo = dataRef.get(
"expIdInfo")
368 measureConfig.plugins.names = []
369 if self.config.doApplyExternalSkyWcs:
370 plugin =
'base_LocalWcs'
372 raise RuntimeError(f
"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
374 measureConfig.plugins.names.add(plugin)
376 if self.config.doApplyExternalPhotoCalib:
377 plugin =
'base_LocalPhotoCalib'
379 raise RuntimeError(f
"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
381 measureConfig.plugins.names.add(plugin)
383 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
384 newCat = afwTable.SourceCatalog(schema)
385 newCat.extend(catalog, mapper=mapper)
386 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
390 """No metadata to write.
395 def _makeArgumentParser(cls):
396 parser = ArgumentParser(name=cls._DefaultName)
397 parser.add_id_argument(
"--id",
'src',
398 help=
"data ID, e.g. --id visit=12345 ccd=0")
402 class PostprocessAnalysis(object):
403 """Calculate columns from ParquetTable
405 This object manages and organizes an arbitrary set of computations
406 on a catalog. The catalog is defined by a
407 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
408 `deepCoadd_obj` dataset, and the computations are defined by a collection
409 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
410 a `CompositeFunctor`).
412 After the object is initialized, accessing the `.df` attribute (which
413 holds the `pandas.DataFrame` containing the results of the calculations) triggers
414 computation of said dataframe.
416 One of the conveniences of using this object is the ability to define a desired common
417 filter for all functors. This enables the same functor collection to be passed to
418 several different `PostprocessAnalysis` objects without having to change the original
419 functor collection, since the `filt` keyword argument of this object triggers an
420 overwrite of the `filt` property for all functors in the collection.
422 This object also allows a list of refFlags to be passed, and defines a set of default
423 refFlags that are always included even if not requested.
425 If a list of `ParquetTable` object is passed, rather than a single one, then the
426 calculations will be mapped over all the input catalogs. In principle, it should
427 be straightforward to parallelize this activity, but initial tests have failed
428 (see TODO in code comments).
432 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
433 Source catalog(s) for computation
435 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
436 Computations to do (functors that act on `parq`).
437 If a dict, the output
438 DataFrame will have columns keyed accordingly.
439 If a list, the column keys will come from the
440 `.shortname` attribute of each functor.
442 filt : `str` (optional)
443 Filter in which to calculate. If provided,
444 this will overwrite any existing `.filt` attribute
445 of the provided functors.
447 flags : `list` (optional)
448 List of flags (per-band) to include in output table.
449 Taken from the `meas` dataset if applied to a multilevel Object Table.
451 refFlags : `list` (optional)
452 List of refFlags (only reference band) to include in output table.
454 forcedFlags : `list` (optional)
455 List of flags (per-band) to include in output table.
456 Taken from the ``forced_src`` dataset if applied to a
457 multilevel Object Table. Intended for flags from measurement plugins
458 only run during multi-band forced-photometry.
460 _defaultRefFlags = []
463 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
465 self.functors = functors
468 self.flags = list(flags)
if flags
is not None else []
469 self.forcedFlags = list(forcedFlags)
if forcedFlags
is not None else []
470 self.refFlags = list(self._defaultRefFlags)
471 if refFlags
is not None:
472 self.refFlags += list(refFlags)
477 def defaultFuncs(self):
478 funcs = dict(self._defaultFuncs)
483 additionalFuncs = self.defaultFuncs
484 additionalFuncs.update({flag: Column(flag, dataset=
'forced_src')
for flag
in self.forcedFlags})
485 additionalFuncs.update({flag: Column(flag, dataset=
'ref')
for flag
in self.refFlags})
486 additionalFuncs.update({flag: Column(flag, dataset=
'meas')
for flag
in self.flags})
488 if isinstance(self.functors, CompositeFunctor):
491 func = CompositeFunctor(self.functors)
493 func.funcDict.update(additionalFuncs)
494 func.filt = self.filt
500 return [name
for name, func
in self.func.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
508 def compute(self, dropna=False, pool=None):
510 if type(self.parq)
in (list, tuple):
512 dflist = [self.func(parq, dropna=dropna)
for parq
in self.parq]
515 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
516 self._df = pd.concat(dflist)
518 self._df = self.func(self.parq, dropna=dropna)
525 """Expected Connections for subclasses of TransformCatalogBaseTask.
529 inputCatalog = connectionTypes.Input(
531 storageClass=
"DataFrame",
533 outputCatalog = connectionTypes.Output(
535 storageClass=
"DataFrame",
540 pipelineConnections=TransformCatalogBaseConnections):
541 functorFile = pexConfig.Field(
543 doc=
'Path to YAML file specifying functors to be computed',
550 """Base class for transforming/standardizing a catalog
552 by applying functors that convert units and apply calibrations.
553 The purpose of this task is to perform a set of computations on
554 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
555 results to a new dataset (which needs to be declared in an `outputDataset`
558 The calculations to be performed are defined in a YAML file that specifies
559 a set of functors to be computed, provided as
560 a `--functorFile` config parameter. An example of such a YAML file
585 - base_InputCount_value
588 functor: DeconvolvedMoments
593 - merge_measurement_i
594 - merge_measurement_r
595 - merge_measurement_z
596 - merge_measurement_y
597 - merge_measurement_g
598 - base_PixelFlags_flag_inexact_psfCenter
601 The names for each entry under "func" will become the names of columns in the
602 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
603 Positional arguments to be passed to each functor are in the `args` list,
604 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
605 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
607 The "flags" entry is the default shortcut for `Column` functors.
608 All columns listed under "flags" will be copied to the output table
609 untransformed. They can be of any datatype.
610 In the special case of transforming a multi-level oject table with
611 band and dataset indices (deepCoadd_obj), these will be taked from the
612 `meas` dataset and exploded out per band.
614 There are two special shortcuts that only apply when transforming
615 multi-level Object (deepCoadd_obj) tables:
616 - The "refFlags" entry is shortcut for `Column` functor
617 taken from the `'ref'` dataset if transforming an ObjectTable.
618 - The "forcedFlags" entry is shortcut for `Column` functors.
619 taken from the ``forced_src`` dataset if transforming an ObjectTable.
620 These are expanded out per band.
623 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
624 to organize and excecute the calculations.
628 def _DefaultName(self):
629 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
633 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
637 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
641 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
645 if self.config.functorFile:
646 self.log.info(
'Loading tranform functor definitions from %s',
647 self.config.functorFile)
648 self.
funcsfuncs = CompositeFunctor.from_file(self.config.functorFile)
649 self.
funcsfuncs.update(dict(PostprocessAnalysis._defaultFuncs))
651 self.
funcsfuncs =
None
654 inputs = butlerQC.get(inputRefs)
655 if self.
funcsfuncs
is None:
656 raise ValueError(
"config.functorFile is None. "
657 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
658 result = self.
runrun(parq=inputs[
'inputCatalog'], funcs=self.
funcsfuncs,
659 dataId=outputRefs.outputCatalog.dataId.full)
660 outputs = pipeBase.Struct(outputCatalog=result)
661 butlerQC.put(outputs, outputRefs)
665 if self.
funcsfuncs
is None:
666 raise ValueError(
"config.functorFile is None. "
667 "Must be a valid path to yaml in order to run as a CommandlineTask.")
668 df = self.
runrun(parq, funcs=self.
funcsfuncs, dataId=dataRef.dataId)
669 self.
writewrite(df, dataRef)
672 def run(self, parq, funcs=None, dataId=None, band=None):
673 """Do postprocessing calculations
675 Takes a `ParquetTable` object and dataId,
676 returns a dataframe with results of postprocessing calculations.
680 parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
681 ParquetTable from which calculations are done.
682 funcs : `lsst.pipe.tasks.functors.Functors`
683 Functors to apply to the table's columns
684 dataId : dict, optional
685 Used to add a `patchId` column to the output dataframe.
686 band : `str`, optional
687 Filter band that is being processed.
694 self.log.info(
"Transforming/standardizing the source table dataId: %s", dataId)
696 df = self.
transformtransform(band, parq, funcs, dataId).df
697 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
701 return self.
funcsfuncs
705 funcs = self.
funcsfuncs
706 analysis = PostprocessAnalysis(parq, funcs, filt=band)
710 analysis = self.
getAnalysisgetAnalysis(parq, funcs=funcs, band=band)
712 if dataId
is not None:
713 for key, value
in dataId.items():
716 return pipeBase.Struct(
725 """No metadata to write.
730 class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
731 defaultTemplates={
"coaddName":
"deep"},
732 dimensions=(
"tract",
"patch",
"skymap")):
733 inputCatalog = connectionTypes.Input(
734 doc=
"The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
735 "stored as a DataFrame with a multi-level column index per-patch.",
736 dimensions=(
"tract",
"patch",
"skymap"),
737 storageClass=
"DataFrame",
738 name=
"{coaddName}Coadd_obj",
741 outputCatalog = connectionTypes.Output(
742 doc=
"Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
744 dimensions=(
"tract",
"patch",
"skymap"),
745 storageClass=
"DataFrame",
750 class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
751 pipelineConnections=TransformObjectCatalogConnections):
752 coaddName = pexConfig.Field(
758 filterMap = pexConfig.DictField(
762 doc=(
"Dictionary mapping full filter name to short one for column name munging."
763 "These filters determine the output columns no matter what filters the "
764 "input data actually contain."),
765 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
766 "Will be removed after v22.")
768 outputBands = pexConfig.ListField(
772 doc=(
"These bands and only these bands will appear in the output,"
773 " NaN-filled if the input does not include them."
774 " If None, then use all bands found in the input.")
776 camelCase = pexConfig.Field(
779 doc=(
"Write per-band columns names with camelCase, else underscore "
780 "For example: gPsFlux instead of g_PsFlux.")
782 multilevelOutput = pexConfig.Field(
785 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
786 "and name-munged (False).")
790 class TransformObjectCatalogTask(TransformCatalogBaseTask):
791 """Produce a flattened Object Table to match the format specified in
794 Do the same set of postprocessing calculations on all bands
796 This is identical to `TransformCatalogBaseTask`, except for that it does the
797 specified functor calculations for all filters present in the
798 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
799 by the YAML file will be superceded.
801 _DefaultName =
"transformObjectCatalog"
802 ConfigClass = TransformObjectCatalogConfig
805 inputDataset =
'deepCoadd_obj'
806 outputDataset =
'objectTable'
809 def _makeArgumentParser(cls):
810 parser = ArgumentParser(name=cls._DefaultName)
811 parser.add_id_argument(
"--id", cls.inputDataset,
812 ContainerClass=CoaddDataIdContainer,
813 help=
"data ID, e.g. --id tract=12345 patch=1,2")
816 def run(self, parq, funcs=None, dataId=None, band=None):
820 templateDf = pd.DataFrame()
822 if isinstance(parq, DeferredDatasetHandle):
823 columns = parq.get(component=
'columns')
824 inputBands = columns.unique(level=1).values
826 inputBands = parq.columnLevelNames[
'band']
828 outputBands = self.config.outputBands
if self.config.outputBands
else inputBands
831 for inputBand
in inputBands:
832 if inputBand
not in outputBands:
833 self.log.info(
"Ignoring %s band data in the input", inputBand)
835 self.log.info(
"Transforming the catalog of band %s", inputBand)
836 result = self.transform(inputBand, parq, funcs, dataId)
837 dfDict[inputBand] = result.df
838 analysisDict[inputBand] = result.analysis
840 templateDf = result.df
843 for filt
in outputBands:
844 if filt
not in dfDict:
845 self.log.info(
"Adding empty columns for band %s", filt)
846 dfDict[filt] = pd.DataFrame().reindex_like(templateDf)
849 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
851 if not self.config.multilevelOutput:
852 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
853 if dataId
is not None:
854 noDupCols += list(dataId.keys())
855 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
856 inputBands=inputBands)
858 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
862 class TractObjectDataIdContainer(CoaddDataIdContainer):
864 def makeDataRefList(self, namespace):
865 """Make self.refList from self.idList
867 Generate a list of data references given tract and/or patch.
868 This was adapted from `TractQADataIdContainer`, which was
869 `TractDataIdContainer` modifie to not require "filter".
870 Only existing dataRefs are returned.
872 def getPatchRefList(tract):
873 return [namespace.butler.dataRef(datasetType=self.datasetType,
875 patch=
"%d,%d" % patch.getIndex())
for patch
in tract]
877 tractRefs = defaultdict(list)
878 for dataId
in self.idList:
879 skymap = self.getSkymap(namespace)
881 if "tract" in dataId:
882 tractId = dataId[
"tract"]
883 if "patch" in dataId:
884 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
886 patch=dataId[
'patch']))
888 tractRefs[tractId] += getPatchRefList(skymap[tractId])
890 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
893 for tractRefList
in tractRefs.values():
894 existingRefs = [ref
for ref
in tractRefList
if ref.datasetExists()]
895 outputRefList.append(existingRefs)
897 self.refList = outputRefList
900 class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
901 dimensions=(
"tract",
"skymap")):
902 inputCatalogs = connectionTypes.Input(
903 doc=
"Per-Patch objectTables conforming to the standard data model.",
905 storageClass=
"DataFrame",
906 dimensions=(
"tract",
"patch",
"skymap"),
909 outputCatalog = connectionTypes.Output(
910 doc=
"Pre-tract horizontal concatenation of the input objectTables",
911 name=
"objectTable_tract",
912 storageClass=
"DataFrame",
913 dimensions=(
"tract",
"skymap"),
917 class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
918 pipelineConnections=ConsolidateObjectTableConnections):
919 coaddName = pexConfig.Field(
926 class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
927 """Write patch-merged source tables to a tract-level parquet file
929 Concatenates `objectTable` list into a per-visit `objectTable_tract`
931 _DefaultName =
"consolidateObjectTable"
932 ConfigClass = ConsolidateObjectTableConfig
934 inputDataset =
'objectTable'
935 outputDataset =
'objectTable_tract'
937 def runQuantum(self, butlerQC, inputRefs, outputRefs):
938 inputs = butlerQC.get(inputRefs)
939 self.log.info(
"Concatenating %s per-patch Object Tables",
940 len(inputs[
'inputCatalogs']))
941 df = pd.concat(inputs[
'inputCatalogs'])
942 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
945 def _makeArgumentParser(cls):
946 parser = ArgumentParser(name=cls._DefaultName)
948 parser.add_id_argument(
"--id", cls.inputDataset,
949 help=
"data ID, e.g. --id tract=12345",
950 ContainerClass=TractObjectDataIdContainer)
953 def runDataRef(self, patchRefList):
954 df = pd.concat([patchRef.get().toDataFrame()
for patchRef
in patchRefList])
955 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
958 """No metadata to write.
963 class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
964 defaultTemplates={
"catalogType":
""},
965 dimensions=(
"instrument",
"visit",
"detector")):
967 inputCatalog = connectionTypes.Input(
968 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
969 name=
"{catalogType}source",
970 storageClass=
"DataFrame",
971 dimensions=(
"instrument",
"visit",
"detector"),
974 outputCatalog = connectionTypes.Output(
975 doc=
"Narrower, per-detector Source Table transformed and converted per a "
976 "specified set of functors",
977 name=
"{catalogType}sourceTable",
978 storageClass=
"DataFrame",
979 dimensions=(
"instrument",
"visit",
"detector")
983 class TransformSourceTableConfig(TransformCatalogBaseConfig,
984 pipelineConnections=TransformSourceTableConnections):
988 class TransformSourceTableTask(TransformCatalogBaseTask):
989 """Transform/standardize a source catalog
991 _DefaultName =
"transformSourceTable"
992 ConfigClass = TransformSourceTableConfig
994 inputDataset =
'source'
995 outputDataset =
'sourceTable'
998 def _makeArgumentParser(cls):
999 parser = ArgumentParser(name=cls._DefaultName)
1000 parser.add_id_argument(
"--id", datasetType=cls.inputDataset,
1002 help=
"data ID, e.g. --id visit=12345 ccd=0")
1005 def runDataRef(self, dataRef):
1006 """Override to specify band label to run()."""
1007 parq = dataRef.get()
1008 funcs = self.getFunctors()
1009 band = dataRef.get(
"calexp_filterLabel", immediate=
True).bandLabel
1010 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
1011 self.write(df, dataRef)
1015 class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1016 dimensions=(
"instrument",
"visit",),
1017 defaultTemplates={
"calexpType":
""}):
1018 calexp = connectionTypes.Input(
1019 doc=
"Processed exposures used for metadata",
1020 name=
"{calexpType}calexp",
1021 storageClass=
"ExposureF",
1022 dimensions=(
"instrument",
"visit",
"detector"),
1026 visitSummary = connectionTypes.Output(
1027 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
1028 "detector id for the id and are sorted for fast lookups of a "
1030 name=
"{calexpType}visitSummary",
1031 storageClass=
"ExposureCatalog",
1032 dimensions=(
"instrument",
"visit"),
1036 class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1037 pipelineConnections=ConsolidateVisitSummaryConnections):
1038 """Config for ConsolidateVisitSummaryTask"""
1042 class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1043 """Task to consolidate per-detector visit metadata.
1045 This task aggregates the following metadata from all the detectors in a
1046 single visit into an exposure catalog:
1050 - The physical_filter and band (if available).
1051 - The psf size, shape, and effective area at the center of the detector.
1052 - The corners of the bounding box in right ascension/declination.
1054 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1055 are not persisted here because of storage concerns, and because of their
1056 limited utility as summary statistics.
1058 Tests for this task are performed in ci_hsc_gen3.
1060 _DefaultName =
"consolidateVisitSummary"
1061 ConfigClass = ConsolidateVisitSummaryConfig
1064 def _makeArgumentParser(cls):
1065 parser = ArgumentParser(name=cls._DefaultName)
1067 parser.add_id_argument(
"--id",
"calexp",
1068 help=
"data ID, e.g. --id visit=12345",
1069 ContainerClass=VisitDataIdContainer)
1073 """No metadata to persist, so override to remove metadata persistance.
1077 def writeConfig(self, butler, clobber=False, doBackup=True):
1078 """No config to persist, so override to remove config persistance.
1082 def runDataRef(self, dataRefList):
1083 visit = dataRefList[0].dataId[
'visit']
1085 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1086 len(dataRefList), visit)
1088 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=
False)
1090 dataRefList[0].put(expCatalog,
'visitSummary', visit=visit)
1092 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1093 dataRefs = butlerQC.get(inputRefs.calexp)
1094 visit = dataRefs[0].dataId.byName()[
'visit']
1096 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1097 len(dataRefs), visit)
1099 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1101 butlerQC.put(expCatalog, outputRefs.visitSummary)
1103 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1104 """Make a combined exposure catalog from a list of dataRefs.
1105 These dataRefs must point to exposures with wcs, summaryStats,
1106 and other visit metadata.
1111 Visit identification number.
1113 List of dataRefs in visit. May be list of
1114 `lsst.daf.persistence.ButlerDataRef` (Gen2) or
1115 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1116 isGen3 : `bool`, optional
1117 Specifies if this is a Gen3 list of datarefs.
1121 visitSummary : `lsst.afw.table.ExposureCatalog`
1122 Exposure catalog with per-detector summary information.
1124 schema = self._makeVisitSummarySchema()
1125 cat = afwTable.ExposureCatalog(schema)
1126 cat.resize(len(dataRefs))
1128 cat[
'visit'] = visit
1130 for i, dataRef
in enumerate(dataRefs):
1132 visitInfo = dataRef.get(component=
'visitInfo')
1133 filterLabel = dataRef.get(component=
'filterLabel')
1134 summaryStats = dataRef.get(component=
'summaryStats')
1135 detector = dataRef.get(component=
'detector')
1136 wcs = dataRef.get(component=
'wcs')
1137 photoCalib = dataRef.get(component=
'photoCalib')
1138 detector = dataRef.get(component=
'detector')
1139 bbox = dataRef.get(component=
'bbox')
1140 validPolygon = dataRef.get(component=
'validPolygon')
1145 exp = dataRef.get(datasetType=
'calexp_sub', bbox=gen2_read_bbox)
1146 visitInfo = exp.getInfo().getVisitInfo()
1147 filterLabel = dataRef.get(
"calexp_filterLabel")
1148 summaryStats = exp.getInfo().getSummaryStats()
1150 photoCalib = exp.getPhotoCalib()
1151 detector = exp.getDetector()
1152 bbox = dataRef.get(datasetType=
'calexp_bbox')
1153 validPolygon = exp.getInfo().getValidPolygon()
1157 rec.setVisitInfo(visitInfo)
1159 rec.setPhotoCalib(photoCalib)
1160 rec.setValidPolygon(validPolygon)
1162 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1163 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1164 rec.setId(detector.getId())
1165 rec[
'psfSigma'] = summaryStats.psfSigma
1166 rec[
'psfIxx'] = summaryStats.psfIxx
1167 rec[
'psfIyy'] = summaryStats.psfIyy
1168 rec[
'psfIxy'] = summaryStats.psfIxy
1169 rec[
'psfArea'] = summaryStats.psfArea
1170 rec[
'raCorners'][:] = summaryStats.raCorners
1171 rec[
'decCorners'][:] = summaryStats.decCorners
1172 rec[
'ra'] = summaryStats.ra
1173 rec[
'decl'] = summaryStats.decl
1174 rec[
'zenithDistance'] = summaryStats.zenithDistance
1175 rec[
'zeroPoint'] = summaryStats.zeroPoint
1176 rec[
'skyBg'] = summaryStats.skyBg
1177 rec[
'skyNoise'] = summaryStats.skyNoise
1178 rec[
'meanVar'] = summaryStats.meanVar
1179 rec[
'astromOffsetMean'] = summaryStats.astromOffsetMean
1180 rec[
'astromOffsetStd'] = summaryStats.astromOffsetStd
1182 metadata = dafBase.PropertyList()
1183 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1185 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1186 cat.setMetadata(metadata)
1191 def _makeVisitSummarySchema(self):
1192 """Make the schema for the visitSummary catalog."""
1193 schema = afwTable.ExposureTable.makeMinimalSchema()
1194 schema.addField(
'visit', type=
'I', doc=
'Visit number')
1195 schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
1196 schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
1197 schema.addField(
'psfSigma', type=
'F',
1198 doc=
'PSF model second-moments determinant radius (center of chip) (pixel)')
1199 schema.addField(
'psfArea', type=
'F',
1200 doc=
'PSF model effective area (center of chip) (pixel**2)')
1201 schema.addField(
'psfIxx', type=
'F',
1202 doc=
'PSF model Ixx (center of chip) (pixel**2)')
1203 schema.addField(
'psfIyy', type=
'F',
1204 doc=
'PSF model Iyy (center of chip) (pixel**2)')
1205 schema.addField(
'psfIxy', type=
'F',
1206 doc=
'PSF model Ixy (center of chip) (pixel**2)')
1207 schema.addField(
'raCorners', type=
'ArrayD', size=4,
1208 doc=
'Right Ascension of bounding box corners (degrees)')
1209 schema.addField(
'decCorners', type=
'ArrayD', size=4,
1210 doc=
'Declination of bounding box corners (degrees)')
1211 schema.addField(
'ra', type=
'D',
1212 doc=
'Right Ascension of bounding box center (degrees)')
1213 schema.addField(
'decl', type=
'D',
1214 doc=
'Declination of bounding box center (degrees)')
1215 schema.addField(
'zenithDistance', type=
'F',
1216 doc=
'Zenith distance of bounding box center (degrees)')
1217 schema.addField(
'zeroPoint', type=
'F',
1218 doc=
'Mean zeropoint in detector (mag)')
1219 schema.addField(
'skyBg', type=
'F',
1220 doc=
'Average sky background (ADU)')
1221 schema.addField(
'skyNoise', type=
'F',
1222 doc=
'Average sky noise (ADU)')
1223 schema.addField(
'meanVar', type=
'F',
1224 doc=
'Mean variance of the weight plane (ADU**2)')
1225 schema.addField(
'astromOffsetMean', type=
'F',
1226 doc=
'Mean offset of astrometric calibration matches (arcsec)')
1227 schema.addField(
'astromOffsetStd', type=
'F',
1228 doc=
'Standard deviation of offsets of astrometric calibration matches (arcsec)')
1233 class VisitDataIdContainer(DataIdContainer):
1234 """DataIdContainer that groups sensor-level id's by visit
1237 def makeDataRefList(self, namespace):
1238 """Make self.refList from self.idList
1240 Generate a list of data references grouped by visit.
1244 namespace : `argparse.Namespace`
1245 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1248 visitRefs = defaultdict(list)
1249 for dataId
in self.idList:
1250 if "visit" in dataId:
1251 visitId = dataId[
"visit"]
1253 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1254 visitRefs[visitId].extend([dataRef
for dataRef
in subset])
1257 for refList
in visitRefs.values():
1258 existingRefs = [ref
for ref
in refList
if ref.datasetExists()]
1260 outputRefList.append(existingRefs)
1262 self.refList = outputRefList
1265 class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1266 defaultTemplates={
"catalogType":
""},
1267 dimensions=(
"instrument",
"visit")):
1268 inputCatalogs = connectionTypes.Input(
1269 doc=
"Input per-detector Source Tables",
1270 name=
"{catalogType}sourceTable",
1271 storageClass=
"DataFrame",
1272 dimensions=(
"instrument",
"visit",
"detector"),
1275 outputCatalog = connectionTypes.Output(
1276 doc=
"Per-visit concatenation of Source Table",
1277 name=
"{catalogType}sourceTable_visit",
1278 storageClass=
"DataFrame",
1279 dimensions=(
"instrument",
"visit")
1283 class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1284 pipelineConnections=ConsolidateSourceTableConnections):
1288 class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1289 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1291 _DefaultName =
'consolidateSourceTable'
1292 ConfigClass = ConsolidateSourceTableConfig
1294 inputDataset =
'sourceTable'
1295 outputDataset =
'sourceTable_visit'
1297 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1298 inputs = butlerQC.get(inputRefs)
1299 self.log.info(
"Concatenating %s per-detector Source Tables",
1300 len(inputs[
'inputCatalogs']))
1301 df = pd.concat(inputs[
'inputCatalogs'])
1302 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1304 def runDataRef(self, dataRefList):
1305 self.log.info(
"Concatenating %s per-detector Source Tables", len(dataRefList))
1306 df = pd.concat([dataRef.get().toDataFrame()
for dataRef
in dataRefList])
1307 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1310 def _makeArgumentParser(cls):
1311 parser = ArgumentParser(name=cls._DefaultName)
1313 parser.add_id_argument(
"--id", cls.inputDataset,
1314 help=
"data ID, e.g. --id visit=12345",
1315 ContainerClass=VisitDataIdContainer)
1319 """No metadata to write.
1323 def writeConfig(self, butler, clobber=False, doBackup=True):
1324 """No config to write.
1329 class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1330 dimensions=(
"instrument",),
1331 defaultTemplates={}):
1332 visitSummaryRefs = connectionTypes.Input(
1333 doc=
"Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1334 name=
"visitSummary",
1335 storageClass=
"ExposureCatalog",
1336 dimensions=(
"instrument",
"visit"),
1340 outputCatalog = connectionTypes.Output(
1341 doc=
"CCD and Visit metadata table",
1342 name=
"CcdVisitTable",
1343 storageClass=
"DataFrame",
1344 dimensions=(
"instrument",)
1348 class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1349 pipelineConnections=MakeCcdVisitTableConnections):
1353 class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1354 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1356 _DefaultName =
'makeCcdVisitTable'
1357 ConfigClass = MakeCcdVisitTableConfig
1359 def run(self, visitSummaryRefs):
1360 """ Make a table of ccd information from the `visitSummary` catalogs.
1363 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1364 List of DeferredDatasetHandles pointing to exposure catalogs with
1365 per-detector summary information.
1368 result : `lsst.pipe.Base.Struct`
1369 Results struct with attribute:
1371 Catalog of ccd and visit information.
1374 for visitSummaryRef
in visitSummaryRefs:
1375 visitSummary = visitSummaryRef.get()
1376 visitInfo = visitSummary[0].getVisitInfo()
1379 summaryTable = visitSummary.asAstropy()
1380 selectColumns = [
'id',
'visit',
'physical_filter',
'ra',
'decl',
'zenithDistance',
'zeroPoint',
1381 'psfSigma',
'skyBg',
'skyNoise']
1382 ccdEntry = summaryTable[selectColumns].to_pandas().set_index(
'id')
1383 ccdEntry = ccdEntry.rename(columns={
"physical_filter":
"filterName",
"visit":
"visitId"})
1385 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id)
for id
in
1387 packer = visitSummaryRef.dataId.universe.makePacker(
'visit_detector', visitSummaryRef.dataId)
1388 ccdVisitIds = [packer.pack(dataId)
for dataId
in dataIds]
1389 ccdEntry[
'ccdVisitId'] = ccdVisitIds
1391 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds()
for vR
in visitSummary])
1392 ccdEntry[
"seeing"] = visitSummary[
'psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1394 ccdEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1395 ccdEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1396 expTime = visitInfo.getExposureTime()
1397 ccdEntry[
'expTime'] = expTime
1398 ccdEntry[
"obsStart"] = ccdEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1399 ccdEntry[
'darkTime'] = visitInfo.getDarkTime()
1400 ccdEntry[
'xSize'] = summaryTable[
'bbox_max_x'] - summaryTable[
'bbox_min_x']
1401 ccdEntry[
'ySize'] = summaryTable[
'bbox_max_y'] - summaryTable[
'bbox_min_y']
1402 ccdEntry[
'llcra'] = summaryTable[
'raCorners'][:, 0]
1403 ccdEntry[
'llcdec'] = summaryTable[
'decCorners'][:, 0]
1404 ccdEntry[
'ulcra'] = summaryTable[
'raCorners'][:, 1]
1405 ccdEntry[
'ulcdec'] = summaryTable[
'decCorners'][:, 1]
1406 ccdEntry[
'urcra'] = summaryTable[
'raCorners'][:, 2]
1407 ccdEntry[
'urcdec'] = summaryTable[
'decCorners'][:, 2]
1408 ccdEntry[
'lrcra'] = summaryTable[
'raCorners'][:, 3]
1409 ccdEntry[
'lrcdec'] = summaryTable[
'decCorners'][:, 3]
1412 ccdEntries.append(ccdEntry)
1414 outputCatalog = pd.concat(ccdEntries)
1415 return pipeBase.Struct(outputCatalog=outputCatalog)
1418 class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1419 dimensions=(
"instrument",),
1420 defaultTemplates={}):
1421 visitSummaries = connectionTypes.Input(
1422 doc=
"Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1423 name=
"visitSummary",
1424 storageClass=
"ExposureCatalog",
1425 dimensions=(
"instrument",
"visit",),
1429 outputCatalog = connectionTypes.Output(
1430 doc=
"Visit metadata table",
1432 storageClass=
"DataFrame",
1433 dimensions=(
"instrument",)
1437 class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1438 pipelineConnections=MakeVisitTableConnections):
1442 class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1443 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1445 _DefaultName =
'makeVisitTable'
1446 ConfigClass = MakeVisitTableConfig
1448 def run(self, visitSummaries):
1449 """ Make a table of visit information from the `visitSummary` catalogs
1453 visitSummaries : list of `lsst.afw.table.ExposureCatalog`
1454 List of exposure catalogs with per-detector summary information.
1457 result : `lsst.pipe.Base.Struct`
1458 Results struct with attribute:
1460 Catalog of visit information.
1463 for visitSummary
in visitSummaries:
1464 visitSummary = visitSummary.get()
1465 visitRow = visitSummary[0]
1466 visitInfo = visitRow.getVisitInfo()
1469 visitEntry[
"visitId"] = visitRow[
'visit']
1470 visitEntry[
"filterName"] = visitRow[
'physical_filter']
1471 raDec = visitInfo.getBoresightRaDec()
1472 visitEntry[
"ra"] = raDec.getRa().asDegrees()
1473 visitEntry[
"decl"] = raDec.getDec().asDegrees()
1474 visitEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1475 azAlt = visitInfo.getBoresightAzAlt()
1476 visitEntry[
"azimuth"] = azAlt.getLongitude().asDegrees()
1477 visitEntry[
"altitude"] = azAlt.getLatitude().asDegrees()
1478 visitEntry[
"zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1479 visitEntry[
"airmass"] = visitInfo.getBoresightAirmass()
1480 visitEntry[
"obsStart"] = visitInfo.getDate().toPython()
1481 visitEntry[
"expTime"] = visitInfo.getExposureTime()
1482 visitEntries.append(visitEntry)
1486 outputCatalog = pd.DataFrame(data=visitEntries)
1487 return pipeBase.Struct(outputCatalog=outputCatalog)
1490 class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1491 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")):
1493 inputCatalog = connectionTypes.Input(
1494 doc=
"Primary per-detector, single-epoch forced-photometry catalog. "
1495 "By default, it is the output of ForcedPhotCcdTask on calexps",
1497 storageClass=
"SourceCatalog",
1498 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1500 inputCatalogDiff = connectionTypes.Input(
1501 doc=
"Secondary multi-epoch, per-detector, forced photometry catalog. "
1502 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1504 storageClass=
"SourceCatalog",
1505 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1507 outputCatalog = connectionTypes.Output(
1508 doc=
"InputCatalogs horizonatally joined on `objectId` in Parquet format",
1509 name=
"forcedSource",
1510 storageClass=
"DataFrame",
1511 dimensions=(
"instrument",
"visit",
"detector")
1515 class WriteForcedSourceTableConfig(WriteSourceTableConfig,
1516 pipelineConnections=WriteForcedSourceTableConnections):
1517 key = lsst.pex.config.Field(
1518 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1524 class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1525 """Merge and convert per-detector forced source catalogs to parquet
1527 _DefaultName =
"writeForcedSourceTable"
1528 ConfigClass = WriteForcedSourceTableConfig
1530 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1531 inputs = butlerQC.get(inputRefs)
1533 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
1534 inputs[
'band'] = butlerQC.quantum.dataId.full[
'band']
1535 outputs = self.run(**inputs)
1536 butlerQC.put(outputs, outputRefs)
1538 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1540 for table, dataset,
in zip((inputCatalog, inputCatalogDiff), (
'calexp',
'diff')):
1541 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=
False)
1542 df = df.reindex(sorted(df.columns), axis=1)
1543 df[
'ccdVisitId'] = ccdVisitId
if ccdVisitId
else pd.NA
1544 df[
'band'] = band
if band
else pd.NA
1545 df.columns = pd.MultiIndex.from_tuples([(dataset, c)
for c
in df.columns],
1546 names=(
'dataset',
'column'))
1550 outputCatalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
1551 return pipeBase.Struct(outputCatalog=outputCatalog)
1554 class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1555 dimensions=(
"instrument",
"skymap",
"patch",
"tract")):
1557 inputCatalogs = connectionTypes.Input(
1558 doc=
"Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1559 name=
"forcedSource",
1560 storageClass=
"DataFrame",
1561 dimensions=(
"instrument",
"visit",
"detector"),
1565 referenceCatalog = connectionTypes.Input(
1566 doc=
"Reference catalog which was used to seed the forcedPhot. Columns "
1567 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1570 storageClass=
"DataFrame",
1571 dimensions=(
"tract",
"patch",
"skymap"),
1574 outputCatalog = connectionTypes.Output(
1575 doc=
"Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1576 "specified set of functors",
1577 name=
"ForcedSourceTable",
1578 storageClass=
"DataFrame",
1579 dimensions=(
"tract",
"patch",
"skymap")
1583 class TransformForcedSourceTableConfig(TransformCatalogBaseConfig,
1584 pipelineConnections=TransformForcedSourceTableConnections):
1585 referenceColumns = pexConfig.ListField(
1587 default=[
"detect_isPrimary",
"detect_isTractInner",
"detect_isPatchInner"],
1589 doc=
"Columns to pull from reference catalog",
1593 class TransformForcedSourceTableTask(TransformCatalogBaseTask):
1594 """Transform/standardize a ForcedSource catalog
1596 Transforms each wide, per-detector forcedSource parquet table per the
1597 specification file (per-camera defaults found in ForcedSource.yaml).
1598 All epochs that overlap the patch are aggregated into one per-patch
1599 narrow-parquet file.
1601 No de-duplication of rows is performed. Duplicate resolutions flags are
1602 pulled in from the referenceCatalog: `detect_isPrimary`,
1603 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1604 for analysis or compare duplicates for QA.
1606 The resulting table includes multiple bands. Epochs (MJDs) and other useful
1607 per-visit rows can be retreived by joining with the CcdVisitTable on
1610 _DefaultName =
"transformForcedSourceTable"
1611 ConfigClass = TransformForcedSourceTableConfig
1613 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1614 inputs = butlerQC.get(inputRefs)
1615 if self.funcs
is None:
1616 raise ValueError(
"config.functorFile is None. "
1617 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1618 outputs = self.run(inputs[
'inputCatalogs'], inputs[
'referenceCatalog'], funcs=self.funcs,
1619 dataId=outputRefs.outputCatalog.dataId.full)
1621 butlerQC.put(outputs, outputRefs)
1623 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1625 ref = referenceCatalog.get(parameters={
"columns": self.config.referenceColumns})
1626 self.log.info(
"Aggregating %s input catalogs" % (len(inputCatalogs)))
1627 for handle
in inputCatalogs:
1628 result = self.transform(
None, handle, funcs, dataId)
1630 dfs.append(ref.join(result.df, how=
'inner'))
1632 outputCatalog = pd.concat(dfs)
1633 self.log.info(
"Made a table of %d columns and %d rows",
1634 len(outputCatalog.columns), len(outputCatalog))
1635 return pipeBase.Struct(outputCatalog=outputCatalog)
1638 class ConsolidateForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1639 defaultTemplates={
"catalogType":
""},
1640 dimensions=(
"instrument",
"tract")):
1641 inputCatalogs = connectionTypes.Input(
1642 doc=
"Input per-patch ForcedSource Tables",
1643 name=
"{catalogType}ForcedSourceTable",
1644 storageClass=
"DataFrame",
1645 dimensions=(
"tract",
"patch",
"skymap"),
1649 outputCatalog = connectionTypes.Output(
1650 doc=
"Output per-tract concatenation of ForcedSource Tables",
1651 name=
"{catalogType}ForcedSourceTable_tract",
1652 storageClass=
"DataFrame",
1653 dimensions=(
"tract",
"skymap"),
1657 class ConsolidateForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1658 pipelineConnections=ConsolidateForcedSourceTableConnections):
1662 class ConsolidateForcedSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1663 """Concatenate a per-patch `ForcedSourceTable` list into a single
1664 per-tract `forcedSourceTable_tract`
1666 _DefaultName =
'consolidateForcedSourceTable'
1667 ConfigClass = ConsolidateForcedSourceTableConfig
1669 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1670 inputs = butlerQC.get(inputRefs)
1671 self.log.info(
"Concatenating %s per-patch ForcedSource Tables",
1672 len(inputs[
'inputCatalogs']))
1673 df = pd.concat(inputs[
'inputCatalogs'])
1674 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def runDataRef(self, dataRef)
def getAnalysis(self, parq, funcs=None, band=None)
def write(self, df, parqRef)
def __init__(self, *args, **kwargs)
def transform(self, band, parq, funcs, dataId)
def run(self, parq, funcs=None, dataId=None, band=None)
def writeMetadata(self, dataRef)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def readCatalog(task, patchRef)
Read input catalog.
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)