24from collections
import defaultdict
36from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
38from lsst.daf.butler
import DeferredDatasetHandle, DataCoordinate
40from .parquetTable
import ParquetTable
41from .multiBandUtils
import makeMergeArgumentParser, MergeSourcesRunner
42from .functors
import CompositeFunctor, Column
45def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
46 """Flattens a dataframe with multilevel column index
48 newDf = pd.DataFrame()
50 dfBands = df.columns.unique(level=0).values
53 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
54 newColumns = {c: columnFormat.format(band, c)
55 for c
in subdf.columns
if c
not in noDupCols}
56 cols = list(newColumns.keys())
57 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
60 presentBands = dfBands
if inputBands
is None else list(set(inputBands).intersection(dfBands))
62 noDupDf = df[presentBands[0]][noDupCols]
63 newDf = pd.concat([noDupDf, newDf], axis=1)
68 defaultTemplates={
"coaddName":
"deep"},
69 dimensions=(
"tract",
"patch",
"skymap")):
70 inputCatalogMeas = connectionTypes.Input(
71 doc=
"Catalog of source measurements on the deepCoadd.",
72 dimensions=(
"tract",
"patch",
"band",
"skymap"),
73 storageClass=
"SourceCatalog",
74 name=
"{coaddName}Coadd_meas",
77 inputCatalogForcedSrc = connectionTypes.Input(
78 doc=
"Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
79 dimensions=(
"tract",
"patch",
"band",
"skymap"),
80 storageClass=
"SourceCatalog",
81 name=
"{coaddName}Coadd_forced_src",
84 inputCatalogRef = connectionTypes.Input(
85 doc=
"Catalog marking the primary detection (which band provides a good shape and position)"
86 "for each detection in deepCoadd_mergeDet.",
87 dimensions=(
"tract",
"patch",
"skymap"),
88 storageClass=
"SourceCatalog",
89 name=
"{coaddName}Coadd_ref"
91 outputCatalog = connectionTypes.Output(
92 doc=
"A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
93 "stored as a DataFrame with a multi-level column index per-patch.",
94 dimensions=(
"tract",
"patch",
"skymap"),
95 storageClass=
"DataFrame",
96 name=
"{coaddName}Coadd_obj"
100class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
101 pipelineConnections=WriteObjectTableConnections):
102 engine = pexConfig.Field(
105 doc=
"Parquet engine for writing (pyarrow or fastparquet)"
107 coaddName = pexConfig.Field(
114class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
115 """Write filter-merged source tables to parquet
117 _DefaultName = "writeObjectTable"
118 ConfigClass = WriteObjectTableConfig
119 RunnerClass = MergeSourcesRunner
122 inputDatasets = (
'forced_src',
'meas',
'ref')
125 outputDataset =
'obj'
127 def __init__(self, butler=None, schema=None, **kwargs):
131 super().__init__(**kwargs)
133 def runDataRef(self, patchRefList):
135 @brief Merge coadd sources
from multiple bands. Calls
@ref `run` which must be defined
in
136 subclasses that inherit
from MergeSourcesTask.
137 @param[
in] patchRefList list of data references
for each filter
139 catalogs = dict(self.readCatalog(patchRef) for patchRef
in patchRefList)
140 dataId = patchRefList[0].dataId
141 mergedCatalog = self.run(catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
142 self.write(patchRefList[0],
ParquetTable(dataFrame=mergedCatalog))
144 def runQuantum(self, butlerQC, inputRefs, outputRefs):
145 inputs = butlerQC.get(inputRefs)
147 measDict = {ref.dataId[
'band']: {
'meas': cat}
for ref, cat
in
148 zip(inputRefs.inputCatalogMeas, inputs[
'inputCatalogMeas'])}
149 forcedSourceDict = {ref.dataId[
'band']: {
'forced_src': cat}
for ref, cat
in
150 zip(inputRefs.inputCatalogForcedSrc, inputs[
'inputCatalogForcedSrc'])}
153 for band
in measDict.keys():
154 catalogs[band] = {
'meas': measDict[band][
'meas'],
155 'forced_src': forcedSourceDict[band][
'forced_src'],
156 'ref': inputs[
'inputCatalogRef']}
157 dataId = butlerQC.quantum.dataId
158 df = self.run(catalogs=catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
159 outputs = pipeBase.Struct(outputCatalog=df)
160 butlerQC.put(outputs, outputRefs)
163 def _makeArgumentParser(cls):
164 """Create a suitable ArgumentParser.
166 We will use the ArgumentParser to get a list of data
167 references for patches; the RunnerClass will sort them into lists
168 of data references
for the same patch.
170 References first of self.inputDatasets, rather than
176 """Read input catalogs
178 Read all the input datasets given by the 'inputDatasets'
183 patchRef : `lsst.daf.persistence.ButlerDataRef`
184 Data reference
for patch
188 Tuple consisting of band name
and a dict of catalogs, keyed by
191 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=
True).bandLabel
193 for dataset
in self.inputDatasets:
194 catalog = patchRef.get(self.config.coaddName +
"Coadd_" + dataset, immediate=
True)
195 self.log.info(
"Read %d sources from %s for band %s: %s",
196 len(catalog), dataset, band, patchRef.dataId)
197 catalogDict[dataset] = catalog
198 return band, catalogDict
200 def run(self, catalogs, tract, patch):
201 """Merge multiple catalogs.
206 Mapping from filter names to dict of catalogs.
208 tractId to use
for the tractId column
210 patchId to use
for the patchId column
214 catalog : `pandas.DataFrame`
219 for filt, tableDict
in catalogs.items():
220 for dataset, table
in tableDict.items():
222 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
225 df = df.reindex(sorted(df.columns), axis=1)
226 df[
'tractId'] = tract
227 df[
'patchId'] = patch
230 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
231 names=(
'dataset',
'band',
'column'))
234 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
237 def write(self, patchRef, catalog):
242 catalog : `ParquetTable`
244 patchRef : `lsst.daf.persistence.ButlerDataRef`
245 Data reference for patch
247 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
250 mergeDataId = patchRef.dataId.copy()
251 del mergeDataId[
"filter"]
252 self.log.info(
"Wrote merged catalog: %s", mergeDataId)
255 """No metadata to write, and not sure how to write it for a list of dataRefs.
260class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
261 defaultTemplates={
"catalogType":
""},
262 dimensions=(
"instrument",
"visit",
"detector")):
264 catalog = connectionTypes.Input(
265 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
266 name=
"{catalogType}src",
267 storageClass=
"SourceCatalog",
268 dimensions=(
"instrument",
"visit",
"detector")
270 outputCatalog = connectionTypes.Output(
271 doc=
"Catalog of sources, `src` in Parquet format. The 'id' column is "
272 "replaced with an index; all other columns are unchanged.",
273 name=
"{catalogType}source",
274 storageClass=
"DataFrame",
275 dimensions=(
"instrument",
"visit",
"detector")
279class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
280 pipelineConnections=WriteSourceTableConnections):
281 doApplyExternalPhotoCalib = pexConfig.Field(
284 doc=(
"Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
285 "generating Source Tables from older src tables which do not already have local calib columns")
287 doApplyExternalSkyWcs = pexConfig.Field(
290 doc=(
"Add local WCS columns from the calexp.wcs? Should only set True if "
291 "generating Source Tables from older src tables which do not already have local calib columns")
295class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
296 """Write source table to parquet
298 _DefaultName = "writeSourceTable"
299 ConfigClass = WriteSourceTableConfig
301 def runDataRef(self, dataRef):
302 src = dataRef.get(
'src')
303 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
304 src = self.addCalibColumns(src, dataRef)
306 ccdVisitId = dataRef.get(
'ccdExposureId')
307 result = self.run(src, ccdVisitId=ccdVisitId)
308 dataRef.put(result.table,
'source')
310 def runQuantum(self, butlerQC, inputRefs, outputRefs):
311 inputs = butlerQC.get(inputRefs)
312 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
313 result = self.run(**inputs).table
314 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
315 butlerQC.put(outputs, outputRefs)
317 def run(self, catalog, ccdVisitId=None):
318 """Convert `src` catalog to parquet
322 catalog: `afwTable.SourceCatalog`
323 catalog to be converted
325 ccdVisitId to be added as a column
329 result : `lsst.pipe.base.Struct`
331 `ParquetTable` version of the input catalog
333 self.log.info("Generating parquet table from src catalog %s", ccdVisitId)
334 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
335 df[
'ccdVisitId'] = ccdVisitId
336 return pipeBase.Struct(table=
ParquetTable(dataFrame=df))
338 def addCalibColumns(self, catalog, dataRef):
339 """Add columns with local calibration evaluated at each centroid
341 for backwards compatibility
with old repos.
342 This exists
for the purpose of converting old src catalogs
343 (which don
't have the expected local calib columns) to Source Tables.
347 catalog: `afwTable.SourceCatalog`
348 catalog to which calib columns will be added
349 dataRef: `lsst.daf.persistence.ButlerDataRef
350 for fetching the calibs
from disk.
354 newCat: `afwTable.SourceCatalog`
355 Source Catalog
with requested local calib columns
357 mapper = afwTable.SchemaMapper(catalog.schema)
358 measureConfig = SingleFrameMeasurementTask.ConfigClass()
359 measureConfig.doReplaceWithNoise = False
362 exposure = dataRef.get(
'calexp_sub',
365 mapper = afwTable.SchemaMapper(catalog.schema)
366 mapper.addMinimalSchema(catalog.schema,
True)
367 schema = mapper.getOutputSchema()
369 exposureIdInfo = dataRef.get(
"expIdInfo")
370 measureConfig.plugins.names = []
371 if self.config.doApplyExternalSkyWcs:
372 plugin =
'base_LocalWcs'
374 raise RuntimeError(f
"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
376 measureConfig.plugins.names.add(plugin)
378 if self.config.doApplyExternalPhotoCalib:
379 plugin =
'base_LocalPhotoCalib'
381 raise RuntimeError(f
"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
383 measureConfig.plugins.names.add(plugin)
385 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
386 newCat = afwTable.SourceCatalog(schema)
387 newCat.extend(catalog, mapper=mapper)
388 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
392 """No metadata to write.
397 def _makeArgumentParser(cls):
398 parser = ArgumentParser(name=cls._DefaultName)
399 parser.add_id_argument(
"--id",
'src',
400 help=
"data ID, e.g. --id visit=12345 ccd=0")
404class PostprocessAnalysis(object):
405 """Calculate columns from ParquetTable
407 This object manages and organizes an arbitrary set of computations
408 on a catalog. The catalog
is defined by a
410 `deepCoadd_obj` dataset,
and the computations are defined by a collection
411 of `lsst.pipe.tasks.functor.Functor` objects (
or, equivalently,
412 a `CompositeFunctor`).
414 After the object
is initialized, accessing the `.df` attribute (which
415 holds the `pandas.DataFrame` containing the results of the calculations) triggers
416 computation of said dataframe.
418 One of the conveniences of using this object
is the ability to define a desired common
419 filter
for all functors. This enables the same functor collection to be passed to
420 several different `PostprocessAnalysis` objects without having to change the original
421 functor collection, since the `filt` keyword argument of this object triggers an
422 overwrite of the `filt` property
for all functors
in the collection.
424 This object also allows a list of refFlags to be passed,
and defines a set of default
425 refFlags that are always included even
if not requested.
427 If a list of `ParquetTable` object
is passed, rather than a single one, then the
428 calculations will be mapped over all the input catalogs. In principle, it should
429 be straightforward to parallelize this activity, but initial tests have failed
430 (see TODO
in code comments).
434 parq : `lsst.pipe.tasks.ParquetTable` (
or list of such)
435 Source catalog(s)
for computation
438 Computations to do (functors that act on `parq`).
439 If a dict, the output
440 DataFrame will have columns keyed accordingly.
441 If a list, the column keys will come
from the
442 `.shortname` attribute of each functor.
444 filt : `str` (optional)
445 Filter
in which to calculate. If provided,
446 this will overwrite any existing `.filt` attribute
447 of the provided functors.
449 flags : `list` (optional)
450 List of flags (per-band) to include
in output table.
451 Taken
from the `meas` dataset
if applied to a multilevel Object Table.
453 refFlags : `list` (optional)
454 List of refFlags (only reference band) to include
in output table.
456 forcedFlags : `list` (optional)
457 List of flags (per-band) to include
in output table.
458 Taken
from the ``forced_src`` dataset
if applied to a
459 multilevel Object Table. Intended
for flags
from measurement plugins
460 only run during multi-band forced-photometry.
462 _defaultRefFlags = []
465 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
467 self.functors = functors
470 self.flags = list(flags)
if flags
is not None else []
471 self.forcedFlags = list(forcedFlags)
if forcedFlags
is not None else []
472 self.refFlags = list(self._defaultRefFlags)
473 if refFlags
is not None:
474 self.refFlags += list(refFlags)
479 def defaultFuncs(self):
480 funcs = dict(self._defaultFuncs)
485 additionalFuncs = self.defaultFuncs
486 additionalFuncs.update({flag:
Column(flag, dataset=
'forced_src')
for flag
in self.forcedFlags})
487 additionalFuncs.update({flag:
Column(flag, dataset=
'ref')
for flag
in self.refFlags})
488 additionalFuncs.update({flag:
Column(flag, dataset=
'meas')
for flag
in self.flags})
490 if isinstance(self.functors, CompositeFunctor):
495 func.funcDict.update(additionalFuncs)
496 func.filt = self.filt
502 return [name
for name, func
in self.func.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
510 def compute(self, dropna=False, pool=None):
512 if type(self.parq)
in (list, tuple):
514 dflist = [self.func(parq, dropna=dropna)
for parq
in self.parq]
517 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
518 self._df = pd.concat(dflist)
520 self._df = self.func(self.parq, dropna=dropna)
527 """Expected Connections for subclasses of TransformCatalogBaseTask.
531 inputCatalog = connectionTypes.Input(
533 storageClass=
"DataFrame",
535 outputCatalog = connectionTypes.Output(
537 storageClass=
"DataFrame",
542 pipelineConnections=TransformCatalogBaseConnections):
543 functorFile = pexConfig.Field(
545 doc=
"Path to YAML file specifying Science Data Model functors to use "
546 "when copying columns and computing calibrated values.",
550 primaryKey = pexConfig.Field(
552 doc=
"Name of column to be set as the DataFrame index. If None, the index"
553 "will be named `id`",
560 """Base class for transforming/standardizing a catalog
562 by applying functors that convert units and apply calibrations.
563 The purpose of this task
is to perform a set of computations on
564 an input `ParquetTable` dataset (such
as `deepCoadd_obj`)
and write the
565 results to a new dataset (which needs to be declared
in an `outputDataset`
568 The calculations to be performed are defined
in a YAML file that specifies
569 a set of functors to be computed, provided
as
570 a `--functorFile` config parameter. An example of such a YAML file
595 - base_InputCount_value
598 functor: DeconvolvedMoments
603 - merge_measurement_i
604 - merge_measurement_r
605 - merge_measurement_z
606 - merge_measurement_y
607 - merge_measurement_g
608 - base_PixelFlags_flag_inexact_psfCenter
611 The names
for each entry under
"func" will become the names of columns
in the
613 Positional arguments to be passed to each functor are
in the `args` list,
614 and any additional entries
for each column other than
"functor" or "args" (e.g., `
'filt'`,
615 `
'dataset'`) are treated
as keyword arguments to be passed to the functor initialization.
617 The
"flags" entry
is the default shortcut
for `Column` functors.
618 All columns listed under
"flags" will be copied to the output table
619 untransformed. They can be of any datatype.
620 In the special case of transforming a multi-level oject table
with
621 band
and dataset indices (deepCoadd_obj), these will be taked
from the
622 `meas` dataset
and exploded out per band.
624 There are two special shortcuts that only apply when transforming
625 multi-level Object (deepCoadd_obj) tables:
626 - The
"refFlags" entry
is shortcut
for `Column` functor
627 taken
from the `
'ref'` dataset
if transforming an ObjectTable.
628 - The
"forcedFlags" entry
is shortcut
for `Column` functors.
629 taken
from the ``forced_src`` dataset
if transforming an ObjectTable.
630 These are expanded out per band.
633 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
634 to organize
and excecute the calculations.
638 def _DefaultName(self):
639 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
643 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
647 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
651 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
655 if self.config.functorFile:
656 self.log.info(
'Loading tranform functor definitions from %s',
657 self.config.functorFile)
658 self.
funcsfuncs = CompositeFunctor.from_file(self.config.functorFile)
659 self.
funcsfuncs.update(dict(PostprocessAnalysis._defaultFuncs))
661 self.
funcsfuncs =
None
664 inputs = butlerQC.get(inputRefs)
665 if self.
funcsfuncs
is None:
666 raise ValueError(
"config.functorFile is None. "
667 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
668 result = self.
runrun(parq=inputs[
'inputCatalog'], funcs=self.
funcsfuncs,
669 dataId=outputRefs.outputCatalog.dataId.full)
670 outputs = pipeBase.Struct(outputCatalog=result)
671 butlerQC.put(outputs, outputRefs)
675 if self.
funcsfuncs
is None:
676 raise ValueError(
"config.functorFile is None. "
677 "Must be a valid path to yaml in order to run as a CommandlineTask.")
678 df = self.
runrun(parq, funcs=self.
funcsfuncs, dataId=dataRef.dataId)
679 self.
writewrite(df, dataRef)
682 def run(self, parq, funcs=None, dataId=None, band=None):
683 """Do postprocessing calculations
685 Takes a `ParquetTable` object and dataId,
686 returns a dataframe
with results of postprocessing calculations.
691 ParquetTable
from which calculations are done.
692 funcs : `lsst.pipe.tasks.functors.Functors`
693 Functors to apply to the table
's columns
694 dataId : dict, optional
695 Used to add a `patchId` column to the output dataframe.
696 band : `str`, optional
697 Filter band that is being processed.
704 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
706 df = self.
transformtransform(band, parq, funcs, dataId).df
707 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
711 return self.
funcsfuncs
715 funcs = self.
funcsfuncs
716 analysis = PostprocessAnalysis(parq, funcs, filt=band)
720 analysis = self.
getAnalysisgetAnalysis(parq, funcs=funcs, band=band)
722 if dataId
is not None:
723 for key, value
in dataId.items():
726 if self.config.primaryKey:
727 if df.index.name != self.config.primaryKey
and self.config.primaryKey
in df:
728 df.reset_index(inplace=
True, drop=
True)
729 df.set_index(self.config.primaryKey, inplace=
True)
731 return pipeBase.Struct(
740 """No metadata to write.
746 defaultTemplates={
"coaddName":
"deep"},
747 dimensions=(
"tract",
"patch",
"skymap")):
748 inputCatalog = connectionTypes.Input(
749 doc=
"The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
750 "stored as a DataFrame with a multi-level column index per-patch.",
751 dimensions=(
"tract",
"patch",
"skymap"),
752 storageClass=
"DataFrame",
753 name=
"{coaddName}Coadd_obj",
756 outputCatalog = connectionTypes.Output(
757 doc=
"Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
759 dimensions=(
"tract",
"patch",
"skymap"),
760 storageClass=
"DataFrame",
766 pipelineConnections=TransformObjectCatalogConnections):
767 coaddName = pexConfig.Field(
773 filterMap = pexConfig.DictField(
777 doc=(
"Dictionary mapping full filter name to short one for column name munging."
778 "These filters determine the output columns no matter what filters the "
779 "input data actually contain."),
780 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
781 "Will be removed after v22.")
783 outputBands = pexConfig.ListField(
787 doc=(
"These bands and only these bands will appear in the output,"
788 " NaN-filled if the input does not include them."
789 " If None, then use all bands found in the input.")
791 camelCase = pexConfig.Field(
794 doc=(
"Write per-band columns names with camelCase, else underscore "
795 "For example: gPsFlux instead of g_PsFlux.")
797 multilevelOutput = pexConfig.Field(
800 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
801 "and name-munged (False).")
803 goodFlags = pexConfig.ListField(
806 doc=(
"List of 'good' flags that should be set False when populating empty tables. "
807 "All other flags are considered to be 'bad' flags and will be set to True.")
809 floatFillValue = pexConfig.Field(
812 doc=
"Fill value for float fields when populating empty tables."
814 integerFillValue = pexConfig.Field(
817 doc=
"Fill value for integer fields when populating empty tables."
820 def setDefaults(self):
821 super().setDefaults()
822 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Object.yaml')
823 self.primaryKey =
'objectId'
824 self.goodFlags = [
'calib_astrometry_used',
825 'calib_photometry_reserved',
826 'calib_photometry_used',
827 'calib_psf_candidate',
828 'calib_psf_reserved',
833 """Produce a flattened Object Table to match the format specified in
836 Do the same set of postprocessing calculations on all bands
838 This is identical to `TransformCatalogBaseTask`,
except for that it does the
839 specified functor calculations
for all filters present
in the
840 input `deepCoadd_obj` table. Any specific `
"filt"` keywords specified
841 by the YAML file will be superceded.
843 _DefaultName = "transformObjectCatalog"
844 ConfigClass = TransformObjectCatalogConfig
847 inputDataset =
'deepCoadd_obj'
848 outputDataset =
'objectTable'
851 def _makeArgumentParser(cls):
852 parser = ArgumentParser(name=cls._DefaultName)
853 parser.add_id_argument(
"--id", cls.inputDataset,
854 ContainerClass=CoaddDataIdContainer,
855 help=
"data ID, e.g. --id tract=12345 patch=1,2")
858 def run(self, parq, funcs=None, dataId=None, band=None):
862 templateDf = pd.DataFrame()
864 if isinstance(parq, DeferredDatasetHandle):
865 columns = parq.get(component=
'columns')
866 inputBands = columns.unique(level=1).values
868 inputBands = parq.columnLevelNames[
'band']
870 outputBands = self.config.outputBands
if self.config.outputBands
else inputBands
873 for inputBand
in inputBands:
874 if inputBand
not in outputBands:
875 self.log.info(
"Ignoring %s band data in the input", inputBand)
877 self.log.info(
"Transforming the catalog of band %s", inputBand)
878 result = self.transform(inputBand, parq, funcs, dataId)
879 dfDict[inputBand] = result.df
880 analysisDict[inputBand] = result.analysis
882 templateDf = result.df
885 for filt
in outputBands:
886 if filt
not in dfDict:
887 self.log.info(
"Adding empty columns for band %s", filt)
888 dfTemp = templateDf.copy()
889 for col
in dfTemp.columns:
890 testValue = dfTemp[col].values[0]
891 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
893 if col
in self.config.goodFlags:
897 elif isinstance(testValue, numbers.Integral):
901 if isinstance(testValue, np.unsignedinteger):
902 raise ValueError(
"Parquet tables may not have unsigned integer columns.")
904 fillValue = self.config.integerFillValue
906 fillValue = self.config.floatFillValue
907 dfTemp[col].values[:] = fillValue
908 dfDict[filt] = dfTemp
911 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
913 if not self.config.multilevelOutput:
914 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
915 if self.config.primaryKey
in noDupCols:
916 noDupCols.remove(self.config.primaryKey)
917 if dataId
is not None:
918 noDupCols += list(dataId.keys())
919 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
920 inputBands=inputBands)
922 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
929 def makeDataRefList(self, namespace):
930 """Make self.refList from self.idList
932 Generate a list of data references given tract and/
or patch.
933 This was adapted
from `TractQADataIdContainer`, which was
934 `TractDataIdContainer` modifie to
not require
"filter".
935 Only existing dataRefs are returned.
937 def getPatchRefList(tract):
938 return [namespace.butler.dataRef(datasetType=self.datasetType,
940 patch=
"%d,%d" % patch.getIndex())
for patch
in tract]
942 tractRefs = defaultdict(list)
943 for dataId
in self.idList:
944 skymap = self.getSkymap(namespace)
946 if "tract" in dataId:
947 tractId = dataId[
"tract"]
948 if "patch" in dataId:
949 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
951 patch=dataId[
'patch']))
953 tractRefs[tractId] += getPatchRefList(skymap[tractId])
955 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
958 for tractRefList
in tractRefs.values():
959 existingRefs = [ref
for ref
in tractRefList
if ref.datasetExists()]
960 outputRefList.append(existingRefs)
962 self.refList = outputRefList
965class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
966 dimensions=(
"tract",
"skymap")):
967 inputCatalogs = connectionTypes.Input(
968 doc=
"Per-Patch objectTables conforming to the standard data model.",
970 storageClass=
"DataFrame",
971 dimensions=(
"tract",
"patch",
"skymap"),
974 outputCatalog = connectionTypes.Output(
975 doc=
"Pre-tract horizontal concatenation of the input objectTables",
976 name=
"objectTable_tract",
977 storageClass=
"DataFrame",
978 dimensions=(
"tract",
"skymap"),
982class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
983 pipelineConnections=ConsolidateObjectTableConnections):
984 coaddName = pexConfig.Field(
991class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
992 """Write patch-merged source tables to a tract-level parquet file
994 Concatenates `objectTable` list into a per-visit `objectTable_tract`
996 _DefaultName = "consolidateObjectTable"
997 ConfigClass = ConsolidateObjectTableConfig
999 inputDataset =
'objectTable'
1000 outputDataset =
'objectTable_tract'
1002 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1003 inputs = butlerQC.get(inputRefs)
1004 self.log.info(
"Concatenating %s per-patch Object Tables",
1005 len(inputs[
'inputCatalogs']))
1006 df = pd.concat(inputs[
'inputCatalogs'])
1007 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1010 def _makeArgumentParser(cls):
1011 parser = ArgumentParser(name=cls._DefaultName)
1013 parser.add_id_argument(
"--id", cls.inputDataset,
1014 help=
"data ID, e.g. --id tract=12345",
1015 ContainerClass=TractObjectDataIdContainer)
1018 def runDataRef(self, patchRefList):
1019 df = pd.concat([patchRef.get().toDataFrame()
for patchRef
in patchRefList])
1020 patchRefList[0].put(
ParquetTable(dataFrame=df), self.outputDataset)
1023 """No metadata to write.
1028class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1029 defaultTemplates={
"catalogType":
""},
1030 dimensions=(
"instrument",
"visit",
"detector")):
1032 inputCatalog = connectionTypes.Input(
1033 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
1034 name=
"{catalogType}source",
1035 storageClass=
"DataFrame",
1036 dimensions=(
"instrument",
"visit",
"detector"),
1039 outputCatalog = connectionTypes.Output(
1040 doc=
"Narrower, per-detector Source Table transformed and converted per a "
1041 "specified set of functors",
1042 name=
"{catalogType}sourceTable",
1043 storageClass=
"DataFrame",
1044 dimensions=(
"instrument",
"visit",
"detector")
1049 pipelineConnections=TransformSourceTableConnections):
1051 def setDefaults(self):
1052 super().setDefaults()
1053 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Source.yaml')
1054 self.primaryKey =
'sourceId'
1058 """Transform/standardize a source catalog
1060 _DefaultName = "transformSourceTable"
1061 ConfigClass = TransformSourceTableConfig
1063 inputDataset =
'source'
1064 outputDataset =
'sourceTable'
1067 def _makeArgumentParser(cls):
1068 parser = ArgumentParser(name=cls._DefaultName)
1069 parser.add_id_argument(
"--id", datasetType=cls.inputDataset,
1071 help=
"data ID, e.g. --id visit=12345 ccd=0")
1074 def runDataRef(self, dataRef):
1075 """Override to specify band label to run()."""
1076 parq = dataRef.get()
1077 funcs = self.getFunctors()
1078 band = dataRef.get(
"calexp_filterLabel", immediate=
True).bandLabel
1079 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
1080 self.write(df, dataRef)
1084class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1085 dimensions=(
"instrument",
"visit",),
1086 defaultTemplates={
"calexpType":
""}):
1087 calexp = connectionTypes.Input(
1088 doc=
"Processed exposures used for metadata",
1089 name=
"{calexpType}calexp",
1090 storageClass=
"ExposureF",
1091 dimensions=(
"instrument",
"visit",
"detector"),
1095 visitSummary = connectionTypes.Output(
1096 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
1097 "detector id for the id and are sorted for fast lookups of a "
1099 name=
"{calexpType}visitSummary",
1100 storageClass=
"ExposureCatalog",
1101 dimensions=(
"instrument",
"visit"),
1105class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1106 pipelineConnections=ConsolidateVisitSummaryConnections):
1107 """Config for ConsolidateVisitSummaryTask"""
1111class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1112 """Task to consolidate per-detector visit metadata.
1114 This task aggregates the following metadata from all the detectors
in a
1115 single visit into an exposure catalog:
1119 - The physical_filter
and band (
if available).
1120 - The psf size, shape,
and effective area at the center of the detector.
1121 - The corners of the bounding box
in right ascension/declination.
1123 Other quantities such
as Detector, Psf, ApCorrMap,
and TransmissionCurve
1124 are
not persisted here because of storage concerns,
and because of their
1125 limited utility
as summary statistics.
1127 Tests
for this task are performed
in ci_hsc_gen3.
1129 _DefaultName = "consolidateVisitSummary"
1130 ConfigClass = ConsolidateVisitSummaryConfig
1133 def _makeArgumentParser(cls):
1134 parser = ArgumentParser(name=cls._DefaultName)
1136 parser.add_id_argument(
"--id",
"calexp",
1137 help=
"data ID, e.g. --id visit=12345",
1138 ContainerClass=VisitDataIdContainer)
1142 """No metadata to persist, so override to remove metadata persistance.
1146 def writeConfig(self, butler, clobber=False, doBackup=True):
1147 """No config to persist, so override to remove config persistance.
1151 def runDataRef(self, dataRefList):
1152 visit = dataRefList[0].dataId[
'visit']
1154 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1155 len(dataRefList), visit)
1157 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=
False)
1159 dataRefList[0].put(expCatalog,
'visitSummary', visit=visit)
1161 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1162 dataRefs = butlerQC.get(inputRefs.calexp)
1163 visit = dataRefs[0].dataId.byName()[
'visit']
1165 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1166 len(dataRefs), visit)
1168 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1170 butlerQC.put(expCatalog, outputRefs.visitSummary)
1172 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1173 """Make a combined exposure catalog from a list of dataRefs.
1174 These dataRefs must point to exposures with wcs, summaryStats,
1175 and other visit metadata.
1180 Visit identification number.
1182 List of dataRefs
in visit. May be list of
1183 `lsst.daf.persistence.ButlerDataRef` (Gen2)
or
1184 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1185 isGen3 : `bool`, optional
1186 Specifies
if this
is a Gen3 list of datarefs.
1191 Exposure catalog
with per-detector summary information.
1193 schema = self._makeVisitSummarySchema()
1194 cat = afwTable.ExposureCatalog(schema)
1195 cat.resize(len(dataRefs))
1197 cat['visit'] = visit
1199 for i, dataRef
in enumerate(dataRefs):
1201 visitInfo = dataRef.get(component=
'visitInfo')
1202 filterLabel = dataRef.get(component=
'filterLabel')
1203 summaryStats = dataRef.get(component=
'summaryStats')
1204 detector = dataRef.get(component=
'detector')
1205 wcs = dataRef.get(component=
'wcs')
1206 photoCalib = dataRef.get(component=
'photoCalib')
1207 detector = dataRef.get(component=
'detector')
1208 bbox = dataRef.get(component=
'bbox')
1209 validPolygon = dataRef.get(component=
'validPolygon')
1214 exp = dataRef.get(datasetType=
'calexp_sub', bbox=gen2_read_bbox)
1215 visitInfo = exp.getInfo().getVisitInfo()
1216 filterLabel = dataRef.get(
"calexp_filterLabel")
1217 summaryStats = exp.getInfo().getSummaryStats()
1219 photoCalib = exp.getPhotoCalib()
1220 detector = exp.getDetector()
1221 bbox = dataRef.get(datasetType=
'calexp_bbox')
1222 validPolygon = exp.getInfo().getValidPolygon()
1226 rec.setVisitInfo(visitInfo)
1228 rec.setPhotoCalib(photoCalib)
1229 rec.setValidPolygon(validPolygon)
1231 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1232 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1233 rec.setId(detector.getId())
1234 rec[
'psfSigma'] = summaryStats.psfSigma
1235 rec[
'psfIxx'] = summaryStats.psfIxx
1236 rec[
'psfIyy'] = summaryStats.psfIyy
1237 rec[
'psfIxy'] = summaryStats.psfIxy
1238 rec[
'psfArea'] = summaryStats.psfArea
1239 rec[
'raCorners'][:] = summaryStats.raCorners
1240 rec[
'decCorners'][:] = summaryStats.decCorners
1241 rec[
'ra'] = summaryStats.ra
1242 rec[
'decl'] = summaryStats.decl
1243 rec[
'zenithDistance'] = summaryStats.zenithDistance
1244 rec[
'zeroPoint'] = summaryStats.zeroPoint
1245 rec[
'skyBg'] = summaryStats.skyBg
1246 rec[
'skyNoise'] = summaryStats.skyNoise
1247 rec[
'meanVar'] = summaryStats.meanVar
1248 rec[
'astromOffsetMean'] = summaryStats.astromOffsetMean
1249 rec[
'astromOffsetStd'] = summaryStats.astromOffsetStd
1250 rec[
'nPsfStar'] = summaryStats.nPsfStar
1251 rec[
'psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median
1252 rec[
'psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median
1253 rec[
'psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter
1254 rec[
'psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter
1255 rec[
'psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian
1256 rec[
'psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter
1257 rec[
'psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter
1259 metadata = dafBase.PropertyList()
1260 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1262 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1263 cat.setMetadata(metadata)
1268 def _makeVisitSummarySchema(self):
1269 """Make the schema for the visitSummary catalog."""
1270 schema = afwTable.ExposureTable.makeMinimalSchema()
1271 schema.addField(
'visit', type=
'L', doc=
'Visit number')
1272 schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
1273 schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
1274 schema.addField(
'psfSigma', type=
'F',
1275 doc=
'PSF model second-moments determinant radius (center of chip) (pixel)')
1276 schema.addField(
'psfArea', type=
'F',
1277 doc=
'PSF model effective area (center of chip) (pixel**2)')
1278 schema.addField(
'psfIxx', type=
'F',
1279 doc=
'PSF model Ixx (center of chip) (pixel**2)')
1280 schema.addField(
'psfIyy', type=
'F',
1281 doc=
'PSF model Iyy (center of chip) (pixel**2)')
1282 schema.addField(
'psfIxy', type=
'F',
1283 doc=
'PSF model Ixy (center of chip) (pixel**2)')
1284 schema.addField(
'raCorners', type=
'ArrayD', size=4,
1285 doc=
'Right Ascension of bounding box corners (degrees)')
1286 schema.addField(
'decCorners', type=
'ArrayD', size=4,
1287 doc=
'Declination of bounding box corners (degrees)')
1288 schema.addField(
'ra', type=
'D',
1289 doc=
'Right Ascension of bounding box center (degrees)')
1290 schema.addField(
'decl', type=
'D',
1291 doc=
'Declination of bounding box center (degrees)')
1292 schema.addField(
'zenithDistance', type=
'F',
1293 doc=
'Zenith distance of bounding box center (degrees)')
1294 schema.addField(
'zeroPoint', type=
'F',
1295 doc=
'Mean zeropoint in detector (mag)')
1296 schema.addField(
'skyBg', type=
'F',
1297 doc=
'Average sky background (ADU)')
1298 schema.addField(
'skyNoise', type=
'F',
1299 doc=
'Average sky noise (ADU)')
1300 schema.addField(
'meanVar', type=
'F',
1301 doc=
'Mean variance of the weight plane (ADU**2)')
1302 schema.addField(
'astromOffsetMean', type=
'F',
1303 doc=
'Mean offset of astrometric calibration matches (arcsec)')
1304 schema.addField(
'astromOffsetStd', type=
'F',
1305 doc=
'Standard deviation of offsets of astrometric calibration matches (arcsec)')
1306 schema.addField(
'nPsfStar', type=
'I', doc=
'Number of stars used for PSF model')
1307 schema.addField(
'psfStarDeltaE1Median', type=
'F',
1308 doc=
'Median E1 residual (starE1 - psfE1) for psf stars')
1309 schema.addField(
'psfStarDeltaE2Median', type=
'F',
1310 doc=
'Median E2 residual (starE2 - psfE2) for psf stars')
1311 schema.addField(
'psfStarDeltaE1Scatter', type=
'F',
1312 doc=
'Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars')
1313 schema.addField(
'psfStarDeltaE2Scatter', type=
'F',
1314 doc=
'Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars')
1315 schema.addField(
'psfStarDeltaSizeMedian', type=
'F',
1316 doc=
'Median size residual (starSize - psfSize) for psf stars (pixel)')
1317 schema.addField(
'psfStarDeltaSizeScatter', type=
'F',
1318 doc=
'Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)')
1319 schema.addField(
'psfStarScaledDeltaSizeScatter', type=
'F',
1320 doc=
'Scatter (via MAD) of size residual scaled by median size squared')
1325class VisitDataIdContainer(DataIdContainer):
1326 """DataIdContainer that groups sensor-level id's by visit
1329 def makeDataRefList(self, namespace):
1330 """Make self.refList from self.idList
1332 Generate a list of data references grouped by visit.
1336 namespace : `argparse.Namespace`
1337 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1340 visitRefs = defaultdict(list)
1341 for dataId
in self.idList:
1342 if "visit" in dataId:
1343 visitId = dataId[
"visit"]
1345 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1346 visitRefs[visitId].extend([dataRef
for dataRef
in subset])
1349 for refList
in visitRefs.values():
1350 existingRefs = [ref
for ref
in refList
if ref.datasetExists()]
1352 outputRefList.append(existingRefs)
1354 self.refList = outputRefList
1357class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1358 defaultTemplates={
"catalogType":
""},
1359 dimensions=(
"instrument",
"visit")):
1360 inputCatalogs = connectionTypes.Input(
1361 doc=
"Input per-detector Source Tables",
1362 name=
"{catalogType}sourceTable",
1363 storageClass=
"DataFrame",
1364 dimensions=(
"instrument",
"visit",
"detector"),
1367 outputCatalog = connectionTypes.Output(
1368 doc=
"Per-visit concatenation of Source Table",
1369 name=
"{catalogType}sourceTable_visit",
1370 storageClass=
"DataFrame",
1371 dimensions=(
"instrument",
"visit")
1375class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1376 pipelineConnections=ConsolidateSourceTableConnections):
1380class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1381 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1383 _DefaultName = 'consolidateSourceTable'
1384 ConfigClass = ConsolidateSourceTableConfig
1386 inputDataset =
'sourceTable'
1387 outputDataset =
'sourceTable_visit'
1389 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1390 from .makeCoaddTempExp
import reorderRefs
1392 detectorOrder = [ref.dataId[
'detector']
for ref
in inputRefs.inputCatalogs]
1393 detectorOrder.sort()
1394 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey=
'detector')
1395 inputs = butlerQC.get(inputRefs)
1396 self.log.info(
"Concatenating %s per-detector Source Tables",
1397 len(inputs[
'inputCatalogs']))
1398 df = pd.concat(inputs[
'inputCatalogs'])
1399 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1401 def runDataRef(self, dataRefList):
1402 self.log.info(
"Concatenating %s per-detector Source Tables", len(dataRefList))
1403 df = pd.concat([dataRef.get().toDataFrame()
for dataRef
in dataRefList])
1404 dataRefList[0].put(
ParquetTable(dataFrame=df), self.outputDataset)
1407 def _makeArgumentParser(cls):
1408 parser = ArgumentParser(name=cls._DefaultName)
1410 parser.add_id_argument(
"--id", cls.inputDataset,
1411 help=
"data ID, e.g. --id visit=12345",
1412 ContainerClass=VisitDataIdContainer)
1416 """No metadata to write.
1420 def writeConfig(self, butler, clobber=False, doBackup=True):
1421 """No config to write.
1426class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1427 dimensions=(
"instrument",),
1428 defaultTemplates={
"calexpType":
""}):
1429 visitSummaryRefs = connectionTypes.Input(
1430 doc=
"Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1431 name=
"{calexpType}visitSummary",
1432 storageClass=
"ExposureCatalog",
1433 dimensions=(
"instrument",
"visit"),
1437 outputCatalog = connectionTypes.Output(
1438 doc=
"CCD and Visit metadata table",
1439 name=
"ccdVisitTable",
1440 storageClass=
"DataFrame",
1441 dimensions=(
"instrument",)
1445class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1446 pipelineConnections=MakeCcdVisitTableConnections):
1450class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1451 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1453 _DefaultName = 'makeCcdVisitTable'
1454 ConfigClass = MakeCcdVisitTableConfig
1456 def run(self, visitSummaryRefs):
1457 """ Make a table of ccd information from the `visitSummary` catalogs.
1460 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1461 List of DeferredDatasetHandles pointing to exposure catalogs with
1462 per-detector summary information.
1465 result : `lsst.pipe.Base.Struct`
1466 Results struct
with attribute:
1468 Catalog of ccd
and visit information.
1471 for visitSummaryRef
in visitSummaryRefs:
1472 visitSummary = visitSummaryRef.get()
1473 visitInfo = visitSummary[0].getVisitInfo()
1476 summaryTable = visitSummary.asAstropy()
1477 selectColumns = [
'id',
'visit',
'physical_filter',
'band',
'ra',
'decl',
'zenithDistance',
1478 'zeroPoint',
'psfSigma',
'skyBg',
'skyNoise']
1479 ccdEntry = summaryTable[selectColumns].to_pandas().set_index(
'id')
1483 ccdEntry = ccdEntry.rename(columns={
"visit":
"visitId"})
1484 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id)
for id
in
1486 packer = visitSummaryRef.dataId.universe.makePacker(
'visit_detector', visitSummaryRef.dataId)
1487 ccdVisitIds = [packer.pack(dataId)
for dataId
in dataIds]
1488 ccdEntry[
'ccdVisitId'] = ccdVisitIds
1489 ccdEntry[
'detector'] = summaryTable[
'id']
1490 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds()
for vR
in visitSummary])
1491 ccdEntry[
"seeing"] = visitSummary[
'psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1493 ccdEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1494 ccdEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1495 ccdEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1496 expTime = visitInfo.getExposureTime()
1497 ccdEntry[
'expTime'] = expTime
1498 ccdEntry[
"obsStart"] = ccdEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1499 expTime_days = expTime / (60*60*24)
1500 ccdEntry[
"obsStartMJD"] = ccdEntry[
"expMidptMJD"] - 0.5 * expTime_days
1501 ccdEntry[
'darkTime'] = visitInfo.getDarkTime()
1502 ccdEntry[
'xSize'] = summaryTable[
'bbox_max_x'] - summaryTable[
'bbox_min_x']
1503 ccdEntry[
'ySize'] = summaryTable[
'bbox_max_y'] - summaryTable[
'bbox_min_y']
1504 ccdEntry[
'llcra'] = summaryTable[
'raCorners'][:, 0]
1505 ccdEntry[
'llcdec'] = summaryTable[
'decCorners'][:, 0]
1506 ccdEntry[
'ulcra'] = summaryTable[
'raCorners'][:, 1]
1507 ccdEntry[
'ulcdec'] = summaryTable[
'decCorners'][:, 1]
1508 ccdEntry[
'urcra'] = summaryTable[
'raCorners'][:, 2]
1509 ccdEntry[
'urcdec'] = summaryTable[
'decCorners'][:, 2]
1510 ccdEntry[
'lrcra'] = summaryTable[
'raCorners'][:, 3]
1511 ccdEntry[
'lrcdec'] = summaryTable[
'decCorners'][:, 3]
1514 ccdEntries.append(ccdEntry)
1516 outputCatalog = pd.concat(ccdEntries)
1517 outputCatalog.set_index(
'ccdVisitId', inplace=
True, verify_integrity=
True)
1518 return pipeBase.Struct(outputCatalog=outputCatalog)
1521class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1522 dimensions=(
"instrument",),
1523 defaultTemplates={
"calexpType":
""}):
1524 visitSummaries = connectionTypes.Input(
1525 doc=
"Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1526 name=
"{calexpType}visitSummary",
1527 storageClass=
"ExposureCatalog",
1528 dimensions=(
"instrument",
"visit",),
1532 outputCatalog = connectionTypes.Output(
1533 doc=
"Visit metadata table",
1535 storageClass=
"DataFrame",
1536 dimensions=(
"instrument",)
1540class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1541 pipelineConnections=MakeVisitTableConnections):
1545class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1546 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1548 _DefaultName = 'makeVisitTable'
1549 ConfigClass = MakeVisitTableConfig
1551 def run(self, visitSummaries):
1552 """ Make a table of visit information from the `visitSummary` catalogs
1557 List of exposure catalogs with per-detector summary information.
1560 result : `lsst.pipe.Base.Struct`
1561 Results struct
with attribute:
1563 Catalog of visit information.
1566 for visitSummary
in visitSummaries:
1567 visitSummary = visitSummary.get()
1568 visitRow = visitSummary[0]
1569 visitInfo = visitRow.getVisitInfo()
1572 visitEntry[
"visitId"] = visitRow[
'visit']
1573 visitEntry[
"visit"] = visitRow[
'visit']
1574 visitEntry[
"physical_filter"] = visitRow[
'physical_filter']
1575 visitEntry[
"band"] = visitRow[
'band']
1576 raDec = visitInfo.getBoresightRaDec()
1577 visitEntry[
"ra"] = raDec.getRa().asDegrees()
1578 visitEntry[
"decl"] = raDec.getDec().asDegrees()
1579 visitEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1580 azAlt = visitInfo.getBoresightAzAlt()
1581 visitEntry[
"azimuth"] = azAlt.getLongitude().asDegrees()
1582 visitEntry[
"altitude"] = azAlt.getLatitude().asDegrees()
1583 visitEntry[
"zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1584 visitEntry[
"airmass"] = visitInfo.getBoresightAirmass()
1585 expTime = visitInfo.getExposureTime()
1586 visitEntry[
"expTime"] = expTime
1587 visitEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1588 visitEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1589 visitEntry[
"obsStart"] = visitEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1590 expTime_days = expTime / (60*60*24)
1591 visitEntry[
"obsStartMJD"] = visitEntry[
"expMidptMJD"] - 0.5 * expTime_days
1592 visitEntries.append(visitEntry)
1597 outputCatalog = pd.DataFrame(data=visitEntries)
1598 outputCatalog.set_index(
'visitId', inplace=
True, verify_integrity=
True)
1599 return pipeBase.Struct(outputCatalog=outputCatalog)
1602class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1603 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")):
1605 inputCatalog = connectionTypes.Input(
1606 doc=
"Primary per-detector, single-epoch forced-photometry catalog. "
1607 "By default, it is the output of ForcedPhotCcdTask on calexps",
1609 storageClass=
"SourceCatalog",
1610 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1612 inputCatalogDiff = connectionTypes.Input(
1613 doc=
"Secondary multi-epoch, per-detector, forced photometry catalog. "
1614 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1616 storageClass=
"SourceCatalog",
1617 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1619 outputCatalog = connectionTypes.Output(
1620 doc=
"InputCatalogs horizonatally joined on `objectId` in Parquet format",
1621 name=
"mergedForcedSource",
1622 storageClass=
"DataFrame",
1623 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1627class WriteForcedSourceTableConfig(WriteSourceTableConfig,
1628 pipelineConnections=WriteForcedSourceTableConnections):
1629 key = lsst.pex.config.Field(
1630 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1636class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1637 """Merge and convert per-detector forced source catalogs to parquet
1639 Because the predecessor ForcedPhotCcdTask operates per-detector,
1640 per-tract, (i.e., it has tract in its dimensions), detectors
1641 on the tract boundary may have multiple forced source catalogs.
1643 The successor task TransformForcedSourceTable runs per-patch
1644 and temporally-aggregates overlapping mergedForcedSource catalogs
from all
1645 available multiple epochs.
1647 _DefaultName = "writeForcedSourceTable"
1648 ConfigClass = WriteForcedSourceTableConfig
1650 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1651 inputs = butlerQC.get(inputRefs)
1653 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
1654 inputs[
'band'] = butlerQC.quantum.dataId.full[
'band']
1655 outputs = self.run(**inputs)
1656 butlerQC.put(outputs, outputRefs)
1658 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1660 for table, dataset,
in zip((inputCatalog, inputCatalogDiff), (
'calexp',
'diff')):
1661 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=
False)
1662 df = df.reindex(sorted(df.columns), axis=1)
1663 df[
'ccdVisitId'] = ccdVisitId
if ccdVisitId
else pd.NA
1664 df[
'band'] = band
if band
else pd.NA
1665 df.columns = pd.MultiIndex.from_tuples([(dataset, c)
for c
in df.columns],
1666 names=(
'dataset',
'column'))
1670 outputCatalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
1671 return pipeBase.Struct(outputCatalog=outputCatalog)
1674class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1675 dimensions=(
"instrument",
"skymap",
"patch",
"tract")):
1677 inputCatalogs = connectionTypes.Input(
1678 doc=
"Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1679 name=
"mergedForcedSource",
1680 storageClass=
"DataFrame",
1681 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract"),
1685 referenceCatalog = connectionTypes.Input(
1686 doc=
"Reference catalog which was used to seed the forcedPhot. Columns "
1687 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1690 storageClass=
"DataFrame",
1691 dimensions=(
"tract",
"patch",
"skymap"),
1694 outputCatalog = connectionTypes.Output(
1695 doc=
"Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1696 "specified set of functors",
1697 name=
"forcedSourceTable",
1698 storageClass=
"DataFrame",
1699 dimensions=(
"tract",
"patch",
"skymap")
1704 pipelineConnections=TransformForcedSourceTableConnections):
1705 referenceColumns = pexConfig.ListField(
1707 default=[
"detect_isPrimary",
"detect_isTractInner",
"detect_isPatchInner"],
1709 doc=
"Columns to pull from reference catalog",
1711 keyRef = lsst.pex.config.Field(
1712 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1716 key = lsst.pex.config.Field(
1717 doc=
"Rename the output DataFrame index to this name",
1719 default=
"forcedSourceId",
1722 def setDefaults(self):
1723 super().setDefaults()
1724 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'ForcedSource.yaml')
1728 """Transform/standardize a ForcedSource catalog
1730 Transforms each wide, per-detector forcedSource parquet table per the
1731 specification file (per-camera defaults found in ForcedSource.yaml).
1732 All epochs that overlap the patch are aggregated into one per-patch
1733 narrow-parquet file.
1735 No de-duplication of rows
is performed. Duplicate resolutions flags are
1736 pulled
in from the referenceCatalog: `detect_isPrimary`,
1737 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1738 for analysis
or compare duplicates
for QA.
1740 The resulting table includes multiple bands. Epochs (MJDs)
and other useful
1741 per-visit rows can be retreived by joining
with the CcdVisitTable on
1744 _DefaultName = "transformForcedSourceTable"
1745 ConfigClass = TransformForcedSourceTableConfig
1747 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1748 inputs = butlerQC.get(inputRefs)
1749 if self.funcs
is None:
1750 raise ValueError(
"config.functorFile is None. "
1751 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1752 outputs = self.run(inputs[
'inputCatalogs'], inputs[
'referenceCatalog'], funcs=self.funcs,
1753 dataId=outputRefs.outputCatalog.dataId.full)
1755 butlerQC.put(outputs, outputRefs)
1757 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1759 ref = referenceCatalog.get(parameters={
"columns": self.config.referenceColumns})
1760 self.log.info(
"Aggregating %s input catalogs" % (len(inputCatalogs)))
1761 for handle
in inputCatalogs:
1762 result = self.transform(
None, handle, funcs, dataId)
1764 dfs.append(result.df.join(ref, how=
'inner'))
1766 outputCatalog = pd.concat(dfs)
1770 outputCatalog.index.rename(self.config.keyRef, inplace=
True)
1772 outputCatalog.reset_index(inplace=
True)
1774 outputCatalog.set_index(
"forcedSourceId", inplace=
True, verify_integrity=
True)
1776 outputCatalog.index.rename(self.config.key, inplace=
True)
1778 self.log.info(
"Made a table of %d columns and %d rows",
1779 len(outputCatalog.columns), len(outputCatalog))
1780 return pipeBase.Struct(outputCatalog=outputCatalog)
1783class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1784 defaultTemplates={
"catalogType":
""},
1785 dimensions=(
"instrument",
"tract")):
1786 inputCatalogs = connectionTypes.Input(
1787 doc=
"Input per-patch DataFrame Tables to be concatenated",
1788 name=
"{catalogType}ForcedSourceTable",
1789 storageClass=
"DataFrame",
1790 dimensions=(
"tract",
"patch",
"skymap"),
1794 outputCatalog = connectionTypes.Output(
1795 doc=
"Output per-tract concatenation of DataFrame Tables",
1796 name=
"{catalogType}ForcedSourceTable_tract",
1797 storageClass=
"DataFrame",
1798 dimensions=(
"tract",
"skymap"),
1802class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1803 pipelineConnections=ConsolidateTractConnections):
1807class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask):
1808 """Concatenate any per-patch, dataframe list into a single
1811 _DefaultName = 'ConsolidateTract'
1812 ConfigClass = ConsolidateTractConfig
1814 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1815 inputs = butlerQC.get(inputRefs)
1817 self.log.info(
"Concatenating %s per-patch %s Tables",
1818 len(inputs[
'inputCatalogs']),
1819 inputRefs.inputCatalogs[0].datasetType.name)
1820 df = pd.concat(inputs[
'inputCatalogs'])
1821 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def runDataRef(self, dataRef)
def getAnalysis(self, parq, funcs=None, band=None)
def write(self, df, parqRef)
def __init__(self, *args, **kwargs)
def transform(self, band, parq, funcs, dataId)
def run(self, parq, funcs=None, dataId=None, band=None)
def writeMetadata(self, dataRef)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def readCatalog(task, patchRef)
Read input catalog.
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)