24from collections
import defaultdict
36from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
38from lsst.daf.butler
import DeferredDatasetHandle, DataCoordinate
40from .parquetTable
import ParquetTable
41from .multiBandUtils
import makeMergeArgumentParser, MergeSourcesRunner
42from .functors
import CompositeFunctor, Column
45def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
46 """Flattens a dataframe with multilevel column index
48 newDf = pd.DataFrame()
50 dfBands = df.columns.unique(level=0).values
53 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
54 newColumns = {c: columnFormat.format(band, c)
55 for c
in subdf.columns
if c
not in noDupCols}
56 cols = list(newColumns.keys())
57 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
60 presentBands = dfBands
if inputBands
is None else list(set(inputBands).intersection(dfBands))
62 noDupDf = df[presentBands[0]][noDupCols]
63 newDf = pd.concat([noDupDf, newDf], axis=1)
68 defaultTemplates={
"coaddName":
"deep"},
69 dimensions=(
"tract",
"patch",
"skymap")):
70 inputCatalogMeas = connectionTypes.Input(
71 doc=
"Catalog of source measurements on the deepCoadd.",
72 dimensions=(
"tract",
"patch",
"band",
"skymap"),
73 storageClass=
"SourceCatalog",
74 name=
"{coaddName}Coadd_meas",
77 inputCatalogForcedSrc = connectionTypes.Input(
78 doc=
"Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
79 dimensions=(
"tract",
"patch",
"band",
"skymap"),
80 storageClass=
"SourceCatalog",
81 name=
"{coaddName}Coadd_forced_src",
84 inputCatalogRef = connectionTypes.Input(
85 doc=
"Catalog marking the primary detection (which band provides a good shape and position)"
86 "for each detection in deepCoadd_mergeDet.",
87 dimensions=(
"tract",
"patch",
"skymap"),
88 storageClass=
"SourceCatalog",
89 name=
"{coaddName}Coadd_ref"
91 outputCatalog = connectionTypes.Output(
92 doc=
"A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
93 "stored as a DataFrame with a multi-level column index per-patch.",
94 dimensions=(
"tract",
"patch",
"skymap"),
95 storageClass=
"DataFrame",
96 name=
"{coaddName}Coadd_obj"
100class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
101 pipelineConnections=WriteObjectTableConnections):
102 engine = pexConfig.Field(
105 doc=
"Parquet engine for writing (pyarrow or fastparquet)"
107 coaddName = pexConfig.Field(
114class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
115 """Write filter-merged source tables to parquet
117 _DefaultName = "writeObjectTable"
118 ConfigClass = WriteObjectTableConfig
119 RunnerClass = MergeSourcesRunner
122 inputDatasets = (
'forced_src',
'meas',
'ref')
125 outputDataset =
'obj'
127 def __init__(self, butler=None, schema=None, **kwargs):
131 super().__init__(**kwargs)
133 def runDataRef(self, patchRefList):
135 @brief Merge coadd sources
from multiple bands. Calls
@ref `run` which must be defined
in
136 subclasses that inherit
from MergeSourcesTask.
137 @param[
in] patchRefList list of data references
for each filter
139 catalogs = dict(self.readCatalog(patchRef) for patchRef
in patchRefList)
140 dataId = patchRefList[0].dataId
141 mergedCatalog = self.run(catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
142 self.write(patchRefList[0],
ParquetTable(dataFrame=mergedCatalog))
144 def runQuantum(self, butlerQC, inputRefs, outputRefs):
145 inputs = butlerQC.get(inputRefs)
147 measDict = {ref.dataId[
'band']: {
'meas': cat}
for ref, cat
in
148 zip(inputRefs.inputCatalogMeas, inputs[
'inputCatalogMeas'])}
149 forcedSourceDict = {ref.dataId[
'band']: {
'forced_src': cat}
for ref, cat
in
150 zip(inputRefs.inputCatalogForcedSrc, inputs[
'inputCatalogForcedSrc'])}
153 for band
in measDict.keys():
154 catalogs[band] = {
'meas': measDict[band][
'meas'],
155 'forced_src': forcedSourceDict[band][
'forced_src'],
156 'ref': inputs[
'inputCatalogRef']}
157 dataId = butlerQC.quantum.dataId
158 df = self.run(catalogs=catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
159 outputs = pipeBase.Struct(outputCatalog=df)
160 butlerQC.put(outputs, outputRefs)
163 def _makeArgumentParser(cls):
164 """Create a suitable ArgumentParser.
166 We will use the ArgumentParser to get a list of data
167 references for patches; the RunnerClass will sort them into lists
168 of data references
for the same patch.
170 References first of self.inputDatasets, rather than
176 """Read input catalogs
178 Read all the input datasets given by the 'inputDatasets'
183 patchRef : `lsst.daf.persistence.ButlerDataRef`
184 Data reference
for patch
188 Tuple consisting of band name
and a dict of catalogs, keyed by
191 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=
True).bandLabel
193 for dataset
in self.inputDatasets:
194 catalog = patchRef.get(self.config.coaddName +
"Coadd_" + dataset, immediate=
True)
195 self.log.info(
"Read %d sources from %s for band %s: %s",
196 len(catalog), dataset, band, patchRef.dataId)
197 catalogDict[dataset] = catalog
198 return band, catalogDict
200 def run(self, catalogs, tract, patch):
201 """Merge multiple catalogs.
206 Mapping from filter names to dict of catalogs.
208 tractId to use
for the tractId column
210 patchId to use
for the patchId column
214 catalog : `pandas.DataFrame`
219 for filt, tableDict
in catalogs.items():
220 for dataset, table
in tableDict.items():
222 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
225 df = df.reindex(sorted(df.columns), axis=1)
226 df[
'tractId'] = tract
227 df[
'patchId'] = patch
230 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
231 names=(
'dataset',
'band',
'column'))
234 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
237 def write(self, patchRef, catalog):
242 catalog : `ParquetTable`
244 patchRef : `lsst.daf.persistence.ButlerDataRef`
245 Data reference for patch
247 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
250 mergeDataId = patchRef.dataId.copy()
251 del mergeDataId[
"filter"]
252 self.log.info(
"Wrote merged catalog: %s", mergeDataId)
255 """No metadata to write, and not sure how to write it for a list of dataRefs.
260class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
261 defaultTemplates={
"catalogType":
""},
262 dimensions=(
"instrument",
"visit",
"detector")):
264 catalog = connectionTypes.Input(
265 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
266 name=
"{catalogType}src",
267 storageClass=
"SourceCatalog",
268 dimensions=(
"instrument",
"visit",
"detector")
270 outputCatalog = connectionTypes.Output(
271 doc=
"Catalog of sources, `src` in Parquet format. The 'id' column is "
272 "replaced with an index; all other columns are unchanged.",
273 name=
"{catalogType}source",
274 storageClass=
"DataFrame",
275 dimensions=(
"instrument",
"visit",
"detector")
279class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
280 pipelineConnections=WriteSourceTableConnections):
281 doApplyExternalPhotoCalib = pexConfig.Field(
284 doc=(
"Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
285 "generating Source Tables from older src tables which do not already have local calib columns")
287 doApplyExternalSkyWcs = pexConfig.Field(
290 doc=(
"Add local WCS columns from the calexp.wcs? Should only set True if "
291 "generating Source Tables from older src tables which do not already have local calib columns")
295class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
296 """Write source table to parquet
298 _DefaultName = "writeSourceTable"
299 ConfigClass = WriteSourceTableConfig
301 def runDataRef(self, dataRef):
302 src = dataRef.get(
'src')
303 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
304 src = self.addCalibColumns(src, dataRef)
306 ccdVisitId = dataRef.get(
'ccdExposureId')
307 result = self.run(src, ccdVisitId=ccdVisitId)
308 dataRef.put(result.table,
'source')
310 def runQuantum(self, butlerQC, inputRefs, outputRefs):
311 inputs = butlerQC.get(inputRefs)
312 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
313 result = self.run(**inputs).table
314 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
315 butlerQC.put(outputs, outputRefs)
317 def run(self, catalog, ccdVisitId=None):
318 """Convert `src` catalog to parquet
322 catalog: `afwTable.SourceCatalog`
323 catalog to be converted
325 ccdVisitId to be added as a column
329 result : `lsst.pipe.base.Struct`
331 `ParquetTable` version of the input catalog
333 self.log.info("Generating parquet table from src catalog %s", ccdVisitId)
334 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
335 df[
'ccdVisitId'] = ccdVisitId
336 return pipeBase.Struct(table=
ParquetTable(dataFrame=df))
338 def addCalibColumns(self, catalog, dataRef):
339 """Add columns with local calibration evaluated at each centroid
341 for backwards compatibility
with old repos.
342 This exists
for the purpose of converting old src catalogs
343 (which don
't have the expected local calib columns) to Source Tables.
347 catalog: `afwTable.SourceCatalog`
348 catalog to which calib columns will be added
349 dataRef: `lsst.daf.persistence.ButlerDataRef
350 for fetching the calibs
from disk.
354 newCat: `afwTable.SourceCatalog`
355 Source Catalog
with requested local calib columns
357 mapper = afwTable.SchemaMapper(catalog.schema)
358 measureConfig = SingleFrameMeasurementTask.ConfigClass()
359 measureConfig.doReplaceWithNoise = False
362 exposure = dataRef.get(
'calexp_sub',
365 mapper = afwTable.SchemaMapper(catalog.schema)
366 mapper.addMinimalSchema(catalog.schema,
True)
367 schema = mapper.getOutputSchema()
369 exposureIdInfo = dataRef.get(
"expIdInfo")
370 measureConfig.plugins.names = []
371 if self.config.doApplyExternalSkyWcs:
372 plugin =
'base_LocalWcs'
374 raise RuntimeError(f
"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
376 measureConfig.plugins.names.add(plugin)
378 if self.config.doApplyExternalPhotoCalib:
379 plugin =
'base_LocalPhotoCalib'
381 raise RuntimeError(f
"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
383 measureConfig.plugins.names.add(plugin)
385 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
386 newCat = afwTable.SourceCatalog(schema)
387 newCat.extend(catalog, mapper=mapper)
388 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
392 """No metadata to write.
397 def _makeArgumentParser(cls):
398 parser = ArgumentParser(name=cls._DefaultName)
399 parser.add_id_argument(
"--id",
'src',
400 help=
"data ID, e.g. --id visit=12345 ccd=0")
404class PostprocessAnalysis(object):
405 """Calculate columns from ParquetTable
407 This object manages and organizes an arbitrary set of computations
408 on a catalog. The catalog
is defined by a
410 `deepCoadd_obj` dataset,
and the computations are defined by a collection
411 of `lsst.pipe.tasks.functor.Functor` objects (
or, equivalently,
412 a `CompositeFunctor`).
414 After the object
is initialized, accessing the `.df` attribute (which
415 holds the `pandas.DataFrame` containing the results of the calculations) triggers
416 computation of said dataframe.
418 One of the conveniences of using this object
is the ability to define a desired common
419 filter
for all functors. This enables the same functor collection to be passed to
420 several different `PostprocessAnalysis` objects without having to change the original
421 functor collection, since the `filt` keyword argument of this object triggers an
422 overwrite of the `filt` property
for all functors
in the collection.
424 This object also allows a list of refFlags to be passed,
and defines a set of default
425 refFlags that are always included even
if not requested.
427 If a list of `ParquetTable` object
is passed, rather than a single one, then the
428 calculations will be mapped over all the input catalogs. In principle, it should
429 be straightforward to parallelize this activity, but initial tests have failed
430 (see TODO
in code comments).
434 parq : `lsst.pipe.tasks.ParquetTable` (
or list of such)
435 Source catalog(s)
for computation
438 Computations to do (functors that act on `parq`).
439 If a dict, the output
440 DataFrame will have columns keyed accordingly.
441 If a list, the column keys will come
from the
442 `.shortname` attribute of each functor.
444 filt : `str` (optional)
445 Filter
in which to calculate. If provided,
446 this will overwrite any existing `.filt` attribute
447 of the provided functors.
449 flags : `list` (optional)
450 List of flags (per-band) to include
in output table.
451 Taken
from the `meas` dataset
if applied to a multilevel Object Table.
453 refFlags : `list` (optional)
454 List of refFlags (only reference band) to include
in output table.
456 forcedFlags : `list` (optional)
457 List of flags (per-band) to include
in output table.
458 Taken
from the ``forced_src`` dataset
if applied to a
459 multilevel Object Table. Intended
for flags
from measurement plugins
460 only run during multi-band forced-photometry.
462 _defaultRefFlags = []
465 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
467 self.functors = functors
470 self.flags = list(flags)
if flags
is not None else []
471 self.forcedFlags = list(forcedFlags)
if forcedFlags
is not None else []
472 self.refFlags = list(self._defaultRefFlags)
473 if refFlags
is not None:
474 self.refFlags += list(refFlags)
479 def defaultFuncs(self):
480 funcs = dict(self._defaultFuncs)
485 additionalFuncs = self.defaultFuncs
486 additionalFuncs.update({flag:
Column(flag, dataset=
'forced_src')
for flag
in self.forcedFlags})
487 additionalFuncs.update({flag:
Column(flag, dataset=
'ref')
for flag
in self.refFlags})
488 additionalFuncs.update({flag:
Column(flag, dataset=
'meas')
for flag
in self.flags})
490 if isinstance(self.functors, CompositeFunctor):
495 func.funcDict.update(additionalFuncs)
496 func.filt = self.filt
502 return [name
for name, func
in self.func.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
510 def compute(self, dropna=False, pool=None):
512 if type(self.parq)
in (list, tuple):
514 dflist = [self.func(parq, dropna=dropna)
for parq
in self.parq]
517 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
518 self._df = pd.concat(dflist)
520 self._df = self.func(self.parq, dropna=dropna)
527 """Expected Connections for subclasses of TransformCatalogBaseTask.
531 inputCatalog = connectionTypes.Input(
533 storageClass=
"DataFrame",
535 outputCatalog = connectionTypes.Output(
537 storageClass=
"DataFrame",
542 pipelineConnections=TransformCatalogBaseConnections):
543 functorFile = pexConfig.Field(
545 doc=
"Path to YAML file specifying Science Data Model functors to use "
546 "when copying columns and computing calibrated values.",
550 primaryKey = pexConfig.Field(
552 doc=
"Name of column to be set as the DataFrame index. If None, the index"
553 "will be named `id`",
560 """Base class for transforming/standardizing a catalog
562 by applying functors that convert units and apply calibrations.
563 The purpose of this task
is to perform a set of computations on
564 an input `ParquetTable` dataset (such
as `deepCoadd_obj`)
and write the
565 results to a new dataset (which needs to be declared
in an `outputDataset`
568 The calculations to be performed are defined
in a YAML file that specifies
569 a set of functors to be computed, provided
as
570 a `--functorFile` config parameter. An example of such a YAML file
595 - base_InputCount_value
598 functor: DeconvolvedMoments
603 - merge_measurement_i
604 - merge_measurement_r
605 - merge_measurement_z
606 - merge_measurement_y
607 - merge_measurement_g
608 - base_PixelFlags_flag_inexact_psfCenter
611 The names
for each entry under
"func" will become the names of columns
in the
613 Positional arguments to be passed to each functor are
in the `args` list,
614 and any additional entries
for each column other than
"functor" or "args" (e.g., `
'filt'`,
615 `
'dataset'`) are treated
as keyword arguments to be passed to the functor initialization.
617 The
"flags" entry
is the default shortcut
for `Column` functors.
618 All columns listed under
"flags" will be copied to the output table
619 untransformed. They can be of any datatype.
620 In the special case of transforming a multi-level oject table
with
621 band
and dataset indices (deepCoadd_obj), these will be taked
from the
622 `meas` dataset
and exploded out per band.
624 There are two special shortcuts that only apply when transforming
625 multi-level Object (deepCoadd_obj) tables:
626 - The
"refFlags" entry
is shortcut
for `Column` functor
627 taken
from the `
'ref'` dataset
if transforming an ObjectTable.
628 - The
"forcedFlags" entry
is shortcut
for `Column` functors.
629 taken
from the ``forced_src`` dataset
if transforming an ObjectTable.
630 These are expanded out per band.
633 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
634 to organize
and excecute the calculations.
638 def _DefaultName(self):
639 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
643 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
647 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
651 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
655 if self.config.functorFile:
656 self.log.info(
'Loading tranform functor definitions from %s',
657 self.config.functorFile)
658 self.
funcsfuncs = CompositeFunctor.from_file(self.config.functorFile)
659 self.
funcsfuncs.update(dict(PostprocessAnalysis._defaultFuncs))
661 self.
funcsfuncs =
None
664 inputs = butlerQC.get(inputRefs)
665 if self.
funcsfuncs
is None:
666 raise ValueError(
"config.functorFile is None. "
667 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
668 result = self.
runrun(parq=inputs[
'inputCatalog'], funcs=self.
funcsfuncs,
669 dataId=outputRefs.outputCatalog.dataId.full)
670 outputs = pipeBase.Struct(outputCatalog=result)
671 butlerQC.put(outputs, outputRefs)
675 if self.
funcsfuncs
is None:
676 raise ValueError(
"config.functorFile is None. "
677 "Must be a valid path to yaml in order to run as a CommandlineTask.")
678 df = self.
runrun(parq, funcs=self.
funcsfuncs, dataId=dataRef.dataId)
679 self.
writewrite(df, dataRef)
682 def run(self, parq, funcs=None, dataId=None, band=None):
683 """Do postprocessing calculations
685 Takes a `ParquetTable` object and dataId,
686 returns a dataframe
with results of postprocessing calculations.
691 ParquetTable
from which calculations are done.
692 funcs : `lsst.pipe.tasks.functors.Functors`
693 Functors to apply to the table
's columns
694 dataId : dict, optional
695 Used to add a `patchId` column to the output dataframe.
696 band : `str`, optional
697 Filter band that is being processed.
704 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
706 df = self.
transformtransform(band, parq, funcs, dataId).df
707 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
711 return self.
funcsfuncs
715 funcs = self.
funcsfuncs
716 analysis = PostprocessAnalysis(parq, funcs, filt=band)
720 analysis = self.
getAnalysisgetAnalysis(parq, funcs=funcs, band=band)
722 if dataId
is not None:
723 for key, value
in dataId.items():
726 if self.config.primaryKey:
727 if df.index.name != self.config.primaryKey
and self.config.primaryKey
in df:
728 df.reset_index(inplace=
True, drop=
True)
729 df.set_index(self.config.primaryKey, inplace=
True)
731 return pipeBase.Struct(
740 """No metadata to write.
746 defaultTemplates={
"coaddName":
"deep"},
747 dimensions=(
"tract",
"patch",
"skymap")):
748 inputCatalog = connectionTypes.Input(
749 doc=
"The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
750 "stored as a DataFrame with a multi-level column index per-patch.",
751 dimensions=(
"tract",
"patch",
"skymap"),
752 storageClass=
"DataFrame",
753 name=
"{coaddName}Coadd_obj",
756 outputCatalog = connectionTypes.Output(
757 doc=
"Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
759 dimensions=(
"tract",
"patch",
"skymap"),
760 storageClass=
"DataFrame",
766 pipelineConnections=TransformObjectCatalogConnections):
767 coaddName = pexConfig.Field(
773 filterMap = pexConfig.DictField(
777 doc=(
"Dictionary mapping full filter name to short one for column name munging."
778 "These filters determine the output columns no matter what filters the "
779 "input data actually contain."),
780 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
781 "Will be removed after v22.")
783 outputBands = pexConfig.ListField(
787 doc=(
"These bands and only these bands will appear in the output,"
788 " NaN-filled if the input does not include them."
789 " If None, then use all bands found in the input.")
791 camelCase = pexConfig.Field(
794 doc=(
"Write per-band columns names with camelCase, else underscore "
795 "For example: gPsFlux instead of g_PsFlux.")
797 multilevelOutput = pexConfig.Field(
800 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
801 "and name-munged (False).")
803 goodFlags = pexConfig.ListField(
806 doc=(
"List of 'good' flags that should be set False when populating empty tables. "
807 "All other flags are considered to be 'bad' flags and will be set to True.")
809 floatFillValue = pexConfig.Field(
812 doc=
"Fill value for float fields when populating empty tables."
814 integerFillValue = pexConfig.Field(
817 doc=
"Fill value for integer fields when populating empty tables."
820 def setDefaults(self):
821 super().setDefaults()
822 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Object.yaml')
823 self.primaryKey =
'objectId'
824 self.goodFlags = [
'calib_astrometry_used',
825 'calib_photometry_reserved',
826 'calib_photometry_used',
827 'calib_psf_candidate',
828 'calib_psf_reserved',
833 """Produce a flattened Object Table to match the format specified in
836 Do the same set of postprocessing calculations on all bands
838 This is identical to `TransformCatalogBaseTask`,
except for that it does the
839 specified functor calculations
for all filters present
in the
840 input `deepCoadd_obj` table. Any specific `
"filt"` keywords specified
841 by the YAML file will be superceded.
843 _DefaultName = "transformObjectCatalog"
844 ConfigClass = TransformObjectCatalogConfig
847 inputDataset =
'deepCoadd_obj'
848 outputDataset =
'objectTable'
851 def _makeArgumentParser(cls):
852 parser = ArgumentParser(name=cls._DefaultName)
853 parser.add_id_argument(
"--id", cls.inputDataset,
854 ContainerClass=CoaddDataIdContainer,
855 help=
"data ID, e.g. --id tract=12345 patch=1,2")
858 def run(self, parq, funcs=None, dataId=None, band=None):
862 templateDf = pd.DataFrame()
864 if isinstance(parq, DeferredDatasetHandle):
865 columns = parq.get(component=
'columns')
866 inputBands = columns.unique(level=1).values
868 inputBands = parq.columnLevelNames[
'band']
870 outputBands = self.config.outputBands
if self.config.outputBands
else inputBands
873 for inputBand
in inputBands:
874 if inputBand
not in outputBands:
875 self.log.info(
"Ignoring %s band data in the input", inputBand)
877 self.log.info(
"Transforming the catalog of band %s", inputBand)
878 result = self.transform(inputBand, parq, funcs, dataId)
879 dfDict[inputBand] = result.df
880 analysisDict[inputBand] = result.analysis
882 templateDf = result.df
885 for filt
in outputBands:
886 if filt
not in dfDict:
887 self.log.info(
"Adding empty columns for band %s", filt)
888 dfTemp = templateDf.copy()
889 for col
in dfTemp.columns:
890 testValue = dfTemp[col].values[0]
891 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
893 if col
in self.config.goodFlags:
897 elif isinstance(testValue, numbers.Integral):
901 if isinstance(testValue, np.unsignedinteger):
902 raise ValueError(
"Parquet tables may not have unsigned integer columns.")
904 fillValue = self.config.integerFillValue
906 fillValue = self.config.floatFillValue
907 dfTemp[col].values[:] = fillValue
908 dfDict[filt] = dfTemp
911 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
913 if not self.config.multilevelOutput:
914 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
915 if self.config.primaryKey
in noDupCols:
916 noDupCols.remove(self.config.primaryKey)
917 if dataId
is not None:
918 noDupCols += list(dataId.keys())
919 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
920 inputBands=inputBands)
922 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
929 def makeDataRefList(self, namespace):
930 """Make self.refList from self.idList
932 Generate a list of data references given tract and/
or patch.
933 This was adapted
from `TractQADataIdContainer`, which was
934 `TractDataIdContainer` modifie to
not require
"filter".
935 Only existing dataRefs are returned.
937 def getPatchRefList(tract):
938 return [namespace.butler.dataRef(datasetType=self.datasetType,
940 patch=
"%d,%d" % patch.getIndex())
for patch
in tract]
942 tractRefs = defaultdict(list)
943 for dataId
in self.idList:
944 skymap = self.getSkymap(namespace)
946 if "tract" in dataId:
947 tractId = dataId[
"tract"]
948 if "patch" in dataId:
949 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
951 patch=dataId[
'patch']))
953 tractRefs[tractId] += getPatchRefList(skymap[tractId])
955 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
958 for tractRefList
in tractRefs.values():
959 existingRefs = [ref
for ref
in tractRefList
if ref.datasetExists()]
960 outputRefList.append(existingRefs)
962 self.refList = outputRefList
965class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
966 dimensions=(
"tract",
"skymap")):
967 inputCatalogs = connectionTypes.Input(
968 doc=
"Per-Patch objectTables conforming to the standard data model.",
970 storageClass=
"DataFrame",
971 dimensions=(
"tract",
"patch",
"skymap"),
974 outputCatalog = connectionTypes.Output(
975 doc=
"Pre-tract horizontal concatenation of the input objectTables",
976 name=
"objectTable_tract",
977 storageClass=
"DataFrame",
978 dimensions=(
"tract",
"skymap"),
982class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
983 pipelineConnections=ConsolidateObjectTableConnections):
984 coaddName = pexConfig.Field(
991class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
992 """Write patch-merged source tables to a tract-level parquet file
994 Concatenates `objectTable` list into a per-visit `objectTable_tract`
996 _DefaultName = "consolidateObjectTable"
997 ConfigClass = ConsolidateObjectTableConfig
999 inputDataset =
'objectTable'
1000 outputDataset =
'objectTable_tract'
1002 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1003 inputs = butlerQC.get(inputRefs)
1004 self.log.info(
"Concatenating %s per-patch Object Tables",
1005 len(inputs[
'inputCatalogs']))
1006 df = pd.concat(inputs[
'inputCatalogs'])
1007 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1010 def _makeArgumentParser(cls):
1011 parser = ArgumentParser(name=cls._DefaultName)
1013 parser.add_id_argument(
"--id", cls.inputDataset,
1014 help=
"data ID, e.g. --id tract=12345",
1015 ContainerClass=TractObjectDataIdContainer)
1018 def runDataRef(self, patchRefList):
1019 df = pd.concat([patchRef.get().toDataFrame()
for patchRef
in patchRefList])
1020 patchRefList[0].put(
ParquetTable(dataFrame=df), self.outputDataset)
1023 """No metadata to write.
1028class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1029 defaultTemplates={
"catalogType":
""},
1030 dimensions=(
"instrument",
"visit",
"detector")):
1032 inputCatalog = connectionTypes.Input(
1033 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
1034 name=
"{catalogType}source",
1035 storageClass=
"DataFrame",
1036 dimensions=(
"instrument",
"visit",
"detector"),
1039 outputCatalog = connectionTypes.Output(
1040 doc=
"Narrower, per-detector Source Table transformed and converted per a "
1041 "specified set of functors",
1042 name=
"{catalogType}sourceTable",
1043 storageClass=
"DataFrame",
1044 dimensions=(
"instrument",
"visit",
"detector")
1049 pipelineConnections=TransformSourceTableConnections):
1051 def setDefaults(self):
1052 super().setDefaults()
1053 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Source.yaml')
1054 self.primaryKey =
'sourceId'
1058 """Transform/standardize a source catalog
1060 _DefaultName = "transformSourceTable"
1061 ConfigClass = TransformSourceTableConfig
1063 inputDataset =
'source'
1064 outputDataset =
'sourceTable'
1067 def _makeArgumentParser(cls):
1068 parser = ArgumentParser(name=cls._DefaultName)
1069 parser.add_id_argument(
"--id", datasetType=cls.inputDataset,
1071 help=
"data ID, e.g. --id visit=12345 ccd=0")
1074 def runDataRef(self, dataRef):
1075 """Override to specify band label to run()."""
1076 parq = dataRef.get()
1077 funcs = self.getFunctors()
1078 band = dataRef.get(
"calexp_filterLabel", immediate=
True).bandLabel
1079 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
1080 self.write(df, dataRef)
1084class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1085 dimensions=(
"instrument",
"visit",),
1086 defaultTemplates={
"calexpType":
""}):
1087 calexp = connectionTypes.Input(
1088 doc=
"Processed exposures used for metadata",
1089 name=
"{calexpType}calexp",
1090 storageClass=
"ExposureF",
1091 dimensions=(
"instrument",
"visit",
"detector"),
1095 visitSummary = connectionTypes.Output(
1096 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
1097 "detector id for the id and are sorted for fast lookups of a "
1099 name=
"{calexpType}visitSummary",
1100 storageClass=
"ExposureCatalog",
1101 dimensions=(
"instrument",
"visit"),
1105class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1106 pipelineConnections=ConsolidateVisitSummaryConnections):
1107 """Config for ConsolidateVisitSummaryTask"""
1111class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1112 """Task to consolidate per-detector visit metadata.
1114 This task aggregates the following metadata from all the detectors
in a
1115 single visit into an exposure catalog:
1119 - The physical_filter
and band (
if available).
1120 - The psf size, shape,
and effective area at the center of the detector.
1121 - The corners of the bounding box
in right ascension/declination.
1123 Other quantities such
as Detector, Psf, ApCorrMap,
and TransmissionCurve
1124 are
not persisted here because of storage concerns,
and because of their
1125 limited utility
as summary statistics.
1127 Tests
for this task are performed
in ci_hsc_gen3.
1129 _DefaultName = "consolidateVisitSummary"
1130 ConfigClass = ConsolidateVisitSummaryConfig
1133 def _makeArgumentParser(cls):
1134 parser = ArgumentParser(name=cls._DefaultName)
1136 parser.add_id_argument(
"--id",
"calexp",
1137 help=
"data ID, e.g. --id visit=12345",
1138 ContainerClass=VisitDataIdContainer)
1142 """No metadata to persist, so override to remove metadata persistance.
1146 def writeConfig(self, butler, clobber=False, doBackup=True):
1147 """No config to persist, so override to remove config persistance.
1151 def runDataRef(self, dataRefList):
1152 visit = dataRefList[0].dataId[
'visit']
1154 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1155 len(dataRefList), visit)
1157 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=
False)
1159 dataRefList[0].put(expCatalog,
'visitSummary', visit=visit)
1161 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1162 dataRefs = butlerQC.get(inputRefs.calexp)
1163 visit = dataRefs[0].dataId.byName()[
'visit']
1165 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1166 len(dataRefs), visit)
1168 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1170 butlerQC.put(expCatalog, outputRefs.visitSummary)
1172 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1173 """Make a combined exposure catalog from a list of dataRefs.
1174 These dataRefs must point to exposures with wcs, summaryStats,
1175 and other visit metadata.
1180 Visit identification number.
1182 List of dataRefs
in visit. May be list of
1183 `lsst.daf.persistence.ButlerDataRef` (Gen2)
or
1184 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1185 isGen3 : `bool`, optional
1186 Specifies
if this
is a Gen3 list of datarefs.
1191 Exposure catalog
with per-detector summary information.
1193 schema = self._makeVisitSummarySchema()
1194 cat = afwTable.ExposureCatalog(schema)
1195 cat.resize(len(dataRefs))
1197 cat['visit'] = visit
1199 for i, dataRef
in enumerate(dataRefs):
1201 visitInfo = dataRef.get(component=
'visitInfo')
1202 filterLabel = dataRef.get(component=
'filterLabel')
1203 summaryStats = dataRef.get(component=
'summaryStats')
1204 detector = dataRef.get(component=
'detector')
1205 wcs = dataRef.get(component=
'wcs')
1206 photoCalib = dataRef.get(component=
'photoCalib')
1207 detector = dataRef.get(component=
'detector')
1208 bbox = dataRef.get(component=
'bbox')
1209 validPolygon = dataRef.get(component=
'validPolygon')
1214 exp = dataRef.get(datasetType=
'calexp_sub', bbox=gen2_read_bbox)
1215 visitInfo = exp.getInfo().getVisitInfo()
1216 filterLabel = dataRef.get(
"calexp_filterLabel")
1217 summaryStats = exp.getInfo().getSummaryStats()
1219 photoCalib = exp.getPhotoCalib()
1220 detector = exp.getDetector()
1221 bbox = dataRef.get(datasetType=
'calexp_bbox')
1222 validPolygon = exp.getInfo().getValidPolygon()
1226 rec.setVisitInfo(visitInfo)
1228 rec.setPhotoCalib(photoCalib)
1229 rec.setValidPolygon(validPolygon)
1231 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1232 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1233 rec.setId(detector.getId())
1234 rec[
'psfSigma'] = summaryStats.psfSigma
1235 rec[
'psfIxx'] = summaryStats.psfIxx
1236 rec[
'psfIyy'] = summaryStats.psfIyy
1237 rec[
'psfIxy'] = summaryStats.psfIxy
1238 rec[
'psfArea'] = summaryStats.psfArea
1239 rec[
'raCorners'][:] = summaryStats.raCorners
1240 rec[
'decCorners'][:] = summaryStats.decCorners
1241 rec[
'ra'] = summaryStats.ra
1242 rec[
'decl'] = summaryStats.decl
1243 rec[
'zenithDistance'] = summaryStats.zenithDistance
1244 rec[
'zeroPoint'] = summaryStats.zeroPoint
1245 rec[
'skyBg'] = summaryStats.skyBg
1246 rec[
'skyNoise'] = summaryStats.skyNoise
1247 rec[
'meanVar'] = summaryStats.meanVar
1248 rec[
'astromOffsetMean'] = summaryStats.astromOffsetMean
1249 rec[
'astromOffsetStd'] = summaryStats.astromOffsetStd
1250 rec[
'nPsfStar'] = summaryStats.nPsfStar
1251 rec[
'psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median
1252 rec[
'psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median
1253 rec[
'psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter
1254 rec[
'psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter
1255 rec[
'psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian
1256 rec[
'psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter
1257 rec[
'psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter
1259 metadata = dafBase.PropertyList()
1260 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1262 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1263 cat.setMetadata(metadata)
1268 def _makeVisitSummarySchema(self):
1269 """Make the schema for the visitSummary catalog."""
1270 schema = afwTable.ExposureTable.makeMinimalSchema()
1271 schema.addField(
'visit', type=
'I', doc=
'Visit number')
1272 schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
1273 schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
1274 schema.addField(
'psfSigma', type=
'F',
1275 doc=
'PSF model second-moments determinant radius (center of chip) (pixel)')
1276 schema.addField(
'psfArea', type=
'F',
1277 doc=
'PSF model effective area (center of chip) (pixel**2)')
1278 schema.addField(
'psfIxx', type=
'F',
1279 doc=
'PSF model Ixx (center of chip) (pixel**2)')
1280 schema.addField(
'psfIyy', type=
'F',
1281 doc=
'PSF model Iyy (center of chip) (pixel**2)')
1282 schema.addField(
'psfIxy', type=
'F',
1283 doc=
'PSF model Ixy (center of chip) (pixel**2)')
1284 schema.addField(
'raCorners', type=
'ArrayD', size=4,
1285 doc=
'Right Ascension of bounding box corners (degrees)')
1286 schema.addField(
'decCorners', type=
'ArrayD', size=4,
1287 doc=
'Declination of bounding box corners (degrees)')
1288 schema.addField(
'ra', type=
'D',
1289 doc=
'Right Ascension of bounding box center (degrees)')
1290 schema.addField(
'decl', type=
'D',
1291 doc=
'Declination of bounding box center (degrees)')
1292 schema.addField(
'zenithDistance', type=
'F',
1293 doc=
'Zenith distance of bounding box center (degrees)')
1294 schema.addField(
'zeroPoint', type=
'F',
1295 doc=
'Mean zeropoint in detector (mag)')
1296 schema.addField(
'skyBg', type=
'F',
1297 doc=
'Average sky background (ADU)')
1298 schema.addField(
'skyNoise', type=
'F',
1299 doc=
'Average sky noise (ADU)')
1300 schema.addField(
'meanVar', type=
'F',
1301 doc=
'Mean variance of the weight plane (ADU**2)')
1302 schema.addField(
'astromOffsetMean', type=
'F',
1303 doc=
'Mean offset of astrometric calibration matches (arcsec)')
1304 schema.addField(
'astromOffsetStd', type=
'F',
1305 doc=
'Standard deviation of offsets of astrometric calibration matches (arcsec)')
1306 schema.addField(
'nPsfStar', type=
'I', doc=
'Number of stars used for PSF model')
1307 schema.addField(
'psfStarDeltaE1Median', type=
'F',
1308 doc=
'Median E1 residual (starE1 - psfE1) for psf stars')
1309 schema.addField(
'psfStarDeltaE2Median', type=
'F',
1310 doc=
'Median E2 residual (starE2 - psfE2) for psf stars')
1311 schema.addField(
'psfStarDeltaE1Scatter', type=
'F',
1312 doc=
'Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars')
1313 schema.addField(
'psfStarDeltaE2Scatter', type=
'F',
1314 doc=
'Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars')
1315 schema.addField(
'psfStarDeltaSizeMedian', type=
'F',
1316 doc=
'Median size residual (starSize - psfSize) for psf stars (pixel)')
1317 schema.addField(
'psfStarDeltaSizeScatter', type=
'F',
1318 doc=
'Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)')
1319 schema.addField(
'psfStarScaledDeltaSizeScatter', type=
'F',
1320 doc=
'Scatter (via MAD) of size residual scaled by median size squared')
1325class VisitDataIdContainer(DataIdContainer):
1326 """DataIdContainer that groups sensor-level id's by visit
1329 def makeDataRefList(self, namespace):
1330 """Make self.refList from self.idList
1332 Generate a list of data references grouped by visit.
1336 namespace : `argparse.Namespace`
1337 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1340 visitRefs = defaultdict(list)
1341 for dataId
in self.idList:
1342 if "visit" in dataId:
1343 visitId = dataId[
"visit"]
1345 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1346 visitRefs[visitId].extend([dataRef
for dataRef
in subset])
1349 for refList
in visitRefs.values():
1350 existingRefs = [ref
for ref
in refList
if ref.datasetExists()]
1352 outputRefList.append(existingRefs)
1354 self.refList = outputRefList
1357class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1358 defaultTemplates={
"catalogType":
""},
1359 dimensions=(
"instrument",
"visit")):
1360 inputCatalogs = connectionTypes.Input(
1361 doc=
"Input per-detector Source Tables",
1362 name=
"{catalogType}sourceTable",
1363 storageClass=
"DataFrame",
1364 dimensions=(
"instrument",
"visit",
"detector"),
1367 outputCatalog = connectionTypes.Output(
1368 doc=
"Per-visit concatenation of Source Table",
1369 name=
"{catalogType}sourceTable_visit",
1370 storageClass=
"DataFrame",
1371 dimensions=(
"instrument",
"visit")
1375class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1376 pipelineConnections=ConsolidateSourceTableConnections):
1380class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1381 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1383 _DefaultName = 'consolidateSourceTable'
1384 ConfigClass = ConsolidateSourceTableConfig
1386 inputDataset =
'sourceTable'
1387 outputDataset =
'sourceTable_visit'
1389 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1390 from .makeCoaddTempExp
import reorderRefs
1392 detectorOrder = [ref.dataId[
'detector']
for ref
in inputRefs.inputCatalogs]
1393 detectorOrder.sort()
1394 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey=
'detector')
1395 inputs = butlerQC.get(inputRefs)
1396 self.log.info(
"Concatenating %s per-detector Source Tables",
1397 len(inputs[
'inputCatalogs']))
1398 df = pd.concat(inputs[
'inputCatalogs'])
1399 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1401 def runDataRef(self, dataRefList):
1402 self.log.info(
"Concatenating %s per-detector Source Tables", len(dataRefList))
1403 df = pd.concat([dataRef.get().toDataFrame()
for dataRef
in dataRefList])
1404 dataRefList[0].put(
ParquetTable(dataFrame=df), self.outputDataset)
1407 def _makeArgumentParser(cls):
1408 parser = ArgumentParser(name=cls._DefaultName)
1410 parser.add_id_argument(
"--id", cls.inputDataset,
1411 help=
"data ID, e.g. --id visit=12345",
1412 ContainerClass=VisitDataIdContainer)
1416 """No metadata to write.
1420 def writeConfig(self, butler, clobber=False, doBackup=True):
1421 """No config to write.
1426class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1427 dimensions=(
"instrument",),
1428 defaultTemplates={
"calexpType":
""}):
1429 visitSummaryRefs = connectionTypes.Input(
1430 doc=
"Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1431 name=
"{calexpType}visitSummary",
1432 storageClass=
"ExposureCatalog",
1433 dimensions=(
"instrument",
"visit"),
1437 outputCatalog = connectionTypes.Output(
1438 doc=
"CCD and Visit metadata table",
1439 name=
"ccdVisitTable",
1440 storageClass=
"DataFrame",
1441 dimensions=(
"instrument",)
1445class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1446 pipelineConnections=MakeCcdVisitTableConnections):
1450class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1451 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1453 _DefaultName = 'makeCcdVisitTable'
1454 ConfigClass = MakeCcdVisitTableConfig
1456 def run(self, visitSummaryRefs):
1457 """ Make a table of ccd information from the `visitSummary` catalogs.
1460 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1461 List of DeferredDatasetHandles pointing to exposure catalogs with
1462 per-detector summary information.
1465 result : `lsst.pipe.Base.Struct`
1466 Results struct
with attribute:
1468 Catalog of ccd
and visit information.
1471 for visitSummaryRef
in visitSummaryRefs:
1472 visitSummary = visitSummaryRef.get()
1473 visitInfo = visitSummary[0].getVisitInfo()
1476 summaryTable = visitSummary.asAstropy()
1477 selectColumns = [
'id',
'visit',
'physical_filter',
'band',
'ra',
'decl',
'zenithDistance',
1478 'zeroPoint',
'psfSigma',
'skyBg',
'skyNoise']
1479 ccdEntry = summaryTable[selectColumns].to_pandas().set_index(
'id')
1483 ccdEntry = ccdEntry.rename(columns={
"visit":
"visitId"})
1484 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id)
for id
in
1486 packer = visitSummaryRef.dataId.universe.makePacker(
'visit_detector', visitSummaryRef.dataId)
1487 ccdVisitIds = [packer.pack(dataId)
for dataId
in dataIds]
1488 ccdEntry[
'ccdVisitId'] = ccdVisitIds
1489 ccdEntry[
'detector'] = summaryTable[
'id']
1490 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds()
for vR
in visitSummary])
1491 ccdEntry[
"seeing"] = visitSummary[
'psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1493 ccdEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1494 ccdEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1495 expTime = visitInfo.getExposureTime()
1496 ccdEntry[
'expTime'] = expTime
1497 ccdEntry[
"obsStart"] = ccdEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1498 ccdEntry[
'darkTime'] = visitInfo.getDarkTime()
1499 ccdEntry[
'xSize'] = summaryTable[
'bbox_max_x'] - summaryTable[
'bbox_min_x']
1500 ccdEntry[
'ySize'] = summaryTable[
'bbox_max_y'] - summaryTable[
'bbox_min_y']
1501 ccdEntry[
'llcra'] = summaryTable[
'raCorners'][:, 0]
1502 ccdEntry[
'llcdec'] = summaryTable[
'decCorners'][:, 0]
1503 ccdEntry[
'ulcra'] = summaryTable[
'raCorners'][:, 1]
1504 ccdEntry[
'ulcdec'] = summaryTable[
'decCorners'][:, 1]
1505 ccdEntry[
'urcra'] = summaryTable[
'raCorners'][:, 2]
1506 ccdEntry[
'urcdec'] = summaryTable[
'decCorners'][:, 2]
1507 ccdEntry[
'lrcra'] = summaryTable[
'raCorners'][:, 3]
1508 ccdEntry[
'lrcdec'] = summaryTable[
'decCorners'][:, 3]
1511 ccdEntries.append(ccdEntry)
1513 outputCatalog = pd.concat(ccdEntries)
1514 outputCatalog.set_index(
'ccdVisitId', inplace=
True, verify_integrity=
True)
1515 return pipeBase.Struct(outputCatalog=outputCatalog)
1518class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1519 dimensions=(
"instrument",),
1520 defaultTemplates={
"calexpType":
""}):
1521 visitSummaries = connectionTypes.Input(
1522 doc=
"Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1523 name=
"{calexpType}visitSummary",
1524 storageClass=
"ExposureCatalog",
1525 dimensions=(
"instrument",
"visit",),
1529 outputCatalog = connectionTypes.Output(
1530 doc=
"Visit metadata table",
1532 storageClass=
"DataFrame",
1533 dimensions=(
"instrument",)
1537class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1538 pipelineConnections=MakeVisitTableConnections):
1542class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1543 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1545 _DefaultName = 'makeVisitTable'
1546 ConfigClass = MakeVisitTableConfig
1548 def run(self, visitSummaries):
1549 """ Make a table of visit information from the `visitSummary` catalogs
1554 List of exposure catalogs with per-detector summary information.
1557 result : `lsst.pipe.Base.Struct`
1558 Results struct
with attribute:
1560 Catalog of visit information.
1563 for visitSummary
in visitSummaries:
1564 visitSummary = visitSummary.get()
1565 visitRow = visitSummary[0]
1566 visitInfo = visitRow.getVisitInfo()
1569 visitEntry[
"visitId"] = visitRow[
'visit']
1570 visitEntry[
"visit"] = visitRow[
'visit']
1571 visitEntry[
"physical_filter"] = visitRow[
'physical_filter']
1572 visitEntry[
"band"] = visitRow[
'band']
1573 raDec = visitInfo.getBoresightRaDec()
1574 visitEntry[
"ra"] = raDec.getRa().asDegrees()
1575 visitEntry[
"decl"] = raDec.getDec().asDegrees()
1576 visitEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1577 azAlt = visitInfo.getBoresightAzAlt()
1578 visitEntry[
"azimuth"] = azAlt.getLongitude().asDegrees()
1579 visitEntry[
"altitude"] = azAlt.getLatitude().asDegrees()
1580 visitEntry[
"zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1581 visitEntry[
"airmass"] = visitInfo.getBoresightAirmass()
1582 visitEntry[
"obsStart"] = visitInfo.getDate().toPython()
1583 visitEntry[
"expTime"] = visitInfo.getExposureTime()
1584 visitEntries.append(visitEntry)
1588 outputCatalog = pd.DataFrame(data=visitEntries)
1589 outputCatalog.set_index(
'visitId', inplace=
True, verify_integrity=
True)
1590 return pipeBase.Struct(outputCatalog=outputCatalog)
1593class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1594 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")):
1596 inputCatalog = connectionTypes.Input(
1597 doc=
"Primary per-detector, single-epoch forced-photometry catalog. "
1598 "By default, it is the output of ForcedPhotCcdTask on calexps",
1600 storageClass=
"SourceCatalog",
1601 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1603 inputCatalogDiff = connectionTypes.Input(
1604 doc=
"Secondary multi-epoch, per-detector, forced photometry catalog. "
1605 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1607 storageClass=
"SourceCatalog",
1608 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1610 outputCatalog = connectionTypes.Output(
1611 doc=
"InputCatalogs horizonatally joined on `objectId` in Parquet format",
1612 name=
"mergedForcedSource",
1613 storageClass=
"DataFrame",
1614 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1618class WriteForcedSourceTableConfig(WriteSourceTableConfig,
1619 pipelineConnections=WriteForcedSourceTableConnections):
1620 key = lsst.pex.config.Field(
1621 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1627class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1628 """Merge and convert per-detector forced source catalogs to parquet
1630 Because the predecessor ForcedPhotCcdTask operates per-detector,
1631 per-tract, (i.e., it has tract in its dimensions), detectors
1632 on the tract boundary may have multiple forced source catalogs.
1634 The successor task TransformForcedSourceTable runs per-patch
1635 and temporally-aggregates overlapping mergedForcedSource catalogs
from all
1636 available multiple epochs.
1638 _DefaultName = "writeForcedSourceTable"
1639 ConfigClass = WriteForcedSourceTableConfig
1641 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1642 inputs = butlerQC.get(inputRefs)
1644 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
1645 inputs[
'band'] = butlerQC.quantum.dataId.full[
'band']
1646 outputs = self.run(**inputs)
1647 butlerQC.put(outputs, outputRefs)
1649 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1651 for table, dataset,
in zip((inputCatalog, inputCatalogDiff), (
'calexp',
'diff')):
1652 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=
False)
1653 df = df.reindex(sorted(df.columns), axis=1)
1654 df[
'ccdVisitId'] = ccdVisitId
if ccdVisitId
else pd.NA
1655 df[
'band'] = band
if band
else pd.NA
1656 df.columns = pd.MultiIndex.from_tuples([(dataset, c)
for c
in df.columns],
1657 names=(
'dataset',
'column'))
1661 outputCatalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
1662 return pipeBase.Struct(outputCatalog=outputCatalog)
1665class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1666 dimensions=(
"instrument",
"skymap",
"patch",
"tract")):
1668 inputCatalogs = connectionTypes.Input(
1669 doc=
"Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1670 name=
"mergedForcedSource",
1671 storageClass=
"DataFrame",
1672 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract"),
1676 referenceCatalog = connectionTypes.Input(
1677 doc=
"Reference catalog which was used to seed the forcedPhot. Columns "
1678 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1681 storageClass=
"DataFrame",
1682 dimensions=(
"tract",
"patch",
"skymap"),
1685 outputCatalog = connectionTypes.Output(
1686 doc=
"Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1687 "specified set of functors",
1688 name=
"forcedSourceTable",
1689 storageClass=
"DataFrame",
1690 dimensions=(
"tract",
"patch",
"skymap")
1695 pipelineConnections=TransformForcedSourceTableConnections):
1696 referenceColumns = pexConfig.ListField(
1698 default=[
"detect_isPrimary",
"detect_isTractInner",
"detect_isPatchInner"],
1700 doc=
"Columns to pull from reference catalog",
1702 keyRef = lsst.pex.config.Field(
1703 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1707 key = lsst.pex.config.Field(
1708 doc=
"Rename the output DataFrame index to this name",
1710 default=
"forcedSourceId",
1713 def setDefaults(self):
1714 super().setDefaults()
1715 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'ForcedSource.yaml')
1719 """Transform/standardize a ForcedSource catalog
1721 Transforms each wide, per-detector forcedSource parquet table per the
1722 specification file (per-camera defaults found in ForcedSource.yaml).
1723 All epochs that overlap the patch are aggregated into one per-patch
1724 narrow-parquet file.
1726 No de-duplication of rows
is performed. Duplicate resolutions flags are
1727 pulled
in from the referenceCatalog: `detect_isPrimary`,
1728 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1729 for analysis
or compare duplicates
for QA.
1731 The resulting table includes multiple bands. Epochs (MJDs)
and other useful
1732 per-visit rows can be retreived by joining
with the CcdVisitTable on
1735 _DefaultName = "transformForcedSourceTable"
1736 ConfigClass = TransformForcedSourceTableConfig
1738 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1739 inputs = butlerQC.get(inputRefs)
1740 if self.funcs
is None:
1741 raise ValueError(
"config.functorFile is None. "
1742 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1743 outputs = self.run(inputs[
'inputCatalogs'], inputs[
'referenceCatalog'], funcs=self.funcs,
1744 dataId=outputRefs.outputCatalog.dataId.full)
1746 butlerQC.put(outputs, outputRefs)
1748 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1750 ref = referenceCatalog.get(parameters={
"columns": self.config.referenceColumns})
1751 self.log.info(
"Aggregating %s input catalogs" % (len(inputCatalogs)))
1752 for handle
in inputCatalogs:
1753 result = self.transform(
None, handle, funcs, dataId)
1755 dfs.append(result.df.join(ref, how=
'inner'))
1757 outputCatalog = pd.concat(dfs)
1761 outputCatalog.index.rename(self.config.keyRef, inplace=
True)
1763 outputCatalog.reset_index(inplace=
True)
1765 outputCatalog.set_index(
"forcedSourceId", inplace=
True, verify_integrity=
True)
1767 outputCatalog.index.rename(self.config.key, inplace=
True)
1769 self.log.info(
"Made a table of %d columns and %d rows",
1770 len(outputCatalog.columns), len(outputCatalog))
1771 return pipeBase.Struct(outputCatalog=outputCatalog)
1774class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1775 defaultTemplates={
"catalogType":
""},
1776 dimensions=(
"instrument",
"tract")):
1777 inputCatalogs = connectionTypes.Input(
1778 doc=
"Input per-patch DataFrame Tables to be concatenated",
1779 name=
"{catalogType}ForcedSourceTable",
1780 storageClass=
"DataFrame",
1781 dimensions=(
"tract",
"patch",
"skymap"),
1785 outputCatalog = connectionTypes.Output(
1786 doc=
"Output per-tract concatenation of DataFrame Tables",
1787 name=
"{catalogType}ForcedSourceTable_tract",
1788 storageClass=
"DataFrame",
1789 dimensions=(
"tract",
"skymap"),
1793class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1794 pipelineConnections=ConsolidateTractConnections):
1798class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask):
1799 """Concatenate any per-patch, dataframe list into a single
1802 _DefaultName = 'ConsolidateTract'
1803 ConfigClass = ConsolidateTractConfig
1805 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1806 inputs = butlerQC.get(inputRefs)
1808 self.log.info(
"Concatenating %s per-patch %s Tables",
1809 len(inputs[
'inputCatalogs']),
1810 inputRefs.inputCatalogs[0].datasetType.name)
1811 df = pd.concat(inputs[
'inputCatalogs'])
1812 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def runDataRef(self, dataRef)
def getAnalysis(self, parq, funcs=None, band=None)
def write(self, df, parqRef)
def __init__(self, *args, **kwargs)
def transform(self, band, parq, funcs, dataId)
def run(self, parq, funcs=None, dataId=None, band=None)
def writeMetadata(self, dataRef)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def readCatalog(task, patchRef)
Read input catalog.
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)