24from collections
import defaultdict
36from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
38from lsst.daf.butler
import DeferredDatasetHandle, DataCoordinate
40from .parquetTable
import ParquetTable
41from .multiBandUtils
import makeMergeArgumentParser, MergeSourcesRunner
42from .functors
import CompositeFunctor, Column
45def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
46 """Flattens a dataframe with multilevel column index
48 newDf = pd.DataFrame()
50 dfBands = df.columns.unique(level=0).values
53 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
54 newColumns = {c: columnFormat.format(band, c)
55 for c
in subdf.columns
if c
not in noDupCols}
56 cols = list(newColumns.keys())
57 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
60 presentBands = dfBands
if inputBands
is None else list(set(inputBands).intersection(dfBands))
62 noDupDf = df[presentBands[0]][noDupCols]
63 newDf = pd.concat([noDupDf, newDf], axis=1)
68 defaultTemplates={
"coaddName":
"deep"},
69 dimensions=(
"tract",
"patch",
"skymap")):
70 inputCatalogMeas = connectionTypes.Input(
71 doc=
"Catalog of source measurements on the deepCoadd.",
72 dimensions=(
"tract",
"patch",
"band",
"skymap"),
73 storageClass=
"SourceCatalog",
74 name=
"{coaddName}Coadd_meas",
77 inputCatalogForcedSrc = connectionTypes.Input(
78 doc=
"Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
79 dimensions=(
"tract",
"patch",
"band",
"skymap"),
80 storageClass=
"SourceCatalog",
81 name=
"{coaddName}Coadd_forced_src",
84 inputCatalogRef = connectionTypes.Input(
85 doc=
"Catalog marking the primary detection (which band provides a good shape and position)"
86 "for each detection in deepCoadd_mergeDet.",
87 dimensions=(
"tract",
"patch",
"skymap"),
88 storageClass=
"SourceCatalog",
89 name=
"{coaddName}Coadd_ref"
91 outputCatalog = connectionTypes.Output(
92 doc=
"A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
93 "stored as a DataFrame with a multi-level column index per-patch.",
94 dimensions=(
"tract",
"patch",
"skymap"),
95 storageClass=
"DataFrame",
96 name=
"{coaddName}Coadd_obj"
100class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
101 pipelineConnections=WriteObjectTableConnections):
102 engine = pexConfig.Field(
105 doc=
"Parquet engine for writing (pyarrow or fastparquet)"
107 coaddName = pexConfig.Field(
114class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
115 """Write filter-merged source tables to parquet
117 _DefaultName = "writeObjectTable"
118 ConfigClass = WriteObjectTableConfig
119 RunnerClass = MergeSourcesRunner
122 inputDatasets = (
'forced_src',
'meas',
'ref')
125 outputDataset =
'obj'
127 def __init__(self, butler=None, schema=None, **kwargs):
131 super().__init__(**kwargs)
133 def runDataRef(self, patchRefList):
135 @brief Merge coadd sources
from multiple bands. Calls
@ref `run` which must be defined
in
136 subclasses that inherit
from MergeSourcesTask.
137 @param[
in] patchRefList list of data references
for each filter
139 catalogs = dict(self.readCatalog(patchRef) for patchRef
in patchRefList)
140 dataId = patchRefList[0].dataId
141 mergedCatalog = self.run(catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
142 self.write(patchRefList[0],
ParquetTable(dataFrame=mergedCatalog))
144 def runQuantum(self, butlerQC, inputRefs, outputRefs):
145 inputs = butlerQC.get(inputRefs)
147 measDict = {ref.dataId[
'band']: {
'meas': cat}
for ref, cat
in
148 zip(inputRefs.inputCatalogMeas, inputs[
'inputCatalogMeas'])}
149 forcedSourceDict = {ref.dataId[
'band']: {
'forced_src': cat}
for ref, cat
in
150 zip(inputRefs.inputCatalogForcedSrc, inputs[
'inputCatalogForcedSrc'])}
153 for band
in measDict.keys():
154 catalogs[band] = {
'meas': measDict[band][
'meas'],
155 'forced_src': forcedSourceDict[band][
'forced_src'],
156 'ref': inputs[
'inputCatalogRef']}
157 dataId = butlerQC.quantum.dataId
158 df = self.run(catalogs=catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
159 outputs = pipeBase.Struct(outputCatalog=df)
160 butlerQC.put(outputs, outputRefs)
163 def _makeArgumentParser(cls):
164 """Create a suitable ArgumentParser.
166 We will use the ArgumentParser to get a list of data
167 references for patches; the RunnerClass will sort them into lists
168 of data references
for the same patch.
170 References first of self.inputDatasets, rather than
176 """Read input catalogs
178 Read all the input datasets given by the 'inputDatasets'
183 patchRef : `lsst.daf.persistence.ButlerDataRef`
184 Data reference
for patch
188 Tuple consisting of band name
and a dict of catalogs, keyed by
191 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=
True).bandLabel
193 for dataset
in self.inputDatasets:
194 catalog = patchRef.get(self.config.coaddName +
"Coadd_" + dataset, immediate=
True)
195 self.log.info(
"Read %d sources from %s for band %s: %s",
196 len(catalog), dataset, band, patchRef.dataId)
197 catalogDict[dataset] = catalog
198 return band, catalogDict
200 def run(self, catalogs, tract, patch):
201 """Merge multiple catalogs.
206 Mapping from filter names to dict of catalogs.
208 tractId to use
for the tractId column
210 patchId to use
for the patchId column
214 catalog : `pandas.DataFrame`
219 for filt, tableDict
in catalogs.items():
220 for dataset, table
in tableDict.items():
222 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
225 df = df.reindex(sorted(df.columns), axis=1)
226 df[
'tractId'] = tract
227 df[
'patchId'] = patch
230 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
231 names=(
'dataset',
'band',
'column'))
234 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
237 def write(self, patchRef, catalog):
242 catalog : `ParquetTable`
244 patchRef : `lsst.daf.persistence.ButlerDataRef`
245 Data reference for patch
247 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
250 mergeDataId = patchRef.dataId.copy()
251 del mergeDataId[
"filter"]
252 self.log.info(
"Wrote merged catalog: %s", mergeDataId)
255 """No metadata to write, and not sure how to write it for a list of dataRefs.
260class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
261 defaultTemplates={
"catalogType":
""},
262 dimensions=(
"instrument",
"visit",
"detector")):
264 catalog = connectionTypes.Input(
265 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
266 name=
"{catalogType}src",
267 storageClass=
"SourceCatalog",
268 dimensions=(
"instrument",
"visit",
"detector")
270 outputCatalog = connectionTypes.Output(
271 doc=
"Catalog of sources, `src` in Parquet format. The 'id' column is "
272 "replaced with an index; all other columns are unchanged.",
273 name=
"{catalogType}source",
274 storageClass=
"DataFrame",
275 dimensions=(
"instrument",
"visit",
"detector")
279class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
280 pipelineConnections=WriteSourceTableConnections):
281 doApplyExternalPhotoCalib = pexConfig.Field(
284 doc=(
"Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
285 "generating Source Tables from older src tables which do not already have local calib columns")
287 doApplyExternalSkyWcs = pexConfig.Field(
290 doc=(
"Add local WCS columns from the calexp.wcs? Should only set True if "
291 "generating Source Tables from older src tables which do not already have local calib columns")
295class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
296 """Write source table to parquet
298 _DefaultName = "writeSourceTable"
299 ConfigClass = WriteSourceTableConfig
301 def runDataRef(self, dataRef):
302 src = dataRef.get(
'src')
303 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
304 src = self.addCalibColumns(src, dataRef)
306 ccdVisitId = dataRef.get(
'ccdExposureId')
307 result = self.run(src, ccdVisitId=ccdVisitId)
308 dataRef.put(result.table,
'source')
310 def runQuantum(self, butlerQC, inputRefs, outputRefs):
311 inputs = butlerQC.get(inputRefs)
312 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
313 result = self.run(**inputs).table
314 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
315 butlerQC.put(outputs, outputRefs)
317 def run(self, catalog, ccdVisitId=None):
318 """Convert `src` catalog to parquet
322 catalog: `afwTable.SourceCatalog`
323 catalog to be converted
325 ccdVisitId to be added as a column
329 result : `lsst.pipe.base.Struct`
331 `ParquetTable` version of the input catalog
333 self.log.info("Generating parquet table from src catalog %s", ccdVisitId)
334 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
335 df[
'ccdVisitId'] = ccdVisitId
336 return pipeBase.Struct(table=
ParquetTable(dataFrame=df))
338 def addCalibColumns(self, catalog, dataRef):
339 """Add columns with local calibration evaluated at each centroid
341 for backwards compatibility
with old repos.
342 This exists
for the purpose of converting old src catalogs
343 (which don
't have the expected local calib columns) to Source Tables.
347 catalog: `afwTable.SourceCatalog`
348 catalog to which calib columns will be added
349 dataRef: `lsst.daf.persistence.ButlerDataRef
350 for fetching the calibs
from disk.
354 newCat: `afwTable.SourceCatalog`
355 Source Catalog
with requested local calib columns
357 mapper = afwTable.SchemaMapper(catalog.schema)
358 measureConfig = SingleFrameMeasurementTask.ConfigClass()
359 measureConfig.doReplaceWithNoise = False
362 exposure = dataRef.get(
'calexp_sub',
365 mapper = afwTable.SchemaMapper(catalog.schema)
366 mapper.addMinimalSchema(catalog.schema,
True)
367 schema = mapper.getOutputSchema()
369 exposureIdInfo = dataRef.get(
"expIdInfo")
370 measureConfig.plugins.names = []
371 if self.config.doApplyExternalSkyWcs:
372 plugin =
'base_LocalWcs'
374 raise RuntimeError(f
"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
376 measureConfig.plugins.names.add(plugin)
378 if self.config.doApplyExternalPhotoCalib:
379 plugin =
'base_LocalPhotoCalib'
381 raise RuntimeError(f
"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
383 measureConfig.plugins.names.add(plugin)
385 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
386 newCat = afwTable.SourceCatalog(schema)
387 newCat.extend(catalog, mapper=mapper)
388 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
392 """No metadata to write.
397 def _makeArgumentParser(cls):
398 parser = ArgumentParser(name=cls._DefaultName)
399 parser.add_id_argument(
"--id",
'src',
400 help=
"data ID, e.g. --id visit=12345 ccd=0")
404class PostprocessAnalysis(object):
405 """Calculate columns from ParquetTable
407 This object manages and organizes an arbitrary set of computations
408 on a catalog. The catalog
is defined by a
410 `deepCoadd_obj` dataset,
and the computations are defined by a collection
411 of `lsst.pipe.tasks.functor.Functor` objects (
or, equivalently,
412 a `CompositeFunctor`).
414 After the object
is initialized, accessing the `.df` attribute (which
415 holds the `pandas.DataFrame` containing the results of the calculations) triggers
416 computation of said dataframe.
418 One of the conveniences of using this object
is the ability to define a desired common
419 filter
for all functors. This enables the same functor collection to be passed to
420 several different `PostprocessAnalysis` objects without having to change the original
421 functor collection, since the `filt` keyword argument of this object triggers an
422 overwrite of the `filt` property
for all functors
in the collection.
424 This object also allows a list of refFlags to be passed,
and defines a set of default
425 refFlags that are always included even
if not requested.
427 If a list of `ParquetTable` object
is passed, rather than a single one, then the
428 calculations will be mapped over all the input catalogs. In principle, it should
429 be straightforward to parallelize this activity, but initial tests have failed
430 (see TODO
in code comments).
434 parq : `lsst.pipe.tasks.ParquetTable` (
or list of such)
435 Source catalog(s)
for computation
438 Computations to do (functors that act on `parq`).
439 If a dict, the output
440 DataFrame will have columns keyed accordingly.
441 If a list, the column keys will come
from the
442 `.shortname` attribute of each functor.
444 filt : `str` (optional)
445 Filter
in which to calculate. If provided,
446 this will overwrite any existing `.filt` attribute
447 of the provided functors.
449 flags : `list` (optional)
450 List of flags (per-band) to include
in output table.
451 Taken
from the `meas` dataset
if applied to a multilevel Object Table.
453 refFlags : `list` (optional)
454 List of refFlags (only reference band) to include
in output table.
456 forcedFlags : `list` (optional)
457 List of flags (per-band) to include
in output table.
458 Taken
from the ``forced_src`` dataset
if applied to a
459 multilevel Object Table. Intended
for flags
from measurement plugins
460 only run during multi-band forced-photometry.
462 _defaultRefFlags = []
465 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
467 self.functors = functors
470 self.flags = list(flags)
if flags
is not None else []
471 self.forcedFlags = list(forcedFlags)
if forcedFlags
is not None else []
472 self.refFlags = list(self._defaultRefFlags)
473 if refFlags
is not None:
474 self.refFlags += list(refFlags)
479 def defaultFuncs(self):
480 funcs = dict(self._defaultFuncs)
485 additionalFuncs = self.defaultFuncs
486 additionalFuncs.update({flag:
Column(flag, dataset=
'forced_src')
for flag
in self.forcedFlags})
487 additionalFuncs.update({flag:
Column(flag, dataset=
'ref')
for flag
in self.refFlags})
488 additionalFuncs.update({flag:
Column(flag, dataset=
'meas')
for flag
in self.flags})
490 if isinstance(self.functors, CompositeFunctor):
495 func.funcDict.update(additionalFuncs)
496 func.filt = self.filt
502 return [name
for name, func
in self.func.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
510 def compute(self, dropna=False, pool=None):
512 if type(self.parq)
in (list, tuple):
514 dflist = [self.func(parq, dropna=dropna)
for parq
in self.parq]
517 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
518 self._df = pd.concat(dflist)
520 self._df = self.func(self.parq, dropna=dropna)
527 """Expected Connections for subclasses of TransformCatalogBaseTask.
531 inputCatalog = connectionTypes.Input(
533 storageClass=
"DataFrame",
535 outputCatalog = connectionTypes.Output(
537 storageClass=
"DataFrame",
542 pipelineConnections=TransformCatalogBaseConnections):
543 functorFile = pexConfig.Field(
545 doc=
"Path to YAML file specifying Science Data Model functors to use "
546 "when copying columns and computing calibrated values.",
550 primaryKey = pexConfig.Field(
552 doc=
"Name of column to be set as the DataFrame index. If None, the index"
553 "will be named `id`",
560 """Base class for transforming/standardizing a catalog
562 by applying functors that convert units and apply calibrations.
563 The purpose of this task
is to perform a set of computations on
564 an input `ParquetTable` dataset (such
as `deepCoadd_obj`)
and write the
565 results to a new dataset (which needs to be declared
in an `outputDataset`
568 The calculations to be performed are defined
in a YAML file that specifies
569 a set of functors to be computed, provided
as
570 a `--functorFile` config parameter. An example of such a YAML file
595 - base_InputCount_value
598 functor: DeconvolvedMoments
603 - merge_measurement_i
604 - merge_measurement_r
605 - merge_measurement_z
606 - merge_measurement_y
607 - merge_measurement_g
608 - base_PixelFlags_flag_inexact_psfCenter
611 The names
for each entry under
"func" will become the names of columns
in the
613 Positional arguments to be passed to each functor are
in the `args` list,
614 and any additional entries
for each column other than
"functor" or "args" (e.g., `
'filt'`,
615 `
'dataset'`) are treated
as keyword arguments to be passed to the functor initialization.
617 The
"flags" entry
is the default shortcut
for `Column` functors.
618 All columns listed under
"flags" will be copied to the output table
619 untransformed. They can be of any datatype.
620 In the special case of transforming a multi-level oject table
with
621 band
and dataset indices (deepCoadd_obj), these will be taked
from the
622 `meas` dataset
and exploded out per band.
624 There are two special shortcuts that only apply when transforming
625 multi-level Object (deepCoadd_obj) tables:
626 - The
"refFlags" entry
is shortcut
for `Column` functor
627 taken
from the `
'ref'` dataset
if transforming an ObjectTable.
628 - The
"forcedFlags" entry
is shortcut
for `Column` functors.
629 taken
from the ``forced_src`` dataset
if transforming an ObjectTable.
630 These are expanded out per band.
633 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
634 to organize
and excecute the calculations.
638 def _DefaultName(self):
639 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
643 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
647 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
651 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
655 if self.config.functorFile:
656 self.log.info(
'Loading tranform functor definitions from %s',
657 self.config.functorFile)
658 self.
funcsfuncs = CompositeFunctor.from_file(self.config.functorFile)
659 self.
funcsfuncs.update(dict(PostprocessAnalysis._defaultFuncs))
661 self.
funcsfuncs =
None
664 inputs = butlerQC.get(inputRefs)
665 if self.
funcsfuncs
is None:
666 raise ValueError(
"config.functorFile is None. "
667 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
668 result = self.
runrun(parq=inputs[
'inputCatalog'], funcs=self.
funcsfuncs,
669 dataId=outputRefs.outputCatalog.dataId.full)
670 outputs = pipeBase.Struct(outputCatalog=result)
671 butlerQC.put(outputs, outputRefs)
675 if self.
funcsfuncs
is None:
676 raise ValueError(
"config.functorFile is None. "
677 "Must be a valid path to yaml in order to run as a CommandlineTask.")
678 df = self.
runrun(parq, funcs=self.
funcsfuncs, dataId=dataRef.dataId)
679 self.
writewrite(df, dataRef)
682 def run(self, parq, funcs=None, dataId=None, band=None):
683 """Do postprocessing calculations
685 Takes a `ParquetTable` object and dataId,
686 returns a dataframe
with results of postprocessing calculations.
691 ParquetTable
from which calculations are done.
692 funcs : `lsst.pipe.tasks.functors.Functors`
693 Functors to apply to the table
's columns
694 dataId : dict, optional
695 Used to add a `patchId` column to the output dataframe.
696 band : `str`, optional
697 Filter band that is being processed.
704 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
706 df = self.
transformtransform(band, parq, funcs, dataId).df
707 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
711 return self.
funcsfuncs
715 funcs = self.
funcsfuncs
716 analysis = PostprocessAnalysis(parq, funcs, filt=band)
720 analysis = self.
getAnalysisgetAnalysis(parq, funcs=funcs, band=band)
722 if dataId
is not None:
723 for key, value
in dataId.items():
726 if self.config.primaryKey:
727 if df.index.name != self.config.primaryKey
and self.config.primaryKey
in df:
728 df.reset_index(inplace=
True, drop=
True)
729 df.set_index(self.config.primaryKey, inplace=
True)
731 return pipeBase.Struct(
740 """No metadata to write.
746 defaultTemplates={
"coaddName":
"deep"},
747 dimensions=(
"tract",
"patch",
"skymap")):
748 inputCatalog = connectionTypes.Input(
749 doc=
"The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
750 "stored as a DataFrame with a multi-level column index per-patch.",
751 dimensions=(
"tract",
"patch",
"skymap"),
752 storageClass=
"DataFrame",
753 name=
"{coaddName}Coadd_obj",
756 outputCatalog = connectionTypes.Output(
757 doc=
"Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
759 dimensions=(
"tract",
"patch",
"skymap"),
760 storageClass=
"DataFrame",
766 pipelineConnections=TransformObjectCatalogConnections):
767 coaddName = pexConfig.Field(
773 filterMap = pexConfig.DictField(
777 doc=(
"Dictionary mapping full filter name to short one for column name munging."
778 "These filters determine the output columns no matter what filters the "
779 "input data actually contain."),
780 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
781 "Will be removed after v22.")
783 outputBands = pexConfig.ListField(
787 doc=(
"These bands and only these bands will appear in the output,"
788 " NaN-filled if the input does not include them."
789 " If None, then use all bands found in the input.")
791 camelCase = pexConfig.Field(
794 doc=(
"Write per-band columns names with camelCase, else underscore "
795 "For example: gPsFlux instead of g_PsFlux.")
797 multilevelOutput = pexConfig.Field(
800 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
801 "and name-munged (False).")
803 goodFlags = pexConfig.ListField(
806 doc=(
"List of 'good' flags that should be set False when populating empty tables. "
807 "All other flags are considered to be 'bad' flags and will be set to True.")
809 floatFillValue = pexConfig.Field(
812 doc=
"Fill value for float fields when populating empty tables."
814 integerFillValue = pexConfig.Field(
817 doc=
"Fill value for integer fields when populating empty tables."
820 def setDefaults(self):
821 super().setDefaults()
822 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Object.yaml')
823 self.primaryKey =
'objectId'
824 self.goodFlags = [
'calib_astrometry_used',
825 'calib_photometry_reserved',
826 'calib_photometry_used',
827 'calib_psf_candidate',
828 'calib_psf_reserved',
833 """Produce a flattened Object Table to match the format specified in
836 Do the same set of postprocessing calculations on all bands
838 This is identical to `TransformCatalogBaseTask`,
except for that it does the
839 specified functor calculations
for all filters present
in the
840 input `deepCoadd_obj` table. Any specific `
"filt"` keywords specified
841 by the YAML file will be superceded.
843 _DefaultName = "transformObjectCatalog"
844 ConfigClass = TransformObjectCatalogConfig
847 inputDataset =
'deepCoadd_obj'
848 outputDataset =
'objectTable'
851 def _makeArgumentParser(cls):
852 parser = ArgumentParser(name=cls._DefaultName)
853 parser.add_id_argument(
"--id", cls.inputDataset,
854 ContainerClass=CoaddDataIdContainer,
855 help=
"data ID, e.g. --id tract=12345 patch=1,2")
858 def run(self, parq, funcs=None, dataId=None, band=None):
862 templateDf = pd.DataFrame()
864 if isinstance(parq, DeferredDatasetHandle):
865 columns = parq.get(component=
'columns')
866 inputBands = columns.unique(level=1).values
868 inputBands = parq.columnLevelNames[
'band']
870 outputBands = self.config.outputBands
if self.config.outputBands
else inputBands
873 for inputBand
in inputBands:
874 if inputBand
not in outputBands:
875 self.log.info(
"Ignoring %s band data in the input", inputBand)
877 self.log.info(
"Transforming the catalog of band %s", inputBand)
878 result = self.transform(inputBand, parq, funcs, dataId)
879 dfDict[inputBand] = result.df
880 analysisDict[inputBand] = result.analysis
882 templateDf = result.df
885 for filt
in outputBands:
886 if filt
not in dfDict:
887 self.log.info(
"Adding empty columns for band %s", filt)
888 dfTemp = templateDf.copy()
889 for col
in dfTemp.columns:
890 testValue = dfTemp[col].values[0]
891 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
893 if col
in self.config.goodFlags:
897 elif isinstance(testValue, numbers.Integral):
901 if isinstance(testValue, np.unsignedinteger):
902 raise ValueError(
"Parquet tables may not have unsigned integer columns.")
904 fillValue = self.config.integerFillValue
906 fillValue = self.config.floatFillValue
907 dfTemp[col].values[:] = fillValue
908 dfDict[filt] = dfTemp
911 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
913 if not self.config.multilevelOutput:
914 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
915 if self.config.primaryKey
in noDupCols:
916 noDupCols.remove(self.config.primaryKey)
917 if dataId
is not None:
918 noDupCols += list(dataId.keys())
919 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
920 inputBands=inputBands)
922 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
929 def makeDataRefList(self, namespace):
930 """Make self.refList from self.idList
932 Generate a list of data references given tract and/
or patch.
933 This was adapted
from `TractQADataIdContainer`, which was
934 `TractDataIdContainer` modifie to
not require
"filter".
935 Only existing dataRefs are returned.
937 def getPatchRefList(tract):
938 return [namespace.butler.dataRef(datasetType=self.datasetType,
940 patch=
"%d,%d" % patch.getIndex())
for patch
in tract]
942 tractRefs = defaultdict(list)
943 for dataId
in self.idList:
944 skymap = self.getSkymap(namespace)
946 if "tract" in dataId:
947 tractId = dataId[
"tract"]
948 if "patch" in dataId:
949 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
951 patch=dataId[
'patch']))
953 tractRefs[tractId] += getPatchRefList(skymap[tractId])
955 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
958 for tractRefList
in tractRefs.values():
959 existingRefs = [ref
for ref
in tractRefList
if ref.datasetExists()]
960 outputRefList.append(existingRefs)
962 self.refList = outputRefList
965class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
966 dimensions=(
"tract",
"skymap")):
967 inputCatalogs = connectionTypes.Input(
968 doc=
"Per-Patch objectTables conforming to the standard data model.",
970 storageClass=
"DataFrame",
971 dimensions=(
"tract",
"patch",
"skymap"),
974 outputCatalog = connectionTypes.Output(
975 doc=
"Pre-tract horizontal concatenation of the input objectTables",
976 name=
"objectTable_tract",
977 storageClass=
"DataFrame",
978 dimensions=(
"tract",
"skymap"),
982class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
983 pipelineConnections=ConsolidateObjectTableConnections):
984 coaddName = pexConfig.Field(
991class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
992 """Write patch-merged source tables to a tract-level parquet file
994 Concatenates `objectTable` list into a per-visit `objectTable_tract`
996 _DefaultName = "consolidateObjectTable"
997 ConfigClass = ConsolidateObjectTableConfig
999 inputDataset =
'objectTable'
1000 outputDataset =
'objectTable_tract'
1002 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1003 inputs = butlerQC.get(inputRefs)
1004 self.log.info(
"Concatenating %s per-patch Object Tables",
1005 len(inputs[
'inputCatalogs']))
1006 df = pd.concat(inputs[
'inputCatalogs'])
1007 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1010 def _makeArgumentParser(cls):
1011 parser = ArgumentParser(name=cls._DefaultName)
1013 parser.add_id_argument(
"--id", cls.inputDataset,
1014 help=
"data ID, e.g. --id tract=12345",
1015 ContainerClass=TractObjectDataIdContainer)
1018 def runDataRef(self, patchRefList):
1019 df = pd.concat([patchRef.get().toDataFrame()
for patchRef
in patchRefList])
1020 patchRefList[0].put(
ParquetTable(dataFrame=df), self.outputDataset)
1023 """No metadata to write.
1028class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1029 defaultTemplates={
"catalogType":
""},
1030 dimensions=(
"instrument",
"visit",
"detector")):
1032 inputCatalog = connectionTypes.Input(
1033 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
1034 name=
"{catalogType}source",
1035 storageClass=
"DataFrame",
1036 dimensions=(
"instrument",
"visit",
"detector"),
1039 outputCatalog = connectionTypes.Output(
1040 doc=
"Narrower, per-detector Source Table transformed and converted per a "
1041 "specified set of functors",
1042 name=
"{catalogType}sourceTable",
1043 storageClass=
"DataFrame",
1044 dimensions=(
"instrument",
"visit",
"detector")
1049 pipelineConnections=TransformSourceTableConnections):
1051 def setDefaults(self):
1052 super().setDefaults()
1053 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Source.yaml')
1054 self.primaryKey =
'sourceId'
1058 """Transform/standardize a source catalog
1060 _DefaultName = "transformSourceTable"
1061 ConfigClass = TransformSourceTableConfig
1063 inputDataset =
'source'
1064 outputDataset =
'sourceTable'
1067 def _makeArgumentParser(cls):
1068 parser = ArgumentParser(name=cls._DefaultName)
1069 parser.add_id_argument(
"--id", datasetType=cls.inputDataset,
1071 help=
"data ID, e.g. --id visit=12345 ccd=0")
1074 def runDataRef(self, dataRef):
1075 """Override to specify band label to run()."""
1076 parq = dataRef.get()
1077 funcs = self.getFunctors()
1078 band = dataRef.get(
"calexp_filterLabel", immediate=
True).bandLabel
1079 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
1080 self.write(df, dataRef)
1084class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1085 dimensions=(
"instrument",
"visit",),
1086 defaultTemplates={
"calexpType":
""}):
1087 calexp = connectionTypes.Input(
1088 doc=
"Processed exposures used for metadata",
1089 name=
"{calexpType}calexp",
1090 storageClass=
"ExposureF",
1091 dimensions=(
"instrument",
"visit",
"detector"),
1095 visitSummary = connectionTypes.Output(
1096 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
1097 "detector id for the id and are sorted for fast lookups of a "
1099 name=
"{calexpType}visitSummary",
1100 storageClass=
"ExposureCatalog",
1101 dimensions=(
"instrument",
"visit"),
1105class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1106 pipelineConnections=ConsolidateVisitSummaryConnections):
1107 """Config for ConsolidateVisitSummaryTask"""
1111class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1112 """Task to consolidate per-detector visit metadata.
1114 This task aggregates the following metadata from all the detectors
in a
1115 single visit into an exposure catalog:
1119 - The physical_filter
and band (
if available).
1120 - The psf size, shape,
and effective area at the center of the detector.
1121 - The corners of the bounding box
in right ascension/declination.
1123 Other quantities such
as Detector, Psf, ApCorrMap,
and TransmissionCurve
1124 are
not persisted here because of storage concerns,
and because of their
1125 limited utility
as summary statistics.
1127 Tests
for this task are performed
in ci_hsc_gen3.
1129 _DefaultName = "consolidateVisitSummary"
1130 ConfigClass = ConsolidateVisitSummaryConfig
1133 def _makeArgumentParser(cls):
1134 parser = ArgumentParser(name=cls._DefaultName)
1136 parser.add_id_argument(
"--id",
"calexp",
1137 help=
"data ID, e.g. --id visit=12345",
1138 ContainerClass=VisitDataIdContainer)
1142 """No metadata to persist, so override to remove metadata persistance.
1146 def writeConfig(self, butler, clobber=False, doBackup=True):
1147 """No config to persist, so override to remove config persistance.
1151 def runDataRef(self, dataRefList):
1152 visit = dataRefList[0].dataId[
'visit']
1154 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1155 len(dataRefList), visit)
1157 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=
False)
1159 dataRefList[0].put(expCatalog,
'visitSummary', visit=visit)
1161 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1162 dataRefs = butlerQC.get(inputRefs.calexp)
1163 visit = dataRefs[0].dataId.byName()[
'visit']
1165 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1166 len(dataRefs), visit)
1168 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1170 butlerQC.put(expCatalog, outputRefs.visitSummary)
1172 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1173 """Make a combined exposure catalog from a list of dataRefs.
1174 These dataRefs must point to exposures with wcs, summaryStats,
1175 and other visit metadata.
1180 Visit identification number.
1182 List of dataRefs
in visit. May be list of
1183 `lsst.daf.persistence.ButlerDataRef` (Gen2)
or
1184 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1185 isGen3 : `bool`, optional
1186 Specifies
if this
is a Gen3 list of datarefs.
1191 Exposure catalog
with per-detector summary information.
1193 schema = self._makeVisitSummarySchema()
1194 cat = afwTable.ExposureCatalog(schema)
1195 cat.resize(len(dataRefs))
1197 cat['visit'] = visit
1199 for i, dataRef
in enumerate(dataRefs):
1201 visitInfo = dataRef.get(component=
'visitInfo')
1202 filterLabel = dataRef.get(component=
'filterLabel')
1203 summaryStats = dataRef.get(component=
'summaryStats')
1204 detector = dataRef.get(component=
'detector')
1205 wcs = dataRef.get(component=
'wcs')
1206 photoCalib = dataRef.get(component=
'photoCalib')
1207 detector = dataRef.get(component=
'detector')
1208 bbox = dataRef.get(component=
'bbox')
1209 validPolygon = dataRef.get(component=
'validPolygon')
1214 exp = dataRef.get(datasetType=
'calexp_sub', bbox=gen2_read_bbox)
1215 visitInfo = exp.getInfo().getVisitInfo()
1216 filterLabel = dataRef.get(
"calexp_filterLabel")
1217 summaryStats = exp.getInfo().getSummaryStats()
1219 photoCalib = exp.getPhotoCalib()
1220 detector = exp.getDetector()
1221 bbox = dataRef.get(datasetType=
'calexp_bbox')
1222 validPolygon = exp.getInfo().getValidPolygon()
1226 rec.setVisitInfo(visitInfo)
1228 rec.setPhotoCalib(photoCalib)
1229 rec.setValidPolygon(validPolygon)
1231 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1232 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1233 rec.setId(detector.getId())
1234 rec[
'psfSigma'] = summaryStats.psfSigma
1235 rec[
'psfIxx'] = summaryStats.psfIxx
1236 rec[
'psfIyy'] = summaryStats.psfIyy
1237 rec[
'psfIxy'] = summaryStats.psfIxy
1238 rec[
'psfArea'] = summaryStats.psfArea
1239 rec[
'raCorners'][:] = summaryStats.raCorners
1240 rec[
'decCorners'][:] = summaryStats.decCorners
1241 rec[
'ra'] = summaryStats.ra
1242 rec[
'decl'] = summaryStats.decl
1243 rec[
'zenithDistance'] = summaryStats.zenithDistance
1244 rec[
'zeroPoint'] = summaryStats.zeroPoint
1245 rec[
'skyBg'] = summaryStats.skyBg
1246 rec[
'skyNoise'] = summaryStats.skyNoise
1247 rec[
'meanVar'] = summaryStats.meanVar
1248 rec[
'astromOffsetMean'] = summaryStats.astromOffsetMean
1249 rec[
'astromOffsetStd'] = summaryStats.astromOffsetStd
1250 rec[
'nPsfStar'] = summaryStats.nPsfStar
1251 rec[
'psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median
1252 rec[
'psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median
1253 rec[
'psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter
1254 rec[
'psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter
1255 rec[
'psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian
1256 rec[
'psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter
1257 rec[
'psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter
1259 metadata = dafBase.PropertyList()
1260 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1262 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1263 cat.setMetadata(metadata)
1268 def _makeVisitSummarySchema(self):
1269 """Make the schema for the visitSummary catalog."""
1270 schema = afwTable.ExposureTable.makeMinimalSchema()
1271 schema.addField(
'visit', type=
'I', doc=
'Visit number')
1272 schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
1273 schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
1274 schema.addField(
'psfSigma', type=
'F',
1275 doc=
'PSF model second-moments determinant radius (center of chip) (pixel)')
1276 schema.addField(
'psfArea', type=
'F',
1277 doc=
'PSF model effective area (center of chip) (pixel**2)')
1278 schema.addField(
'psfIxx', type=
'F',
1279 doc=
'PSF model Ixx (center of chip) (pixel**2)')
1280 schema.addField(
'psfIyy', type=
'F',
1281 doc=
'PSF model Iyy (center of chip) (pixel**2)')
1282 schema.addField(
'psfIxy', type=
'F',
1283 doc=
'PSF model Ixy (center of chip) (pixel**2)')
1284 schema.addField(
'raCorners', type=
'ArrayD', size=4,
1285 doc=
'Right Ascension of bounding box corners (degrees)')
1286 schema.addField(
'decCorners', type=
'ArrayD', size=4,
1287 doc=
'Declination of bounding box corners (degrees)')
1288 schema.addField(
'ra', type=
'D',
1289 doc=
'Right Ascension of bounding box center (degrees)')
1290 schema.addField(
'decl', type=
'D',
1291 doc=
'Declination of bounding box center (degrees)')
1292 schema.addField(
'zenithDistance', type=
'F',
1293 doc=
'Zenith distance of bounding box center (degrees)')
1294 schema.addField(
'zeroPoint', type=
'F',
1295 doc=
'Mean zeropoint in detector (mag)')
1296 schema.addField(
'skyBg', type=
'F',
1297 doc=
'Average sky background (ADU)')
1298 schema.addField(
'skyNoise', type=
'F',
1299 doc=
'Average sky noise (ADU)')
1300 schema.addField(
'meanVar', type=
'F',
1301 doc=
'Mean variance of the weight plane (ADU**2)')
1302 schema.addField(
'astromOffsetMean', type=
'F',
1303 doc=
'Mean offset of astrometric calibration matches (arcsec)')
1304 schema.addField(
'astromOffsetStd', type=
'F',
1305 doc=
'Standard deviation of offsets of astrometric calibration matches (arcsec)')
1306 schema.addField(
'nPsfStar', type=
'I', doc=
'Number of stars used for PSF model')
1307 schema.addField(
'psfStarDeltaE1Median', type=
'F',
1308 doc=
'Median E1 residual (starE1 - psfE1) for psf stars')
1309 schema.addField(
'psfStarDeltaE2Median', type=
'F',
1310 doc=
'Median E2 residual (starE2 - psfE2) for psf stars')
1311 schema.addField(
'psfStarDeltaE1Scatter', type=
'F',
1312 doc=
'Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars')
1313 schema.addField(
'psfStarDeltaE2Scatter', type=
'F',
1314 doc=
'Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars')
1315 schema.addField(
'psfStarDeltaSizeMedian', type=
'F',
1316 doc=
'Median size residual (starSize - psfSize) for psf stars (pixel)')
1317 schema.addField(
'psfStarDeltaSizeScatter', type=
'F',
1318 doc=
'Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)')
1319 schema.addField(
'psfStarScaledDeltaSizeScatter', type=
'F',
1320 doc=
'Scatter (via MAD) of size residual scaled by median size squared')
1325class VisitDataIdContainer(DataIdContainer):
1326 """DataIdContainer that groups sensor-level id's by visit
1329 def makeDataRefList(self, namespace):
1330 """Make self.refList from self.idList
1332 Generate a list of data references grouped by visit.
1336 namespace : `argparse.Namespace`
1337 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1340 visitRefs = defaultdict(list)
1341 for dataId
in self.idList:
1342 if "visit" in dataId:
1343 visitId = dataId[
"visit"]
1345 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1346 visitRefs[visitId].extend([dataRef
for dataRef
in subset])
1349 for refList
in visitRefs.values():
1350 existingRefs = [ref
for ref
in refList
if ref.datasetExists()]
1352 outputRefList.append(existingRefs)
1354 self.refList = outputRefList
1357class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1358 defaultTemplates={
"catalogType":
""},
1359 dimensions=(
"instrument",
"visit")):
1360 inputCatalogs = connectionTypes.Input(
1361 doc=
"Input per-detector Source Tables",
1362 name=
"{catalogType}sourceTable",
1363 storageClass=
"DataFrame",
1364 dimensions=(
"instrument",
"visit",
"detector"),
1367 outputCatalog = connectionTypes.Output(
1368 doc=
"Per-visit concatenation of Source Table",
1369 name=
"{catalogType}sourceTable_visit",
1370 storageClass=
"DataFrame",
1371 dimensions=(
"instrument",
"visit")
1375class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1376 pipelineConnections=ConsolidateSourceTableConnections):
1380class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1381 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1383 _DefaultName = 'consolidateSourceTable'
1384 ConfigClass = ConsolidateSourceTableConfig
1386 inputDataset =
'sourceTable'
1387 outputDataset =
'sourceTable_visit'
1389 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1390 inputs = butlerQC.get(inputRefs)
1391 self.log.info(
"Concatenating %s per-detector Source Tables",
1392 len(inputs[
'inputCatalogs']))
1393 df = pd.concat(inputs[
'inputCatalogs'])
1394 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1396 def runDataRef(self, dataRefList):
1397 self.log.info(
"Concatenating %s per-detector Source Tables", len(dataRefList))
1398 df = pd.concat([dataRef.get().toDataFrame()
for dataRef
in dataRefList])
1399 dataRefList[0].put(
ParquetTable(dataFrame=df), self.outputDataset)
1402 def _makeArgumentParser(cls):
1403 parser = ArgumentParser(name=cls._DefaultName)
1405 parser.add_id_argument(
"--id", cls.inputDataset,
1406 help=
"data ID, e.g. --id visit=12345",
1407 ContainerClass=VisitDataIdContainer)
1411 """No metadata to write.
1415 def writeConfig(self, butler, clobber=False, doBackup=True):
1416 """No config to write.
1421class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1422 dimensions=(
"instrument",),
1423 defaultTemplates={
"calexpType":
""}):
1424 visitSummaryRefs = connectionTypes.Input(
1425 doc=
"Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1426 name=
"{calexpType}visitSummary",
1427 storageClass=
"ExposureCatalog",
1428 dimensions=(
"instrument",
"visit"),
1432 outputCatalog = connectionTypes.Output(
1433 doc=
"CCD and Visit metadata table",
1434 name=
"ccdVisitTable",
1435 storageClass=
"DataFrame",
1436 dimensions=(
"instrument",)
1440class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1441 pipelineConnections=MakeCcdVisitTableConnections):
1445class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1446 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1448 _DefaultName = 'makeCcdVisitTable'
1449 ConfigClass = MakeCcdVisitTableConfig
1451 def run(self, visitSummaryRefs):
1452 """ Make a table of ccd information from the `visitSummary` catalogs.
1455 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1456 List of DeferredDatasetHandles pointing to exposure catalogs with
1457 per-detector summary information.
1460 result : `lsst.pipe.Base.Struct`
1461 Results struct
with attribute:
1463 Catalog of ccd
and visit information.
1466 for visitSummaryRef
in visitSummaryRefs:
1467 visitSummary = visitSummaryRef.get()
1468 visitInfo = visitSummary[0].getVisitInfo()
1471 summaryTable = visitSummary.asAstropy()
1472 selectColumns = [
'id',
'visit',
'physical_filter',
'band',
'ra',
'decl',
'zenithDistance',
1473 'zeroPoint',
'psfSigma',
'skyBg',
'skyNoise']
1474 ccdEntry = summaryTable[selectColumns].to_pandas().set_index(
'id')
1478 ccdEntry = ccdEntry.rename(columns={
"visit":
"visitId"})
1479 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id)
for id
in
1481 packer = visitSummaryRef.dataId.universe.makePacker(
'visit_detector', visitSummaryRef.dataId)
1482 ccdVisitIds = [packer.pack(dataId)
for dataId
in dataIds]
1483 ccdEntry[
'ccdVisitId'] = ccdVisitIds
1484 ccdEntry[
'detector'] = summaryTable[
'id']
1485 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds()
for vR
in visitSummary])
1486 ccdEntry[
"seeing"] = visitSummary[
'psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1488 ccdEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1489 ccdEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1490 expTime = visitInfo.getExposureTime()
1491 ccdEntry[
'expTime'] = expTime
1492 ccdEntry[
"obsStart"] = ccdEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1493 ccdEntry[
'darkTime'] = visitInfo.getDarkTime()
1494 ccdEntry[
'xSize'] = summaryTable[
'bbox_max_x'] - summaryTable[
'bbox_min_x']
1495 ccdEntry[
'ySize'] = summaryTable[
'bbox_max_y'] - summaryTable[
'bbox_min_y']
1496 ccdEntry[
'llcra'] = summaryTable[
'raCorners'][:, 0]
1497 ccdEntry[
'llcdec'] = summaryTable[
'decCorners'][:, 0]
1498 ccdEntry[
'ulcra'] = summaryTable[
'raCorners'][:, 1]
1499 ccdEntry[
'ulcdec'] = summaryTable[
'decCorners'][:, 1]
1500 ccdEntry[
'urcra'] = summaryTable[
'raCorners'][:, 2]
1501 ccdEntry[
'urcdec'] = summaryTable[
'decCorners'][:, 2]
1502 ccdEntry[
'lrcra'] = summaryTable[
'raCorners'][:, 3]
1503 ccdEntry[
'lrcdec'] = summaryTable[
'decCorners'][:, 3]
1506 ccdEntries.append(ccdEntry)
1508 outputCatalog = pd.concat(ccdEntries)
1509 outputCatalog.set_index(
'ccdVisitId', inplace=
True, verify_integrity=
True)
1510 return pipeBase.Struct(outputCatalog=outputCatalog)
1513class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1514 dimensions=(
"instrument",),
1515 defaultTemplates={
"calexpType":
""}):
1516 visitSummaries = connectionTypes.Input(
1517 doc=
"Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1518 name=
"{calexpType}visitSummary",
1519 storageClass=
"ExposureCatalog",
1520 dimensions=(
"instrument",
"visit",),
1524 outputCatalog = connectionTypes.Output(
1525 doc=
"Visit metadata table",
1527 storageClass=
"DataFrame",
1528 dimensions=(
"instrument",)
1532class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1533 pipelineConnections=MakeVisitTableConnections):
1537class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1538 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1540 _DefaultName = 'makeVisitTable'
1541 ConfigClass = MakeVisitTableConfig
1543 def run(self, visitSummaries):
1544 """ Make a table of visit information from the `visitSummary` catalogs
1549 List of exposure catalogs with per-detector summary information.
1552 result : `lsst.pipe.Base.Struct`
1553 Results struct
with attribute:
1555 Catalog of visit information.
1558 for visitSummary
in visitSummaries:
1559 visitSummary = visitSummary.get()
1560 visitRow = visitSummary[0]
1561 visitInfo = visitRow.getVisitInfo()
1564 visitEntry[
"visitId"] = visitRow[
'visit']
1565 visitEntry[
"visit"] = visitRow[
'visit']
1566 visitEntry[
"physical_filter"] = visitRow[
'physical_filter']
1567 visitEntry[
"band"] = visitRow[
'band']
1568 raDec = visitInfo.getBoresightRaDec()
1569 visitEntry[
"ra"] = raDec.getRa().asDegrees()
1570 visitEntry[
"decl"] = raDec.getDec().asDegrees()
1571 visitEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1572 azAlt = visitInfo.getBoresightAzAlt()
1573 visitEntry[
"azimuth"] = azAlt.getLongitude().asDegrees()
1574 visitEntry[
"altitude"] = azAlt.getLatitude().asDegrees()
1575 visitEntry[
"zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1576 visitEntry[
"airmass"] = visitInfo.getBoresightAirmass()
1577 visitEntry[
"obsStart"] = visitInfo.getDate().toPython()
1578 visitEntry[
"expTime"] = visitInfo.getExposureTime()
1579 visitEntries.append(visitEntry)
1583 outputCatalog = pd.DataFrame(data=visitEntries)
1584 outputCatalog.set_index(
'visitId', inplace=
True, verify_integrity=
True)
1585 return pipeBase.Struct(outputCatalog=outputCatalog)
1588class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1589 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")):
1591 inputCatalog = connectionTypes.Input(
1592 doc=
"Primary per-detector, single-epoch forced-photometry catalog. "
1593 "By default, it is the output of ForcedPhotCcdTask on calexps",
1595 storageClass=
"SourceCatalog",
1596 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1598 inputCatalogDiff = connectionTypes.Input(
1599 doc=
"Secondary multi-epoch, per-detector, forced photometry catalog. "
1600 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1602 storageClass=
"SourceCatalog",
1603 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1605 outputCatalog = connectionTypes.Output(
1606 doc=
"InputCatalogs horizonatally joined on `objectId` in Parquet format",
1607 name=
"mergedForcedSource",
1608 storageClass=
"DataFrame",
1609 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1613class WriteForcedSourceTableConfig(WriteSourceTableConfig,
1614 pipelineConnections=WriteForcedSourceTableConnections):
1615 key = lsst.pex.config.Field(
1616 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1622class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1623 """Merge and convert per-detector forced source catalogs to parquet
1625 Because the predecessor ForcedPhotCcdTask operates per-detector,
1626 per-tract, (i.e., it has tract in its dimensions), detectors
1627 on the tract boundary may have multiple forced source catalogs.
1629 The successor task TransformForcedSourceTable runs per-patch
1630 and temporally-aggregates overlapping mergedForcedSource catalogs
from all
1631 available multiple epochs.
1633 _DefaultName = "writeForcedSourceTable"
1634 ConfigClass = WriteForcedSourceTableConfig
1636 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1637 inputs = butlerQC.get(inputRefs)
1639 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
1640 inputs[
'band'] = butlerQC.quantum.dataId.full[
'band']
1641 outputs = self.run(**inputs)
1642 butlerQC.put(outputs, outputRefs)
1644 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1646 for table, dataset,
in zip((inputCatalog, inputCatalogDiff), (
'calexp',
'diff')):
1647 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=
False)
1648 df = df.reindex(sorted(df.columns), axis=1)
1649 df[
'ccdVisitId'] = ccdVisitId
if ccdVisitId
else pd.NA
1650 df[
'band'] = band
if band
else pd.NA
1651 df.columns = pd.MultiIndex.from_tuples([(dataset, c)
for c
in df.columns],
1652 names=(
'dataset',
'column'))
1656 outputCatalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
1657 return pipeBase.Struct(outputCatalog=outputCatalog)
1660class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1661 dimensions=(
"instrument",
"skymap",
"patch",
"tract")):
1663 inputCatalogs = connectionTypes.Input(
1664 doc=
"Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1665 name=
"mergedForcedSource",
1666 storageClass=
"DataFrame",
1667 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract"),
1671 referenceCatalog = connectionTypes.Input(
1672 doc=
"Reference catalog which was used to seed the forcedPhot. Columns "
1673 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1676 storageClass=
"DataFrame",
1677 dimensions=(
"tract",
"patch",
"skymap"),
1680 outputCatalog = connectionTypes.Output(
1681 doc=
"Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1682 "specified set of functors",
1683 name=
"forcedSourceTable",
1684 storageClass=
"DataFrame",
1685 dimensions=(
"tract",
"patch",
"skymap")
1690 pipelineConnections=TransformForcedSourceTableConnections):
1691 referenceColumns = pexConfig.ListField(
1693 default=[
"detect_isPrimary",
"detect_isTractInner",
"detect_isPatchInner"],
1695 doc=
"Columns to pull from reference catalog",
1697 keyRef = lsst.pex.config.Field(
1698 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1702 key = lsst.pex.config.Field(
1703 doc=
"Rename the output DataFrame index to this name",
1705 default=
"forcedSourceId",
1708 def setDefaults(self):
1709 super().setDefaults()
1710 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'ForcedSource.yaml')
1714 """Transform/standardize a ForcedSource catalog
1716 Transforms each wide, per-detector forcedSource parquet table per the
1717 specification file (per-camera defaults found in ForcedSource.yaml).
1718 All epochs that overlap the patch are aggregated into one per-patch
1719 narrow-parquet file.
1721 No de-duplication of rows
is performed. Duplicate resolutions flags are
1722 pulled
in from the referenceCatalog: `detect_isPrimary`,
1723 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1724 for analysis
or compare duplicates
for QA.
1726 The resulting table includes multiple bands. Epochs (MJDs)
and other useful
1727 per-visit rows can be retreived by joining
with the CcdVisitTable on
1730 _DefaultName = "transformForcedSourceTable"
1731 ConfigClass = TransformForcedSourceTableConfig
1733 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1734 inputs = butlerQC.get(inputRefs)
1735 if self.funcs
is None:
1736 raise ValueError(
"config.functorFile is None. "
1737 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1738 outputs = self.run(inputs[
'inputCatalogs'], inputs[
'referenceCatalog'], funcs=self.funcs,
1739 dataId=outputRefs.outputCatalog.dataId.full)
1741 butlerQC.put(outputs, outputRefs)
1743 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1745 ref = referenceCatalog.get(parameters={
"columns": self.config.referenceColumns})
1746 self.log.info(
"Aggregating %s input catalogs" % (len(inputCatalogs)))
1747 for handle
in inputCatalogs:
1748 result = self.transform(
None, handle, funcs, dataId)
1750 dfs.append(result.df.join(ref, how=
'inner'))
1752 outputCatalog = pd.concat(dfs)
1756 outputCatalog.index.rename(self.config.keyRef, inplace=
True)
1758 outputCatalog.reset_index(inplace=
True)
1760 outputCatalog.set_index(
"forcedSourceId", inplace=
True, verify_integrity=
True)
1762 outputCatalog.index.rename(self.config.key, inplace=
True)
1764 self.log.info(
"Made a table of %d columns and %d rows",
1765 len(outputCatalog.columns), len(outputCatalog))
1766 return pipeBase.Struct(outputCatalog=outputCatalog)
1769class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1770 defaultTemplates={
"catalogType":
""},
1771 dimensions=(
"instrument",
"tract")):
1772 inputCatalogs = connectionTypes.Input(
1773 doc=
"Input per-patch DataFrame Tables to be concatenated",
1774 name=
"{catalogType}ForcedSourceTable",
1775 storageClass=
"DataFrame",
1776 dimensions=(
"tract",
"patch",
"skymap"),
1780 outputCatalog = connectionTypes.Output(
1781 doc=
"Output per-tract concatenation of DataFrame Tables",
1782 name=
"{catalogType}ForcedSourceTable_tract",
1783 storageClass=
"DataFrame",
1784 dimensions=(
"tract",
"skymap"),
1788class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1789 pipelineConnections=ConsolidateTractConnections):
1793class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask):
1794 """Concatenate any per-patch, dataframe list into a single
1797 _DefaultName = 'ConsolidateTract'
1798 ConfigClass = ConsolidateTractConfig
1800 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1801 inputs = butlerQC.get(inputRefs)
1803 self.log.info(
"Concatenating %s per-patch %s Tables",
1804 len(inputs[
'inputCatalogs']),
1805 inputRefs.inputCatalogs[0].datasetType.name)
1806 df = pd.concat(inputs[
'inputCatalogs'])
1807 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def runDataRef(self, dataRef)
def getAnalysis(self, parq, funcs=None, band=None)
def write(self, df, parqRef)
def __init__(self, *args, **kwargs)
def transform(self, band, parq, funcs, dataId)
def run(self, parq, funcs=None, dataId=None, band=None)
def writeMetadata(self, dataRef)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def readCatalog(task, patchRef)
Read input catalog.
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)