25 from collections
import defaultdict
33 from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
36 from .parquetTable
import ParquetTable
37 from .multiBandUtils
import makeMergeArgumentParser, MergeSourcesRunner
38 from .functors
import CompositeFunctor, RAColumn, DecColumn, Column
41 def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False):
42 """Flattens a dataframe with multilevel column index
44 newDf = pd.DataFrame()
45 for band
in set(df.columns.to_frame()[
'band']):
47 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
48 newColumns = {c: columnFormat.format(band, c)
49 for c
in subdf.columns
if c
not in noDupCols}
50 cols = list(newColumns.keys())
51 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
53 newDf = pd.concat([subdf[noDupCols], newDf], axis=1)
58 engine = pexConfig.Field(
61 doc=
"Parquet engine for writing (pyarrow or fastparquet)"
63 coaddName = pexConfig.Field(
71 """Write filter-merged source tables to parquet
73 _DefaultName =
"writeObjectTable"
74 ConfigClass = WriteObjectTableConfig
75 RunnerClass = MergeSourcesRunner
78 inputDatasets = (
'forced_src',
'meas',
'ref')
83 def __init__(self, butler=None, schema=None, **kwargs):
87 CmdLineTask.__init__(self, **kwargs)
91 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
92 subclasses that inherit from MergeSourcesTask.
93 @param[in] patchRefList list of data references for each filter
95 catalogs = dict(self.
readCatalogreadCatalog(patchRef)
for patchRef
in patchRefList)
96 dataId = patchRefList[0].dataId
97 mergedCatalog = self.
runrun(catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
98 self.
writewrite(patchRefList[0], mergedCatalog)
101 def _makeArgumentParser(cls):
102 """Create a suitable ArgumentParser.
104 We will use the ArgumentParser to get a list of data
105 references for patches; the RunnerClass will sort them into lists
106 of data references for the same patch.
108 References first of self.inputDatasets, rather than
114 """Read input catalogs
116 Read all the input datasets given by the 'inputDatasets'
121 patchRef : `lsst.daf.persistence.ButlerDataRef`
122 Data reference for patch
126 Tuple consisting of band name and a dict of catalogs, keyed by
129 band = patchRef.get(self.config.coaddName +
"Coadd_filterLabel", immediate=
True).bandLabel
132 catalog = patchRef.get(self.config.coaddName +
"Coadd_" + dataset, immediate=
True)
133 self.log.info(
"Read %d sources from %s for band %s: %s" %
134 (len(catalog), dataset, band, patchRef.dataId))
135 catalogDict[dataset] = catalog
136 return band, catalogDict
138 def run(self, catalogs, tract, patch):
139 """Merge multiple catalogs.
144 Mapping from filter names to dict of catalogs.
146 tractId to use for the tractId column
148 patchId to use for the patchId column
152 catalog : `lsst.pipe.tasks.parquetTable.ParquetTable`
153 Merged dataframe, with each column prefixed by
154 `filter_tag(filt)`, wrapped in the parquet writer shim class.
158 for filt, tableDict
in catalogs.items():
159 for dataset, table
in tableDict.items():
161 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
164 df = df.reindex(sorted(df.columns), axis=1)
165 df[
'tractId'] = tract
166 df[
'patchId'] = patch
169 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
170 names=(
'dataset',
'band',
'column'))
173 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
181 catalog : `ParquetTable`
183 patchRef : `lsst.daf.persistence.ButlerDataRef`
184 Data reference for patch
186 patchRef.put(catalog, self.config.coaddName +
"Coadd_" + self.
outputDatasetoutputDataset)
189 mergeDataId = patchRef.dataId.copy()
190 del mergeDataId[
"filter"]
191 self.log.info(
"Wrote merged catalog: %s" % (mergeDataId,))
194 """No metadata to write, and not sure how to write it for a list of dataRefs.
199 class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
200 dimensions=(
"instrument",
"visit",
"detector")):
202 catalog = connectionTypes.Input(
203 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
205 storageClass=
"SourceCatalog",
206 dimensions=(
"instrument",
"visit",
"detector")
208 outputCatalog = connectionTypes.Output(
209 doc=
"Catalog of sources, `src` in Parquet format",
211 storageClass=
"DataFrame",
212 dimensions=(
"instrument",
"visit",
"detector")
217 pipelineConnections=WriteSourceTableConnections):
218 doApplyExternalPhotoCalib = pexConfig.Field(
221 doc=(
"Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
222 "generating Source Tables from older src tables which do not already have local calib columns")
224 doApplyExternalSkyWcs = pexConfig.Field(
227 doc=(
"Add local WCS columns from the calexp.wcs? Should only set True if "
228 "generating Source Tables from older src tables which do not already have local calib columns")
233 """Write source table to parquet
235 _DefaultName =
"writeSourceTable"
236 ConfigClass = WriteSourceTableConfig
239 src = dataRef.get(
'src')
240 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
243 ccdVisitId = dataRef.get(
'ccdExposureId')
244 result = self.
runrun(src, ccdVisitId=ccdVisitId)
245 dataRef.put(result.table,
'source')
248 inputs = butlerQC.get(inputRefs)
249 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
250 result = self.
runrun(**inputs).table
251 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
252 butlerQC.put(outputs, outputRefs)
254 def run(self, catalog, ccdVisitId=None):
255 """Convert `src` catalog to parquet
259 catalog: `afwTable.SourceCatalog`
260 catalog to be converted
262 ccdVisitId to be added as a column
266 result : `lsst.pipe.base.Struct`
268 `ParquetTable` version of the input catalog
270 self.log.info(
"Generating parquet table from src catalog %s", ccdVisitId)
271 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
272 df[
'ccdVisitId'] = ccdVisitId
273 return pipeBase.Struct(table=
ParquetTable(dataFrame=df))
276 """Add columns with local calibration evaluated at each centroid
278 for backwards compatibility with old repos.
279 This exists for the purpose of converting old src catalogs
280 (which don't have the expected local calib columns) to Source Tables.
284 catalog: `afwTable.SourceCatalog`
285 catalog to which calib columns will be added
286 dataRef: `lsst.daf.persistence.ButlerDataRef
287 for fetching the calibs from disk.
291 newCat: `afwTable.SourceCatalog`
292 Source Catalog with requested local calib columns
294 mapper = afwTable.SchemaMapper(catalog.schema)
295 measureConfig = SingleFrameMeasurementTask.ConfigClass()
296 measureConfig.doReplaceWithNoise =
False
299 exposure = dataRef.get(
'calexp_sub',
302 mapper = afwTable.SchemaMapper(catalog.schema)
303 mapper.addMinimalSchema(catalog.schema,
True)
304 schema = mapper.getOutputSchema()
306 exposureIdInfo = dataRef.get(
"expIdInfo")
307 measureConfig.plugins.names = []
308 if self.config.doApplyExternalSkyWcs:
309 plugin =
'base_LocalWcs'
311 raise RuntimeError(f
"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
313 measureConfig.plugins.names.add(plugin)
315 if self.config.doApplyExternalPhotoCalib:
316 plugin =
'base_LocalPhotoCalib'
318 raise RuntimeError(f
"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
320 measureConfig.plugins.names.add(plugin)
322 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
323 newCat = afwTable.SourceCatalog(schema)
324 newCat.extend(catalog, mapper=mapper)
325 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
329 """No metadata to write.
334 def _makeArgumentParser(cls):
335 parser = ArgumentParser(name=cls.
_DefaultName_DefaultName)
336 parser.add_id_argument(
"--id",
'src',
337 help=
"data ID, e.g. --id visit=12345 ccd=0")
342 """Calculate columns from ParquetTable
344 This object manages and organizes an arbitrary set of computations
345 on a catalog. The catalog is defined by a
346 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
347 `deepCoadd_obj` dataset, and the computations are defined by a collection
348 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
349 a `CompositeFunctor`).
351 After the object is initialized, accessing the `.df` attribute (which
352 holds the `pandas.DataFrame` containing the results of the calculations) triggers
353 computation of said dataframe.
355 One of the conveniences of using this object is the ability to define a desired common
356 filter for all functors. This enables the same functor collection to be passed to
357 several different `PostprocessAnalysis` objects without having to change the original
358 functor collection, since the `filt` keyword argument of this object triggers an
359 overwrite of the `filt` property for all functors in the collection.
361 This object also allows a list of refFlags to be passed, and defines a set of default
362 refFlags that are always included even if not requested.
364 If a list of `ParquetTable` object is passed, rather than a single one, then the
365 calculations will be mapped over all the input catalogs. In principle, it should
366 be straightforward to parallelize this activity, but initial tests have failed
367 (see TODO in code comments).
371 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
372 Source catalog(s) for computation
374 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
375 Computations to do (functors that act on `parq`).
376 If a dict, the output
377 DataFrame will have columns keyed accordingly.
378 If a list, the column keys will come from the
379 `.shortname` attribute of each functor.
381 filt : `str` (optional)
382 Filter in which to calculate. If provided,
383 this will overwrite any existing `.filt` attribute
384 of the provided functors.
386 flags : `list` (optional)
387 List of flags (per-band) to include in output table.
389 refFlags : `list` (optional)
390 List of refFlags (only reference band) to include in output table.
394 _defaultRefFlags = []
395 _defaultFuncs = ((
'coord_ra',
RAColumn()),
398 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None):
403 self.
flagsflags = list(flags)
if flags
is not None else []
405 if refFlags
is not None:
406 self.
refFlagsrefFlags += list(refFlags)
418 additionalFuncs.update({flag:
Column(flag, dataset=
'ref')
for flag
in self.
refFlagsrefFlags})
419 additionalFuncs.update({flag:
Column(flag, dataset=
'meas')
for flag
in self.
flagsflags})
421 if isinstance(self.
functorsfunctors, CompositeFunctor):
426 func.funcDict.update(additionalFuncs)
427 func.filt = self.
filtfilt
433 return [name
for name, func
in self.
funcfunc.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
437 if self.
_df_df
is None:
443 if type(self.
parqparq)
in (list, tuple):
445 dflist = [self.
funcfunc(parq, dropna=dropna)
for parq
in self.
parqparq]
448 dflist = pool.map(functools.partial(self.
funcfunc, dropna=dropna), self.
parqparq)
449 self.
_df_df = pd.concat(dflist)
451 self.
_df_df = self.
funcfunc(self.
parqparq, dropna=dropna)
458 """Expected Connections for subclasses of TransformCatalogBaseTask.
462 inputCatalog = connectionTypes.Input(
464 storageClass=
"DataFrame",
466 outputCatalog = connectionTypes.Output(
468 storageClass=
"DataFrame",
473 pipelineConnections=TransformCatalogBaseConnections):
474 functorFile = pexConfig.Field(
476 doc=
'Path to YAML file specifying functors to be computed',
483 """Base class for transforming/standardizing a catalog
485 by applying functors that convert units and apply calibrations.
486 The purpose of this task is to perform a set of computations on
487 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
488 results to a new dataset (which needs to be declared in an `outputDataset`
491 The calculations to be performed are defined in a YAML file that specifies
492 a set of functors to be computed, provided as
493 a `--functorFile` config parameter. An example of such a YAML file
518 - base_InputCount_value
521 functor: DeconvolvedMoments
526 - merge_measurement_i
527 - merge_measurement_r
528 - merge_measurement_z
529 - merge_measurement_y
530 - merge_measurement_g
531 - base_PixelFlags_flag_inexact_psfCenter
534 The names for each entry under "func" will become the names of columns in the
535 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
536 Positional arguments to be passed to each functor are in the `args` list,
537 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
538 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
540 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and
541 taken from the `'ref'` dataset.
543 The "flags" entry will be expanded out per band.
545 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
546 to organize and excecute the calculations.
550 def _DefaultName(self):
551 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
555 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
559 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
563 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
567 if self.config.functorFile:
568 self.log.info(
'Loading tranform functor definitions from %s',
569 self.config.functorFile)
570 self.
funcsfuncs = CompositeFunctor.from_file(self.config.functorFile)
571 self.
funcsfuncs.update(dict(PostprocessAnalysis._defaultFuncs))
573 self.
funcsfuncs =
None
576 inputs = butlerQC.get(inputRefs)
577 if self.
funcsfuncs
is None:
578 raise ValueError(
"config.functorFile is None. "
579 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
580 result = self.
runrun(parq=inputs[
'inputCatalog'], funcs=self.
funcsfuncs,
581 dataId=outputRefs.outputCatalog.dataId.full)
582 outputs = pipeBase.Struct(outputCatalog=result)
583 butlerQC.put(outputs, outputRefs)
587 if self.
funcsfuncs
is None:
588 raise ValueError(
"config.functorFile is None. "
589 "Must be a valid path to yaml in order to run as a CommandlineTask.")
590 df = self.
runrun(parq, funcs=self.
funcsfuncs, dataId=dataRef.dataId)
591 self.
writewrite(df, dataRef)
594 def run(self, parq, funcs=None, dataId=None, band=None):
595 """Do postprocessing calculations
597 Takes a `ParquetTable` object and dataId,
598 returns a dataframe with results of postprocessing calculations.
602 parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
603 ParquetTable from which calculations are done.
604 funcs : `lsst.pipe.tasks.functors.Functors`
605 Functors to apply to the table's columns
606 dataId : dict, optional
607 Used to add a `patchId` column to the output dataframe.
608 band : `str`, optional
609 Filter band that is being processed.
616 self.log.info(
"Transforming/standardizing the source table dataId: %s", dataId)
618 df = self.
transformtransform(band, parq, funcs, dataId).df
619 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
623 return self.
funcsfuncs
627 funcs = self.
funcsfuncs
632 analysis = self.
getAnalysisgetAnalysis(parq, funcs=funcs, band=band)
634 if dataId
is not None:
635 for key, value
in dataId.items():
638 return pipeBase.Struct(
647 """No metadata to write.
652 class TransformObjectCatalogConfig(TransformCatalogBaseConfig):
653 coaddName = pexConfig.Field(
659 filterMap = pexConfig.DictField(
663 doc=(
"Dictionary mapping full filter name to short one for column name munging."
664 "These filters determine the output columns no matter what filters the "
665 "input data actually contain."),
666 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
667 "Will be removed after v22.")
669 outputBands = pexConfig.ListField(
673 doc=(
"These bands and only these bands will appear in the output,"
674 " NaN-filled if the input does not include them."
675 " If None, then use all bands found in the input.")
677 camelCase = pexConfig.Field(
680 doc=(
"Write per-band columns names with camelCase, else underscore "
681 "For example: gPsFlux instead of g_PsFlux.")
683 multilevelOutput = pexConfig.Field(
686 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
687 "and name-munged (False).")
692 """Produce a flattened Object Table to match the format specified in
695 Do the same set of postprocessing calculations on all bands
697 This is identical to `TransformCatalogBaseTask`, except for that it does the
698 specified functor calculations for all filters present in the
699 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
700 by the YAML file will be superceded.
702 _DefaultName =
"transformObjectCatalog"
703 ConfigClass = TransformObjectCatalogConfig
705 inputDataset =
'deepCoadd_obj'
706 outputDataset =
'objectTable'
709 def _makeArgumentParser(cls):
712 ContainerClass=CoaddDataIdContainer,
713 help=
"data ID, e.g. --id tract=12345 patch=1,2")
716 def run(self, parq, funcs=None, dataId=None, band=None):
720 templateDf = pd.DataFrame()
721 outputBands = parq.columnLevelNames[
'band']
if self.config.outputBands
is None else \
722 self.config.outputBands
725 for inputBand
in parq.columnLevelNames[
'band']:
726 if inputBand
not in outputBands:
727 self.log.info(
"Ignoring %s band data in the input", inputBand)
729 self.log.info(
"Transforming the catalog of band %s", inputBand)
730 result = self.
transformtransform(inputBand, parq, funcs, dataId)
731 dfDict[inputBand] = result.df
732 analysisDict[inputBand] = result.analysis
734 templateDf = result.df
737 for filt
in outputBands:
738 if filt
not in dfDict:
739 self.log.info(
"Adding empty columns for band %s", filt)
740 dfDict[filt] = pd.DataFrame().reindex_like(templateDf)
743 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
745 if not self.config.multilevelOutput:
746 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
747 if dataId
is not None:
748 noDupCols += list(dataId.keys())
749 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase)
751 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
758 """Make self.refList from self.idList
760 Generate a list of data references given tract and/or patch.
761 This was adapted from `TractQADataIdContainer`, which was
762 `TractDataIdContainer` modifie to not require "filter".
763 Only existing dataRefs are returned.
765 def getPatchRefList(tract):
766 return [namespace.butler.dataRef(datasetType=self.datasetType,
768 patch=
"%d,%d" % patch.getIndex())
for patch
in tract]
770 tractRefs = defaultdict(list)
771 for dataId
in self.idList:
772 skymap = self.
getSkymapgetSkymap(namespace)
774 if "tract" in dataId:
775 tractId = dataId[
"tract"]
776 if "patch" in dataId:
777 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
779 patch=dataId[
'patch']))
781 tractRefs[tractId] += getPatchRefList(skymap[tractId])
783 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
786 for tractRefList
in tractRefs.values():
787 existingRefs = [ref
for ref
in tractRefList
if ref.datasetExists()]
788 outputRefList.append(existingRefs)
794 coaddName = pexConfig.Field(
802 """Write patch-merged source tables to a tract-level parquet file
804 _DefaultName =
"consolidateObjectTable"
805 ConfigClass = ConsolidateObjectTableConfig
807 inputDataset =
'objectTable'
808 outputDataset =
'objectTable_tract'
811 def _makeArgumentParser(cls):
812 parser = ArgumentParser(name=cls.
_DefaultName_DefaultName)
814 parser.add_id_argument(
"--id", cls.
inputDatasetinputDataset,
815 help=
"data ID, e.g. --id tract=12345",
816 ContainerClass=TractObjectDataIdContainer)
820 df = pd.concat([patchRef.get().toDataFrame()
for patchRef
in patchRefList])
824 """No metadata to write.
829 class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
830 dimensions=(
"instrument",
"visit",
"detector")):
832 inputCatalog = connectionTypes.Input(
833 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
835 storageClass=
"DataFrame",
836 dimensions=(
"instrument",
"visit",
"detector"),
839 outputCatalog = connectionTypes.Output(
840 doc=
"Narrower, per-detector Source Table transformed and converted per a "
841 "specified set of functors",
843 storageClass=
"DataFrame",
844 dimensions=(
"instrument",
"visit",
"detector")
849 pipelineConnections=TransformSourceTableConnections):
854 """Transform/standardize a source catalog
856 _DefaultName =
"transformSourceTable"
857 ConfigClass = TransformSourceTableConfig
859 inputDataset =
'source'
860 outputDataset =
'sourceTable'
863 def _makeArgumentParser(cls):
867 help=
"data ID, e.g. --id visit=12345 ccd=0")
871 """Override to specify band label to run()."""
874 band = dataRef.get(
"calexp_filterLabel", immediate=
True).bandLabel
875 df = self.
runrun(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
876 self.
writewrite(df, dataRef)
881 dimensions=(
"instrument",
"visit",),
882 defaultTemplates={}):
883 calexp = connectionTypes.Input(
884 doc=
"Processed exposures used for metadata",
886 storageClass=
"ExposureF",
887 dimensions=(
"instrument",
"visit",
"detector"),
891 visitSummary = connectionTypes.Output(
892 doc=
"Consolidated visit-level exposure metadata",
894 storageClass=
"ExposureCatalog",
895 dimensions=(
"instrument",
"visit"),
900 pipelineConnections=ConsolidateVisitSummaryConnections):
901 """Config for ConsolidateVisitSummaryTask"""
906 """Task to consolidate per-detector visit metadata.
908 This task aggregates the following metadata from all the detectors in a
909 single visit into an exposure catalog:
913 - The physical_filter and band (if available).
914 - The psf size, shape, and effective area at the center of the detector.
915 - The corners of the bounding box in right ascension/declination.
917 Other quantities such as Psf, ApCorrMap, and TransmissionCurve are not
918 persisted here because of storage concerns, and because of their limited
919 utility as summary statistics.
921 Tests for this task are performed in ci_hsc_gen3.
923 _DefaultName =
"consolidateVisitSummary"
924 ConfigClass = ConsolidateVisitSummaryConfig
927 def _makeArgumentParser(cls):
928 parser = ArgumentParser(name=cls.
_DefaultName_DefaultName)
930 parser.add_id_argument(
"--id",
"calexp",
931 help=
"data ID, e.g. --id visit=12345",
932 ContainerClass=VisitDataIdContainer)
936 """No metadata to persist, so override to remove metadata persistance.
941 """No config to persist, so override to remove config persistance.
946 visit = dataRefList[0].dataId[
'visit']
948 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)" %
949 (len(dataRefList), visit))
951 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=
False)
953 dataRefList[0].put(expCatalog,
'visitSummary', visit=visit)
956 dataRefs = butlerQC.get(inputRefs.calexp)
957 visit = dataRefs[0].dataId.byName()[
'visit']
959 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)" %
960 (len(dataRefs), visit))
964 butlerQC.put(expCatalog, outputRefs.visitSummary)
966 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
967 """Make a combined exposure catalog from a list of dataRefs.
972 Visit identification number
974 List of calexp dataRefs in visit. May be list of
975 `lsst.daf.persistence.ButlerDataRef` (Gen2) or
976 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
977 isGen3 : `bool`, optional
978 Specifies if this is a Gen3 list of datarefs.
982 visitSummary : `lsst.afw.table.ExposureCatalog`
983 Exposure catalog with per-detector summary information.
985 schema = afwTable.ExposureTable.makeMinimalSchema()
986 schema.addField(
'visit', type=
'I', doc=
'Visit number')
987 schema.addField(
'detector_id', type=
'I', doc=
'Detector number')
988 schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
989 schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
990 schema.addField(
'psfSigma', type=
'F',
991 doc=
'PSF model second-moments determinant radius (center of chip) (pixel)')
992 schema.addField(
'psfArea', type=
'F',
993 doc=
'PSF model effective area (center of chip) (pixel**2)')
994 schema.addField(
'psfIxx', type=
'F',
995 doc=
'PSF model Ixx (center of chip) (pixel**2)')
996 schema.addField(
'psfIyy', type=
'F',
997 doc=
'PSF model Iyy (center of chip) (pixel**2)')
998 schema.addField(
'psfIxy', type=
'F',
999 doc=
'PSF model Ixy (center of chip) (pixel**2)')
1000 schema.addField(
'raCorners', type=
'ArrayD', size=4,
1001 doc=
'Right Ascension of bounding box corners (degrees)')
1002 schema.addField(
'decCorners', type=
'ArrayD', size=4,
1003 doc=
'Declination of bounding box corners (degrees)')
1005 cat = afwTable.ExposureCatalog(schema)
1006 cat.resize(len(dataRefs))
1008 cat[
'visit'] = visit
1010 for i, dataRef
in enumerate(dataRefs):
1012 visitInfo = dataRef.get(component=
'visitInfo')
1013 filterLabel = dataRef.get(component=
'filterLabel')
1014 psf = dataRef.get(component=
'psf')
1015 wcs = dataRef.get(component=
'wcs')
1016 photoCalib = dataRef.get(component=
'photoCalib')
1017 detector = dataRef.get(component=
'detector')
1018 bbox = dataRef.get(component=
'bbox')
1019 validPolygon = dataRef.get(component=
'validPolygon')
1024 exp = dataRef.get(datasetType=
'calexp_sub', bbox=gen2_read_bbox)
1025 visitInfo = exp.getInfo().getVisitInfo()
1026 filterLabel = dataRef.get(
"calexp_filterLabel")
1029 photoCalib = exp.getPhotoCalib()
1030 detector = exp.getDetector()
1031 bbox = dataRef.get(datasetType=
'calexp_bbox')
1032 validPolygon = exp.getInfo().getValidPolygon()
1036 rec.setVisitInfo(visitInfo)
1038 rec.setPhotoCalib(photoCalib)
1039 rec.setDetector(detector)
1040 rec.setValidPolygon(validPolygon)
1042 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1043 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1044 rec[
'detector_id'] = detector.getId()
1045 shape = psf.computeShape(bbox.getCenter())
1046 rec[
'psfSigma'] = shape.getDeterminantRadius()
1047 rec[
'psfIxx'] = shape.getIxx()
1048 rec[
'psfIyy'] = shape.getIyy()
1049 rec[
'psfIxy'] = shape.getIxy()
1050 im = psf.computeKernelImage(bbox.getCenter())
1055 rec[
'psfArea'] = np.sum(im.array)/np.sum(im.array**2.)
1058 rec[
'raCorners'][:] = [sph.getRa().asDegrees()
for sph
in sph_pts]
1059 rec[
'decCorners'][:] = [sph.getDec().asDegrees()
for sph
in sph_pts]
1065 """DataIdContainer that groups sensor-level id's by visit
1069 """Make self.refList from self.idList
1071 Generate a list of data references grouped by visit.
1075 namespace : `argparse.Namespace`
1076 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1079 visitRefs = defaultdict(list)
1080 for dataId
in self.idList:
1081 if "visit" in dataId:
1082 visitId = dataId[
"visit"]
1084 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1085 visitRefs[visitId].extend([dataRef
for dataRef
in subset])
1088 for refList
in visitRefs.values():
1089 existingRefs = [ref
for ref
in refList
if ref.datasetExists()]
1091 outputRefList.append(existingRefs)
1097 dimensions=(
"instrument",
"visit")):
1098 inputCatalogs = connectionTypes.Input(
1099 doc=
"Input per-detector Source Tables",
1101 storageClass=
"DataFrame",
1102 dimensions=(
"instrument",
"visit",
"detector"),
1105 outputCatalog = connectionTypes.Output(
1106 doc=
"Per-visit concatenation of Source Table",
1107 name=
"sourceTable_visit",
1108 storageClass=
"DataFrame",
1109 dimensions=(
"instrument",
"visit")
1114 pipelineConnections=ConsolidateSourceTableConnections):
1119 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1121 _DefaultName =
'consolidateSourceTable'
1122 ConfigClass = ConsolidateSourceTableConfig
1124 inputDataset =
'sourceTable'
1125 outputDataset =
'sourceTable_visit'
1128 inputs = butlerQC.get(inputRefs)
1129 self.log.info(
"Concatenating %s per-detector Source Tables",
1130 len(inputs[
'inputCatalogs']))
1131 df = pd.concat(inputs[
'inputCatalogs'])
1132 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1135 self.log.info(
"Concatenating %s per-detector Source Tables", len(dataRefList))
1136 df = pd.concat([dataRef.get().toDataFrame()
for dataRef
in dataRefList])
1140 def _makeArgumentParser(cls):
1141 parser = ArgumentParser(name=cls.
_DefaultName_DefaultName)
1143 parser.add_id_argument(
"--id", cls.
inputDatasetinputDataset,
1144 help=
"data ID, e.g. --id visit=12345",
1145 ContainerClass=VisitDataIdContainer)
1149 """No metadata to write.
1154 """No config to write.
def getSkymap(self, namespace)
def runDataRef(self, patchRefList)
def writeMetadata(self, dataRef)
def writeConfig(self, butler, clobber=False, doBackup=True)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def runDataRef(self, dataRefList)
def writeMetadata(self, dataRef)
def runDataRef(self, dataRefList)
def writeConfig(self, butler, clobber=False, doBackup=True)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def writeMetadata(self, dataRef)
def _combineExposureMetadata(self, visit, dataRefs, isGen3=True)
def __init__(self, parq, functors, filt=None, flags=None, refFlags=None)
def compute(self, dropna=False, pool=None)
def makeDataRefList(self, namespace)
def runDataRef(self, dataRef)
def getAnalysis(self, parq, funcs=None, band=None)
def write(self, df, parqRef)
def __init__(self, *args, **kwargs)
def transform(self, band, parq, funcs, dataId)
def run(self, parq, funcs=None, dataId=None, band=None)
def writeMetadata(self, dataRef)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def run(self, parq, funcs=None, dataId=None, band=None)
def runDataRef(self, dataRef)
def makeDataRefList(self, namespace)
def __init__(self, butler=None, schema=None, **kwargs)
def write(self, patchRef, catalog)
def runDataRef(self, patchRefList)
Merge coadd sources from multiple bands.
def run(self, catalogs, tract, patch)
def writeMetadata(self, dataRefList)
def readCatalog(self, patchRef)
def run(self, catalog, ccdVisitId=None)
def runDataRef(self, dataRef)
def writeMetadata(self, dataRef)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def addCalibColumns(self, catalog, dataRef)
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False)