25 from collections
import defaultdict
34 from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
37 from .parquetTable
import ParquetTable
38 from .multiBandUtils
import makeMergeArgumentParser, MergeSourcesRunner
39 from .functors
import CompositeFunctor, RAColumn, DecColumn, Column
42 def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False):
43 """Flattens a dataframe with multilevel column index
45 newDf = pd.DataFrame()
46 for band
in set(df.columns.to_frame()[
'band']):
48 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
49 newColumns = {c: columnFormat.format(band, c)
50 for c
in subdf.columns
if c
not in noDupCols}
51 cols = list(newColumns.keys())
52 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
54 newDf = pd.concat([subdf[noDupCols], newDf], axis=1)
59 engine = pexConfig.Field(
62 doc=
"Parquet engine for writing (pyarrow or fastparquet)"
64 coaddName = pexConfig.Field(
72 """Write filter-merged source tables to parquet
74 _DefaultName =
"writeObjectTable"
75 ConfigClass = WriteObjectTableConfig
76 RunnerClass = MergeSourcesRunner
79 inputDatasets = (
'forced_src',
'meas',
'ref')
84 def __init__(self, butler=None, schema=None, **kwargs):
88 CmdLineTask.__init__(self, **kwargs)
92 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
93 subclasses that inherit from MergeSourcesTask.
94 @param[in] patchRefList list of data references for each filter
96 catalogs = dict(self.
readCatalogreadCatalog(patchRef)
for patchRef
in patchRefList)
97 dataId = patchRefList[0].dataId
98 mergedCatalog = self.
runrun(catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
99 self.
writewrite(patchRefList[0], mergedCatalog)
102 def _makeArgumentParser(cls):
103 """Create a suitable ArgumentParser.
105 We will use the ArgumentParser to get a list of data
106 references for patches; the RunnerClass will sort them into lists
107 of data references for the same patch.
109 References first of self.inputDatasets, rather than
115 """Read input catalogs
117 Read all the input datasets given by the 'inputDatasets'
122 patchRef : `lsst.daf.persistence.ButlerDataRef`
123 Data reference for patch
127 Tuple consisting of band name and a dict of catalogs, keyed by
130 band = patchRef.get(self.config.coaddName +
"Coadd_filterLabel", immediate=
True).bandLabel
133 catalog = patchRef.get(self.config.coaddName +
"Coadd_" + dataset, immediate=
True)
134 self.log.info(
"Read %d sources from %s for band %s: %s" %
135 (len(catalog), dataset, band, patchRef.dataId))
136 catalogDict[dataset] = catalog
137 return band, catalogDict
139 def run(self, catalogs, tract, patch):
140 """Merge multiple catalogs.
145 Mapping from filter names to dict of catalogs.
147 tractId to use for the tractId column
149 patchId to use for the patchId column
153 catalog : `lsst.pipe.tasks.parquetTable.ParquetTable`
154 Merged dataframe, with each column prefixed by
155 `filter_tag(filt)`, wrapped in the parquet writer shim class.
159 for filt, tableDict
in catalogs.items():
160 for dataset, table
in tableDict.items():
162 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
165 df = df.reindex(sorted(df.columns), axis=1)
166 df[
'tractId'] = tract
167 df[
'patchId'] = patch
170 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
171 names=(
'dataset',
'band',
'column'))
174 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
182 catalog : `ParquetTable`
184 patchRef : `lsst.daf.persistence.ButlerDataRef`
185 Data reference for patch
187 patchRef.put(catalog, self.config.coaddName +
"Coadd_" + self.
outputDatasetoutputDataset)
190 mergeDataId = patchRef.dataId.copy()
191 del mergeDataId[
"filter"]
192 self.log.info(
"Wrote merged catalog: %s" % (mergeDataId,))
195 """No metadata to write, and not sure how to write it for a list of dataRefs.
200 class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
201 dimensions=(
"instrument",
"visit",
"detector")):
203 catalog = connectionTypes.Input(
204 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
206 storageClass=
"SourceCatalog",
207 dimensions=(
"instrument",
"visit",
"detector")
209 outputCatalog = connectionTypes.Output(
210 doc=
"Catalog of sources, `src` in Parquet format",
212 storageClass=
"DataFrame",
213 dimensions=(
"instrument",
"visit",
"detector")
218 pipelineConnections=WriteSourceTableConnections):
219 doApplyExternalPhotoCalib = pexConfig.Field(
222 doc=(
"Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
223 "generating Source Tables from older src tables which do not already have local calib columns")
225 doApplyExternalSkyWcs = pexConfig.Field(
228 doc=(
"Add local WCS columns from the calexp.wcs? Should only set True if "
229 "generating Source Tables from older src tables which do not already have local calib columns")
234 """Write source table to parquet
236 _DefaultName =
"writeSourceTable"
237 ConfigClass = WriteSourceTableConfig
240 src = dataRef.get(
'src')
241 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
244 ccdVisitId = dataRef.get(
'ccdExposureId')
245 result = self.
runrun(src, ccdVisitId=ccdVisitId)
246 dataRef.put(result.table,
'source')
249 inputs = butlerQC.get(inputRefs)
250 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
251 result = self.
runrun(**inputs).table
252 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
253 butlerQC.put(outputs, outputRefs)
255 def run(self, catalog, ccdVisitId=None):
256 """Convert `src` catalog to parquet
260 catalog: `afwTable.SourceCatalog`
261 catalog to be converted
263 ccdVisitId to be added as a column
267 result : `lsst.pipe.base.Struct`
269 `ParquetTable` version of the input catalog
271 self.log.info(
"Generating parquet table from src catalog %s", ccdVisitId)
272 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
273 df[
'ccdVisitId'] = ccdVisitId
274 return pipeBase.Struct(table=
ParquetTable(dataFrame=df))
277 """Add columns with local calibration evaluated at each centroid
279 for backwards compatibility with old repos.
280 This exists for the purpose of converting old src catalogs
281 (which don't have the expected local calib columns) to Source Tables.
285 catalog: `afwTable.SourceCatalog`
286 catalog to which calib columns will be added
287 dataRef: `lsst.daf.persistence.ButlerDataRef
288 for fetching the calibs from disk.
292 newCat: `afwTable.SourceCatalog`
293 Source Catalog with requested local calib columns
295 mapper = afwTable.SchemaMapper(catalog.schema)
296 measureConfig = SingleFrameMeasurementTask.ConfigClass()
297 measureConfig.doReplaceWithNoise =
False
300 exposure = dataRef.get(
'calexp_sub',
303 mapper = afwTable.SchemaMapper(catalog.schema)
304 mapper.addMinimalSchema(catalog.schema,
True)
305 schema = mapper.getOutputSchema()
307 exposureIdInfo = dataRef.get(
"expIdInfo")
308 measureConfig.plugins.names = []
309 if self.config.doApplyExternalSkyWcs:
310 plugin =
'base_LocalWcs'
312 raise RuntimeError(f
"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
314 measureConfig.plugins.names.add(plugin)
316 if self.config.doApplyExternalPhotoCalib:
317 plugin =
'base_LocalPhotoCalib'
319 raise RuntimeError(f
"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
321 measureConfig.plugins.names.add(plugin)
323 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
324 newCat = afwTable.SourceCatalog(schema)
325 newCat.extend(catalog, mapper=mapper)
326 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
330 """No metadata to write.
335 def _makeArgumentParser(cls):
336 parser = ArgumentParser(name=cls.
_DefaultName_DefaultName)
337 parser.add_id_argument(
"--id",
'src',
338 help=
"data ID, e.g. --id visit=12345 ccd=0")
343 """Calculate columns from ParquetTable
345 This object manages and organizes an arbitrary set of computations
346 on a catalog. The catalog is defined by a
347 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
348 `deepCoadd_obj` dataset, and the computations are defined by a collection
349 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
350 a `CompositeFunctor`).
352 After the object is initialized, accessing the `.df` attribute (which
353 holds the `pandas.DataFrame` containing the results of the calculations) triggers
354 computation of said dataframe.
356 One of the conveniences of using this object is the ability to define a desired common
357 filter for all functors. This enables the same functor collection to be passed to
358 several different `PostprocessAnalysis` objects without having to change the original
359 functor collection, since the `filt` keyword argument of this object triggers an
360 overwrite of the `filt` property for all functors in the collection.
362 This object also allows a list of refFlags to be passed, and defines a set of default
363 refFlags that are always included even if not requested.
365 If a list of `ParquetTable` object is passed, rather than a single one, then the
366 calculations will be mapped over all the input catalogs. In principle, it should
367 be straightforward to parallelize this activity, but initial tests have failed
368 (see TODO in code comments).
372 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
373 Source catalog(s) for computation
375 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
376 Computations to do (functors that act on `parq`).
377 If a dict, the output
378 DataFrame will have columns keyed accordingly.
379 If a list, the column keys will come from the
380 `.shortname` attribute of each functor.
382 filt : `str` (optional)
383 Filter in which to calculate. If provided,
384 this will overwrite any existing `.filt` attribute
385 of the provided functors.
387 flags : `list` (optional)
388 List of flags (per-band) to include in output table.
390 refFlags : `list` (optional)
391 List of refFlags (only reference band) to include in output table.
395 _defaultRefFlags = []
396 _defaultFuncs = ((
'coord_ra',
RAColumn()),
399 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None):
404 self.
flagsflags = list(flags)
if flags
is not None else []
406 if refFlags
is not None:
407 self.
refFlagsrefFlags += list(refFlags)
419 additionalFuncs.update({flag:
Column(flag, dataset=
'ref')
for flag
in self.
refFlagsrefFlags})
420 additionalFuncs.update({flag:
Column(flag, dataset=
'meas')
for flag
in self.
flagsflags})
422 if isinstance(self.
functorsfunctors, CompositeFunctor):
427 func.funcDict.update(additionalFuncs)
428 func.filt = self.
filtfilt
434 return [name
for name, func
in self.
funcfunc.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
438 if self.
_df_df
is None:
444 if type(self.
parqparq)
in (list, tuple):
446 dflist = [self.
funcfunc(parq, dropna=dropna)
for parq
in self.
parqparq]
449 dflist = pool.map(functools.partial(self.
funcfunc, dropna=dropna), self.
parqparq)
450 self.
_df_df = pd.concat(dflist)
452 self.
_df_df = self.
funcfunc(self.
parqparq, dropna=dropna)
459 """Expected Connections for subclasses of TransformCatalogBaseTask.
463 inputCatalog = connectionTypes.Input(
465 storageClass=
"DataFrame",
467 outputCatalog = connectionTypes.Output(
469 storageClass=
"DataFrame",
474 pipelineConnections=TransformCatalogBaseConnections):
475 functorFile = pexConfig.Field(
477 doc=
'Path to YAML file specifying functors to be computed',
484 """Base class for transforming/standardizing a catalog
486 by applying functors that convert units and apply calibrations.
487 The purpose of this task is to perform a set of computations on
488 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
489 results to a new dataset (which needs to be declared in an `outputDataset`
492 The calculations to be performed are defined in a YAML file that specifies
493 a set of functors to be computed, provided as
494 a `--functorFile` config parameter. An example of such a YAML file
519 - base_InputCount_value
522 functor: DeconvolvedMoments
527 - merge_measurement_i
528 - merge_measurement_r
529 - merge_measurement_z
530 - merge_measurement_y
531 - merge_measurement_g
532 - base_PixelFlags_flag_inexact_psfCenter
535 The names for each entry under "func" will become the names of columns in the
536 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
537 Positional arguments to be passed to each functor are in the `args` list,
538 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
539 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
541 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and
542 taken from the `'ref'` dataset.
544 The "flags" entry will be expanded out per band.
546 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
547 to organize and excecute the calculations.
551 def _DefaultName(self):
552 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
556 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
560 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
564 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
568 if self.config.functorFile:
569 self.log.info(
'Loading tranform functor definitions from %s',
570 self.config.functorFile)
571 self.
funcsfuncs = CompositeFunctor.from_file(self.config.functorFile)
572 self.
funcsfuncs.update(dict(PostprocessAnalysis._defaultFuncs))
574 self.
funcsfuncs =
None
577 inputs = butlerQC.get(inputRefs)
578 if self.
funcsfuncs
is None:
579 raise ValueError(
"config.functorFile is None. "
580 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
581 result = self.
runrun(parq=inputs[
'inputCatalog'], funcs=self.
funcsfuncs,
582 dataId=outputRefs.outputCatalog.dataId.full)
583 outputs = pipeBase.Struct(outputCatalog=result)
584 butlerQC.put(outputs, outputRefs)
588 if self.
funcsfuncs
is None:
589 raise ValueError(
"config.functorFile is None. "
590 "Must be a valid path to yaml in order to run as a CommandlineTask.")
591 df = self.
runrun(parq, funcs=self.
funcsfuncs, dataId=dataRef.dataId)
592 self.
writewrite(df, dataRef)
595 def run(self, parq, funcs=None, dataId=None, band=None):
596 """Do postprocessing calculations
598 Takes a `ParquetTable` object and dataId,
599 returns a dataframe with results of postprocessing calculations.
603 parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
604 ParquetTable from which calculations are done.
605 funcs : `lsst.pipe.tasks.functors.Functors`
606 Functors to apply to the table's columns
607 dataId : dict, optional
608 Used to add a `patchId` column to the output dataframe.
609 band : `str`, optional
610 Filter band that is being processed.
617 self.log.info(
"Transforming/standardizing the source table dataId: %s", dataId)
619 df = self.
transformtransform(band, parq, funcs, dataId).df
620 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
624 return self.
funcsfuncs
628 funcs = self.
funcsfuncs
633 analysis = self.
getAnalysisgetAnalysis(parq, funcs=funcs, band=band)
635 if dataId
is not None:
636 for key, value
in dataId.items():
639 return pipeBase.Struct(
648 """No metadata to write.
653 class TransformObjectCatalogConfig(TransformCatalogBaseConfig):
654 coaddName = pexConfig.Field(
660 filterMap = pexConfig.DictField(
664 doc=(
"Dictionary mapping full filter name to short one for column name munging."
665 "These filters determine the output columns no matter what filters the "
666 "input data actually contain."),
667 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
668 "Will be removed after v22.")
670 outputBands = pexConfig.ListField(
674 doc=(
"These bands and only these bands will appear in the output,"
675 " NaN-filled if the input does not include them."
676 " If None, then use all bands found in the input.")
678 camelCase = pexConfig.Field(
681 doc=(
"Write per-band columns names with camelCase, else underscore "
682 "For example: gPsFlux instead of g_PsFlux.")
684 multilevelOutput = pexConfig.Field(
687 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
688 "and name-munged (False).")
693 """Produce a flattened Object Table to match the format specified in
696 Do the same set of postprocessing calculations on all bands
698 This is identical to `TransformCatalogBaseTask`, except for that it does the
699 specified functor calculations for all filters present in the
700 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
701 by the YAML file will be superceded.
703 _DefaultName =
"transformObjectCatalog"
704 ConfigClass = TransformObjectCatalogConfig
706 inputDataset =
'deepCoadd_obj'
707 outputDataset =
'objectTable'
710 def _makeArgumentParser(cls):
713 ContainerClass=CoaddDataIdContainer,
714 help=
"data ID, e.g. --id tract=12345 patch=1,2")
717 def run(self, parq, funcs=None, dataId=None, band=None):
721 templateDf = pd.DataFrame()
722 outputBands = parq.columnLevelNames[
'band']
if self.config.outputBands
is None else \
723 self.config.outputBands
726 for inputBand
in parq.columnLevelNames[
'band']:
727 if inputBand
not in outputBands:
728 self.log.info(
"Ignoring %s band data in the input", inputBand)
730 self.log.info(
"Transforming the catalog of band %s", inputBand)
731 result = self.
transformtransform(inputBand, parq, funcs, dataId)
732 dfDict[inputBand] = result.df
733 analysisDict[inputBand] = result.analysis
735 templateDf = result.df
738 for filt
in outputBands:
739 if filt
not in dfDict:
740 self.log.info(
"Adding empty columns for band %s", filt)
741 dfDict[filt] = pd.DataFrame().reindex_like(templateDf)
744 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
746 if not self.config.multilevelOutput:
747 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
748 if dataId
is not None:
749 noDupCols += list(dataId.keys())
750 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase)
752 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
759 """Make self.refList from self.idList
761 Generate a list of data references given tract and/or patch.
762 This was adapted from `TractQADataIdContainer`, which was
763 `TractDataIdContainer` modifie to not require "filter".
764 Only existing dataRefs are returned.
766 def getPatchRefList(tract):
767 return [namespace.butler.dataRef(datasetType=self.datasetType,
769 patch=
"%d,%d" % patch.getIndex())
for patch
in tract]
771 tractRefs = defaultdict(list)
772 for dataId
in self.idList:
773 skymap = self.
getSkymapgetSkymap(namespace)
775 if "tract" in dataId:
776 tractId = dataId[
"tract"]
777 if "patch" in dataId:
778 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
780 patch=dataId[
'patch']))
782 tractRefs[tractId] += getPatchRefList(skymap[tractId])
784 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
787 for tractRefList
in tractRefs.values():
788 existingRefs = [ref
for ref
in tractRefList
if ref.datasetExists()]
789 outputRefList.append(existingRefs)
795 coaddName = pexConfig.Field(
803 """Write patch-merged source tables to a tract-level parquet file
805 _DefaultName =
"consolidateObjectTable"
806 ConfigClass = ConsolidateObjectTableConfig
808 inputDataset =
'objectTable'
809 outputDataset =
'objectTable_tract'
812 def _makeArgumentParser(cls):
813 parser = ArgumentParser(name=cls.
_DefaultName_DefaultName)
815 parser.add_id_argument(
"--id", cls.
inputDatasetinputDataset,
816 help=
"data ID, e.g. --id tract=12345",
817 ContainerClass=TractObjectDataIdContainer)
821 df = pd.concat([patchRef.get().toDataFrame()
for patchRef
in patchRefList])
825 """No metadata to write.
830 class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
831 dimensions=(
"instrument",
"visit",
"detector")):
833 inputCatalog = connectionTypes.Input(
834 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
836 storageClass=
"DataFrame",
837 dimensions=(
"instrument",
"visit",
"detector"),
840 outputCatalog = connectionTypes.Output(
841 doc=
"Narrower, per-detector Source Table transformed and converted per a "
842 "specified set of functors",
844 storageClass=
"DataFrame",
845 dimensions=(
"instrument",
"visit",
"detector")
850 pipelineConnections=TransformSourceTableConnections):
855 """Transform/standardize a source catalog
857 _DefaultName =
"transformSourceTable"
858 ConfigClass = TransformSourceTableConfig
860 inputDataset =
'source'
861 outputDataset =
'sourceTable'
864 def _makeArgumentParser(cls):
868 help=
"data ID, e.g. --id visit=12345 ccd=0")
872 """Override to specify band label to run()."""
875 band = dataRef.get(
"calexp_filterLabel", immediate=
True).bandLabel
876 df = self.
runrun(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
877 self.
writewrite(df, dataRef)
882 dimensions=(
"instrument",
"visit",),
883 defaultTemplates={}):
884 calexp = connectionTypes.Input(
885 doc=
"Processed exposures used for metadata",
887 storageClass=
"ExposureF",
888 dimensions=(
"instrument",
"visit",
"detector"),
892 visitSummary = connectionTypes.Output(
893 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
894 "detector id for the id and are sorted for fast lookups of a "
897 storageClass=
"ExposureCatalog",
898 dimensions=(
"instrument",
"visit"),
903 pipelineConnections=ConsolidateVisitSummaryConnections):
904 """Config for ConsolidateVisitSummaryTask"""
909 """Task to consolidate per-detector visit metadata.
911 This task aggregates the following metadata from all the detectors in a
912 single visit into an exposure catalog:
916 - The physical_filter and band (if available).
917 - The psf size, shape, and effective area at the center of the detector.
918 - The corners of the bounding box in right ascension/declination.
920 Other quantities such as Psf, ApCorrMap, and TransmissionCurve are not
921 persisted here because of storage concerns, and because of their limited
922 utility as summary statistics.
924 Tests for this task are performed in ci_hsc_gen3.
926 _DefaultName =
"consolidateVisitSummary"
927 ConfigClass = ConsolidateVisitSummaryConfig
930 def _makeArgumentParser(cls):
931 parser = ArgumentParser(name=cls.
_DefaultName_DefaultName)
933 parser.add_id_argument(
"--id",
"calexp",
934 help=
"data ID, e.g. --id visit=12345",
935 ContainerClass=VisitDataIdContainer)
939 """No metadata to persist, so override to remove metadata persistance.
944 """No config to persist, so override to remove config persistance.
949 visit = dataRefList[0].dataId[
'visit']
951 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)" %
952 (len(dataRefList), visit))
954 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=
False)
956 dataRefList[0].put(expCatalog,
'visitSummary', visit=visit)
959 dataRefs = butlerQC.get(inputRefs.calexp)
960 visit = dataRefs[0].dataId.byName()[
'visit']
962 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)" %
963 (len(dataRefs), visit))
967 butlerQC.put(expCatalog, outputRefs.visitSummary)
969 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
970 """Make a combined exposure catalog from a list of dataRefs.
975 Visit identification number
977 List of calexp dataRefs in visit. May be list of
978 `lsst.daf.persistence.ButlerDataRef` (Gen2) or
979 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
980 isGen3 : `bool`, optional
981 Specifies if this is a Gen3 list of datarefs.
985 visitSummary : `lsst.afw.table.ExposureCatalog`
986 Exposure catalog with per-detector summary information.
988 schema = afwTable.ExposureTable.makeMinimalSchema()
989 schema.addField(
'visit', type=
'I', doc=
'Visit number')
990 schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
991 schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
992 schema.addField(
'psfSigma', type=
'F',
993 doc=
'PSF model second-moments determinant radius (center of chip) (pixel)')
994 schema.addField(
'psfArea', type=
'F',
995 doc=
'PSF model effective area (center of chip) (pixel**2)')
996 schema.addField(
'psfIxx', type=
'F',
997 doc=
'PSF model Ixx (center of chip) (pixel**2)')
998 schema.addField(
'psfIyy', type=
'F',
999 doc=
'PSF model Iyy (center of chip) (pixel**2)')
1000 schema.addField(
'psfIxy', type=
'F',
1001 doc=
'PSF model Ixy (center of chip) (pixel**2)')
1002 schema.addField(
'raCorners', type=
'ArrayD', size=4,
1003 doc=
'Right Ascension of bounding box corners (degrees)')
1004 schema.addField(
'decCorners', type=
'ArrayD', size=4,
1005 doc=
'Declination of bounding box corners (degrees)')
1007 cat = afwTable.ExposureCatalog(schema)
1008 cat.resize(len(dataRefs))
1010 cat[
'visit'] = visit
1012 for i, dataRef
in enumerate(dataRefs):
1014 visitInfo = dataRef.get(component=
'visitInfo')
1015 filterLabel = dataRef.get(component=
'filterLabel')
1016 psf = dataRef.get(component=
'psf')
1017 wcs = dataRef.get(component=
'wcs')
1018 photoCalib = dataRef.get(component=
'photoCalib')
1019 detector = dataRef.get(component=
'detector')
1020 bbox = dataRef.get(component=
'bbox')
1021 validPolygon = dataRef.get(component=
'validPolygon')
1026 exp = dataRef.get(datasetType=
'calexp_sub', bbox=gen2_read_bbox)
1027 visitInfo = exp.getInfo().getVisitInfo()
1028 filterLabel = dataRef.get(
"calexp_filterLabel")
1031 photoCalib = exp.getPhotoCalib()
1032 detector = exp.getDetector()
1033 bbox = dataRef.get(datasetType=
'calexp_bbox')
1034 validPolygon = exp.getInfo().getValidPolygon()
1038 rec.setVisitInfo(visitInfo)
1040 rec.setPhotoCalib(photoCalib)
1041 rec.setDetector(detector)
1042 rec.setValidPolygon(validPolygon)
1044 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1045 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1046 rec.setId(detector.getId())
1047 shape = psf.computeShape(bbox.getCenter())
1048 rec[
'psfSigma'] = shape.getDeterminantRadius()
1049 rec[
'psfIxx'] = shape.getIxx()
1050 rec[
'psfIyy'] = shape.getIyy()
1051 rec[
'psfIxy'] = shape.getIxy()
1052 im = psf.computeKernelImage(bbox.getCenter())
1057 rec[
'psfArea'] = np.sum(im.array)/np.sum(im.array**2.)
1060 rec[
'raCorners'][:] = [sph.getRa().asDegrees()
for sph
in sph_pts]
1061 rec[
'decCorners'][:] = [sph.getDec().asDegrees()
for sph
in sph_pts]
1063 metadata = dafBase.PropertyList()
1064 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1066 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1067 cat.setMetadata(metadata)
1074 """DataIdContainer that groups sensor-level id's by visit
1078 """Make self.refList from self.idList
1080 Generate a list of data references grouped by visit.
1084 namespace : `argparse.Namespace`
1085 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1088 visitRefs = defaultdict(list)
1089 for dataId
in self.idList:
1090 if "visit" in dataId:
1091 visitId = dataId[
"visit"]
1093 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1094 visitRefs[visitId].extend([dataRef
for dataRef
in subset])
1097 for refList
in visitRefs.values():
1098 existingRefs = [ref
for ref
in refList
if ref.datasetExists()]
1100 outputRefList.append(existingRefs)
1106 dimensions=(
"instrument",
"visit")):
1107 inputCatalogs = connectionTypes.Input(
1108 doc=
"Input per-detector Source Tables",
1110 storageClass=
"DataFrame",
1111 dimensions=(
"instrument",
"visit",
"detector"),
1114 outputCatalog = connectionTypes.Output(
1115 doc=
"Per-visit concatenation of Source Table",
1116 name=
"sourceTable_visit",
1117 storageClass=
"DataFrame",
1118 dimensions=(
"instrument",
"visit")
1123 pipelineConnections=ConsolidateSourceTableConnections):
1128 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1130 _DefaultName =
'consolidateSourceTable'
1131 ConfigClass = ConsolidateSourceTableConfig
1133 inputDataset =
'sourceTable'
1134 outputDataset =
'sourceTable_visit'
1137 inputs = butlerQC.get(inputRefs)
1138 self.log.info(
"Concatenating %s per-detector Source Tables",
1139 len(inputs[
'inputCatalogs']))
1140 df = pd.concat(inputs[
'inputCatalogs'])
1141 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1144 self.log.info(
"Concatenating %s per-detector Source Tables", len(dataRefList))
1145 df = pd.concat([dataRef.get().toDataFrame()
for dataRef
in dataRefList])
1149 def _makeArgumentParser(cls):
1150 parser = ArgumentParser(name=cls.
_DefaultName_DefaultName)
1152 parser.add_id_argument(
"--id", cls.
inputDatasetinputDataset,
1153 help=
"data ID, e.g. --id visit=12345",
1154 ContainerClass=VisitDataIdContainer)
1158 """No metadata to write.
1163 """No config to write.
def getSkymap(self, namespace)
def runDataRef(self, patchRefList)
def writeMetadata(self, dataRef)
def writeConfig(self, butler, clobber=False, doBackup=True)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def runDataRef(self, dataRefList)
def writeMetadata(self, dataRef)
def runDataRef(self, dataRefList)
def writeConfig(self, butler, clobber=False, doBackup=True)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def writeMetadata(self, dataRef)
def _combineExposureMetadata(self, visit, dataRefs, isGen3=True)
def __init__(self, parq, functors, filt=None, flags=None, refFlags=None)
def compute(self, dropna=False, pool=None)
def makeDataRefList(self, namespace)
def runDataRef(self, dataRef)
def getAnalysis(self, parq, funcs=None, band=None)
def write(self, df, parqRef)
def __init__(self, *args, **kwargs)
def transform(self, band, parq, funcs, dataId)
def run(self, parq, funcs=None, dataId=None, band=None)
def writeMetadata(self, dataRef)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def run(self, parq, funcs=None, dataId=None, band=None)
def runDataRef(self, dataRef)
def makeDataRefList(self, namespace)
def __init__(self, butler=None, schema=None, **kwargs)
def write(self, patchRef, catalog)
def runDataRef(self, patchRefList)
Merge coadd sources from multiple bands.
def run(self, catalogs, tract, patch)
def writeMetadata(self, dataRefList)
def readCatalog(self, patchRef)
def run(self, catalog, ccdVisitId=None)
def runDataRef(self, dataRef)
def writeMetadata(self, dataRef)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def addCalibColumns(self, catalog, dataRef)
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False)