24from collections
import defaultdict
33from lsst.obs.base
import ExposureIdInfo
37from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
39from lsst.daf.butler
import DeferredDatasetHandle, DataCoordinate
42from .parquetTable
import ParquetTable
43from .multiBandUtils
import makeMergeArgumentParser, MergeSourcesRunner
44from .functors
import CompositeFunctor, Column
46log = logging.getLogger(__name__)
49def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
50 """Flattens a dataframe with multilevel column index
52 newDf = pd.DataFrame()
54 dfBands = df.columns.unique(level=0).values
57 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
58 newColumns = {c: columnFormat.format(band, c)
59 for c
in subdf.columns
if c
not in noDupCols}
60 cols = list(newColumns.keys())
61 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
64 presentBands = dfBands
if inputBands
is None else list(set(inputBands).intersection(dfBands))
66 noDupDf = df[presentBands[0]][noDupCols]
67 newDf = pd.concat([noDupDf, newDf], axis=1)
72 defaultTemplates={
"coaddName":
"deep"},
73 dimensions=(
"tract",
"patch",
"skymap")):
74 inputCatalogMeas = connectionTypes.Input(
75 doc=
"Catalog of source measurements on the deepCoadd.",
76 dimensions=(
"tract",
"patch",
"band",
"skymap"),
77 storageClass=
"SourceCatalog",
78 name=
"{coaddName}Coadd_meas",
81 inputCatalogForcedSrc = connectionTypes.Input(
82 doc=
"Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
83 dimensions=(
"tract",
"patch",
"band",
"skymap"),
84 storageClass=
"SourceCatalog",
85 name=
"{coaddName}Coadd_forced_src",
88 inputCatalogRef = connectionTypes.Input(
89 doc=
"Catalog marking the primary detection (which band provides a good shape and position)"
90 "for each detection in deepCoadd_mergeDet.",
91 dimensions=(
"tract",
"patch",
"skymap"),
92 storageClass=
"SourceCatalog",
93 name=
"{coaddName}Coadd_ref"
95 outputCatalog = connectionTypes.Output(
96 doc=
"A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
97 "stored as a DataFrame with a multi-level column index per-patch.",
98 dimensions=(
"tract",
"patch",
"skymap"),
99 storageClass=
"DataFrame",
100 name=
"{coaddName}Coadd_obj"
104class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
105 pipelineConnections=WriteObjectTableConnections):
106 engine = pexConfig.Field(
109 doc=
"Parquet engine for writing (pyarrow or fastparquet)"
111 coaddName = pexConfig.Field(
118class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
119 """Write filter-merged source tables to parquet
121 _DefaultName = "writeObjectTable"
122 ConfigClass = WriteObjectTableConfig
123 RunnerClass = MergeSourcesRunner
126 inputDatasets = (
'forced_src',
'meas',
'ref')
129 outputDataset =
'obj'
131 def __init__(self, butler=None, schema=None, **kwargs):
135 super().__init__(**kwargs)
137 def runDataRef(self, patchRefList):
139 @brief Merge coadd sources
from multiple bands. Calls
@ref `run` which must be defined
in
140 subclasses that inherit
from MergeSourcesTask.
141 @param[
in] patchRefList list of data references
for each filter
143 catalogs = dict(self.readCatalog(patchRef) for patchRef
in patchRefList)
144 dataId = patchRefList[0].dataId
145 mergedCatalog = self.run(catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
146 self.write(patchRefList[0],
ParquetTable(dataFrame=mergedCatalog))
148 def runQuantum(self, butlerQC, inputRefs, outputRefs):
149 inputs = butlerQC.get(inputRefs)
151 measDict = {ref.dataId[
'band']: {
'meas': cat}
for ref, cat
in
152 zip(inputRefs.inputCatalogMeas, inputs[
'inputCatalogMeas'])}
153 forcedSourceDict = {ref.dataId[
'band']: {
'forced_src': cat}
for ref, cat
in
154 zip(inputRefs.inputCatalogForcedSrc, inputs[
'inputCatalogForcedSrc'])}
157 for band
in measDict.keys():
158 catalogs[band] = {
'meas': measDict[band][
'meas'],
159 'forced_src': forcedSourceDict[band][
'forced_src'],
160 'ref': inputs[
'inputCatalogRef']}
161 dataId = butlerQC.quantum.dataId
162 df = self.run(catalogs=catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
163 outputs = pipeBase.Struct(outputCatalog=df)
164 butlerQC.put(outputs, outputRefs)
167 def _makeArgumentParser(cls):
168 """Create a suitable ArgumentParser.
170 We will use the ArgumentParser to get a list of data
171 references for patches; the RunnerClass will sort them into lists
172 of data references
for the same patch.
174 References first of self.inputDatasets, rather than
177 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
179 def readCatalog(self, patchRef):
180 """Read input catalogs
182 Read all the input datasets given by the 'inputDatasets'
187 patchRef : `lsst.daf.persistence.ButlerDataRef`
188 Data reference
for patch
192 Tuple consisting of band name
and a dict of catalogs, keyed by
195 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=
True).bandLabel
197 for dataset
in self.inputDatasets:
198 catalog = patchRef.get(self.config.coaddName +
"Coadd_" + dataset, immediate=
True)
199 self.log.info(
"Read %d sources from %s for band %s: %s",
200 len(catalog), dataset, band, patchRef.dataId)
201 catalogDict[dataset] = catalog
202 return band, catalogDict
204 def run(self, catalogs, tract, patch):
205 """Merge multiple catalogs.
210 Mapping from filter names to dict of catalogs.
212 tractId to use
for the tractId column
214 patchId to use
for the patchId column
218 catalog : `pandas.DataFrame`
223 for filt, tableDict
in catalogs.items():
224 for dataset, table
in tableDict.items():
226 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
229 df = df.reindex(sorted(df.columns), axis=1)
230 df[
'tractId'] = tract
231 df[
'patchId'] = patch
234 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
235 names=(
'dataset',
'band',
'column'))
238 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
241 def write(self, patchRef, catalog):
246 catalog : `ParquetTable`
248 patchRef : `lsst.daf.persistence.ButlerDataRef`
249 Data reference for patch
251 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
254 mergeDataId = patchRef.dataId.copy()
255 del mergeDataId[
"filter"]
256 self.log.info(
"Wrote merged catalog: %s", mergeDataId)
258 def writeMetadata(self, dataRefList):
259 """No metadata to write, and not sure how to write it for a list of dataRefs.
264class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
265 defaultTemplates={
"catalogType":
""},
266 dimensions=(
"instrument",
"visit",
"detector")):
268 catalog = connectionTypes.Input(
269 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
270 name=
"{catalogType}src",
271 storageClass=
"SourceCatalog",
272 dimensions=(
"instrument",
"visit",
"detector")
274 outputCatalog = connectionTypes.Output(
275 doc=
"Catalog of sources, `src` in Parquet format. The 'id' column is "
276 "replaced with an index; all other columns are unchanged.",
277 name=
"{catalogType}source",
278 storageClass=
"DataFrame",
279 dimensions=(
"instrument",
"visit",
"detector")
283class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
284 pipelineConnections=WriteSourceTableConnections):
285 doApplyExternalPhotoCalib = pexConfig.Field(
288 doc=(
"Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
289 "generating Source Tables from older src tables which do not already have local calib columns"),
290 deprecated=
"This field is no longer used. Use WriteRecalibratedSourceTableTask instead."
292 doApplyExternalSkyWcs = pexConfig.Field(
295 doc=(
"Add local WCS columns from the calexp.wcs? Should only set True if "
296 "generating Source Tables from older src tables which do not already have local calib columns"),
297 deprecated=
"This field is no longer used. Use WriteRecalibratedSourceTableTask instead."
302 if self.doApplyExternalSkyWcs
or self.doApplyExternalPhotoCalib:
303 raise ValueError(f
"doApplyExternalSkyWcs={self.doApplyExternalSkyWcs} and "
304 f
"doApplyExternalPhotoCalib={self.doApplyExternalPhotoCalib}. "
305 "These config parameters are no-ops for WriteSourceTableTask. "
306 "Set to False or use WriteRecalibratedSourceTableTask instead. ")
309class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
310 """Write source table to parquet
312 _DefaultName = "writeSourceTable"
313 ConfigClass = WriteSourceTableConfig
315 def runDataRef(self, dataRef):
316 src = dataRef.get(
'src')
317 ccdVisitId = dataRef.get(
'ccdExposureId')
318 result = self.run(src, ccdVisitId=ccdVisitId)
319 dataRef.put(result.table,
'source')
321 def runQuantum(self, butlerQC, inputRefs, outputRefs):
322 inputs = butlerQC.get(inputRefs)
323 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
324 result = self.run(**inputs).table
325 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
326 butlerQC.put(outputs, outputRefs)
328 def run(self, catalog, ccdVisitId=None, **kwargs):
329 """Convert `src` catalog to parquet
333 catalog: `afwTable.SourceCatalog`
334 catalog to be converted
336 ccdVisitId to be added as a column
340 result : `lsst.pipe.base.Struct`
342 `ParquetTable` version of the input catalog
344 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId)
345 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
346 df[
'ccdVisitId'] = ccdVisitId
347 return pipeBase.Struct(table=
ParquetTable(dataFrame=df))
349 def writeMetadata(self, dataRef):
350 """No metadata to write.
355 def _makeArgumentParser(cls):
356 parser = ArgumentParser(name=cls._DefaultName)
357 parser.add_id_argument(
"--id",
'src',
358 help=
"data ID, e.g. --id visit=12345 ccd=0")
362class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
363 defaultTemplates={
"catalogType":
"",
364 "skyWcsName":
"jointcal",
365 "photoCalibName":
"fgcm"},
366 dimensions=(
"instrument",
"visit",
"detector",
"skymap")):
367 skyMap = connectionTypes.Input(
368 doc=
"skyMap needed to choose which tract-level calibrations to use when multiple available",
369 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
370 storageClass=
"SkyMap",
371 dimensions=(
"skymap",),
373 exposure = connectionTypes.Input(
374 doc=
"Input exposure to perform photometry on.",
376 storageClass=
"ExposureF",
377 dimensions=[
"instrument",
"visit",
"detector"],
379 externalSkyWcsTractCatalog = connectionTypes.Input(
380 doc=(
"Per-tract, per-visit wcs calibrations. These catalogs use the detector "
381 "id for the catalog id, sorted on id for fast lookup."),
382 name=
"{skyWcsName}SkyWcsCatalog",
383 storageClass=
"ExposureCatalog",
384 dimensions=[
"instrument",
"visit",
"tract"],
387 externalSkyWcsGlobalCatalog = connectionTypes.Input(
388 doc=(
"Per-visit wcs calibrations computed globally (with no tract information). "
389 "These catalogs use the detector id for the catalog id, sorted on id for "
391 name=
"{skyWcsName}SkyWcsCatalog",
392 storageClass=
"ExposureCatalog",
393 dimensions=[
"instrument",
"visit"],
395 externalPhotoCalibTractCatalog = connectionTypes.Input(
396 doc=(
"Per-tract, per-visit photometric calibrations. These catalogs use the "
397 "detector id for the catalog id, sorted on id for fast lookup."),
398 name=
"{photoCalibName}PhotoCalibCatalog",
399 storageClass=
"ExposureCatalog",
400 dimensions=[
"instrument",
"visit",
"tract"],
403 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
404 doc=(
"Per-visit photometric calibrations computed globally (with no tract "
405 "information). These catalogs use the detector id for the catalog id, "
406 "sorted on id for fast lookup."),
407 name=
"{photoCalibName}PhotoCalibCatalog",
408 storageClass=
"ExposureCatalog",
409 dimensions=[
"instrument",
"visit"],
412 def __init__(self, *, config=None):
413 super().__init__(config=config)
416 if config.doApplyExternalSkyWcs
and config.doReevaluateSkyWcs:
417 if config.useGlobalExternalSkyWcs:
418 self.inputs.remove(
"externalSkyWcsTractCatalog")
420 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
422 self.inputs.remove(
"externalSkyWcsTractCatalog")
423 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
424 if config.doApplyExternalPhotoCalib
and config.doReevaluatePhotoCalib:
425 if config.useGlobalExternalPhotoCalib:
426 self.inputs.remove(
"externalPhotoCalibTractCatalog")
428 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
430 self.inputs.remove(
"externalPhotoCalibTractCatalog")
431 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
434class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
435 pipelineConnections=WriteRecalibratedSourceTableConnections):
437 doReevaluatePhotoCalib = pexConfig.Field(
440 doc=(
"Add or replace local photoCalib columns from either the calexp.photoCalib or jointcal/FGCM")
442 doReevaluateSkyWcs = pexConfig.Field(
445 doc=(
"Add or replace local WCS columns from either the calexp.wcs or or jointcal")
447 doReevaluateLocalBackground = pexConfig.Field(
450 doc=(
"Add or replace local Background columns")
452 doApplyExternalPhotoCalib = pexConfig.Field(
455 doc=(
"Whether to apply external photometric calibration via an "
456 "`lsst.afw.image.PhotoCalib` object. Uses the "
457 "``externalPhotoCalibName`` field to determine which calibration "
460 doApplyExternalSkyWcs = pexConfig.Field(
463 doc=(
"Whether to apply external astrometric calibration via an "
464 "`lsst.afw.geom.SkyWcs` object. Uses ``externalSkyWcsName`` "
465 "field to determine which calibration to load."),
467 useGlobalExternalPhotoCalib = pexConfig.Field(
470 doc=(
"When using doApplyExternalPhotoCalib, use 'global' calibrations "
471 "that are not run per-tract. When False, use per-tract photometric "
472 "calibration files.")
474 useGlobalExternalSkyWcs = pexConfig.Field(
477 doc=(
"When using doApplyExternalSkyWcs, use 'global' calibrations "
478 "that are not run per-tract. When False, use per-tract wcs "
484 if self.doApplyExternalSkyWcs
and not self.doReevaluateSkyWcs:
485 log.warning(
"doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
486 "External SkyWcs will not be read or evaluated.")
487 if self.doApplyExternalPhotoCalib
and not self.doReevaluatePhotoCalib:
488 log.warning(
"doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
489 "External PhotoCalib will not be read or evaluated.")
492class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
493 """Write source table to parquet
495 _DefaultName = "writeRecalibratedSourceTable"
496 ConfigClass = WriteRecalibratedSourceTableConfig
498 def runQuantum(self, butlerQC, inputRefs, outputRefs):
499 inputs = butlerQC.get(inputRefs)
500 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
501 inputs[
'exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId,
"visit_detector")
503 if self.config.doReevaluatePhotoCalib
or self.config.doReevaluateSkyWcs:
504 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
505 inputs[
'exposure'] = self.attachCalibs(inputRefs, **inputs)
507 inputs[
'catalog'] = self.addCalibColumns(**inputs)
509 result = self.run(**inputs).table
510 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
511 butlerQC.put(outputs, outputRefs)
513 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
514 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
515 externalPhotoCalibTractCatalog=None, **kwargs):
516 """Apply external calibrations to exposure per configuration
518 When multiple tract-level calibrations overlap, select the one with the
519 center closest to detector.
523 inputRefs : `lsst.pipe.base.InputQuantizedConnection`,
for dataIds of
525 skyMap : `lsst.skymap.SkyMap`
526 exposure : `lsst.afw.image.exposure.Exposure`
527 Input exposure to adjust calibrations.
529 Exposure catalog
with external skyWcs to be applied per config
531 Exposure catalog
with external skyWcs to be applied per config
533 Exposure catalog
with external photoCalib to be applied per config
539 exposure : `lsst.afw.image.exposure.Exposure`
540 Exposure
with adjusted calibrations.
542 if not self.config.doApplyExternalSkyWcs:
544 externalSkyWcsCatalog =
None
545 elif self.config.useGlobalExternalSkyWcs:
547 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
548 self.log.info(
'Applying global SkyWcs')
551 inputRef = getattr(inputRefs,
'externalSkyWcsTractCatalog')
552 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
555 self.log.info(
'Applying tract-level SkyWcs from tract %s', tracts[ind])
557 ind = self.getClosestTract(tracts, skyMap,
558 exposure.getBBox(), exposure.getWcs())
559 self.log.info(
'Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
560 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
562 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
564 if not self.config.doApplyExternalPhotoCalib:
566 externalPhotoCalibCatalog =
None
567 elif self.config.useGlobalExternalPhotoCalib:
569 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
570 self.log.info(
'Applying global PhotoCalib')
573 inputRef = getattr(inputRefs,
'externalPhotoCalibTractCatalog')
574 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
577 self.log.info(
'Applying tract-level PhotoCalib from tract %s', tracts[ind])
579 ind = self.getClosestTract(tracts, skyMap,
580 exposure.getBBox(), exposure.getWcs())
581 self.log.info(
'Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
582 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
584 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
586 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
588 def getClosestTract(self, tracts, skyMap, bbox, wcs):
589 """Find the index of the tract closest to detector from list of tractIds
593 tracts: `list` [`int`]
594 Iterable of integer tractIds
595 skyMap : `lsst.skymap.SkyMap`
596 skyMap to lookup tract geometry and wcs
598 Detector bbox, center of which will compared to tract centers
600 Detector Wcs object to map the detector center to SkyCoord
609 center = wcs.pixelToSky(bbox.getCenter())
611 for tractId
in tracts:
612 tract = skyMap[tractId]
613 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
614 sep.append(center.separation(tractCenter))
616 return np.argmin(sep)
618 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
619 """Prepare a calibrated exposure and apply external calibrations
624 exposure : `lsst.afw.image.exposure.Exposure`
625 Input exposure to adjust calibrations.
627 Exposure catalog
with external skyWcs to be applied
628 if config.doApplyExternalSkyWcs=
True. Catalog uses the detector id
629 for the catalog id, sorted on id
for fast lookup.
631 Exposure catalog
with external photoCalib to be applied
632 if config.doApplyExternalPhotoCalib=
True. Catalog uses the detector
633 id
for the catalog id, sorted on id
for fast lookup.
637 exposure : `lsst.afw.image.exposure.Exposure`
638 Exposure
with adjusted calibrations.
640 detectorId = exposure.getInfo().getDetector().getId()
642 if externalPhotoCalibCatalog
is not None:
643 row = externalPhotoCalibCatalog.find(detectorId)
645 self.log.warning(
"Detector id %s not found in externalPhotoCalibCatalog; "
646 "Using original photoCalib.", detectorId)
648 photoCalib = row.getPhotoCalib()
649 if photoCalib
is None:
650 self.log.warning(
"Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
651 "Using original photoCalib.", detectorId)
653 exposure.setPhotoCalib(photoCalib)
655 if externalSkyWcsCatalog
is not None:
656 row = externalSkyWcsCatalog.find(detectorId)
658 self.log.warning(
"Detector id %s not found in externalSkyWcsCatalog; "
659 "Using original skyWcs.", detectorId)
661 skyWcs = row.getWcs()
663 self.log.warning(
"Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
664 "Using original skyWcs.", detectorId)
666 exposure.setWcs(skyWcs)
670 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs):
671 """Add replace columns with calibs evaluated at each centroid
673 Add or replace
'base_LocalWcs' `base_LocalPhotoCalib
' columns in a
674 a source catalog, by rerunning the plugins.
679 catalog to which calib columns will be added
680 exposure : `lsst.afw.image.exposure.Exposure`
681 Exposure with attached PhotoCalibs
and SkyWcs attributes to be
682 reevaluated at local centroids. Pixels are
not required.
683 exposureIdInfo : `lsst.obs.base.ExposureIdInfo`
688 Source Catalog
with requested local calib columns
690 measureConfig = SingleFrameMeasurementTask.ConfigClass()
691 measureConfig.doReplaceWithNoise = False
693 measureConfig.plugins.names = []
694 if self.config.doReevaluateSkyWcs:
695 measureConfig.plugins.names.add(
'base_LocalWcs')
696 self.log.info(
"Re-evaluating base_LocalWcs plugin")
697 if self.config.doReevaluatePhotoCalib:
698 measureConfig.plugins.names.add(
'base_LocalPhotoCalib')
699 self.log.info(
"Re-evaluating base_LocalPhotoCalib plugin")
700 if self.config.doReevaluateLocalBackground:
701 measureConfig.plugins.names.add(
'base_LocalBackground')
702 self.log.info(
"Re-evaluating base_LocalBackground plugin")
703 pluginsNotToCopy = tuple(measureConfig.plugins.names)
707 aliasMap = catalog.schema.getAliasMap()
708 mapper = afwTable.SchemaMapper(catalog.schema)
709 for item
in catalog.schema:
710 if not item.field.getName().startswith(pluginsNotToCopy):
711 mapper.addMapping(item.key)
713 schema = mapper.getOutputSchema()
714 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
715 schema.setAliasMap(aliasMap)
716 newCat = afwTable.SourceCatalog(schema)
717 newCat.extend(catalog, mapper=mapper)
719 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
724class PostprocessAnalysis(object):
725 """Calculate columns from ParquetTable
727 This object manages and organizes an arbitrary set of computations
728 on a catalog. The catalog
is defined by a
730 `deepCoadd_obj` dataset,
and the computations are defined by a collection
731 of `lsst.pipe.tasks.functor.Functor` objects (
or, equivalently,
732 a `CompositeFunctor`).
734 After the object
is initialized, accessing the `.df` attribute (which
735 holds the `pandas.DataFrame` containing the results of the calculations) triggers
736 computation of said dataframe.
738 One of the conveniences of using this object
is the ability to define a desired common
739 filter
for all functors. This enables the same functor collection to be passed to
740 several different `PostprocessAnalysis` objects without having to change the original
741 functor collection, since the `filt` keyword argument of this object triggers an
742 overwrite of the `filt` property
for all functors
in the collection.
744 This object also allows a list of refFlags to be passed,
and defines a set of default
745 refFlags that are always included even
if not requested.
747 If a list of `ParquetTable` object
is passed, rather than a single one, then the
748 calculations will be mapped over all the input catalogs. In principle, it should
749 be straightforward to parallelize this activity, but initial tests have failed
750 (see TODO
in code comments).
754 parq : `lsst.pipe.tasks.ParquetTable` (
or list of such)
755 Source catalog(s)
for computation
758 Computations to do (functors that act on `parq`).
759 If a dict, the output
760 DataFrame will have columns keyed accordingly.
761 If a list, the column keys will come
from the
762 `.shortname` attribute of each functor.
764 filt : `str` (optional)
765 Filter
in which to calculate. If provided,
766 this will overwrite any existing `.filt` attribute
767 of the provided functors.
769 flags : `list` (optional)
770 List of flags (per-band) to include
in output table.
771 Taken
from the `meas` dataset
if applied to a multilevel Object Table.
773 refFlags : `list` (optional)
774 List of refFlags (only reference band) to include
in output table.
776 forcedFlags : `list` (optional)
777 List of flags (per-band) to include
in output table.
778 Taken
from the ``forced_src`` dataset
if applied to a
779 multilevel Object Table. Intended
for flags
from measurement plugins
780 only run during multi-band forced-photometry.
782 _defaultRefFlags = []
785 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
787 self.functors = functors
790 self.flags = list(flags)
if flags
is not None else []
791 self.forcedFlags = list(forcedFlags)
if forcedFlags
is not None else []
792 self.refFlags = list(self._defaultRefFlags)
793 if refFlags
is not None:
794 self.refFlags += list(refFlags)
799 def defaultFuncs(self):
800 funcs = dict(self._defaultFuncs)
805 additionalFuncs = self.defaultFuncs
806 additionalFuncs.update({flag:
Column(flag, dataset=
'forced_src')
for flag
in self.forcedFlags})
807 additionalFuncs.update({flag:
Column(flag, dataset=
'ref')
for flag
in self.refFlags})
808 additionalFuncs.update({flag:
Column(flag, dataset=
'meas')
for flag
in self.flags})
810 if isinstance(self.functors, CompositeFunctor):
815 func.funcDict.update(additionalFuncs)
816 func.filt = self.filt
822 return [name
for name, func
in self.func.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
830 def compute(self, dropna=False, pool=None):
832 if type(self.parq)
in (list, tuple):
834 dflist = [self.func(parq, dropna=dropna)
for parq
in self.parq]
837 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
838 self._df = pd.concat(dflist)
840 self._df = self.func(self.parq, dropna=dropna)
847 """Expected Connections for subclasses of TransformCatalogBaseTask.
851 inputCatalog = connectionTypes.Input(
853 storageClass=
"DataFrame",
855 outputCatalog = connectionTypes.Output(
857 storageClass=
"DataFrame",
862 pipelineConnections=TransformCatalogBaseConnections):
863 functorFile = pexConfig.Field(
865 doc=
"Path to YAML file specifying Science Data Model functors to use "
866 "when copying columns and computing calibrated values.",
870 primaryKey = pexConfig.Field(
872 doc=
"Name of column to be set as the DataFrame index. If None, the index"
873 "will be named `id`",
880 """Base class for transforming/standardizing a catalog
882 by applying functors that convert units and apply calibrations.
883 The purpose of this task
is to perform a set of computations on
884 an input `ParquetTable` dataset (such
as `deepCoadd_obj`)
and write the
885 results to a new dataset (which needs to be declared
in an `outputDataset`
888 The calculations to be performed are defined
in a YAML file that specifies
889 a set of functors to be computed, provided
as
890 a `--functorFile` config parameter. An example of such a YAML file
915 - base_InputCount_value
918 functor: DeconvolvedMoments
923 - merge_measurement_i
924 - merge_measurement_r
925 - merge_measurement_z
926 - merge_measurement_y
927 - merge_measurement_g
928 - base_PixelFlags_flag_inexact_psfCenter
931 The names
for each entry under
"func" will become the names of columns
in the
933 Positional arguments to be passed to each functor are
in the `args` list,
934 and any additional entries
for each column other than
"functor" or "args" (e.g., `
'filt'`,
935 `
'dataset'`) are treated
as keyword arguments to be passed to the functor initialization.
937 The
"flags" entry
is the default shortcut
for `Column` functors.
938 All columns listed under
"flags" will be copied to the output table
939 untransformed. They can be of any datatype.
940 In the special case of transforming a multi-level oject table
with
941 band
and dataset indices (deepCoadd_obj), these will be taked
from the
942 `meas` dataset
and exploded out per band.
944 There are two special shortcuts that only apply when transforming
945 multi-level Object (deepCoadd_obj) tables:
946 - The
"refFlags" entry
is shortcut
for `Column` functor
947 taken
from the `
'ref'` dataset
if transforming an ObjectTable.
948 - The
"forcedFlags" entry
is shortcut
for `Column` functors.
949 taken
from the ``forced_src`` dataset
if transforming an ObjectTable.
950 These are expanded out per band.
953 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
954 to organize
and excecute the calculations.
958 def _DefaultName(self):
959 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
963 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
967 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
970 def ConfigClass(self):
971 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
975 if self.config.functorFile:
976 self.log.info(
'Loading tranform functor definitions from %s',
977 self.config.functorFile)
978 self.
funcs = CompositeFunctor.from_file(self.config.functorFile)
979 self.
funcs.update(dict(PostprocessAnalysis._defaultFuncs))
984 inputs = butlerQC.get(inputRefs)
985 if self.
funcs is None:
986 raise ValueError(
"config.functorFile is None. "
987 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
988 result = self.
run(parq=inputs[
'inputCatalog'], funcs=self.
funcs,
989 dataId=outputRefs.outputCatalog.dataId.full)
990 outputs = pipeBase.Struct(outputCatalog=result)
991 butlerQC.put(outputs, outputRefs)
995 if self.
funcs is None:
996 raise ValueError(
"config.functorFile is None. "
997 "Must be a valid path to yaml in order to run as a CommandlineTask.")
998 df = self.
run(parq, funcs=self.
funcs, dataId=dataRef.dataId)
999 self.
write(df, dataRef)
1002 def run(self, parq, funcs=None, dataId=None, band=None):
1003 """Do postprocessing calculations
1005 Takes a `ParquetTable` object and dataId,
1006 returns a dataframe
with results of postprocessing calculations.
1011 ParquetTable
from which calculations are done.
1012 funcs : `lsst.pipe.tasks.functors.Functors`
1013 Functors to apply to the table
's columns
1014 dataId : dict, optional
1015 Used to add a `patchId` column to the output dataframe.
1016 band : `str`, optional
1017 Filter band that is being processed.
1024 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
1026 df = self.
transform(band, parq, funcs, dataId).df
1027 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
1036 analysis = PostprocessAnalysis(parq, funcs, filt=band)
1039 def transform(self, band, parq, funcs, dataId):
1040 analysis = self.
getAnalysis(parq, funcs=funcs, band=band)
1042 if dataId
is not None:
1043 for key, value
in dataId.items():
1044 df[str(key)] = value
1046 if self.config.primaryKey:
1047 if df.index.name != self.config.primaryKey
and self.config.primaryKey
in df:
1048 df.reset_index(inplace=
True, drop=
True)
1049 df.set_index(self.config.primaryKey, inplace=
True)
1051 return pipeBase.Struct(
1056 def write(self, df, parqRef):
1060 """No metadata to write.
1066 defaultTemplates={
"coaddName":
"deep"},
1067 dimensions=(
"tract",
"patch",
"skymap")):
1068 inputCatalog = connectionTypes.Input(
1069 doc=
"The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
1070 "stored as a DataFrame with a multi-level column index per-patch.",
1071 dimensions=(
"tract",
"patch",
"skymap"),
1072 storageClass=
"DataFrame",
1073 name=
"{coaddName}Coadd_obj",
1076 outputCatalog = connectionTypes.Output(
1077 doc=
"Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
1079 dimensions=(
"tract",
"patch",
"skymap"),
1080 storageClass=
"DataFrame",
1086 pipelineConnections=TransformObjectCatalogConnections):
1087 coaddName = pexConfig.Field(
1093 filterMap = pexConfig.DictField(
1097 doc=(
"Dictionary mapping full filter name to short one for column name munging."
1098 "These filters determine the output columns no matter what filters the "
1099 "input data actually contain."),
1100 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
1101 "Will be removed after v22.")
1103 outputBands = pexConfig.ListField(
1107 doc=(
"These bands and only these bands will appear in the output,"
1108 " NaN-filled if the input does not include them."
1109 " If None, then use all bands found in the input.")
1111 camelCase = pexConfig.Field(
1114 doc=(
"Write per-band columns names with camelCase, else underscore "
1115 "For example: gPsFlux instead of g_PsFlux.")
1117 multilevelOutput = pexConfig.Field(
1120 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
1121 "and name-munged (False).")
1123 goodFlags = pexConfig.ListField(
1126 doc=(
"List of 'good' flags that should be set False when populating empty tables. "
1127 "All other flags are considered to be 'bad' flags and will be set to True.")
1129 floatFillValue = pexConfig.Field(
1132 doc=
"Fill value for float fields when populating empty tables."
1134 integerFillValue = pexConfig.Field(
1137 doc=
"Fill value for integer fields when populating empty tables."
1140 def setDefaults(self):
1141 super().setDefaults()
1142 self.primaryKey =
'objectId'
1143 self.goodFlags = [
'calib_astrometry_used',
1144 'calib_photometry_reserved',
1145 'calib_photometry_used',
1146 'calib_psf_candidate',
1147 'calib_psf_reserved',
1152 """Produce a flattened Object Table to match the format specified in
1155 Do the same set of postprocessing calculations on all bands
1157 This is identical to `TransformCatalogBaseTask`,
except for that it does the
1158 specified functor calculations
for all filters present
in the
1159 input `deepCoadd_obj` table. Any specific `
"filt"` keywords specified
1160 by the YAML file will be superceded.
1162 _DefaultName = "transformObjectCatalog"
1163 ConfigClass = TransformObjectCatalogConfig
1166 inputDataset =
'deepCoadd_obj'
1167 outputDataset =
'objectTable'
1170 def _makeArgumentParser(cls):
1171 parser = ArgumentParser(name=cls._DefaultName)
1172 parser.add_id_argument(
"--id", cls.inputDataset,
1173 ContainerClass=CoaddDataIdContainer,
1174 help=
"data ID, e.g. --id tract=12345 patch=1,2")
1177 def run(self, parq, funcs=None, dataId=None, band=None):
1181 templateDf = pd.DataFrame()
1183 if isinstance(parq, DeferredDatasetHandle):
1184 columns = parq.get(component=
'columns')
1185 inputBands = columns.unique(level=1).values
1187 inputBands = parq.columnLevelNames[
'band']
1189 outputBands = self.config.outputBands
if self.config.outputBands
else inputBands
1192 for inputBand
in inputBands:
1193 if inputBand
not in outputBands:
1194 self.log.info(
"Ignoring %s band data in the input", inputBand)
1196 self.log.info(
"Transforming the catalog of band %s", inputBand)
1197 result = self.transform(inputBand, parq, funcs, dataId)
1198 dfDict[inputBand] = result.df
1199 analysisDict[inputBand] = result.analysis
1200 if templateDf.empty:
1201 templateDf = result.df
1204 for filt
in outputBands:
1205 if filt
not in dfDict:
1206 self.log.info(
"Adding empty columns for band %s", filt)
1207 dfTemp = templateDf.copy()
1208 for col
in dfTemp.columns:
1209 testValue = dfTemp[col].values[0]
1210 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1212 if col
in self.config.goodFlags:
1216 elif isinstance(testValue, numbers.Integral):
1220 if isinstance(testValue, np.unsignedinteger):
1221 raise ValueError(
"Parquet tables may not have unsigned integer columns.")
1223 fillValue = self.config.integerFillValue
1225 fillValue = self.config.floatFillValue
1226 dfTemp[col].values[:] = fillValue
1227 dfDict[filt] = dfTemp
1230 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
1232 if not self.config.multilevelOutput:
1233 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
1234 if self.config.primaryKey
in noDupCols:
1235 noDupCols.remove(self.config.primaryKey)
1236 if dataId
is not None:
1237 noDupCols += list(dataId.keys())
1238 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1239 inputBands=inputBands)
1241 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
1248 def makeDataRefList(self, namespace):
1249 """Make self.refList from self.idList
1251 Generate a list of data references given tract and/
or patch.
1252 This was adapted
from `TractQADataIdContainer`, which was
1253 `TractDataIdContainer` modifie to
not require
"filter".
1254 Only existing dataRefs are returned.
1256 def getPatchRefList(tract):
1257 return [namespace.butler.dataRef(datasetType=self.datasetType,
1258 tract=tract.getId(),
1259 patch=
"%d,%d" % patch.getIndex())
for patch
in tract]
1261 tractRefs = defaultdict(list)
1262 for dataId
in self.idList:
1263 skymap = self.getSkymap(namespace)
1265 if "tract" in dataId:
1266 tractId = dataId[
"tract"]
1267 if "patch" in dataId:
1268 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
1270 patch=dataId[
'patch']))
1272 tractRefs[tractId] += getPatchRefList(skymap[tractId])
1274 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
1275 for tract
in skymap)
1277 for tractRefList
in tractRefs.values():
1278 existingRefs = [ref
for ref
in tractRefList
if ref.datasetExists()]
1279 outputRefList.append(existingRefs)
1281 self.refList = outputRefList
1284class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1285 dimensions=(
"tract",
"skymap")):
1286 inputCatalogs = connectionTypes.Input(
1287 doc=
"Per-Patch objectTables conforming to the standard data model.",
1289 storageClass=
"DataFrame",
1290 dimensions=(
"tract",
"patch",
"skymap"),
1293 outputCatalog = connectionTypes.Output(
1294 doc=
"Pre-tract horizontal concatenation of the input objectTables",
1295 name=
"objectTable_tract",
1296 storageClass=
"DataFrame",
1297 dimensions=(
"tract",
"skymap"),
1301class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1302 pipelineConnections=ConsolidateObjectTableConnections):
1303 coaddName = pexConfig.Field(
1310class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
1311 """Write patch-merged source tables to a tract-level parquet file
1313 Concatenates `objectTable` list into a per-visit `objectTable_tract`
1315 _DefaultName = "consolidateObjectTable"
1316 ConfigClass = ConsolidateObjectTableConfig
1318 inputDataset =
'objectTable'
1319 outputDataset =
'objectTable_tract'
1321 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1322 inputs = butlerQC.get(inputRefs)
1323 self.log.info(
"Concatenating %s per-patch Object Tables",
1324 len(inputs[
'inputCatalogs']))
1325 df = pd.concat(inputs[
'inputCatalogs'])
1326 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1329 def _makeArgumentParser(cls):
1330 parser = ArgumentParser(name=cls._DefaultName)
1332 parser.add_id_argument(
"--id", cls.inputDataset,
1333 help=
"data ID, e.g. --id tract=12345",
1334 ContainerClass=TractObjectDataIdContainer)
1337 def runDataRef(self, patchRefList):
1338 df = pd.concat([patchRef.get().toDataFrame()
for patchRef
in patchRefList])
1339 patchRefList[0].put(
ParquetTable(dataFrame=df), self.outputDataset)
1341 def writeMetadata(self, dataRef):
1342 """No metadata to write.
1347class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1348 defaultTemplates={
"catalogType":
""},
1349 dimensions=(
"instrument",
"visit",
"detector")):
1351 inputCatalog = connectionTypes.Input(
1352 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
1353 name=
"{catalogType}source",
1354 storageClass=
"DataFrame",
1355 dimensions=(
"instrument",
"visit",
"detector"),
1358 outputCatalog = connectionTypes.Output(
1359 doc=
"Narrower, per-detector Source Table transformed and converted per a "
1360 "specified set of functors",
1361 name=
"{catalogType}sourceTable",
1362 storageClass=
"DataFrame",
1363 dimensions=(
"instrument",
"visit",
"detector")
1368 pipelineConnections=TransformSourceTableConnections):
1370 def setDefaults(self):
1371 super().setDefaults()
1372 self.primaryKey =
'sourceId'
1376 """Transform/standardize a source catalog
1378 _DefaultName = "transformSourceTable"
1379 ConfigClass = TransformSourceTableConfig
1381 inputDataset =
'source'
1382 outputDataset =
'sourceTable'
1385 def _makeArgumentParser(cls):
1386 parser = ArgumentParser(name=cls._DefaultName)
1387 parser.add_id_argument(
"--id", datasetType=cls.inputDataset,
1389 help=
"data ID, e.g. --id visit=12345 ccd=0")
1392 def runDataRef(self, dataRef):
1393 """Override to specify band label to run()."""
1394 parq = dataRef.get()
1395 funcs = self.getFunctors()
1396 band = dataRef.get(
"calexp_filterLabel", immediate=
True).bandLabel
1397 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
1398 self.write(df, dataRef)
1402class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1403 dimensions=(
"instrument",
"visit",),
1404 defaultTemplates={
"calexpType":
""}):
1405 calexp = connectionTypes.Input(
1406 doc=
"Processed exposures used for metadata",
1407 name=
"{calexpType}calexp",
1408 storageClass=
"ExposureF",
1409 dimensions=(
"instrument",
"visit",
"detector"),
1413 visitSummary = connectionTypes.Output(
1414 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
1415 "detector id for the id and are sorted for fast lookups of a "
1417 name=
"{calexpType}visitSummary",
1418 storageClass=
"ExposureCatalog",
1419 dimensions=(
"instrument",
"visit"),
1423class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1424 pipelineConnections=ConsolidateVisitSummaryConnections):
1425 """Config for ConsolidateVisitSummaryTask"""
1429class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1430 """Task to consolidate per-detector visit metadata.
1432 This task aggregates the following metadata from all the detectors
in a
1433 single visit into an exposure catalog:
1437 - The physical_filter
and band (
if available).
1438 - The psf size, shape,
and effective area at the center of the detector.
1439 - The corners of the bounding box
in right ascension/declination.
1441 Other quantities such
as Detector, Psf, ApCorrMap,
and TransmissionCurve
1442 are
not persisted here because of storage concerns,
and because of their
1443 limited utility
as summary statistics.
1445 Tests
for this task are performed
in ci_hsc_gen3.
1447 _DefaultName = "consolidateVisitSummary"
1448 ConfigClass = ConsolidateVisitSummaryConfig
1451 def _makeArgumentParser(cls):
1452 parser = ArgumentParser(name=cls._DefaultName)
1454 parser.add_id_argument(
"--id",
"calexp",
1455 help=
"data ID, e.g. --id visit=12345",
1456 ContainerClass=VisitDataIdContainer)
1459 def writeMetadata(self, dataRef):
1460 """No metadata to persist, so override to remove metadata persistance.
1464 def writeConfig(self, butler, clobber=False, doBackup=True):
1465 """No config to persist, so override to remove config persistance.
1469 def runDataRef(self, dataRefList):
1470 visit = dataRefList[0].dataId[
'visit']
1472 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1473 len(dataRefList), visit)
1475 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=
False)
1477 dataRefList[0].put(expCatalog,
'visitSummary', visit=visit)
1479 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1480 dataRefs = butlerQC.get(inputRefs.calexp)
1481 visit = dataRefs[0].dataId.byName()[
'visit']
1483 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1484 len(dataRefs), visit)
1486 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1488 butlerQC.put(expCatalog, outputRefs.visitSummary)
1490 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1491 """Make a combined exposure catalog from a list of dataRefs.
1492 These dataRefs must point to exposures with wcs, summaryStats,
1493 and other visit metadata.
1498 Visit identification number.
1500 List of dataRefs
in visit. May be list of
1501 `lsst.daf.persistence.ButlerDataRef` (Gen2)
or
1502 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1503 isGen3 : `bool`, optional
1504 Specifies
if this
is a Gen3 list of datarefs.
1509 Exposure catalog
with per-detector summary information.
1511 schema = self._makeVisitSummarySchema()
1512 cat = afwTable.ExposureCatalog(schema)
1513 cat.resize(len(dataRefs))
1515 cat['visit'] = visit
1517 for i, dataRef
in enumerate(dataRefs):
1519 visitInfo = dataRef.get(component=
'visitInfo')
1520 filterLabel = dataRef.get(component=
'filterLabel')
1521 summaryStats = dataRef.get(component=
'summaryStats')
1522 detector = dataRef.get(component=
'detector')
1523 wcs = dataRef.get(component=
'wcs')
1524 photoCalib = dataRef.get(component=
'photoCalib')
1525 detector = dataRef.get(component=
'detector')
1526 bbox = dataRef.get(component=
'bbox')
1527 validPolygon = dataRef.get(component=
'validPolygon')
1532 exp = dataRef.get(datasetType=
'calexp_sub', bbox=gen2_read_bbox)
1533 visitInfo = exp.getInfo().getVisitInfo()
1534 filterLabel = dataRef.get(
"calexp_filterLabel")
1535 summaryStats = exp.getInfo().getSummaryStats()
1537 photoCalib = exp.getPhotoCalib()
1538 detector = exp.getDetector()
1539 bbox = dataRef.get(datasetType=
'calexp_bbox')
1540 validPolygon = exp.getInfo().getValidPolygon()
1544 rec.setVisitInfo(visitInfo)
1546 rec.setPhotoCalib(photoCalib)
1547 rec.setValidPolygon(validPolygon)
1549 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1550 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1551 rec.setId(detector.getId())
1552 rec[
'psfSigma'] = summaryStats.psfSigma
1553 rec[
'psfIxx'] = summaryStats.psfIxx
1554 rec[
'psfIyy'] = summaryStats.psfIyy
1555 rec[
'psfIxy'] = summaryStats.psfIxy
1556 rec[
'psfArea'] = summaryStats.psfArea
1557 rec[
'raCorners'][:] = summaryStats.raCorners
1558 rec[
'decCorners'][:] = summaryStats.decCorners
1559 rec[
'ra'] = summaryStats.ra
1560 rec[
'decl'] = summaryStats.decl
1561 rec[
'zenithDistance'] = summaryStats.zenithDistance
1562 rec[
'zeroPoint'] = summaryStats.zeroPoint
1563 rec[
'skyBg'] = summaryStats.skyBg
1564 rec[
'skyNoise'] = summaryStats.skyNoise
1565 rec[
'meanVar'] = summaryStats.meanVar
1566 rec[
'astromOffsetMean'] = summaryStats.astromOffsetMean
1567 rec[
'astromOffsetStd'] = summaryStats.astromOffsetStd
1568 rec[
'nPsfStar'] = summaryStats.nPsfStar
1569 rec[
'psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median
1570 rec[
'psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median
1571 rec[
'psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter
1572 rec[
'psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter
1573 rec[
'psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian
1574 rec[
'psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter
1575 rec[
'psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter
1577 metadata = dafBase.PropertyList()
1578 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1580 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1581 cat.setMetadata(metadata)
1586 def _makeVisitSummarySchema(self):
1587 """Make the schema for the visitSummary catalog."""
1588 schema = afwTable.ExposureTable.makeMinimalSchema()
1589 schema.addField(
'visit', type=
'I', doc=
'Visit number')
1590 schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
1591 schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
1592 schema.addField(
'psfSigma', type=
'F',
1593 doc=
'PSF model second-moments determinant radius (center of chip) (pixel)')
1594 schema.addField(
'psfArea', type=
'F',
1595 doc=
'PSF model effective area (center of chip) (pixel**2)')
1596 schema.addField(
'psfIxx', type=
'F',
1597 doc=
'PSF model Ixx (center of chip) (pixel**2)')
1598 schema.addField(
'psfIyy', type=
'F',
1599 doc=
'PSF model Iyy (center of chip) (pixel**2)')
1600 schema.addField(
'psfIxy', type=
'F',
1601 doc=
'PSF model Ixy (center of chip) (pixel**2)')
1602 schema.addField(
'raCorners', type=
'ArrayD', size=4,
1603 doc=
'Right Ascension of bounding box corners (degrees)')
1604 schema.addField(
'decCorners', type=
'ArrayD', size=4,
1605 doc=
'Declination of bounding box corners (degrees)')
1606 schema.addField(
'ra', type=
'D',
1607 doc=
'Right Ascension of bounding box center (degrees)')
1608 schema.addField(
'decl', type=
'D',
1609 doc=
'Declination of bounding box center (degrees)')
1610 schema.addField(
'zenithDistance', type=
'F',
1611 doc=
'Zenith distance of bounding box center (degrees)')
1612 schema.addField(
'zeroPoint', type=
'F',
1613 doc=
'Mean zeropoint in detector (mag)')
1614 schema.addField(
'skyBg', type=
'F',
1615 doc=
'Average sky background (ADU)')
1616 schema.addField(
'skyNoise', type=
'F',
1617 doc=
'Average sky noise (ADU)')
1618 schema.addField(
'meanVar', type=
'F',
1619 doc=
'Mean variance of the weight plane (ADU**2)')
1620 schema.addField(
'astromOffsetMean', type=
'F',
1621 doc=
'Mean offset of astrometric calibration matches (arcsec)')
1622 schema.addField(
'astromOffsetStd', type=
'F',
1623 doc=
'Standard deviation of offsets of astrometric calibration matches (arcsec)')
1624 schema.addField(
'nPsfStar', type=
'I', doc=
'Number of stars used for PSF model')
1625 schema.addField(
'psfStarDeltaE1Median', type=
'F',
1626 doc=
'Median E1 residual (starE1 - psfE1) for psf stars')
1627 schema.addField(
'psfStarDeltaE2Median', type=
'F',
1628 doc=
'Median E2 residual (starE2 - psfE2) for psf stars')
1629 schema.addField(
'psfStarDeltaE1Scatter', type=
'F',
1630 doc=
'Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars')
1631 schema.addField(
'psfStarDeltaE2Scatter', type=
'F',
1632 doc=
'Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars')
1633 schema.addField(
'psfStarDeltaSizeMedian', type=
'F',
1634 doc=
'Median size residual (starSize - psfSize) for psf stars (pixel)')
1635 schema.addField(
'psfStarDeltaSizeScatter', type=
'F',
1636 doc=
'Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)')
1637 schema.addField(
'psfStarScaledDeltaSizeScatter', type=
'F',
1638 doc=
'Scatter (via MAD) of size residual scaled by median size squared')
1643class VisitDataIdContainer(DataIdContainer):
1644 """DataIdContainer that groups sensor-level id's by visit
1647 def makeDataRefList(self, namespace):
1648 """Make self.refList from self.idList
1650 Generate a list of data references grouped by visit.
1654 namespace : `argparse.Namespace`
1655 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1658 visitRefs = defaultdict(list)
1659 for dataId
in self.idList:
1660 if "visit" in dataId:
1661 visitId = dataId[
"visit"]
1663 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1664 visitRefs[visitId].extend([dataRef
for dataRef
in subset])
1667 for refList
in visitRefs.values():
1668 existingRefs = [ref
for ref
in refList
if ref.datasetExists()]
1670 outputRefList.append(existingRefs)
1672 self.refList = outputRefList
1675class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1676 defaultTemplates={
"catalogType":
""},
1677 dimensions=(
"instrument",
"visit")):
1678 inputCatalogs = connectionTypes.Input(
1679 doc=
"Input per-detector Source Tables",
1680 name=
"{catalogType}sourceTable",
1681 storageClass=
"DataFrame",
1682 dimensions=(
"instrument",
"visit",
"detector"),
1685 outputCatalog = connectionTypes.Output(
1686 doc=
"Per-visit concatenation of Source Table",
1687 name=
"{catalogType}sourceTable_visit",
1688 storageClass=
"DataFrame",
1689 dimensions=(
"instrument",
"visit")
1693class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1694 pipelineConnections=ConsolidateSourceTableConnections):
1698class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1699 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1701 _DefaultName = 'consolidateSourceTable'
1702 ConfigClass = ConsolidateSourceTableConfig
1704 inputDataset =
'sourceTable'
1705 outputDataset =
'sourceTable_visit'
1707 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1708 inputs = butlerQC.get(inputRefs)
1709 self.log.info(
"Concatenating %s per-detector Source Tables",
1710 len(inputs[
'inputCatalogs']))
1711 df = pd.concat(inputs[
'inputCatalogs'])
1712 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1714 def runDataRef(self, dataRefList):
1715 self.log.info(
"Concatenating %s per-detector Source Tables", len(dataRefList))
1716 df = pd.concat([dataRef.get().toDataFrame()
for dataRef
in dataRefList])
1717 dataRefList[0].put(
ParquetTable(dataFrame=df), self.outputDataset)
1720 def _makeArgumentParser(cls):
1721 parser = ArgumentParser(name=cls._DefaultName)
1723 parser.add_id_argument(
"--id", cls.inputDataset,
1724 help=
"data ID, e.g. --id visit=12345",
1725 ContainerClass=VisitDataIdContainer)
1728 def writeMetadata(self, dataRef):
1729 """No metadata to write.
1733 def writeConfig(self, butler, clobber=False, doBackup=True):
1734 """No config to write.
1739class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1740 dimensions=(
"instrument",),
1741 defaultTemplates={}):
1742 visitSummaryRefs = connectionTypes.Input(
1743 doc=
"Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1744 name=
"visitSummary",
1745 storageClass=
"ExposureCatalog",
1746 dimensions=(
"instrument",
"visit"),
1750 outputCatalog = connectionTypes.Output(
1751 doc=
"CCD and Visit metadata table",
1752 name=
"ccdVisitTable",
1753 storageClass=
"DataFrame",
1754 dimensions=(
"instrument",)
1758class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1759 pipelineConnections=MakeCcdVisitTableConnections):
1763class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1764 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1766 _DefaultName = 'makeCcdVisitTable'
1767 ConfigClass = MakeCcdVisitTableConfig
1769 def run(self, visitSummaryRefs):
1770 """ Make a table of ccd information from the `visitSummary` catalogs.
1773 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1774 List of DeferredDatasetHandles pointing to exposure catalogs with
1775 per-detector summary information.
1778 result : `lsst.pipe.Base.Struct`
1779 Results struct
with attribute:
1781 Catalog of ccd
and visit information.
1784 for visitSummaryRef
in visitSummaryRefs:
1785 visitSummary = visitSummaryRef.get()
1786 visitInfo = visitSummary[0].getVisitInfo()
1789 summaryTable = visitSummary.asAstropy()
1790 selectColumns = [
'id',
'visit',
'physical_filter',
'band',
'ra',
'decl',
'zenithDistance',
1791 'zeroPoint',
'psfSigma',
'skyBg',
'skyNoise']
1792 ccdEntry = summaryTable[selectColumns].to_pandas().set_index(
'id')
1796 ccdEntry = ccdEntry.rename(columns={
"visit":
"visitId"})
1797 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id)
for id
in
1799 packer = visitSummaryRef.dataId.universe.makePacker(
'visit_detector', visitSummaryRef.dataId)
1800 ccdVisitIds = [packer.pack(dataId)
for dataId
in dataIds]
1801 ccdEntry[
'ccdVisitId'] = ccdVisitIds
1802 ccdEntry[
'detector'] = summaryTable[
'id']
1803 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds()
for vR
in visitSummary])
1804 ccdEntry[
"seeing"] = visitSummary[
'psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1806 ccdEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1807 ccdEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1808 ccdEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1809 expTime = visitInfo.getExposureTime()
1810 ccdEntry[
'expTime'] = expTime
1811 ccdEntry[
"obsStart"] = ccdEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1812 expTime_days = expTime / (60*60*24)
1813 ccdEntry[
"obsStartMJD"] = ccdEntry[
"expMidptMJD"] - 0.5 * expTime_days
1814 ccdEntry[
'darkTime'] = visitInfo.getDarkTime()
1815 ccdEntry[
'xSize'] = summaryTable[
'bbox_max_x'] - summaryTable[
'bbox_min_x']
1816 ccdEntry[
'ySize'] = summaryTable[
'bbox_max_y'] - summaryTable[
'bbox_min_y']
1817 ccdEntry[
'llcra'] = summaryTable[
'raCorners'][:, 0]
1818 ccdEntry[
'llcdec'] = summaryTable[
'decCorners'][:, 0]
1819 ccdEntry[
'ulcra'] = summaryTable[
'raCorners'][:, 1]
1820 ccdEntry[
'ulcdec'] = summaryTable[
'decCorners'][:, 1]
1821 ccdEntry[
'urcra'] = summaryTable[
'raCorners'][:, 2]
1822 ccdEntry[
'urcdec'] = summaryTable[
'decCorners'][:, 2]
1823 ccdEntry[
'lrcra'] = summaryTable[
'raCorners'][:, 3]
1824 ccdEntry[
'lrcdec'] = summaryTable[
'decCorners'][:, 3]
1827 ccdEntries.append(ccdEntry)
1829 outputCatalog = pd.concat(ccdEntries)
1830 outputCatalog.set_index(
'ccdVisitId', inplace=
True, verify_integrity=
True)
1831 return pipeBase.Struct(outputCatalog=outputCatalog)
1834class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1835 dimensions=(
"instrument",),
1836 defaultTemplates={}):
1837 visitSummaries = connectionTypes.Input(
1838 doc=
"Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1839 name=
"visitSummary",
1840 storageClass=
"ExposureCatalog",
1841 dimensions=(
"instrument",
"visit",),
1845 outputCatalog = connectionTypes.Output(
1846 doc=
"Visit metadata table",
1848 storageClass=
"DataFrame",
1849 dimensions=(
"instrument",)
1853class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1854 pipelineConnections=MakeVisitTableConnections):
1858class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1859 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1861 _DefaultName = 'makeVisitTable'
1862 ConfigClass = MakeVisitTableConfig
1864 def run(self, visitSummaries):
1865 """ Make a table of visit information from the `visitSummary` catalogs
1870 List of exposure catalogs with per-detector summary information.
1873 result : `lsst.pipe.Base.Struct`
1874 Results struct
with attribute:
1876 Catalog of visit information.
1879 for visitSummary
in visitSummaries:
1880 visitSummary = visitSummary.get()
1881 visitRow = visitSummary[0]
1882 visitInfo = visitRow.getVisitInfo()
1885 visitEntry[
"visitId"] = visitRow[
'visit']
1886 visitEntry[
"visit"] = visitRow[
'visit']
1887 visitEntry[
"physical_filter"] = visitRow[
'physical_filter']
1888 visitEntry[
"band"] = visitRow[
'band']
1889 raDec = visitInfo.getBoresightRaDec()
1890 visitEntry[
"ra"] = raDec.getRa().asDegrees()
1891 visitEntry[
"decl"] = raDec.getDec().asDegrees()
1892 visitEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1893 azAlt = visitInfo.getBoresightAzAlt()
1894 visitEntry[
"azimuth"] = azAlt.getLongitude().asDegrees()
1895 visitEntry[
"altitude"] = azAlt.getLatitude().asDegrees()
1896 visitEntry[
"zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1897 visitEntry[
"airmass"] = visitInfo.getBoresightAirmass()
1898 expTime = visitInfo.getExposureTime()
1899 visitEntry[
"expTime"] = expTime
1900 visitEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1901 visitEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1902 visitEntry[
"obsStart"] = visitEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1903 expTime_days = expTime / (60*60*24)
1904 visitEntry[
"obsStartMJD"] = visitEntry[
"expMidptMJD"] - 0.5 * expTime_days
1905 visitEntries.append(visitEntry)
1910 outputCatalog = pd.DataFrame(data=visitEntries)
1911 outputCatalog.set_index(
'visitId', inplace=
True, verify_integrity=
True)
1912 return pipeBase.Struct(outputCatalog=outputCatalog)
1915class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1916 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")):
1918 inputCatalog = connectionTypes.Input(
1919 doc=
"Primary per-detector, single-epoch forced-photometry catalog. "
1920 "By default, it is the output of ForcedPhotCcdTask on calexps",
1922 storageClass=
"SourceCatalog",
1923 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1925 inputCatalogDiff = connectionTypes.Input(
1926 doc=
"Secondary multi-epoch, per-detector, forced photometry catalog. "
1927 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1929 storageClass=
"SourceCatalog",
1930 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1932 outputCatalog = connectionTypes.Output(
1933 doc=
"InputCatalogs horizonatally joined on `objectId` in Parquet format",
1934 name=
"mergedForcedSource",
1935 storageClass=
"DataFrame",
1936 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1940class WriteForcedSourceTableConfig(WriteSourceTableConfig,
1941 pipelineConnections=WriteForcedSourceTableConnections):
1942 key = lsst.pex.config.Field(
1943 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1949class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1950 """Merge and convert per-detector forced source catalogs to parquet
1952 Because the predecessor ForcedPhotCcdTask operates per-detector,
1953 per-tract, (i.e., it has tract in its dimensions), detectors
1954 on the tract boundary may have multiple forced source catalogs.
1956 The successor task TransformForcedSourceTable runs per-patch
1957 and temporally-aggregates overlapping mergedForcedSource catalogs
from all
1958 available multiple epochs.
1960 _DefaultName = "writeForcedSourceTable"
1961 ConfigClass = WriteForcedSourceTableConfig
1963 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1964 inputs = butlerQC.get(inputRefs)
1966 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
1967 inputs[
'band'] = butlerQC.quantum.dataId.full[
'band']
1968 outputs = self.run(**inputs)
1969 butlerQC.put(outputs, outputRefs)
1971 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1973 for table, dataset,
in zip((inputCatalog, inputCatalogDiff), (
'calexp',
'diff')):
1974 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=
False)
1975 df = df.reindex(sorted(df.columns), axis=1)
1976 df[
'ccdVisitId'] = ccdVisitId
if ccdVisitId
else pd.NA
1977 df[
'band'] = band
if band
else pd.NA
1978 df.columns = pd.MultiIndex.from_tuples([(dataset, c)
for c
in df.columns],
1979 names=(
'dataset',
'column'))
1983 outputCatalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
1984 return pipeBase.Struct(outputCatalog=outputCatalog)
1987class RewriteForcedSourceOnDiaObjectConnections(WriteForcedSourceTableConnections):
1988 exposure = connectionTypes.Input(
1989 doc=
"Input exposure to perform photometry on.",
1991 storageClass=
"ExposureF",
1992 dimensions=[
"instrument",
"visit",
"detector"],
1996class RewriteForcedSourceOnDiaObjectConfig(WriteForcedSourceTableConfig,
1997 pipelineConnections=RewriteForcedSourceOnDiaObjectConnections):
1998 reevaluate = pexConfig.ConfigurableField(
1999 target=WriteRecalibratedSourceTableTask,
2000 doc=
"Subtask with addCalibColumns method",
2003 def setDefaults(self):
2004 super().setDefaults()
2005 self.reevaluate.doReevaluatePhotoCalib =
True
2006 self.reevaluate.doReevaluateSkyWcs =
True
2007 self.reevaluate.doReevaluateLocalBackground =
True
2008 self.connections.inputCatalogDiff =
"forced_diff_diaObject"
2009 self.connections.inputCatalog =
"forced_src_diaObject"
2010 self.connections.outputCatalog =
"mergedForcedSourceOnDiaObject"
2011 self.key =
"diaObjectId"
2014class RewriteForcedSourceOnDiaObjectTask(WriteForcedSourceTableTask):
2015 """Specialized afterburner to recalibrate DP0.2 ForcedSourceOnDiaObject"""
2016 _DefaultName =
"RewriteForcedSourceOnDiaObject"
2017 ConfigClass = RewriteForcedSourceOnDiaObjectConfig
2019 def __init__(self, **kwargs):
2020 super().__init__(**kwargs)
2021 self.makeSubtask(
"reevaluate")
2023 def runQuantum(self, butlerQC, inputRefs, outputRefs):
2024 inputs = butlerQC.get(inputRefs)
2026 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
2027 inputs[
'band'] = butlerQC.quantum.dataId.full[
'band']
2028 exposureIdInfo = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId,
"visit_detector")
2029 exposure = inputs.pop(
'exposure')
2031 inputs[
'inputCatalog'] = self.reevaluate.addCalibColumns(catalog=inputs[
'inputCatalog'],
2033 exposureIdInfo=exposureIdInfo)
2034 inputs[
'inputCatalogDiff'] = self.reevaluate.addCalibColumns(catalog=inputs[
'inputCatalogDiff'],
2036 exposureIdInfo=exposureIdInfo)
2038 outputs = self.run(**inputs)
2039 butlerQC.put(outputs, outputRefs)
2042class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
2043 dimensions=(
"instrument",
"skymap",
"patch",
"tract")):
2045 inputCatalogs = connectionTypes.Input(
2046 doc=
"Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
2047 name=
"mergedForcedSource",
2048 storageClass=
"DataFrame",
2049 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract"),
2053 referenceCatalog = connectionTypes.Input(
2054 doc=
"Reference catalog which was used to seed the forcedPhot. Columns "
2055 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
2058 storageClass=
"DataFrame",
2059 dimensions=(
"tract",
"patch",
"skymap"),
2062 outputCatalog = connectionTypes.Output(
2063 doc=
"Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
2064 "specified set of functors",
2065 name=
"forcedSourceTable",
2066 storageClass=
"DataFrame",
2067 dimensions=(
"tract",
"patch",
"skymap")
2072 pipelineConnections=TransformForcedSourceTableConnections):
2073 referenceColumns = pexConfig.ListField(
2075 default=[
"detect_isPrimary",
"detect_isTractInner",
"detect_isPatchInner"],
2077 doc=
"Columns to pull from reference catalog",
2079 keyRef = lsst.pex.config.Field(
2080 doc=
"Column on which to join the two input tables on and make the primary key of the output",
2084 key = lsst.pex.config.Field(
2085 doc=
"Rename the output DataFrame index to this name",
2087 default=
"forcedSourceId",
2092 """Transform/standardize a ForcedSource catalog
2094 Transforms each wide, per-detector forcedSource parquet table per the
2095 specification file (per-camera defaults found in ForcedSource.yaml).
2096 All epochs that overlap the patch are aggregated into one per-patch
2097 narrow-parquet file.
2099 No de-duplication of rows
is performed. Duplicate resolutions flags are
2100 pulled
in from the referenceCatalog: `detect_isPrimary`,
2101 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
2102 for analysis
or compare duplicates
for QA.
2104 The resulting table includes multiple bands. Epochs (MJDs)
and other useful
2105 per-visit rows can be retreived by joining
with the CcdVisitTable on
2108 _DefaultName = "transformForcedSourceTable"
2109 ConfigClass = TransformForcedSourceTableConfig
2111 def runQuantum(self, butlerQC, inputRefs, outputRefs):
2112 inputs = butlerQC.get(inputRefs)
2113 if self.funcs
is None:
2114 raise ValueError(
"config.functorFile is None. "
2115 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
2116 outputs = self.run(inputs[
'inputCatalogs'], inputs[
'referenceCatalog'], funcs=self.funcs,
2117 dataId=outputRefs.outputCatalog.dataId.full)
2119 butlerQC.put(outputs, outputRefs)
2121 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
2123 ref = referenceCatalog.get(parameters={
"columns": self.config.referenceColumns})
2124 self.log.info(
"Aggregating %s input catalogs" % (len(inputCatalogs)))
2125 for handle
in inputCatalogs:
2126 result = self.transform(
None, handle, funcs, dataId)
2128 dfs.append(result.df.join(ref, how=
'inner'))
2130 outputCatalog = pd.concat(dfs)
2134 outputCatalog.index.rename(self.config.keyRef, inplace=
True)
2136 outputCatalog.reset_index(inplace=
True)
2138 outputCatalog.set_index(
"forcedSourceId", inplace=
True, verify_integrity=
True)
2140 outputCatalog.index.rename(self.config.key, inplace=
True)
2142 self.log.info(
"Made a table of %d columns and %d rows",
2143 len(outputCatalog.columns), len(outputCatalog))
2144 return pipeBase.Struct(outputCatalog=outputCatalog)
2147class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
2148 defaultTemplates={
"catalogType":
""},
2149 dimensions=(
"instrument",
"tract")):
2150 inputCatalogs = connectionTypes.Input(
2151 doc=
"Input per-patch DataFrame Tables to be concatenated",
2152 name=
"{catalogType}ForcedSourceTable",
2153 storageClass=
"DataFrame",
2154 dimensions=(
"tract",
"patch",
"skymap"),
2158 outputCatalog = connectionTypes.Output(
2159 doc=
"Output per-tract concatenation of DataFrame Tables",
2160 name=
"{catalogType}ForcedSourceTable_tract",
2161 storageClass=
"DataFrame",
2162 dimensions=(
"tract",
"skymap"),
2166class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
2167 pipelineConnections=ConsolidateTractConnections):
2171class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask):
2172 """Concatenate any per-patch, dataframe list into a single
2175 _DefaultName = 'ConsolidateTract'
2176 ConfigClass = ConsolidateTractConfig
2178 def runQuantum(self, butlerQC, inputRefs, outputRefs):
2179 inputs = butlerQC.get(inputRefs)
2181 self.log.info(
"Concatenating %s per-patch %s Tables",
2182 len(inputs[
'inputCatalogs']),
2183 inputRefs.inputCatalogs[0].datasetType.name)
2184 df = pd.concat(inputs[
'inputCatalogs'])
2185 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def runDataRef(self, dataRef)
def getAnalysis(self, parq, funcs=None, band=None)
def write(self, df, parqRef)
def __init__(self, *args, **kwargs)
def transform(self, band, parq, funcs, dataId)
def run(self, parq, funcs=None, dataId=None, band=None)
def writeMetadata(self, dataRef)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)