22__all__ = [
"WriteObjectTableConfig",
"WriteObjectTableTask",
23 "WriteSourceTableConfig",
"WriteSourceTableTask",
24 "WriteRecalibratedSourceTableConfig",
"WriteRecalibratedSourceTableTask",
25 "PostprocessAnalysis",
26 "TransformCatalogBaseConfig",
"TransformCatalogBaseTask",
27 "TransformObjectCatalogConfig",
"TransformObjectCatalogTask",
28 "ConsolidateObjectTableConfig",
"ConsolidateObjectTableTask",
29 "TransformSourceTableConfig",
"TransformSourceTableTask",
30 "ConsolidateVisitSummaryConfig",
"ConsolidateVisitSummaryTask",
31 "ConsolidateSourceTableConfig",
"ConsolidateSourceTableTask",
32 "MakeCcdVisitTableConfig",
"MakeCcdVisitTableTask",
33 "MakeVisitTableConfig",
"MakeVisitTableTask",
34 "WriteForcedSourceTableConfig",
"WriteForcedSourceTableTask",
35 "TransformForcedSourceTableConfig",
"TransformForcedSourceTableTask",
36 "ConsolidateTractConfig",
"ConsolidateTractTask"]
52from lsst.meas.base import SingleFrameMeasurementTask, DetectorVisitIdGeneratorConfig
55from .functors
import CompositeFunctor, Column
57log = logging.getLogger(__name__)
60def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
61 """Flattens a dataframe with multilevel column index.
63 newDf = pd.DataFrame()
65 dfBands = df.columns.unique(level=0).values
68 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
69 newColumns = {c: columnFormat.format(band, c)
70 for c
in subdf.columns
if c
not in noDupCols}
71 cols = list(newColumns.keys())
72 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
75 presentBands = dfBands
if inputBands
is None else list(set(inputBands).intersection(dfBands))
77 noDupDf = df[presentBands[0]][noDupCols]
78 newDf = pd.concat([noDupDf, newDf], axis=1)
83 defaultTemplates={
"coaddName":
"deep"},
84 dimensions=(
"tract",
"patch",
"skymap")):
85 inputCatalogMeas = connectionTypes.Input(
86 doc=
"Catalog of source measurements on the deepCoadd.",
87 dimensions=(
"tract",
"patch",
"band",
"skymap"),
88 storageClass=
"SourceCatalog",
89 name=
"{coaddName}Coadd_meas",
92 inputCatalogForcedSrc = connectionTypes.Input(
93 doc=
"Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
94 dimensions=(
"tract",
"patch",
"band",
"skymap"),
95 storageClass=
"SourceCatalog",
96 name=
"{coaddName}Coadd_forced_src",
99 inputCatalogRef = connectionTypes.Input(
100 doc=
"Catalog marking the primary detection (which band provides a good shape and position)"
101 "for each detection in deepCoadd_mergeDet.",
102 dimensions=(
"tract",
"patch",
"skymap"),
103 storageClass=
"SourceCatalog",
104 name=
"{coaddName}Coadd_ref"
106 outputCatalog = connectionTypes.Output(
107 doc=
"A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
108 "stored as a DataFrame with a multi-level column index per-patch.",
109 dimensions=(
"tract",
"patch",
"skymap"),
110 storageClass=
"DataFrame",
111 name=
"{coaddName}Coadd_obj"
115class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
116 pipelineConnections=WriteObjectTableConnections):
117 engine = pexConfig.Field(
120 doc=
"Parquet engine for writing (pyarrow or fastparquet)",
121 deprecated=
"This config is no longer used, and will be removed after v26."
123 coaddName = pexConfig.Field(
130class WriteObjectTableTask(pipeBase.PipelineTask):
131 """Write filter-merged source tables as a DataFrame in parquet format.
133 _DefaultName = "writeObjectTable"
134 ConfigClass = WriteObjectTableConfig
137 inputDatasets = (
'forced_src',
'meas',
'ref')
140 outputDataset =
'obj'
142 def runQuantum(self, butlerQC, inputRefs, outputRefs):
143 inputs = butlerQC.get(inputRefs)
145 measDict = {ref.dataId[
'band']: {
'meas': cat}
for ref, cat
in
146 zip(inputRefs.inputCatalogMeas, inputs[
'inputCatalogMeas'])}
147 forcedSourceDict = {ref.dataId[
'band']: {
'forced_src': cat}
for ref, cat
in
148 zip(inputRefs.inputCatalogForcedSrc, inputs[
'inputCatalogForcedSrc'])}
151 for band
in measDict.keys():
152 catalogs[band] = {
'meas': measDict[band][
'meas'],
153 'forced_src': forcedSourceDict[band][
'forced_src'],
154 'ref': inputs[
'inputCatalogRef']}
155 dataId = butlerQC.quantum.dataId
156 df = self.run(catalogs=catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
157 outputs = pipeBase.Struct(outputCatalog=df)
158 butlerQC.put(outputs, outputRefs)
160 def run(self, catalogs, tract, patch):
161 """Merge multiple catalogs.
166 Mapping from filter names to dict of catalogs.
168 tractId to use
for the tractId column.
170 patchId to use
for the patchId column.
174 catalog : `pandas.DataFrame`
178 for filt, tableDict
in catalogs.items():
179 for dataset, table
in tableDict.items():
181 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
184 df = df.reindex(sorted(df.columns), axis=1)
185 df = df.assign(tractId=tract, patchId=patch)
188 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
189 names=(
'dataset',
'band',
'column'))
194 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
198class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
199 defaultTemplates={
"catalogType":
""},
200 dimensions=(
"instrument",
"visit",
"detector")):
202 catalog = connectionTypes.Input(
203 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
204 name=
"{catalogType}src",
205 storageClass=
"SourceCatalog",
206 dimensions=(
"instrument",
"visit",
"detector")
208 outputCatalog = connectionTypes.Output(
209 doc=
"Catalog of sources, `src` in DataFrame/Parquet format. The 'id' column is "
210 "replaced with an index; all other columns are unchanged.",
211 name=
"{catalogType}source",
212 storageClass=
"DataFrame",
213 dimensions=(
"instrument",
"visit",
"detector")
217class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
218 pipelineConnections=WriteSourceTableConnections):
219 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
222class WriteSourceTableTask(pipeBase.PipelineTask):
223 """Write source table to DataFrame Parquet format.
225 _DefaultName = "writeSourceTable"
226 ConfigClass = WriteSourceTableConfig
228 def runQuantum(self, butlerQC, inputRefs, outputRefs):
229 inputs = butlerQC.get(inputRefs)
230 inputs[
'ccdVisitId'] = self.config.idGenerator.apply(butlerQC.quantum.dataId).catalog_id
231 result = self.run(**inputs)
232 outputs = pipeBase.Struct(outputCatalog=result.table)
233 butlerQC.put(outputs, outputRefs)
235 def run(self, catalog, ccdVisitId=None, **kwargs):
236 """Convert `src` catalog to DataFrame
240 catalog: `afwTable.SourceCatalog`
241 catalog to be converted
243 ccdVisitId to be added as a column
245 Additional keyword arguments are ignored
as a convenience
for
246 subclasses that
pass the same arguments to several different
251 result : `lsst.pipe.base.Struct`
253 `DataFrame` version of the input catalog
255 self.log.info("Generating DataFrame from src catalog ccdVisitId=%s", ccdVisitId)
256 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
257 df[
'ccdVisitId'] = ccdVisitId
259 return pipeBase.Struct(table=df)
262class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
263 defaultTemplates={
"catalogType":
"",
264 "skyWcsName":
"gbdesAstrometricFit",
265 "photoCalibName":
"fgcm"},
266 dimensions=(
"instrument",
"visit",
"detector",
"skymap")):
267 skyMap = connectionTypes.Input(
268 doc=
"skyMap needed to choose which tract-level calibrations to use when multiple available",
269 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
270 storageClass=
"SkyMap",
271 dimensions=(
"skymap",),
273 exposure = connectionTypes.Input(
274 doc=
"Input exposure to perform photometry on.",
276 storageClass=
"ExposureF",
277 dimensions=[
"instrument",
"visit",
"detector"],
279 externalSkyWcsTractCatalog = connectionTypes.Input(
280 doc=(
"Per-tract, per-visit wcs calibrations. These catalogs use the detector "
281 "id for the catalog id, sorted on id for fast lookup."),
282 name=
"{skyWcsName}SkyWcsCatalog",
283 storageClass=
"ExposureCatalog",
284 dimensions=[
"instrument",
"visit",
"tract"],
287 externalSkyWcsGlobalCatalog = connectionTypes.Input(
288 doc=(
"Per-visit wcs calibrations computed globally (with no tract information). "
289 "These catalogs use the detector id for the catalog id, sorted on id for "
291 name=
"finalVisitSummary",
292 storageClass=
"ExposureCatalog",
293 dimensions=[
"instrument",
"visit"],
295 externalPhotoCalibTractCatalog = connectionTypes.Input(
296 doc=(
"Per-tract, per-visit photometric calibrations. These catalogs use the "
297 "detector id for the catalog id, sorted on id for fast lookup."),
298 name=
"{photoCalibName}PhotoCalibCatalog",
299 storageClass=
"ExposureCatalog",
300 dimensions=[
"instrument",
"visit",
"tract"],
303 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
304 doc=(
"Per-visit photometric calibrations computed globally (with no tract "
305 "information). These catalogs use the detector id for the catalog id, "
306 "sorted on id for fast lookup."),
307 name=
"finalVisitSummary",
308 storageClass=
"ExposureCatalog",
309 dimensions=[
"instrument",
"visit"],
312 def __init__(self, *, config=None):
313 super().__init__(config=config)
316 if config.doApplyExternalSkyWcs
and config.doReevaluateSkyWcs:
317 if config.useGlobalExternalSkyWcs:
318 self.inputs.remove(
"externalSkyWcsTractCatalog")
320 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
322 self.inputs.remove(
"externalSkyWcsTractCatalog")
323 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
324 if config.doApplyExternalPhotoCalib
and config.doReevaluatePhotoCalib:
325 if config.useGlobalExternalPhotoCalib:
326 self.inputs.remove(
"externalPhotoCalibTractCatalog")
328 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
330 self.inputs.remove(
"externalPhotoCalibTractCatalog")
331 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
334class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
335 pipelineConnections=WriteRecalibratedSourceTableConnections):
337 doReevaluatePhotoCalib = pexConfig.Field(
340 doc=(
"Add or replace local photoCalib columns")
342 doReevaluateSkyWcs = pexConfig.Field(
345 doc=(
"Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
347 doApplyExternalPhotoCalib = pexConfig.Field(
350 doc=(
"If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
351 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
353 doApplyExternalSkyWcs = pexConfig.Field(
356 doc=(
"if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
357 "else use the wcs already attached to the exposure."),
359 useGlobalExternalPhotoCalib = pexConfig.Field(
362 doc=(
"When using doApplyExternalPhotoCalib, use 'global' calibrations "
363 "that are not run per-tract. When False, use per-tract photometric "
364 "calibration files.")
366 useGlobalExternalSkyWcs = pexConfig.Field(
369 doc=(
"When using doApplyExternalSkyWcs, use 'global' calibrations "
370 "that are not run per-tract. When False, use per-tract wcs "
373 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
377 if self.doApplyExternalSkyWcs
and not self.doReevaluateSkyWcs:
378 log.warning(
"doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
379 "External SkyWcs will not be read or evaluated.")
380 if self.doApplyExternalPhotoCalib
and not self.doReevaluatePhotoCalib:
381 log.warning(
"doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
382 "External PhotoCalib will not be read or evaluated.")
385class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
386 """Write source table to DataFrame Parquet format.
388 _DefaultName = "writeRecalibratedSourceTable"
389 ConfigClass = WriteRecalibratedSourceTableConfig
391 def runQuantum(self, butlerQC, inputRefs, outputRefs):
392 inputs = butlerQC.get(inputRefs)
394 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
395 inputs[
'idGenerator'] = idGenerator
396 inputs[
'ccdVisitId'] = idGenerator.catalog_id
398 if self.config.doReevaluatePhotoCalib
or self.config.doReevaluateSkyWcs:
399 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
400 inputs[
'exposure'] = self.attachCalibs(inputRefs, **inputs)
402 inputs[
'catalog'] = self.addCalibColumns(**inputs)
404 result = self.run(**inputs)
405 outputs = pipeBase.Struct(outputCatalog=result.table)
406 butlerQC.put(outputs, outputRefs)
408 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
409 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
410 externalPhotoCalibTractCatalog=None, **kwargs):
411 """Apply external calibrations to exposure per configuration
413 When multiple tract-level calibrations overlap, select the one with the
414 center closest to detector.
418 inputRefs : `lsst.pipe.base.InputQuantizedConnection`,
for dataIds of
420 skyMap : `lsst.skymap.SkyMap`
421 exposure : `lsst.afw.image.exposure.Exposure`
422 Input exposure to adjust calibrations.
424 Exposure catalog
with external skyWcs to be applied per config
426 Exposure catalog
with external skyWcs to be applied per config
428 Exposure catalog
with external photoCalib to be applied per config
430 Exposure catalog
with external photoCalib to be applied per config
432 Additional keyword arguments are ignored to facilitate passing the
433 same arguments to several methods.
437 exposure : `lsst.afw.image.exposure.Exposure`
438 Exposure
with adjusted calibrations.
440 if not self.config.doApplyExternalSkyWcs:
442 externalSkyWcsCatalog =
None
443 elif self.config.useGlobalExternalSkyWcs:
445 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
446 self.log.info(
'Applying global SkyWcs')
449 inputRef = getattr(inputRefs,
'externalSkyWcsTractCatalog')
450 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
453 self.log.info(
'Applying tract-level SkyWcs from tract %s', tracts[ind])
455 if exposure.getWcs()
is None:
456 raise ValueError(
"Trying to locate nearest tract, but exposure.wcs is None.")
457 ind = self.getClosestTract(tracts, skyMap,
458 exposure.getBBox(), exposure.getWcs())
459 self.log.info(
'Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
460 'Applying closest to detector center: tract=%s',
str(tracts), tracts[ind])
462 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
464 if not self.config.doApplyExternalPhotoCalib:
466 externalPhotoCalibCatalog =
None
467 elif self.config.useGlobalExternalPhotoCalib:
469 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
470 self.log.info(
'Applying global PhotoCalib')
473 inputRef = getattr(inputRefs,
'externalPhotoCalibTractCatalog')
474 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
477 self.log.info(
'Applying tract-level PhotoCalib from tract %s', tracts[ind])
479 ind = self.getClosestTract(tracts, skyMap,
480 exposure.getBBox(), exposure.getWcs())
481 self.log.info(
'Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
482 'Applying closest to detector center: tract=%s',
str(tracts), tracts[ind])
484 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
486 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
488 def getClosestTract(self, tracts, skyMap, bbox, wcs):
489 """Find the index of the tract closest to detector from list of tractIds
493 tracts: `list` [`int`]
494 Iterable of integer tractIds
495 skyMap : `lsst.skymap.SkyMap`
496 skyMap to lookup tract geometry and wcs
498 Detector bbox, center of which will compared to tract centers
500 Detector Wcs object to map the detector center to SkyCoord
509 center = wcs.pixelToSky(bbox.getCenter())
511 for tractId
in tracts:
512 tract = skyMap[tractId]
513 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
514 sep.append(center.separation(tractCenter))
516 return np.argmin(sep)
518 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
519 """Prepare a calibrated exposure and apply external calibrations
524 exposure : `lsst.afw.image.exposure.Exposure`
525 Input exposure to adjust calibrations.
527 Exposure catalog
with external skyWcs to be applied
528 if config.doApplyExternalSkyWcs=
True. Catalog uses the detector id
529 for the catalog id, sorted on id
for fast lookup.
531 Exposure catalog
with external photoCalib to be applied
532 if config.doApplyExternalPhotoCalib=
True. Catalog uses the detector
533 id
for the catalog id, sorted on id
for fast lookup.
537 exposure : `lsst.afw.image.exposure.Exposure`
538 Exposure
with adjusted calibrations.
540 detectorId = exposure.getInfo().getDetector().getId()
542 if externalPhotoCalibCatalog
is not None:
543 row = externalPhotoCalibCatalog.find(detectorId)
545 self.log.warning(
"Detector id %s not found in externalPhotoCalibCatalog; "
546 "Using original photoCalib.", detectorId)
548 photoCalib = row.getPhotoCalib()
549 if photoCalib
is None:
550 self.log.warning(
"Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
551 "Using original photoCalib.", detectorId)
553 exposure.setPhotoCalib(photoCalib)
555 if externalSkyWcsCatalog
is not None:
556 row = externalSkyWcsCatalog.find(detectorId)
558 self.log.warning(
"Detector id %s not found in externalSkyWcsCatalog; "
559 "Using original skyWcs.", detectorId)
561 skyWcs = row.getWcs()
563 self.log.warning(
"Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
564 "Using original skyWcs.", detectorId)
566 exposure.setWcs(skyWcs)
570 def addCalibColumns(self, catalog, exposure, idGenerator, **kwargs):
571 """Add replace columns with calibs evaluated at each centroid
573 Add or replace
'base_LocalWcs' `base_LocalPhotoCalib
' columns in a
574 a source catalog, by rerunning the plugins.
579 catalog to which calib columns will be added
580 exposure : `lsst.afw.image.exposure.Exposure`
581 Exposure with attached PhotoCalibs
and SkyWcs attributes to be
582 reevaluated at local centroids. Pixels are
not required.
583 idGenerator : `lsst.meas.base.IdGenerator`
584 Object that generates Source IDs
and random seeds.
586 Additional keyword arguments are ignored to facilitate passing the
587 same arguments to several methods.
592 Source Catalog
with requested local calib columns
594 measureConfig = SingleFrameMeasurementTask.ConfigClass()
595 measureConfig.doReplaceWithNoise = False
598 for slot
in measureConfig.slots:
599 setattr(measureConfig.slots, slot,
None)
601 measureConfig.plugins.names = []
602 if self.config.doReevaluateSkyWcs:
603 measureConfig.plugins.names.add(
'base_LocalWcs')
604 self.log.info(
"Re-evaluating base_LocalWcs plugin")
605 if self.config.doReevaluatePhotoCalib:
606 measureConfig.plugins.names.add(
'base_LocalPhotoCalib')
607 self.log.info(
"Re-evaluating base_LocalPhotoCalib plugin")
608 pluginsNotToCopy = tuple(measureConfig.plugins.names)
612 aliasMap = catalog.schema.getAliasMap()
613 mapper = afwTable.SchemaMapper(catalog.schema)
614 for item
in catalog.schema:
615 if not item.field.getName().startswith(pluginsNotToCopy):
616 mapper.addMapping(item.key)
618 schema = mapper.getOutputSchema()
619 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
620 schema.setAliasMap(aliasMap)
621 newCat = afwTable.SourceCatalog(schema)
622 newCat.extend(catalog, mapper=mapper)
628 if self.config.doReevaluateSkyWcs
and exposure.wcs
is not None:
629 afwTable.updateSourceCoords(exposure.wcs, newCat)
631 measurement.run(measCat=newCat, exposure=exposure, exposureId=idGenerator.catalog_id)
637 """Calculate columns from DataFrames or handles storing DataFrames.
639 This object manages and organizes an arbitrary set of computations
640 on a catalog. The catalog
is defined by a
641 `DeferredDatasetHandle`
or `InMemoryDatasetHandle` object
642 (
or list thereof), such
as a ``deepCoadd_obj`` dataset,
and the
643 computations are defined by a collection of `lsst.pipe.tasks.functor.Functor`
644 objects (
or, equivalently, a ``CompositeFunctor``).
646 After the object
is initialized, accessing the ``.df`` attribute (which
647 holds the `pandas.DataFrame` containing the results of the calculations)
648 triggers computation of said dataframe.
650 One of the conveniences of using this object
is the ability to define a
651 desired common filter
for all functors. This enables the same functor
652 collection to be passed to several different `PostprocessAnalysis` objects
653 without having to change the original functor collection, since the ``filt``
654 keyword argument of this object triggers an overwrite of the ``filt``
655 property
for all functors
in the collection.
657 This object also allows a list of refFlags to be passed,
and defines a set
658 of default refFlags that are always included even
if not requested.
660 If a list of DataFrames
or Handles
is passed, rather than a single one,
661 then the calculations will be mapped over all the input catalogs. In
662 principle, it should be straightforward to parallelize this activity, but
663 initial tests have failed (see TODO
in code comments).
667 handles : `lsst.daf.butler.DeferredDatasetHandle`
or
668 `lsst.pipe.base.InMemoryDatasetHandle`
or
670 Source
catalog(s)
for computation.
672 Computations to do (functors that act on ``handles``).
673 If a dict, the output
674 DataFrame will have columns keyed accordingly.
675 If a list, the column keys will come
from the
676 ``.shortname`` attribute of each functor.
678 filt : `str`, optional
679 Filter
in which to calculate. If provided,
680 this will overwrite any existing ``.filt`` attribute
681 of the provided functors.
683 flags : `list`, optional
684 List of flags (per-band) to include
in output table.
685 Taken
from the ``meas`` dataset
if applied to a multilevel Object Table.
687 refFlags : `list`, optional
688 List of refFlags (only reference band) to include
in output table.
690 forcedFlags : `list`, optional
691 List of flags (per-band) to include
in output table.
692 Taken
from the ``forced_src`` dataset
if applied to a
693 multilevel Object Table. Intended
for flags
from measurement plugins
694 only run during multi-band forced-photometry.
696 _defaultRefFlags = []
699 def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
704 self.
flags = list(flags)
if flags
is not None else []
705 self.
forcedFlags = list(forcedFlags)
if forcedFlags
is not None else []
707 if refFlags
is not None:
720 additionalFuncs.update({flag:
Column(flag, dataset=
'forced_src')
for flag
in self.
forcedFlags})
721 additionalFuncs.update({flag:
Column(flag, dataset=
'ref')
for flag
in self.
refFlags})
722 additionalFuncs.update({flag:
Column(flag, dataset=
'meas')
for flag
in self.
flags})
724 if isinstance(self.
functors, CompositeFunctor):
729 func.funcDict.update(additionalFuncs)
730 func.filt = self.
filt
736 return [name
for name, func
in self.
func.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
746 if type(self.
handles)
in (list, tuple):
748 dflist = [self.
func(handle, dropna=dropna)
for handle
in self.
handles]
752 dflist = pool.map(functools.partial(self.
func, dropna=dropna), self.
handles)
753 self.
_df = pd.concat(dflist)
762 """Expected Connections for subclasses of TransformCatalogBaseTask.
766 inputCatalog = connectionTypes.Input(
768 storageClass=
"DataFrame",
770 outputCatalog = connectionTypes.Output(
772 storageClass=
"DataFrame",
777 pipelineConnections=TransformCatalogBaseConnections):
778 functorFile = pexConfig.Field(
780 doc=
"Path to YAML file specifying Science Data Model functors to use "
781 "when copying columns and computing calibrated values.",
785 primaryKey = pexConfig.Field(
787 doc=
"Name of column to be set as the DataFrame index. If None, the index"
788 "will be named `id`",
792 columnsFromDataId = pexConfig.ListField(
796 doc=
"Columns to extract from the dataId",
801 """Base class for transforming/standardizing a catalog
803 by applying functors that convert units and apply calibrations.
804 The purpose of this task
is to perform a set of computations on
805 an input ``DeferredDatasetHandle``
or ``InMemoryDatasetHandle`` that holds
806 a ``DataFrame`` dataset (such
as ``deepCoadd_obj``),
and write the
807 results to a new dataset (which needs to be declared
in an ``outputDataset``
810 The calculations to be performed are defined
in a YAML file that specifies
811 a set of functors to be computed, provided
as
812 a ``--functorFile`` config parameter. An example of such a YAML file
837 - base_InputCount_value
840 functor: DeconvolvedMoments
845 - merge_measurement_i
846 - merge_measurement_r
847 - merge_measurement_z
848 - merge_measurement_y
849 - merge_measurement_g
850 - base_PixelFlags_flag_inexact_psfCenter
853 The names
for each entry under
"func" will become the names of columns
in
854 the output dataset. All the functors referenced are defined
in
856 functor are
in the `args` list,
and any additional entries
for each column
857 other than
"functor" or "args" (e.g., ``
'filt'``, ``
'dataset'``) are treated
as
858 keyword arguments to be passed to the functor initialization.
860 The
"flags" entry
is the default shortcut
for `Column` functors.
861 All columns listed under
"flags" will be copied to the output table
862 untransformed. They can be of any datatype.
863 In the special case of transforming a multi-level oject table
with
864 band
and dataset indices (deepCoadd_obj), these will be taked
from the
865 `meas` dataset
and exploded out per band.
867 There are two special shortcuts that only apply when transforming
868 multi-level Object (deepCoadd_obj) tables:
869 - The
"refFlags" entry
is shortcut
for `Column` functor
870 taken
from the `
'ref'` dataset
if transforming an ObjectTable.
871 - The
"forcedFlags" entry
is shortcut
for `Column` functors.
872 taken
from the ``forced_src`` dataset
if transforming an ObjectTable.
873 These are expanded out per band.
877 to organize
and excecute the calculations.
880 def _DefaultName(self):
881 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
885 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
889 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
892 def ConfigClass(self):
893 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
897 if self.config.functorFile:
898 self.log.info(
'Loading tranform functor definitions from %s',
899 self.config.functorFile)
900 self.
funcs = CompositeFunctor.from_file(self.config.functorFile)
901 self.
funcs.update(dict(PostprocessAnalysis._defaultFuncs))
906 inputs = butlerQC.get(inputRefs)
907 if self.
funcs is None:
908 raise ValueError(
"config.functorFile is None. "
909 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
910 result = self.
run(handle=inputs[
'inputCatalog'], funcs=self.
funcs,
911 dataId=outputRefs.outputCatalog.dataId.full)
912 outputs = pipeBase.Struct(outputCatalog=result)
913 butlerQC.put(outputs, outputRefs)
915 def run(self, handle, funcs=None, dataId=None, band=None):
916 """Do postprocessing calculations
918 Takes a ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle``
or
919 ``DataFrame`` object
and dataId,
920 returns a dataframe
with results of postprocessing calculations.
924 handles : `lsst.daf.butler.DeferredDatasetHandle`
or
925 `lsst.pipe.base.InMemoryDatasetHandle`
or
926 `pandas.DataFrame`,
or list of these.
927 DataFrames
from which calculations are done.
928 funcs : `lsst.pipe.tasks.functors.Functors`
929 Functors to apply to the table
's columns
930 dataId : dict, optional
931 Used to add a `patchId` column to the output dataframe.
932 band : `str`, optional
933 Filter band that is being processed.
937 df : `pandas.DataFrame`
939 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
941 df = self.
transform(band, handle, funcs, dataId).df
942 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
954 def transform(self, band, handles, funcs, dataId):
955 analysis = self.
getAnalysis(handles, funcs=funcs, band=band)
957 if dataId
and self.config.columnsFromDataId:
958 for key
in self.config.columnsFromDataId:
960 df[
str(key)] = dataId[key]
962 raise ValueError(f
"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
964 if self.config.primaryKey:
965 if df.index.name != self.config.primaryKey
and self.config.primaryKey
in df:
966 df.reset_index(inplace=
True, drop=
True)
967 df.set_index(self.config.primaryKey, inplace=
True)
969 return pipeBase.Struct(
976 defaultTemplates={
"coaddName":
"deep"},
977 dimensions=(
"tract",
"patch",
"skymap")):
978 inputCatalog = connectionTypes.Input(
979 doc=
"The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
980 "stored as a DataFrame with a multi-level column index per-patch.",
981 dimensions=(
"tract",
"patch",
"skymap"),
982 storageClass=
"DataFrame",
983 name=
"{coaddName}Coadd_obj",
986 outputCatalog = connectionTypes.Output(
987 doc=
"Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
989 dimensions=(
"tract",
"patch",
"skymap"),
990 storageClass=
"DataFrame",
996 pipelineConnections=TransformObjectCatalogConnections):
997 coaddName = pexConfig.Field(
1003 filterMap = pexConfig.DictField(
1007 doc=(
"Dictionary mapping full filter name to short one for column name munging."
1008 "These filters determine the output columns no matter what filters the "
1009 "input data actually contain."),
1010 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
1011 "Will be removed after v22.")
1013 outputBands = pexConfig.ListField(
1017 doc=(
"These bands and only these bands will appear in the output,"
1018 " NaN-filled if the input does not include them."
1019 " If None, then use all bands found in the input.")
1021 camelCase = pexConfig.Field(
1024 doc=(
"Write per-band columns names with camelCase, else underscore "
1025 "For example: gPsFlux instead of g_PsFlux.")
1027 multilevelOutput = pexConfig.Field(
1030 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
1031 "and name-munged (False).")
1033 goodFlags = pexConfig.ListField(
1036 doc=(
"List of 'good' flags that should be set False when populating empty tables. "
1037 "All other flags are considered to be 'bad' flags and will be set to True.")
1039 floatFillValue = pexConfig.Field(
1042 doc=
"Fill value for float fields when populating empty tables."
1044 integerFillValue = pexConfig.Field(
1047 doc=
"Fill value for integer fields when populating empty tables."
1050 def setDefaults(self):
1051 super().setDefaults()
1052 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Object.yaml')
1053 self.primaryKey =
'objectId'
1054 self.columnsFromDataId = [
'tract',
'patch']
1055 self.goodFlags = [
'calib_astrometry_used',
1056 'calib_photometry_reserved',
1057 'calib_photometry_used',
1058 'calib_psf_candidate',
1059 'calib_psf_reserved',
1064 """Produce a flattened Object Table to match the format specified in
1067 Do the same set of postprocessing calculations on all bands.
1069 This is identical to `TransformCatalogBaseTask`,
except for that it does
1070 the specified functor calculations
for all filters present
in the
1071 input `deepCoadd_obj` table. Any specific ``
"filt"`` keywords specified
1072 by the YAML file will be superceded.
1074 _DefaultName = "transformObjectCatalog"
1075 ConfigClass = TransformObjectCatalogConfig
1077 def run(self, handle, funcs=None, dataId=None, band=None):
1081 templateDf = pd.DataFrame()
1083 columns = handle.get(component=
'columns')
1084 inputBands = columns.unique(level=1).values
1086 outputBands = self.config.outputBands
if self.config.outputBands
else inputBands
1089 for inputBand
in inputBands:
1090 if inputBand
not in outputBands:
1091 self.log.info(
"Ignoring %s band data in the input", inputBand)
1093 self.log.info(
"Transforming the catalog of band %s", inputBand)
1094 result = self.transform(inputBand, handle, funcs, dataId)
1095 dfDict[inputBand] = result.df
1096 analysisDict[inputBand] = result.analysis
1097 if templateDf.empty:
1098 templateDf = result.df
1101 for filt
in outputBands:
1102 if filt
not in dfDict:
1103 self.log.info(
"Adding empty columns for band %s", filt)
1104 dfTemp = templateDf.copy()
1105 for col
in dfTemp.columns:
1106 testValue = dfTemp[col].values[0]
1107 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1109 if col
in self.config.goodFlags:
1113 elif isinstance(testValue, numbers.Integral):
1117 if isinstance(testValue, np.unsignedinteger):
1118 raise ValueError(
"Parquet tables may not have unsigned integer columns.")
1120 fillValue = self.config.integerFillValue
1122 fillValue = self.config.floatFillValue
1123 dfTemp[col].values[:] = fillValue
1124 dfDict[filt] = dfTemp
1127 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
1129 if not self.config.multilevelOutput:
1130 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
1131 if self.config.primaryKey
in noDupCols:
1132 noDupCols.remove(self.config.primaryKey)
1133 if dataId
and self.config.columnsFromDataId:
1134 noDupCols += self.config.columnsFromDataId
1135 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1136 inputBands=inputBands)
1138 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
1143class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1144 dimensions=(
"tract",
"skymap")):
1145 inputCatalogs = connectionTypes.Input(
1146 doc=
"Per-Patch objectTables conforming to the standard data model.",
1148 storageClass=
"DataFrame",
1149 dimensions=(
"tract",
"patch",
"skymap"),
1152 outputCatalog = connectionTypes.Output(
1153 doc=
"Pre-tract horizontal concatenation of the input objectTables",
1154 name=
"objectTable_tract",
1155 storageClass=
"DataFrame",
1156 dimensions=(
"tract",
"skymap"),
1160class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1161 pipelineConnections=ConsolidateObjectTableConnections):
1162 coaddName = pexConfig.Field(
1169class ConsolidateObjectTableTask(pipeBase.PipelineTask):
1170 """Write patch-merged source tables to a tract-level DataFrame Parquet file.
1172 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1174 _DefaultName = "consolidateObjectTable"
1175 ConfigClass = ConsolidateObjectTableConfig
1177 inputDataset =
'objectTable'
1178 outputDataset =
'objectTable_tract'
1180 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1181 inputs = butlerQC.get(inputRefs)
1182 self.log.info(
"Concatenating %s per-patch Object Tables",
1183 len(inputs[
'inputCatalogs']))
1184 df = pd.concat(inputs[
'inputCatalogs'])
1185 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1188class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1189 defaultTemplates={
"catalogType":
""},
1190 dimensions=(
"instrument",
"visit",
"detector")):
1192 inputCatalog = connectionTypes.Input(
1193 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
1194 name=
"{catalogType}source",
1195 storageClass=
"DataFrame",
1196 dimensions=(
"instrument",
"visit",
"detector"),
1199 outputCatalog = connectionTypes.Output(
1200 doc=
"Narrower, per-detector Source Table transformed and converted per a "
1201 "specified set of functors",
1202 name=
"{catalogType}sourceTable",
1203 storageClass=
"DataFrame",
1204 dimensions=(
"instrument",
"visit",
"detector")
1209 pipelineConnections=TransformSourceTableConnections):
1211 def setDefaults(self):
1212 super().setDefaults()
1213 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Source.yaml')
1214 self.primaryKey =
'sourceId'
1215 self.columnsFromDataId = [
'visit',
'detector',
'band',
'physical_filter']
1219 """Transform/standardize a source catalog
1221 _DefaultName = "transformSourceTable"
1222 ConfigClass = TransformSourceTableConfig
1225class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1226 dimensions=(
"instrument",
"visit",),
1227 defaultTemplates={
"calexpType":
""}):
1228 calexp = connectionTypes.Input(
1229 doc=
"Processed exposures used for metadata",
1231 storageClass=
"ExposureF",
1232 dimensions=(
"instrument",
"visit",
"detector"),
1236 visitSummary = connectionTypes.Output(
1237 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
1238 "detector id for the id and are sorted for fast lookups of a "
1240 name=
"visitSummary",
1241 storageClass=
"ExposureCatalog",
1242 dimensions=(
"instrument",
"visit"),
1244 visitSummarySchema = connectionTypes.InitOutput(
1245 doc=
"Schema of the visitSummary catalog",
1246 name=
"visitSummary_schema",
1247 storageClass=
"ExposureCatalog",
1251class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1252 pipelineConnections=ConsolidateVisitSummaryConnections):
1253 """Config for ConsolidateVisitSummaryTask"""
1257class ConsolidateVisitSummaryTask(pipeBase.PipelineTask):
1258 """Task to consolidate per-detector visit metadata.
1260 This task aggregates the following metadata from all the detectors
in a
1261 single visit into an exposure catalog:
1265 - The physical_filter
and band (
if available).
1266 - The psf size, shape,
and effective area at the center of the detector.
1267 - The corners of the bounding box
in right ascension/declination.
1269 Other quantities such
as Detector, Psf, ApCorrMap,
and TransmissionCurve
1270 are
not persisted here because of storage concerns,
and because of their
1271 limited utility
as summary statistics.
1273 Tests
for this task are performed
in ci_hsc_gen3.
1275 _DefaultName = "consolidateVisitSummary"
1276 ConfigClass = ConsolidateVisitSummaryConfig
1278 def __init__(self, **kwargs):
1279 super().__init__(**kwargs)
1280 self.schema = afwTable.ExposureTable.makeMinimalSchema()
1281 self.schema.addField(
'visit', type=
'L', doc=
'Visit number')
1282 self.schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
1283 self.schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
1284 ExposureSummaryStats.update_schema(self.schema)
1285 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema)
1287 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1288 dataRefs = butlerQC.get(inputRefs.calexp)
1289 visit = dataRefs[0].dataId.byName()[
'visit']
1291 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1292 len(dataRefs), visit)
1294 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1296 butlerQC.put(expCatalog, outputRefs.visitSummary)
1298 def _combineExposureMetadata(self, visit, dataRefs):
1299 """Make a combined exposure catalog from a list of dataRefs.
1300 These dataRefs must point to exposures with wcs, summaryStats,
1301 and other visit metadata.
1306 Visit identification number.
1307 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1308 List of dataRefs
in visit.
1313 Exposure catalog
with per-detector summary information.
1315 cat = afwTable.ExposureCatalog(self.schema)
1316 cat.resize(len(dataRefs))
1318 cat['visit'] = visit
1320 for i, dataRef
in enumerate(dataRefs):
1321 visitInfo = dataRef.get(component=
'visitInfo')
1322 filterLabel = dataRef.get(component=
'filter')
1323 summaryStats = dataRef.get(component=
'summaryStats')
1324 detector = dataRef.get(component=
'detector')
1325 wcs = dataRef.get(component=
'wcs')
1326 photoCalib = dataRef.get(component=
'photoCalib')
1327 detector = dataRef.get(component=
'detector')
1328 bbox = dataRef.get(component=
'bbox')
1329 validPolygon = dataRef.get(component=
'validPolygon')
1333 rec.setVisitInfo(visitInfo)
1335 rec.setPhotoCalib(photoCalib)
1336 rec.setValidPolygon(validPolygon)
1338 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1339 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1340 rec.setId(detector.getId())
1341 summaryStats.update_record(rec)
1343 metadata = dafBase.PropertyList()
1344 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1346 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1347 cat.setMetadata(metadata)
1353class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1354 defaultTemplates={
"catalogType":
""},
1355 dimensions=(
"instrument",
"visit")):
1356 inputCatalogs = connectionTypes.Input(
1357 doc=
"Input per-detector Source Tables",
1358 name=
"{catalogType}sourceTable",
1359 storageClass=
"DataFrame",
1360 dimensions=(
"instrument",
"visit",
"detector"),
1363 outputCatalog = connectionTypes.Output(
1364 doc=
"Per-visit concatenation of Source Table",
1365 name=
"{catalogType}sourceTable_visit",
1366 storageClass=
"DataFrame",
1367 dimensions=(
"instrument",
"visit")
1371class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1372 pipelineConnections=ConsolidateSourceTableConnections):
1376class ConsolidateSourceTableTask(pipeBase.PipelineTask):
1377 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1379 _DefaultName = 'consolidateSourceTable'
1380 ConfigClass = ConsolidateSourceTableConfig
1382 inputDataset =
'sourceTable'
1383 outputDataset =
'sourceTable_visit'
1385 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1386 from .makeWarp
import reorderRefs
1388 detectorOrder = [ref.dataId[
'detector']
for ref
in inputRefs.inputCatalogs]
1389 detectorOrder.sort()
1390 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey=
'detector')
1391 inputs = butlerQC.get(inputRefs)
1392 self.log.info(
"Concatenating %s per-detector Source Tables",
1393 len(inputs[
'inputCatalogs']))
1394 df = pd.concat(inputs[
'inputCatalogs'])
1395 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1398class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1399 dimensions=(
"instrument",),
1400 defaultTemplates={
"calexpType":
""}):
1401 visitSummaryRefs = connectionTypes.Input(
1402 doc=
"Data references for per-visit consolidated exposure metadata",
1403 name=
"finalVisitSummary",
1404 storageClass=
"ExposureCatalog",
1405 dimensions=(
"instrument",
"visit"),
1409 outputCatalog = connectionTypes.Output(
1410 doc=
"CCD and Visit metadata table",
1411 name=
"ccdVisitTable",
1412 storageClass=
"DataFrame",
1413 dimensions=(
"instrument",)
1417class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1418 pipelineConnections=MakeCcdVisitTableConnections):
1419 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
1422class MakeCcdVisitTableTask(pipeBase.PipelineTask):
1423 """Produce a `ccdVisitTable` from the visit summary exposure catalogs.
1425 _DefaultName = 'makeCcdVisitTable'
1426 ConfigClass = MakeCcdVisitTableConfig
1428 def run(self, visitSummaryRefs):
1429 """Make a table of ccd information from the visit summary catalogs.
1433 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1434 List of DeferredDatasetHandles pointing to exposure catalogs with
1435 per-detector summary information.
1439 result : `lsst.pipe.Base.Struct`
1440 Results struct
with attribute:
1443 Catalog of ccd
and visit information.
1446 for visitSummaryRef
in visitSummaryRefs:
1447 visitSummary = visitSummaryRef.get()
1448 visitInfo = visitSummary[0].getVisitInfo()
1451 summaryTable = visitSummary.asAstropy()
1452 selectColumns = [
'id',
'visit',
'physical_filter',
'band',
'ra',
'dec',
'zenithDistance',
1453 'zeroPoint',
'psfSigma',
'skyBg',
'skyNoise',
1454 'astromOffsetMean',
'astromOffsetStd',
'nPsfStar',
1455 'psfStarDeltaE1Median',
'psfStarDeltaE2Median',
1456 'psfStarDeltaE1Scatter',
'psfStarDeltaE2Scatter',
1457 'psfStarDeltaSizeMedian',
'psfStarDeltaSizeScatter',
1458 'psfStarScaledDeltaSizeScatter',
1459 'psfTraceRadiusDelta',
'maxDistToNearestPsf']
1460 ccdEntry = summaryTable[selectColumns].to_pandas().set_index(
'id')
1465 ccdEntry = ccdEntry.rename(columns={
"visit":
"visitId"})
1469 ccdEntry[
"decl"] = ccdEntry.loc[:,
"dec"]
1471 ccdEntry[
'ccdVisitId'] = [
1472 self.config.idGenerator.apply(
1473 visitSummaryRef.dataId,
1474 detector=detector_id,
1481 for detector_id
in summaryTable[
'id']
1483 ccdEntry[
'detector'] = summaryTable[
'id']
1484 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds()
if vR.getWcs()
1485 else np.nan
for vR
in visitSummary])
1486 ccdEntry[
"seeing"] = visitSummary[
'psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1488 ccdEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1489 ccdEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1490 ccdEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1491 expTime = visitInfo.getExposureTime()
1492 ccdEntry[
'expTime'] = expTime
1493 ccdEntry[
"obsStart"] = ccdEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1494 expTime_days = expTime / (60*60*24)
1495 ccdEntry[
"obsStartMJD"] = ccdEntry[
"expMidptMJD"] - 0.5 * expTime_days
1496 ccdEntry[
'darkTime'] = visitInfo.getDarkTime()
1497 ccdEntry[
'xSize'] = summaryTable[
'bbox_max_x'] - summaryTable[
'bbox_min_x']
1498 ccdEntry[
'ySize'] = summaryTable[
'bbox_max_y'] - summaryTable[
'bbox_min_y']
1499 ccdEntry[
'llcra'] = summaryTable[
'raCorners'][:, 0]
1500 ccdEntry[
'llcdec'] = summaryTable[
'decCorners'][:, 0]
1501 ccdEntry[
'ulcra'] = summaryTable[
'raCorners'][:, 1]
1502 ccdEntry[
'ulcdec'] = summaryTable[
'decCorners'][:, 1]
1503 ccdEntry[
'urcra'] = summaryTable[
'raCorners'][:, 2]
1504 ccdEntry[
'urcdec'] = summaryTable[
'decCorners'][:, 2]
1505 ccdEntry[
'lrcra'] = summaryTable[
'raCorners'][:, 3]
1506 ccdEntry[
'lrcdec'] = summaryTable[
'decCorners'][:, 3]
1510 ccdEntries.append(ccdEntry)
1512 outputCatalog = pd.concat(ccdEntries)
1513 outputCatalog.set_index(
'ccdVisitId', inplace=
True, verify_integrity=
True)
1514 return pipeBase.Struct(outputCatalog=outputCatalog)
1517class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1518 dimensions=(
"instrument",),
1519 defaultTemplates={
"calexpType":
""}):
1520 visitSummaries = connectionTypes.Input(
1521 doc=
"Per-visit consolidated exposure metadata",
1522 name=
"finalVisitSummary",
1523 storageClass=
"ExposureCatalog",
1524 dimensions=(
"instrument",
"visit",),
1528 outputCatalog = connectionTypes.Output(
1529 doc=
"Visit metadata table",
1531 storageClass=
"DataFrame",
1532 dimensions=(
"instrument",)
1536class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1537 pipelineConnections=MakeVisitTableConnections):
1541class MakeVisitTableTask(pipeBase.PipelineTask):
1542 """Produce a `visitTable` from the visit summary exposure catalogs.
1544 _DefaultName = 'makeVisitTable'
1545 ConfigClass = MakeVisitTableConfig
1547 def run(self, visitSummaries):
1548 """Make a table of visit information from the visit summary catalogs.
1553 List of exposure catalogs with per-detector summary information.
1556 result : `lsst.pipe.Base.Struct`
1557 Results struct
with attribute:
1560 Catalog of visit information.
1563 for visitSummary
in visitSummaries:
1564 visitSummary = visitSummary.get()
1565 visitRow = visitSummary[0]
1566 visitInfo = visitRow.getVisitInfo()
1569 visitEntry[
"visitId"] = visitRow[
'visit']
1570 visitEntry[
"visit"] = visitRow[
'visit']
1571 visitEntry[
"physical_filter"] = visitRow[
'physical_filter']
1572 visitEntry[
"band"] = visitRow[
'band']
1573 raDec = visitInfo.getBoresightRaDec()
1574 visitEntry[
"ra"] = raDec.getRa().asDegrees()
1575 visitEntry[
"dec"] = raDec.getDec().asDegrees()
1579 visitEntry[
"decl"] = visitEntry[
"dec"]
1581 visitEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1582 azAlt = visitInfo.getBoresightAzAlt()
1583 visitEntry[
"azimuth"] = azAlt.getLongitude().asDegrees()
1584 visitEntry[
"altitude"] = azAlt.getLatitude().asDegrees()
1585 visitEntry[
"zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1586 visitEntry[
"airmass"] = visitInfo.getBoresightAirmass()
1587 expTime = visitInfo.getExposureTime()
1588 visitEntry[
"expTime"] = expTime
1589 visitEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1590 visitEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1591 visitEntry[
"obsStart"] = visitEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1592 expTime_days = expTime / (60*60*24)
1593 visitEntry[
"obsStartMJD"] = visitEntry[
"expMidptMJD"] - 0.5 * expTime_days
1594 visitEntries.append(visitEntry)
1600 outputCatalog = pd.DataFrame(data=visitEntries)
1601 outputCatalog.set_index(
'visitId', inplace=
True, verify_integrity=
True)
1602 return pipeBase.Struct(outputCatalog=outputCatalog)
1605class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1606 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")):
1608 inputCatalog = connectionTypes.Input(
1609 doc=
"Primary per-detector, single-epoch forced-photometry catalog. "
1610 "By default, it is the output of ForcedPhotCcdTask on calexps",
1612 storageClass=
"SourceCatalog",
1613 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1615 inputCatalogDiff = connectionTypes.Input(
1616 doc=
"Secondary multi-epoch, per-detector, forced photometry catalog. "
1617 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1619 storageClass=
"SourceCatalog",
1620 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1622 outputCatalog = connectionTypes.Output(
1623 doc=
"InputCatalogs horizonatally joined on `objectId` in DataFrame parquet format",
1624 name=
"mergedForcedSource",
1625 storageClass=
"DataFrame",
1626 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1630class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1631 pipelineConnections=WriteForcedSourceTableConnections):
1632 key = lsst.pex.config.Field(
1633 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1637 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
1640class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1641 """Merge and convert per-detector forced source catalogs to DataFrame Parquet format.
1643 Because the predecessor ForcedPhotCcdTask operates per-detector,
1644 per-tract, (i.e., it has tract in its dimensions), detectors
1645 on the tract boundary may have multiple forced source catalogs.
1647 The successor task TransformForcedSourceTable runs per-patch
1648 and temporally-aggregates overlapping mergedForcedSource catalogs
from all
1649 available multiple epochs.
1651 _DefaultName = "writeForcedSourceTable"
1652 ConfigClass = WriteForcedSourceTableConfig
1654 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1655 inputs = butlerQC.get(inputRefs)
1657 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
1658 inputs[
'ccdVisitId'] = idGenerator.catalog_id
1659 inputs[
'band'] = butlerQC.quantum.dataId.full[
'band']
1660 outputs = self.run(**inputs)
1661 butlerQC.put(outputs, outputRefs)
1663 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1665 for table, dataset,
in zip((inputCatalog, inputCatalogDiff), (
'calexp',
'diff')):
1666 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=
False)
1667 df = df.reindex(sorted(df.columns), axis=1)
1668 df[
'ccdVisitId'] = ccdVisitId
if ccdVisitId
else pd.NA
1669 df[
'band'] = band
if band
else pd.NA
1670 df.columns = pd.MultiIndex.from_tuples([(dataset, c)
for c
in df.columns],
1671 names=(
'dataset',
'column'))
1675 outputCatalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
1676 return pipeBase.Struct(outputCatalog=outputCatalog)
1679class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1680 dimensions=(
"instrument",
"skymap",
"patch",
"tract")):
1682 inputCatalogs = connectionTypes.Input(
1683 doc=
"DataFrames of merged ForcedSources produced by WriteForcedSourceTableTask",
1684 name=
"mergedForcedSource",
1685 storageClass=
"DataFrame",
1686 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract"),
1690 referenceCatalog = connectionTypes.Input(
1691 doc=
"Reference catalog which was used to seed the forcedPhot. Columns "
1692 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1695 storageClass=
"DataFrame",
1696 dimensions=(
"tract",
"patch",
"skymap"),
1699 outputCatalog = connectionTypes.Output(
1700 doc=
"Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1701 "specified set of functors",
1702 name=
"forcedSourceTable",
1703 storageClass=
"DataFrame",
1704 dimensions=(
"tract",
"patch",
"skymap")
1709 pipelineConnections=TransformForcedSourceTableConnections):
1710 referenceColumns = pexConfig.ListField(
1712 default=[
"detect_isPrimary",
"detect_isTractInner",
"detect_isPatchInner"],
1714 doc=
"Columns to pull from reference catalog",
1716 keyRef = lsst.pex.config.Field(
1717 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1721 key = lsst.pex.config.Field(
1722 doc=
"Rename the output DataFrame index to this name",
1724 default=
"forcedSourceId",
1727 def setDefaults(self):
1728 super().setDefaults()
1729 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'ForcedSource.yaml')
1730 self.columnsFromDataId = [
'tract',
'patch']
1734 """Transform/standardize a ForcedSource catalog
1736 Transforms each wide, per-detector forcedSource DataFrame per the
1737 specification file (per-camera defaults found in ForcedSource.yaml).
1738 All epochs that overlap the patch are aggregated into one per-patch
1739 narrow-DataFrame file.
1741 No de-duplication of rows
is performed. Duplicate resolutions flags are
1742 pulled
in from the referenceCatalog: `detect_isPrimary`,
1743 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1744 for analysis
or compare duplicates
for QA.
1746 The resulting table includes multiple bands. Epochs (MJDs)
and other useful
1747 per-visit rows can be retreived by joining
with the CcdVisitTable on
1750 _DefaultName = "transformForcedSourceTable"
1751 ConfigClass = TransformForcedSourceTableConfig
1753 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1754 inputs = butlerQC.get(inputRefs)
1755 if self.funcs
is None:
1756 raise ValueError(
"config.functorFile is None. "
1757 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1758 outputs = self.run(inputs[
'inputCatalogs'], inputs[
'referenceCatalog'], funcs=self.funcs,
1759 dataId=outputRefs.outputCatalog.dataId.full)
1761 butlerQC.put(outputs, outputRefs)
1763 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1765 ref = referenceCatalog.get(parameters={
"columns": self.config.referenceColumns})
1766 self.log.info(
"Aggregating %s input catalogs" % (len(inputCatalogs)))
1767 for handle
in inputCatalogs:
1768 result = self.transform(
None, handle, funcs, dataId)
1770 dfs.append(result.df.join(ref, how=
'inner'))
1772 outputCatalog = pd.concat(dfs)
1776 outputCatalog.index.rename(self.config.keyRef, inplace=
True)
1778 outputCatalog.reset_index(inplace=
True)
1781 outputCatalog.set_index(
"forcedSourceId", inplace=
True, verify_integrity=
True)
1783 outputCatalog.index.rename(self.config.key, inplace=
True)
1785 self.log.info(
"Made a table of %d columns and %d rows",
1786 len(outputCatalog.columns), len(outputCatalog))
1787 return pipeBase.Struct(outputCatalog=outputCatalog)
1790class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1791 defaultTemplates={
"catalogType":
""},
1792 dimensions=(
"instrument",
"tract")):
1793 inputCatalogs = connectionTypes.Input(
1794 doc=
"Input per-patch DataFrame Tables to be concatenated",
1795 name=
"{catalogType}ForcedSourceTable",
1796 storageClass=
"DataFrame",
1797 dimensions=(
"tract",
"patch",
"skymap"),
1801 outputCatalog = connectionTypes.Output(
1802 doc=
"Output per-tract concatenation of DataFrame Tables",
1803 name=
"{catalogType}ForcedSourceTable_tract",
1804 storageClass=
"DataFrame",
1805 dimensions=(
"tract",
"skymap"),
1809class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1810 pipelineConnections=ConsolidateTractConnections):
1814class ConsolidateTractTask(pipeBase.PipelineTask):
1815 """Concatenate any per-patch, dataframe list into a single
1816 per-tract DataFrame.
1818 _DefaultName = 'ConsolidateTract'
1819 ConfigClass = ConsolidateTractConfig
1821 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1822 inputs = butlerQC.get(inputRefs)
1825 self.log.info(
"Concatenating %s per-patch %s Tables",
1826 len(inputs[
'inputCatalogs']),
1827 inputRefs.inputCatalogs[0].datasetType.name)
1828 df = pd.concat(inputs[
'inputCatalogs'])
1829 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None)
def compute(self, dropna=False, pool=None)
def __init__(self, *args, **kwargs)
def getAnalysis(self, handles, funcs=None, band=None)
def run(self, handle, funcs=None, dataId=None, band=None)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def transform(self, band, handles, funcs, dataId)
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)