22__all__ = [
"WriteObjectTableConfig",
"WriteObjectTableTask",
23 "WriteSourceTableConfig",
"WriteSourceTableTask",
24 "WriteRecalibratedSourceTableConfig",
"WriteRecalibratedSourceTableTask",
25 "PostprocessAnalysis",
26 "TransformCatalogBaseConfig",
"TransformCatalogBaseTask",
27 "TransformObjectCatalogConfig",
"TransformObjectCatalogTask",
28 "ConsolidateObjectTableConfig",
"ConsolidateObjectTableTask",
29 "TransformSourceTableConfig",
"TransformSourceTableTask",
30 "ConsolidateVisitSummaryConfig",
"ConsolidateVisitSummaryTask",
31 "ConsolidateSourceTableConfig",
"ConsolidateSourceTableTask",
32 "MakeCcdVisitTableConfig",
"MakeCcdVisitTableTask",
33 "MakeVisitTableConfig",
"MakeVisitTableTask",
34 "WriteForcedSourceTableConfig",
"WriteForcedSourceTableTask",
35 "TransformForcedSourceTableConfig",
"TransformForcedSourceTableTask",
36 "ConsolidateTractConfig",
"ConsolidateTractTask"]
47import lsst.pipe.base
as pipeBase
49from lsst.pipe.base
import connectionTypes
52from lsst.meas.base import SingleFrameMeasurementTask, DetectorVisitIdGeneratorConfig
55from .functors
import CompositeFunctor, Column
57log = logging.getLogger(__name__)
60def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
61 """Flattens a dataframe with multilevel column index.
63 newDf = pd.DataFrame()
65 dfBands = df.columns.unique(level=0).values
68 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
69 newColumns = {c: columnFormat.format(band, c)
70 for c
in subdf.columns
if c
not in noDupCols}
71 cols = list(newColumns.keys())
72 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
75 presentBands = dfBands
if inputBands
is None else list(set(inputBands).intersection(dfBands))
77 noDupDf = df[presentBands[0]][noDupCols]
78 newDf = pd.concat([noDupDf, newDf], axis=1)
83 defaultTemplates={
"coaddName":
"deep"},
84 dimensions=(
"tract",
"patch",
"skymap")):
85 inputCatalogMeas = connectionTypes.Input(
86 doc=
"Catalog of source measurements on the deepCoadd.",
87 dimensions=(
"tract",
"patch",
"band",
"skymap"),
88 storageClass=
"SourceCatalog",
89 name=
"{coaddName}Coadd_meas",
92 inputCatalogForcedSrc = connectionTypes.Input(
93 doc=
"Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
94 dimensions=(
"tract",
"patch",
"band",
"skymap"),
95 storageClass=
"SourceCatalog",
96 name=
"{coaddName}Coadd_forced_src",
99 inputCatalogRef = connectionTypes.Input(
100 doc=
"Catalog marking the primary detection (which band provides a good shape and position)"
101 "for each detection in deepCoadd_mergeDet.",
102 dimensions=(
"tract",
"patch",
"skymap"),
103 storageClass=
"SourceCatalog",
104 name=
"{coaddName}Coadd_ref"
106 outputCatalog = connectionTypes.Output(
107 doc=
"A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
108 "stored as a DataFrame with a multi-level column index per-patch.",
109 dimensions=(
"tract",
"patch",
"skymap"),
110 storageClass=
"DataFrame",
111 name=
"{coaddName}Coadd_obj"
115class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
116 pipelineConnections=WriteObjectTableConnections):
117 engine = pexConfig.Field(
120 doc=
"Parquet engine for writing (pyarrow or fastparquet)",
121 deprecated=
"This config is no longer used, and will be removed after v26."
123 coaddName = pexConfig.Field(
130class WriteObjectTableTask(pipeBase.PipelineTask):
131 """Write filter-merged source tables as a DataFrame in parquet format.
133 _DefaultName = "writeObjectTable"
134 ConfigClass = WriteObjectTableConfig
137 inputDatasets = (
'forced_src',
'meas',
'ref')
140 outputDataset =
'obj'
142 def runQuantum(self, butlerQC, inputRefs, outputRefs):
143 inputs = butlerQC.get(inputRefs)
145 measDict = {ref.dataId[
'band']: {
'meas': cat}
for ref, cat
in
146 zip(inputRefs.inputCatalogMeas, inputs[
'inputCatalogMeas'])}
147 forcedSourceDict = {ref.dataId[
'band']: {
'forced_src': cat}
for ref, cat
in
148 zip(inputRefs.inputCatalogForcedSrc, inputs[
'inputCatalogForcedSrc'])}
151 for band
in measDict.keys():
152 catalogs[band] = {
'meas': measDict[band][
'meas'],
153 'forced_src': forcedSourceDict[band][
'forced_src'],
154 'ref': inputs[
'inputCatalogRef']}
155 dataId = butlerQC.quantum.dataId
156 df = self.run(catalogs=catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
157 outputs = pipeBase.Struct(outputCatalog=df)
158 butlerQC.put(outputs, outputRefs)
160 def run(self, catalogs, tract, patch):
161 """Merge multiple catalogs.
166 Mapping from filter names to dict of catalogs.
168 tractId to use
for the tractId column.
170 patchId to use
for the patchId column.
174 catalog : `pandas.DataFrame`
178 for filt, tableDict
in catalogs.items():
179 for dataset, table
in tableDict.items():
181 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
184 df = df.reindex(sorted(df.columns), axis=1)
185 df = df.assign(tractId=tract, patchId=patch)
188 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
189 names=(
'dataset',
'band',
'column'))
194 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
198class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
199 defaultTemplates={
"catalogType":
""},
200 dimensions=(
"instrument",
"visit",
"detector")):
202 catalog = connectionTypes.Input(
203 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
204 name=
"{catalogType}src",
205 storageClass=
"SourceCatalog",
206 dimensions=(
"instrument",
"visit",
"detector")
208 outputCatalog = connectionTypes.Output(
209 doc=
"Catalog of sources, `src` in DataFrame/Parquet format. The 'id' column is "
210 "replaced with an index; all other columns are unchanged.",
211 name=
"{catalogType}source",
212 storageClass=
"DataFrame",
213 dimensions=(
"instrument",
"visit",
"detector")
217class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
218 pipelineConnections=WriteSourceTableConnections):
219 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
222class WriteSourceTableTask(pipeBase.PipelineTask):
223 """Write source table to DataFrame Parquet format.
225 _DefaultName = "writeSourceTable"
226 ConfigClass = WriteSourceTableConfig
228 def runQuantum(self, butlerQC, inputRefs, outputRefs):
229 inputs = butlerQC.get(inputRefs)
230 inputs[
'ccdVisitId'] = self.config.idGenerator.apply(butlerQC.quantum.dataId).catalog_id
231 result = self.run(**inputs)
232 outputs = pipeBase.Struct(outputCatalog=result.table)
233 butlerQC.put(outputs, outputRefs)
235 def run(self, catalog, ccdVisitId=None, **kwargs):
236 """Convert `src` catalog to DataFrame
240 catalog: `afwTable.SourceCatalog`
241 catalog to be converted
243 ccdVisitId to be added as a column
245 Additional keyword arguments are ignored
as a convenience
for
246 subclasses that
pass the same arguments to several different
251 result : `~lsst.pipe.base.Struct`
253 `DataFrame` version of the input catalog
255 self.log.info("Generating DataFrame from src catalog ccdVisitId=%s", ccdVisitId)
256 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
257 df[
'ccdVisitId'] = ccdVisitId
259 return pipeBase.Struct(table=df)
262class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
263 defaultTemplates={
"catalogType":
"",
264 "skyWcsName":
"gbdesAstrometricFit",
265 "photoCalibName":
"fgcm"},
266 dimensions=(
"instrument",
"visit",
"detector",
"skymap")):
267 skyMap = connectionTypes.Input(
268 doc=
"skyMap needed to choose which tract-level calibrations to use when multiple available",
269 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
270 storageClass=
"SkyMap",
271 dimensions=(
"skymap",),
273 exposure = connectionTypes.Input(
274 doc=
"Input exposure to perform photometry on.",
276 storageClass=
"ExposureF",
277 dimensions=[
"instrument",
"visit",
"detector"],
279 externalSkyWcsTractCatalog = connectionTypes.Input(
280 doc=(
"Per-tract, per-visit wcs calibrations. These catalogs use the detector "
281 "id for the catalog id, sorted on id for fast lookup."),
282 name=
"{skyWcsName}SkyWcsCatalog",
283 storageClass=
"ExposureCatalog",
284 dimensions=[
"instrument",
"visit",
"tract"],
287 externalSkyWcsGlobalCatalog = connectionTypes.Input(
288 doc=(
"Per-visit wcs calibrations computed globally (with no tract information). "
289 "These catalogs use the detector id for the catalog id, sorted on id for "
291 name=
"finalVisitSummary",
292 storageClass=
"ExposureCatalog",
293 dimensions=[
"instrument",
"visit"],
295 externalPhotoCalibTractCatalog = connectionTypes.Input(
296 doc=(
"Per-tract, per-visit photometric calibrations. These catalogs use the "
297 "detector id for the catalog id, sorted on id for fast lookup."),
298 name=
"{photoCalibName}PhotoCalibCatalog",
299 storageClass=
"ExposureCatalog",
300 dimensions=[
"instrument",
"visit",
"tract"],
303 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
304 doc=(
"Per-visit photometric calibrations computed globally (with no tract "
305 "information). These catalogs use the detector id for the catalog id, "
306 "sorted on id for fast lookup."),
307 name=
"finalVisitSummary",
308 storageClass=
"ExposureCatalog",
309 dimensions=[
"instrument",
"visit"],
312 def __init__(self, *, config=None):
313 super().__init__(config=config)
316 if config.doApplyExternalSkyWcs
and config.doReevaluateSkyWcs:
317 if config.useGlobalExternalSkyWcs:
318 self.inputs.remove(
"externalSkyWcsTractCatalog")
320 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
322 self.inputs.remove(
"externalSkyWcsTractCatalog")
323 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
324 if config.doApplyExternalPhotoCalib
and config.doReevaluatePhotoCalib:
325 if config.useGlobalExternalPhotoCalib:
326 self.inputs.remove(
"externalPhotoCalibTractCatalog")
328 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
330 self.inputs.remove(
"externalPhotoCalibTractCatalog")
331 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
334class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
335 pipelineConnections=WriteRecalibratedSourceTableConnections):
337 doReevaluatePhotoCalib = pexConfig.Field(
340 doc=(
"Add or replace local photoCalib columns")
342 doReevaluateSkyWcs = pexConfig.Field(
345 doc=(
"Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
347 doApplyExternalPhotoCalib = pexConfig.Field(
350 doc=(
"If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
351 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
353 doApplyExternalSkyWcs = pexConfig.Field(
356 doc=(
"if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
357 "else use the wcs already attached to the exposure."),
359 useGlobalExternalPhotoCalib = pexConfig.Field(
362 doc=(
"When using doApplyExternalPhotoCalib, use 'global' calibrations "
363 "that are not run per-tract. When False, use per-tract photometric "
364 "calibration files.")
366 useGlobalExternalSkyWcs = pexConfig.Field(
369 doc=(
"When using doApplyExternalSkyWcs, use 'global' calibrations "
370 "that are not run per-tract. When False, use per-tract wcs "
373 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
377 if self.doApplyExternalSkyWcs
and not self.doReevaluateSkyWcs:
378 log.warning(
"doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
379 "External SkyWcs will not be read or evaluated.")
380 if self.doApplyExternalPhotoCalib
and not self.doReevaluatePhotoCalib:
381 log.warning(
"doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
382 "External PhotoCalib will not be read or evaluated.")
385class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
386 """Write source table to DataFrame Parquet format.
388 _DefaultName = "writeRecalibratedSourceTable"
389 ConfigClass = WriteRecalibratedSourceTableConfig
391 def runQuantum(self, butlerQC, inputRefs, outputRefs):
392 inputs = butlerQC.get(inputRefs)
394 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
395 inputs[
'idGenerator'] = idGenerator
396 inputs[
'ccdVisitId'] = idGenerator.catalog_id
398 if self.config.doReevaluatePhotoCalib
or self.config.doReevaluateSkyWcs:
399 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
400 inputs[
'exposure'] = self.attachCalibs(inputRefs, **inputs)
402 inputs[
'catalog'] = self.addCalibColumns(**inputs)
404 result = self.run(**inputs)
405 outputs = pipeBase.Struct(outputCatalog=result.table)
406 butlerQC.put(outputs, outputRefs)
408 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
409 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
410 externalPhotoCalibTractCatalog=None, **kwargs):
411 """Apply external calibrations to exposure per configuration
413 When multiple tract-level calibrations overlap, select the one with the
414 center closest to detector.
418 inputRefs : `~lsst.pipe.base.InputQuantizedConnection`,
for dataIds of
420 skyMap : `~lsst.skymap.BaseSkyMap`
421 skyMap to lookup tract geometry
and WCS.
422 exposure : `lsst.afw.image.exposure.Exposure`
423 Input exposure to adjust calibrations.
425 Exposure catalog
with external skyWcs to be applied per config
427 Exposure catalog
with external skyWcs to be applied per config
429 Exposure catalog
with external photoCalib to be applied per config
431 Exposure catalog
with external photoCalib to be applied per config
433 Additional keyword arguments are ignored to facilitate passing the
434 same arguments to several methods.
438 exposure : `lsst.afw.image.exposure.Exposure`
439 Exposure
with adjusted calibrations.
441 if not self.config.doApplyExternalSkyWcs:
443 externalSkyWcsCatalog =
None
444 elif self.config.useGlobalExternalSkyWcs:
446 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
447 self.log.info(
'Applying global SkyWcs')
450 inputRef = getattr(inputRefs,
'externalSkyWcsTractCatalog')
451 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
454 self.log.info(
'Applying tract-level SkyWcs from tract %s', tracts[ind])
456 if exposure.getWcs()
is None:
457 raise ValueError(
"Trying to locate nearest tract, but exposure.wcs is None.")
458 ind = self.getClosestTract(tracts, skyMap,
459 exposure.getBBox(), exposure.getWcs())
460 self.log.info(
'Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
461 'Applying closest to detector center: tract=%s',
str(tracts), tracts[ind])
463 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
465 if not self.config.doApplyExternalPhotoCalib:
467 externalPhotoCalibCatalog =
None
468 elif self.config.useGlobalExternalPhotoCalib:
470 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
471 self.log.info(
'Applying global PhotoCalib')
474 inputRef = getattr(inputRefs,
'externalPhotoCalibTractCatalog')
475 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
478 self.log.info(
'Applying tract-level PhotoCalib from tract %s', tracts[ind])
480 ind = self.getClosestTract(tracts, skyMap,
481 exposure.getBBox(), exposure.getWcs())
482 self.log.info(
'Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
483 'Applying closest to detector center: tract=%s',
str(tracts), tracts[ind])
485 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
487 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
489 def getClosestTract(self, tracts, skyMap, bbox, wcs):
490 """Find the index of the tract closest to detector from list of tractIds
494 tracts: `list` [`int`]
495 Iterable of integer tractIds
496 skyMap : `~lsst.skymap.BaseSkyMap`
497 skyMap to lookup tract geometry and wcs
499 Detector bbox, center of which will compared to tract centers
501 Detector Wcs object to map the detector center to SkyCoord
510 center = wcs.pixelToSky(bbox.getCenter())
512 for tractId
in tracts:
513 tract = skyMap[tractId]
514 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
515 sep.append(center.separation(tractCenter))
517 return np.argmin(sep)
519 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
520 """Prepare a calibrated exposure and apply external calibrations
525 exposure : `lsst.afw.image.exposure.Exposure`
526 Input exposure to adjust calibrations.
528 Exposure catalog
with external skyWcs to be applied
529 if config.doApplyExternalSkyWcs=
True. Catalog uses the detector id
530 for the catalog id, sorted on id
for fast lookup.
532 Exposure catalog
with external photoCalib to be applied
533 if config.doApplyExternalPhotoCalib=
True. Catalog uses the detector
534 id
for the catalog id, sorted on id
for fast lookup.
538 exposure : `lsst.afw.image.exposure.Exposure`
539 Exposure
with adjusted calibrations.
541 detectorId = exposure.getInfo().getDetector().getId()
543 if externalPhotoCalibCatalog
is not None:
544 row = externalPhotoCalibCatalog.find(detectorId)
546 self.log.warning(
"Detector id %s not found in externalPhotoCalibCatalog; "
547 "Using original photoCalib.", detectorId)
549 photoCalib = row.getPhotoCalib()
550 if photoCalib
is None:
551 self.log.warning(
"Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
552 "Using original photoCalib.", detectorId)
554 exposure.setPhotoCalib(photoCalib)
556 if externalSkyWcsCatalog
is not None:
557 row = externalSkyWcsCatalog.find(detectorId)
559 self.log.warning(
"Detector id %s not found in externalSkyWcsCatalog; "
560 "Using original skyWcs.", detectorId)
562 skyWcs = row.getWcs()
564 self.log.warning(
"Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
565 "Using original skyWcs.", detectorId)
567 exposure.setWcs(skyWcs)
571 def addCalibColumns(self, catalog, exposure, idGenerator, **kwargs):
572 """Add replace columns with calibs evaluated at each centroid
574 Add or replace
'base_LocalWcs' `base_LocalPhotoCalib
' columns in a
575 a source catalog, by rerunning the plugins.
580 catalog to which calib columns will be added
581 exposure : `lsst.afw.image.exposure.Exposure`
582 Exposure with attached PhotoCalibs
and SkyWcs attributes to be
583 reevaluated at local centroids. Pixels are
not required.
584 idGenerator : `lsst.meas.base.IdGenerator`
585 Object that generates Source IDs
and random seeds.
587 Additional keyword arguments are ignored to facilitate passing the
588 same arguments to several methods.
593 Source Catalog
with requested local calib columns
595 measureConfig = SingleFrameMeasurementTask.ConfigClass()
596 measureConfig.doReplaceWithNoise = False
599 for slot
in measureConfig.slots:
600 setattr(measureConfig.slots, slot,
None)
602 measureConfig.plugins.names = []
603 if self.config.doReevaluateSkyWcs:
604 measureConfig.plugins.names.add(
'base_LocalWcs')
605 self.log.info(
"Re-evaluating base_LocalWcs plugin")
606 if self.config.doReevaluatePhotoCalib:
607 measureConfig.plugins.names.add(
'base_LocalPhotoCalib')
608 self.log.info(
"Re-evaluating base_LocalPhotoCalib plugin")
609 pluginsNotToCopy = tuple(measureConfig.plugins.names)
613 aliasMap = catalog.schema.getAliasMap()
614 mapper = afwTable.SchemaMapper(catalog.schema)
615 for item
in catalog.schema:
616 if not item.field.getName().startswith(pluginsNotToCopy):
617 mapper.addMapping(item.key)
619 schema = mapper.getOutputSchema()
620 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
621 schema.setAliasMap(aliasMap)
622 newCat = afwTable.SourceCatalog(schema)
623 newCat.extend(catalog, mapper=mapper)
629 if self.config.doReevaluateSkyWcs
and exposure.wcs
is not None:
630 afwTable.updateSourceCoords(exposure.wcs, newCat)
632 measurement.run(measCat=newCat, exposure=exposure, exposureId=idGenerator.catalog_id)
638 """Calculate columns from DataFrames or handles storing DataFrames.
640 This object manages and organizes an arbitrary set of computations
641 on a catalog. The catalog
is defined by a
642 `DeferredDatasetHandle`
or `InMemoryDatasetHandle` object
643 (
or list thereof), such
as a ``deepCoadd_obj`` dataset,
and the
644 computations are defined by a collection of
646 ``CompositeFunctor``).
648 After the object
is initialized, accessing the ``.df`` attribute (which
649 holds the `pandas.DataFrame` containing the results of the calculations)
650 triggers computation of said dataframe.
652 One of the conveniences of using this object
is the ability to define a
653 desired common filter
for all functors. This enables the same functor
654 collection to be passed to several different `PostprocessAnalysis` objects
655 without having to change the original functor collection, since the ``filt``
656 keyword argument of this object triggers an overwrite of the ``filt``
657 property
for all functors
in the collection.
659 This object also allows a list of refFlags to be passed,
and defines a set
660 of default refFlags that are always included even
if not requested.
662 If a list of DataFrames
or Handles
is passed, rather than a single one,
663 then the calculations will be mapped over all the input catalogs. In
664 principle, it should be straightforward to parallelize this activity, but
665 initial tests have failed (see TODO
in code comments).
669 handles : `~lsst.daf.butler.DeferredDatasetHandle`
or
670 `~lsst.pipe.base.InMemoryDatasetHandle`
or
672 Source
catalog(s)
for computation.
674 Computations to do (functors that act on ``handles``).
675 If a dict, the output
676 DataFrame will have columns keyed accordingly.
677 If a list, the column keys will come
from the
678 ``.shortname`` attribute of each functor.
680 filt : `str`, optional
681 Filter
in which to calculate. If provided,
682 this will overwrite any existing ``.filt`` attribute
683 of the provided functors.
685 flags : `list`, optional
686 List of flags (per-band) to include
in output table.
687 Taken
from the ``meas`` dataset
if applied to a multilevel Object Table.
689 refFlags : `list`, optional
690 List of refFlags (only reference band) to include
in output table.
692 forcedFlags : `list`, optional
693 List of flags (per-band) to include
in output table.
694 Taken
from the ``forced_src`` dataset
if applied to a
695 multilevel Object Table. Intended
for flags
from measurement plugins
696 only run during multi-band forced-photometry.
698 _defaultRefFlags = []
701 def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
706 self.
flags = list(flags)
if flags
is not None else []
707 self.
forcedFlags = list(forcedFlags)
if forcedFlags
is not None else []
709 if refFlags
is not None:
722 additionalFuncs.update({flag:
Column(flag, dataset=
'forced_src')
for flag
in self.
forcedFlags})
723 additionalFuncs.update({flag:
Column(flag, dataset=
'ref')
for flag
in self.
refFlags})
724 additionalFuncs.update({flag:
Column(flag, dataset=
'meas')
for flag
in self.
flags})
726 if isinstance(self.
functors, CompositeFunctor):
731 func.funcDict.update(additionalFuncs)
732 func.filt = self.
filt
738 return [name
for name, func
in self.
func.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
748 if type(self.
handles)
in (list, tuple):
750 dflist = [self.
func(handle, dropna=dropna)
for handle
in self.
handles]
754 dflist = pool.map(functools.partial(self.
func, dropna=dropna), self.
handles)
755 self.
_df = pd.concat(dflist)
764 """Expected Connections for subclasses of TransformCatalogBaseTask.
768 inputCatalog = connectionTypes.Input(
770 storageClass=
"DataFrame",
772 outputCatalog = connectionTypes.Output(
774 storageClass=
"DataFrame",
779 pipelineConnections=TransformCatalogBaseConnections):
780 functorFile = pexConfig.Field(
782 doc=
"Path to YAML file specifying Science Data Model functors to use "
783 "when copying columns and computing calibrated values.",
787 primaryKey = pexConfig.Field(
789 doc=
"Name of column to be set as the DataFrame index. If None, the index"
790 "will be named `id`",
794 columnsFromDataId = pexConfig.ListField(
798 doc=
"Columns to extract from the dataId",
803 """Base class for transforming/standardizing a catalog by applying functors
804 that convert units and apply calibrations.
806 The purpose of this task
is to perform a set of computations on an input
807 ``DeferredDatasetHandle``
or ``InMemoryDatasetHandle`` that holds a
808 ``DataFrame`` dataset (such
as ``deepCoadd_obj``),
and write the results to
809 a new dataset (which needs to be declared
in an ``outputDataset``
812 The calculations to be performed are defined
in a YAML file that specifies
813 a set of functors to be computed, provided
as a ``--functorFile`` config
814 parameter. An example of such a YAML file
is the following:
821 args: slot_Centroid_x
824 args: slot_Centroid_y
826 functor: LocalNanojansky
828 - slot_PsfFlux_instFlux
829 - slot_PsfFlux_instFluxErr
830 - base_LocalPhotoCalib
831 - base_LocalPhotoCalibErr
833 functor: LocalNanojanskyErr
835 - slot_PsfFlux_instFlux
836 - slot_PsfFlux_instFluxErr
837 - base_LocalPhotoCalib
838 - base_LocalPhotoCalibErr
842 The names
for each entry under
"func" will become the names of columns
in
843 the output dataset. All the functors referenced are defined
in
845 functor are
in the `args` list,
and any additional entries
for each column
846 other than
"functor" or "args" (e.g., ``
'filt'``, ``
'dataset'``) are
847 treated
as keyword arguments to be passed to the functor initialization.
849 The
"flags" entry
is the default shortcut
for `Column` functors.
850 All columns listed under
"flags" will be copied to the output table
851 untransformed. They can be of any datatype.
852 In the special case of transforming a multi-level oject table
with
853 band
and dataset indices (deepCoadd_obj), these will be taked
from the
854 `meas` dataset
and exploded out per band.
856 There are two special shortcuts that only apply when transforming
857 multi-level Object (deepCoadd_obj) tables:
858 - The
"refFlags" entry
is shortcut
for `Column` functor
859 taken
from the `
'ref'` dataset
if transforming an ObjectTable.
860 - The
"forcedFlags" entry
is shortcut
for `Column` functors.
861 taken
from the ``forced_src`` dataset
if transforming an ObjectTable.
862 These are expanded out per band.
866 to organize
and excecute the calculations.
869 def _DefaultName(self):
870 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
874 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
878 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
881 def ConfigClass(self):
882 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
886 if self.config.functorFile:
887 self.log.info(
'Loading tranform functor definitions from %s',
888 self.config.functorFile)
889 self.
funcs = CompositeFunctor.from_file(self.config.functorFile)
890 self.
funcs.update(dict(PostprocessAnalysis._defaultFuncs))
895 inputs = butlerQC.get(inputRefs)
896 if self.
funcs is None:
897 raise ValueError(
"config.functorFile is None. "
898 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
899 result = self.
run(handle=inputs[
'inputCatalog'], funcs=self.
funcs,
900 dataId=outputRefs.outputCatalog.dataId.full)
901 outputs = pipeBase.Struct(outputCatalog=result)
902 butlerQC.put(outputs, outputRefs)
904 def run(self, handle, funcs=None, dataId=None, band=None):
905 """Do postprocessing calculations
907 Takes a ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle``
or
908 ``DataFrame`` object
and dataId,
909 returns a dataframe
with results of postprocessing calculations.
913 handles : `~lsst.daf.butler.DeferredDatasetHandle`
or
914 `~lsst.pipe.base.InMemoryDatasetHandle`
or
915 `~pandas.DataFrame`,
or list of these.
916 DataFrames
from which calculations are done.
918 Functors to apply to the table
's columns
919 dataId : dict, optional
920 Used to add a `patchId` column to the output dataframe.
921 band : `str`, optional
922 Filter band that is being processed.
926 df : `pandas.DataFrame`
928 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
930 df = self.
transform(band, handle, funcs, dataId).df
931 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
943 def transform(self, band, handles, funcs, dataId):
944 analysis = self.
getAnalysis(handles, funcs=funcs, band=band)
946 if dataId
and self.config.columnsFromDataId:
947 for key
in self.config.columnsFromDataId:
949 df[
str(key)] = dataId[key]
951 raise ValueError(f
"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
953 if self.config.primaryKey:
954 if df.index.name != self.config.primaryKey
and self.config.primaryKey
in df:
955 df.reset_index(inplace=
True, drop=
True)
956 df.set_index(self.config.primaryKey, inplace=
True)
958 return pipeBase.Struct(
965 defaultTemplates={
"coaddName":
"deep"},
966 dimensions=(
"tract",
"patch",
"skymap")):
967 inputCatalog = connectionTypes.Input(
968 doc=
"The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
969 "stored as a DataFrame with a multi-level column index per-patch.",
970 dimensions=(
"tract",
"patch",
"skymap"),
971 storageClass=
"DataFrame",
972 name=
"{coaddName}Coadd_obj",
975 outputCatalog = connectionTypes.Output(
976 doc=
"Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
978 dimensions=(
"tract",
"patch",
"skymap"),
979 storageClass=
"DataFrame",
985 pipelineConnections=TransformObjectCatalogConnections):
986 coaddName = pexConfig.Field(
992 filterMap = pexConfig.DictField(
996 doc=(
"Dictionary mapping full filter name to short one for column name munging."
997 "These filters determine the output columns no matter what filters the "
998 "input data actually contain."),
999 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
1000 "Will be removed after v22.")
1002 outputBands = pexConfig.ListField(
1006 doc=(
"These bands and only these bands will appear in the output,"
1007 " NaN-filled if the input does not include them."
1008 " If None, then use all bands found in the input.")
1010 camelCase = pexConfig.Field(
1013 doc=(
"Write per-band columns names with camelCase, else underscore "
1014 "For example: gPsFlux instead of g_PsFlux.")
1016 multilevelOutput = pexConfig.Field(
1019 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
1020 "and name-munged (False).")
1022 goodFlags = pexConfig.ListField(
1025 doc=(
"List of 'good' flags that should be set False when populating empty tables. "
1026 "All other flags are considered to be 'bad' flags and will be set to True.")
1028 floatFillValue = pexConfig.Field(
1031 doc=
"Fill value for float fields when populating empty tables."
1033 integerFillValue = pexConfig.Field(
1036 doc=
"Fill value for integer fields when populating empty tables."
1039 def setDefaults(self):
1040 super().setDefaults()
1041 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Object.yaml')
1042 self.primaryKey =
'objectId'
1043 self.columnsFromDataId = [
'tract',
'patch']
1044 self.goodFlags = [
'calib_astrometry_used',
1045 'calib_photometry_reserved',
1046 'calib_photometry_used',
1047 'calib_psf_candidate',
1048 'calib_psf_reserved',
1053 """Produce a flattened Object Table to match the format specified in
1056 Do the same set of postprocessing calculations on all bands.
1058 This is identical to `TransformCatalogBaseTask`,
except for that it does
1059 the specified functor calculations
for all filters present
in the
1060 input `deepCoadd_obj` table. Any specific ``
"filt"`` keywords specified
1061 by the YAML file will be superceded.
1063 _DefaultName = "transformObjectCatalog"
1064 ConfigClass = TransformObjectCatalogConfig
1066 def run(self, handle, funcs=None, dataId=None, band=None):
1070 templateDf = pd.DataFrame()
1072 columns = handle.get(component=
'columns')
1073 inputBands = columns.unique(level=1).values
1075 outputBands = self.config.outputBands
if self.config.outputBands
else inputBands
1078 for inputBand
in inputBands:
1079 if inputBand
not in outputBands:
1080 self.log.info(
"Ignoring %s band data in the input", inputBand)
1082 self.log.info(
"Transforming the catalog of band %s", inputBand)
1083 result = self.transform(inputBand, handle, funcs, dataId)
1084 dfDict[inputBand] = result.df
1085 analysisDict[inputBand] = result.analysis
1086 if templateDf.empty:
1087 templateDf = result.df
1090 for filt
in outputBands:
1091 if filt
not in dfDict:
1092 self.log.info(
"Adding empty columns for band %s", filt)
1093 dfTemp = templateDf.copy()
1094 for col
in dfTemp.columns:
1095 testValue = dfTemp[col].values[0]
1096 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1098 if col
in self.config.goodFlags:
1102 elif isinstance(testValue, numbers.Integral):
1106 if isinstance(testValue, np.unsignedinteger):
1107 raise ValueError(
"Parquet tables may not have unsigned integer columns.")
1109 fillValue = self.config.integerFillValue
1111 fillValue = self.config.floatFillValue
1112 dfTemp[col].values[:] = fillValue
1113 dfDict[filt] = dfTemp
1116 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
1118 if not self.config.multilevelOutput:
1119 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
1120 if self.config.primaryKey
in noDupCols:
1121 noDupCols.remove(self.config.primaryKey)
1122 if dataId
and self.config.columnsFromDataId:
1123 noDupCols += self.config.columnsFromDataId
1124 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1125 inputBands=inputBands)
1127 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
1132class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1133 dimensions=(
"tract",
"skymap")):
1134 inputCatalogs = connectionTypes.Input(
1135 doc=
"Per-Patch objectTables conforming to the standard data model.",
1137 storageClass=
"DataFrame",
1138 dimensions=(
"tract",
"patch",
"skymap"),
1141 outputCatalog = connectionTypes.Output(
1142 doc=
"Pre-tract horizontal concatenation of the input objectTables",
1143 name=
"objectTable_tract",
1144 storageClass=
"DataFrame",
1145 dimensions=(
"tract",
"skymap"),
1149class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1150 pipelineConnections=ConsolidateObjectTableConnections):
1151 coaddName = pexConfig.Field(
1158class ConsolidateObjectTableTask(pipeBase.PipelineTask):
1159 """Write patch-merged source tables to a tract-level DataFrame Parquet file.
1161 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1163 _DefaultName = "consolidateObjectTable"
1164 ConfigClass = ConsolidateObjectTableConfig
1166 inputDataset =
'objectTable'
1167 outputDataset =
'objectTable_tract'
1169 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1170 inputs = butlerQC.get(inputRefs)
1171 self.log.info(
"Concatenating %s per-patch Object Tables",
1172 len(inputs[
'inputCatalogs']))
1173 df = pd.concat(inputs[
'inputCatalogs'])
1174 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1177class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1178 defaultTemplates={
"catalogType":
""},
1179 dimensions=(
"instrument",
"visit",
"detector")):
1181 inputCatalog = connectionTypes.Input(
1182 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
1183 name=
"{catalogType}source",
1184 storageClass=
"DataFrame",
1185 dimensions=(
"instrument",
"visit",
"detector"),
1188 outputCatalog = connectionTypes.Output(
1189 doc=
"Narrower, per-detector Source Table transformed and converted per a "
1190 "specified set of functors",
1191 name=
"{catalogType}sourceTable",
1192 storageClass=
"DataFrame",
1193 dimensions=(
"instrument",
"visit",
"detector")
1198 pipelineConnections=TransformSourceTableConnections):
1200 def setDefaults(self):
1201 super().setDefaults()
1202 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Source.yaml')
1203 self.primaryKey =
'sourceId'
1204 self.columnsFromDataId = [
'visit',
'detector',
'band',
'physical_filter']
1208 """Transform/standardize a source catalog
1210 _DefaultName = "transformSourceTable"
1211 ConfigClass = TransformSourceTableConfig
1214class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1215 dimensions=(
"instrument",
"visit",),
1216 defaultTemplates={
"calexpType":
""}):
1217 calexp = connectionTypes.Input(
1218 doc=
"Processed exposures used for metadata",
1220 storageClass=
"ExposureF",
1221 dimensions=(
"instrument",
"visit",
"detector"),
1225 visitSummary = connectionTypes.Output(
1226 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
1227 "detector id for the id and are sorted for fast lookups of a "
1229 name=
"visitSummary",
1230 storageClass=
"ExposureCatalog",
1231 dimensions=(
"instrument",
"visit"),
1233 visitSummarySchema = connectionTypes.InitOutput(
1234 doc=
"Schema of the visitSummary catalog",
1235 name=
"visitSummary_schema",
1236 storageClass=
"ExposureCatalog",
1240class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1241 pipelineConnections=ConsolidateVisitSummaryConnections):
1242 """Config for ConsolidateVisitSummaryTask"""
1246class ConsolidateVisitSummaryTask(pipeBase.PipelineTask):
1247 """Task to consolidate per-detector visit metadata.
1249 This task aggregates the following metadata from all the detectors
in a
1250 single visit into an exposure catalog:
1254 - The physical_filter
and band (
if available).
1255 - The psf size, shape,
and effective area at the center of the detector.
1256 - The corners of the bounding box
in right ascension/declination.
1258 Other quantities such
as Detector, Psf, ApCorrMap,
and TransmissionCurve
1259 are
not persisted here because of storage concerns,
and because of their
1260 limited utility
as summary statistics.
1262 Tests
for this task are performed
in ci_hsc_gen3.
1264 _DefaultName = "consolidateVisitSummary"
1265 ConfigClass = ConsolidateVisitSummaryConfig
1267 def __init__(self, **kwargs):
1268 super().__init__(**kwargs)
1269 self.schema = afwTable.ExposureTable.makeMinimalSchema()
1270 self.schema.addField(
'visit', type=
'L', doc=
'Visit number')
1271 self.schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
1272 self.schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
1273 ExposureSummaryStats.update_schema(self.schema)
1274 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema)
1276 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1277 dataRefs = butlerQC.get(inputRefs.calexp)
1278 visit = dataRefs[0].dataId.byName()[
'visit']
1280 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1281 len(dataRefs), visit)
1283 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1285 butlerQC.put(expCatalog, outputRefs.visitSummary)
1287 def _combineExposureMetadata(self, visit, dataRefs):
1288 """Make a combined exposure catalog from a list of dataRefs.
1289 These dataRefs must point to exposures with wcs, summaryStats,
1290 and other visit metadata.
1295 Visit identification number.
1296 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1297 List of dataRefs
in visit.
1302 Exposure catalog
with per-detector summary information.
1304 cat = afwTable.ExposureCatalog(self.schema)
1305 cat.resize(len(dataRefs))
1307 cat['visit'] = visit
1309 for i, dataRef
in enumerate(dataRefs):
1310 visitInfo = dataRef.get(component=
'visitInfo')
1311 filterLabel = dataRef.get(component=
'filter')
1312 summaryStats = dataRef.get(component=
'summaryStats')
1313 detector = dataRef.get(component=
'detector')
1314 wcs = dataRef.get(component=
'wcs')
1315 photoCalib = dataRef.get(component=
'photoCalib')
1316 detector = dataRef.get(component=
'detector')
1317 bbox = dataRef.get(component=
'bbox')
1318 validPolygon = dataRef.get(component=
'validPolygon')
1322 rec.setVisitInfo(visitInfo)
1324 rec.setPhotoCalib(photoCalib)
1325 rec.setValidPolygon(validPolygon)
1327 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1328 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1329 rec.setId(detector.getId())
1330 summaryStats.update_record(rec)
1332 metadata = dafBase.PropertyList()
1333 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1335 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1336 cat.setMetadata(metadata)
1342class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1343 defaultTemplates={
"catalogType":
""},
1344 dimensions=(
"instrument",
"visit")):
1345 inputCatalogs = connectionTypes.Input(
1346 doc=
"Input per-detector Source Tables",
1347 name=
"{catalogType}sourceTable",
1348 storageClass=
"DataFrame",
1349 dimensions=(
"instrument",
"visit",
"detector"),
1352 outputCatalog = connectionTypes.Output(
1353 doc=
"Per-visit concatenation of Source Table",
1354 name=
"{catalogType}sourceTable_visit",
1355 storageClass=
"DataFrame",
1356 dimensions=(
"instrument",
"visit")
1360class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1361 pipelineConnections=ConsolidateSourceTableConnections):
1365class ConsolidateSourceTableTask(pipeBase.PipelineTask):
1366 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1368 _DefaultName = 'consolidateSourceTable'
1369 ConfigClass = ConsolidateSourceTableConfig
1371 inputDataset =
'sourceTable'
1372 outputDataset =
'sourceTable_visit'
1374 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1375 from .makeWarp
import reorderRefs
1377 detectorOrder = [ref.dataId[
'detector']
for ref
in inputRefs.inputCatalogs]
1378 detectorOrder.sort()
1379 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey=
'detector')
1380 inputs = butlerQC.get(inputRefs)
1381 self.log.info(
"Concatenating %s per-detector Source Tables",
1382 len(inputs[
'inputCatalogs']))
1383 df = pd.concat(inputs[
'inputCatalogs'])
1384 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1387class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1388 dimensions=(
"instrument",),
1389 defaultTemplates={
"calexpType":
""}):
1390 visitSummaryRefs = connectionTypes.Input(
1391 doc=
"Data references for per-visit consolidated exposure metadata",
1392 name=
"finalVisitSummary",
1393 storageClass=
"ExposureCatalog",
1394 dimensions=(
"instrument",
"visit"),
1398 outputCatalog = connectionTypes.Output(
1399 doc=
"CCD and Visit metadata table",
1400 name=
"ccdVisitTable",
1401 storageClass=
"DataFrame",
1402 dimensions=(
"instrument",)
1406class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1407 pipelineConnections=MakeCcdVisitTableConnections):
1408 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
1411class MakeCcdVisitTableTask(pipeBase.PipelineTask):
1412 """Produce a `ccdVisitTable` from the visit summary exposure catalogs.
1414 _DefaultName = 'makeCcdVisitTable'
1415 ConfigClass = MakeCcdVisitTableConfig
1417 def run(self, visitSummaryRefs):
1418 """Make a table of ccd information from the visit summary catalogs.
1422 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1423 List of DeferredDatasetHandles pointing to exposure catalogs with
1424 per-detector summary information.
1428 result : `~lsst.pipe.base.Struct`
1429 Results struct
with attribute:
1432 Catalog of ccd
and visit information.
1435 for visitSummaryRef
in visitSummaryRefs:
1436 visitSummary = visitSummaryRef.get()
1437 visitInfo = visitSummary[0].getVisitInfo()
1440 summaryTable = visitSummary.asAstropy()
1441 selectColumns = [
'id',
'visit',
'physical_filter',
'band',
'ra',
'dec',
'zenithDistance',
1442 'zeroPoint',
'psfSigma',
'skyBg',
'skyNoise',
1443 'astromOffsetMean',
'astromOffsetStd',
'nPsfStar',
1444 'psfStarDeltaE1Median',
'psfStarDeltaE2Median',
1445 'psfStarDeltaE1Scatter',
'psfStarDeltaE2Scatter',
1446 'psfStarDeltaSizeMedian',
'psfStarDeltaSizeScatter',
1447 'psfStarScaledDeltaSizeScatter',
1448 'psfTraceRadiusDelta',
'maxDistToNearestPsf']
1449 ccdEntry = summaryTable[selectColumns].to_pandas().set_index(
'id')
1454 ccdEntry = ccdEntry.rename(columns={
"visit":
"visitId"})
1458 ccdEntry[
"decl"] = ccdEntry.loc[:,
"dec"]
1460 ccdEntry[
'ccdVisitId'] = [
1461 self.config.idGenerator.apply(
1462 visitSummaryRef.dataId,
1463 detector=detector_id,
1470 for detector_id
in summaryTable[
'id']
1472 ccdEntry[
'detector'] = summaryTable[
'id']
1473 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds()
if vR.getWcs()
1474 else np.nan
for vR
in visitSummary])
1475 ccdEntry[
"seeing"] = visitSummary[
'psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1477 ccdEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1478 ccdEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1479 ccdEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1480 expTime = visitInfo.getExposureTime()
1481 ccdEntry[
'expTime'] = expTime
1482 ccdEntry[
"obsStart"] = ccdEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1483 expTime_days = expTime / (60*60*24)
1484 ccdEntry[
"obsStartMJD"] = ccdEntry[
"expMidptMJD"] - 0.5 * expTime_days
1485 ccdEntry[
'darkTime'] = visitInfo.getDarkTime()
1486 ccdEntry[
'xSize'] = summaryTable[
'bbox_max_x'] - summaryTable[
'bbox_min_x']
1487 ccdEntry[
'ySize'] = summaryTable[
'bbox_max_y'] - summaryTable[
'bbox_min_y']
1488 ccdEntry[
'llcra'] = summaryTable[
'raCorners'][:, 0]
1489 ccdEntry[
'llcdec'] = summaryTable[
'decCorners'][:, 0]
1490 ccdEntry[
'ulcra'] = summaryTable[
'raCorners'][:, 1]
1491 ccdEntry[
'ulcdec'] = summaryTable[
'decCorners'][:, 1]
1492 ccdEntry[
'urcra'] = summaryTable[
'raCorners'][:, 2]
1493 ccdEntry[
'urcdec'] = summaryTable[
'decCorners'][:, 2]
1494 ccdEntry[
'lrcra'] = summaryTable[
'raCorners'][:, 3]
1495 ccdEntry[
'lrcdec'] = summaryTable[
'decCorners'][:, 3]
1499 ccdEntries.append(ccdEntry)
1501 outputCatalog = pd.concat(ccdEntries)
1502 outputCatalog.set_index(
'ccdVisitId', inplace=
True, verify_integrity=
True)
1503 return pipeBase.Struct(outputCatalog=outputCatalog)
1506class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1507 dimensions=(
"instrument",),
1508 defaultTemplates={
"calexpType":
""}):
1509 visitSummaries = connectionTypes.Input(
1510 doc=
"Per-visit consolidated exposure metadata",
1511 name=
"finalVisitSummary",
1512 storageClass=
"ExposureCatalog",
1513 dimensions=(
"instrument",
"visit",),
1517 outputCatalog = connectionTypes.Output(
1518 doc=
"Visit metadata table",
1520 storageClass=
"DataFrame",
1521 dimensions=(
"instrument",)
1525class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1526 pipelineConnections=MakeVisitTableConnections):
1530class MakeVisitTableTask(pipeBase.PipelineTask):
1531 """Produce a `visitTable` from the visit summary exposure catalogs.
1533 _DefaultName = 'makeVisitTable'
1534 ConfigClass = MakeVisitTableConfig
1536 def run(self, visitSummaries):
1537 """Make a table of visit information from the visit summary catalogs.
1542 List of exposure catalogs with per-detector summary information.
1545 result : `~lsst.pipe.base.Struct`
1546 Results struct
with attribute:
1549 Catalog of visit information.
1552 for visitSummary
in visitSummaries:
1553 visitSummary = visitSummary.get()
1554 visitRow = visitSummary[0]
1555 visitInfo = visitRow.getVisitInfo()
1558 visitEntry[
"visitId"] = visitRow[
'visit']
1559 visitEntry[
"visit"] = visitRow[
'visit']
1560 visitEntry[
"physical_filter"] = visitRow[
'physical_filter']
1561 visitEntry[
"band"] = visitRow[
'band']
1562 raDec = visitInfo.getBoresightRaDec()
1563 visitEntry[
"ra"] = raDec.getRa().asDegrees()
1564 visitEntry[
"dec"] = raDec.getDec().asDegrees()
1568 visitEntry[
"decl"] = visitEntry[
"dec"]
1570 visitEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1571 azAlt = visitInfo.getBoresightAzAlt()
1572 visitEntry[
"azimuth"] = azAlt.getLongitude().asDegrees()
1573 visitEntry[
"altitude"] = azAlt.getLatitude().asDegrees()
1574 visitEntry[
"zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1575 visitEntry[
"airmass"] = visitInfo.getBoresightAirmass()
1576 expTime = visitInfo.getExposureTime()
1577 visitEntry[
"expTime"] = expTime
1578 visitEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1579 visitEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1580 visitEntry[
"obsStart"] = visitEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1581 expTime_days = expTime / (60*60*24)
1582 visitEntry[
"obsStartMJD"] = visitEntry[
"expMidptMJD"] - 0.5 * expTime_days
1583 visitEntries.append(visitEntry)
1589 outputCatalog = pd.DataFrame(data=visitEntries)
1590 outputCatalog.set_index(
'visitId', inplace=
True, verify_integrity=
True)
1591 return pipeBase.Struct(outputCatalog=outputCatalog)
1594class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1595 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")):
1597 inputCatalog = connectionTypes.Input(
1598 doc=
"Primary per-detector, single-epoch forced-photometry catalog. "
1599 "By default, it is the output of ForcedPhotCcdTask on calexps",
1601 storageClass=
"SourceCatalog",
1602 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1604 inputCatalogDiff = connectionTypes.Input(
1605 doc=
"Secondary multi-epoch, per-detector, forced photometry catalog. "
1606 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1608 storageClass=
"SourceCatalog",
1609 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1611 outputCatalog = connectionTypes.Output(
1612 doc=
"InputCatalogs horizonatally joined on `objectId` in DataFrame parquet format",
1613 name=
"mergedForcedSource",
1614 storageClass=
"DataFrame",
1615 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1619class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1620 pipelineConnections=WriteForcedSourceTableConnections):
1621 key = lsst.pex.config.Field(
1622 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1626 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
1629class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1630 """Merge and convert per-detector forced source catalogs to DataFrame Parquet format.
1632 Because the predecessor ForcedPhotCcdTask operates per-detector,
1633 per-tract, (i.e., it has tract in its dimensions), detectors
1634 on the tract boundary may have multiple forced source catalogs.
1636 The successor task TransformForcedSourceTable runs per-patch
1637 and temporally-aggregates overlapping mergedForcedSource catalogs
from all
1638 available multiple epochs.
1640 _DefaultName = "writeForcedSourceTable"
1641 ConfigClass = WriteForcedSourceTableConfig
1643 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1644 inputs = butlerQC.get(inputRefs)
1646 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
1647 inputs[
'ccdVisitId'] = idGenerator.catalog_id
1648 inputs[
'band'] = butlerQC.quantum.dataId.full[
'band']
1649 outputs = self.run(**inputs)
1650 butlerQC.put(outputs, outputRefs)
1652 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1654 for table, dataset,
in zip((inputCatalog, inputCatalogDiff), (
'calexp',
'diff')):
1655 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=
False)
1656 df = df.reindex(sorted(df.columns), axis=1)
1657 df[
'ccdVisitId'] = ccdVisitId
if ccdVisitId
else pd.NA
1658 df[
'band'] = band
if band
else pd.NA
1659 df.columns = pd.MultiIndex.from_tuples([(dataset, c)
for c
in df.columns],
1660 names=(
'dataset',
'column'))
1664 outputCatalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
1665 return pipeBase.Struct(outputCatalog=outputCatalog)
1668class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1669 dimensions=(
"instrument",
"skymap",
"patch",
"tract")):
1671 inputCatalogs = connectionTypes.Input(
1672 doc=
"DataFrames of merged ForcedSources produced by WriteForcedSourceTableTask",
1673 name=
"mergedForcedSource",
1674 storageClass=
"DataFrame",
1675 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract"),
1679 referenceCatalog = connectionTypes.Input(
1680 doc=
"Reference catalog which was used to seed the forcedPhot. Columns "
1681 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1684 storageClass=
"DataFrame",
1685 dimensions=(
"tract",
"patch",
"skymap"),
1688 outputCatalog = connectionTypes.Output(
1689 doc=
"Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1690 "specified set of functors",
1691 name=
"forcedSourceTable",
1692 storageClass=
"DataFrame",
1693 dimensions=(
"tract",
"patch",
"skymap")
1698 pipelineConnections=TransformForcedSourceTableConnections):
1699 referenceColumns = pexConfig.ListField(
1701 default=[
"detect_isPrimary",
"detect_isTractInner",
"detect_isPatchInner"],
1703 doc=
"Columns to pull from reference catalog",
1705 keyRef = lsst.pex.config.Field(
1706 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1710 key = lsst.pex.config.Field(
1711 doc=
"Rename the output DataFrame index to this name",
1713 default=
"forcedSourceId",
1716 def setDefaults(self):
1717 super().setDefaults()
1718 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'ForcedSource.yaml')
1719 self.columnsFromDataId = [
'tract',
'patch']
1723 """Transform/standardize a ForcedSource catalog
1725 Transforms each wide, per-detector forcedSource DataFrame per the
1726 specification file (per-camera defaults found in ForcedSource.yaml).
1727 All epochs that overlap the patch are aggregated into one per-patch
1728 narrow-DataFrame file.
1730 No de-duplication of rows
is performed. Duplicate resolutions flags are
1731 pulled
in from the referenceCatalog: `detect_isPrimary`,
1732 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1733 for analysis
or compare duplicates
for QA.
1735 The resulting table includes multiple bands. Epochs (MJDs)
and other useful
1736 per-visit rows can be retreived by joining
with the CcdVisitTable on
1739 _DefaultName = "transformForcedSourceTable"
1740 ConfigClass = TransformForcedSourceTableConfig
1742 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1743 inputs = butlerQC.get(inputRefs)
1744 if self.funcs
is None:
1745 raise ValueError(
"config.functorFile is None. "
1746 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1747 outputs = self.run(inputs[
'inputCatalogs'], inputs[
'referenceCatalog'], funcs=self.funcs,
1748 dataId=outputRefs.outputCatalog.dataId.full)
1750 butlerQC.put(outputs, outputRefs)
1752 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1754 ref = referenceCatalog.get(parameters={
"columns": self.config.referenceColumns})
1755 self.log.info(
"Aggregating %s input catalogs" % (len(inputCatalogs)))
1756 for handle
in inputCatalogs:
1757 result = self.transform(
None, handle, funcs, dataId)
1759 dfs.append(result.df.join(ref, how=
'inner'))
1761 outputCatalog = pd.concat(dfs)
1765 outputCatalog.index.rename(self.config.keyRef, inplace=
True)
1767 outputCatalog.reset_index(inplace=
True)
1770 outputCatalog.set_index(
"forcedSourceId", inplace=
True, verify_integrity=
True)
1772 outputCatalog.index.rename(self.config.key, inplace=
True)
1774 self.log.info(
"Made a table of %d columns and %d rows",
1775 len(outputCatalog.columns), len(outputCatalog))
1776 return pipeBase.Struct(outputCatalog=outputCatalog)
1779class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1780 defaultTemplates={
"catalogType":
""},
1781 dimensions=(
"instrument",
"tract")):
1782 inputCatalogs = connectionTypes.Input(
1783 doc=
"Input per-patch DataFrame Tables to be concatenated",
1784 name=
"{catalogType}ForcedSourceTable",
1785 storageClass=
"DataFrame",
1786 dimensions=(
"tract",
"patch",
"skymap"),
1790 outputCatalog = connectionTypes.Output(
1791 doc=
"Output per-tract concatenation of DataFrame Tables",
1792 name=
"{catalogType}ForcedSourceTable_tract",
1793 storageClass=
"DataFrame",
1794 dimensions=(
"tract",
"skymap"),
1798class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1799 pipelineConnections=ConsolidateTractConnections):
1803class ConsolidateTractTask(pipeBase.PipelineTask):
1804 """Concatenate any per-patch, dataframe list into a single
1805 per-tract DataFrame.
1807 _DefaultName = 'ConsolidateTract'
1808 ConfigClass = ConsolidateTractConfig
1810 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1811 inputs = butlerQC.get(inputRefs)
1814 self.log.info(
"Concatenating %s per-patch %s Tables",
1815 len(inputs[
'inputCatalogs']),
1816 inputRefs.inputCatalogs[0].datasetType.name)
1817 df = pd.concat(inputs[
'inputCatalogs'])
1818 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None)
def compute(self, dropna=False, pool=None)
def __init__(self, *args, **kwargs)
def getAnalysis(self, handles, funcs=None, band=None)
def run(self, handle, funcs=None, dataId=None, band=None)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def transform(self, band, handles, funcs, dataId)
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)