33from lsst.obs.base
import ExposureIdInfo
37from lsst.daf.butler
import DeferredDatasetHandle, DataCoordinate
40from .parquetTable
import ParquetTable
41from .functors
import CompositeFunctor, Column
43log = logging.getLogger(__name__)
46def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
47 """Flattens a dataframe with multilevel column index.
49 newDf = pd.DataFrame()
51 dfBands = df.columns.unique(level=0).values
54 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
55 newColumns = {c: columnFormat.format(band, c)
56 for c
in subdf.columns
if c
not in noDupCols}
57 cols = list(newColumns.keys())
58 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
61 presentBands = dfBands
if inputBands
is None else list(set(inputBands).intersection(dfBands))
63 noDupDf = df[presentBands[0]][noDupCols]
64 newDf = pd.concat([noDupDf, newDf], axis=1)
69 defaultTemplates={
"coaddName":
"deep"},
70 dimensions=(
"tract",
"patch",
"skymap")):
71 inputCatalogMeas = connectionTypes.Input(
72 doc=
"Catalog of source measurements on the deepCoadd.",
73 dimensions=(
"tract",
"patch",
"band",
"skymap"),
74 storageClass=
"SourceCatalog",
75 name=
"{coaddName}Coadd_meas",
78 inputCatalogForcedSrc = connectionTypes.Input(
79 doc=
"Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
80 dimensions=(
"tract",
"patch",
"band",
"skymap"),
81 storageClass=
"SourceCatalog",
82 name=
"{coaddName}Coadd_forced_src",
85 inputCatalogRef = connectionTypes.Input(
86 doc=
"Catalog marking the primary detection (which band provides a good shape and position)"
87 "for each detection in deepCoadd_mergeDet.",
88 dimensions=(
"tract",
"patch",
"skymap"),
89 storageClass=
"SourceCatalog",
90 name=
"{coaddName}Coadd_ref"
92 outputCatalog = connectionTypes.Output(
93 doc=
"A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
94 "stored as a DataFrame with a multi-level column index per-patch.",
95 dimensions=(
"tract",
"patch",
"skymap"),
96 storageClass=
"DataFrame",
97 name=
"{coaddName}Coadd_obj"
101class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
102 pipelineConnections=WriteObjectTableConnections):
103 engine = pexConfig.Field(
106 doc=
"Parquet engine for writing (pyarrow or fastparquet)"
108 coaddName = pexConfig.Field(
115class WriteObjectTableTask(pipeBase.PipelineTask):
116 """Write filter-merged source tables to parquet
118 _DefaultName = "writeObjectTable"
119 ConfigClass = WriteObjectTableConfig
122 inputDatasets = (
'forced_src',
'meas',
'ref')
125 outputDataset =
'obj'
127 def runQuantum(self, butlerQC, inputRefs, outputRefs):
128 inputs = butlerQC.get(inputRefs)
130 measDict = {ref.dataId[
'band']: {
'meas': cat}
for ref, cat
in
131 zip(inputRefs.inputCatalogMeas, inputs[
'inputCatalogMeas'])}
132 forcedSourceDict = {ref.dataId[
'band']: {
'forced_src': cat}
for ref, cat
in
133 zip(inputRefs.inputCatalogForcedSrc, inputs[
'inputCatalogForcedSrc'])}
136 for band
in measDict.keys():
137 catalogs[band] = {
'meas': measDict[band][
'meas'],
138 'forced_src': forcedSourceDict[band][
'forced_src'],
139 'ref': inputs[
'inputCatalogRef']}
140 dataId = butlerQC.quantum.dataId
141 df = self.run(catalogs=catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
142 outputs = pipeBase.Struct(outputCatalog=df)
143 butlerQC.put(outputs, outputRefs)
145 def run(self, catalogs, tract, patch):
146 """Merge multiple catalogs.
151 Mapping from filter names to dict of catalogs.
153 tractId to use
for the tractId column.
155 patchId to use
for the patchId column.
159 catalog : `pandas.DataFrame`
164 for filt, tableDict
in catalogs.items():
165 for dataset, table
in tableDict.items():
167 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
170 df = df.reindex(sorted(df.columns), axis=1)
171 df[
'tractId'] = tract
172 df[
'patchId'] = patch
175 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
176 names=(
'dataset',
'band',
'column'))
179 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
183class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
184 defaultTemplates={
"catalogType":
""},
185 dimensions=(
"instrument",
"visit",
"detector")):
187 catalog = connectionTypes.Input(
188 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
189 name=
"{catalogType}src",
190 storageClass=
"SourceCatalog",
191 dimensions=(
"instrument",
"visit",
"detector")
193 outputCatalog = connectionTypes.Output(
194 doc=
"Catalog of sources, `src` in Parquet format. The 'id' column is "
195 "replaced with an index; all other columns are unchanged.",
196 name=
"{catalogType}source",
197 storageClass=
"DataFrame",
198 dimensions=(
"instrument",
"visit",
"detector")
202class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
203 pipelineConnections=WriteSourceTableConnections):
207class WriteSourceTableTask(pipeBase.PipelineTask):
208 """Write source table to parquet.
210 _DefaultName = "writeSourceTable"
211 ConfigClass = WriteSourceTableConfig
213 def runQuantum(self, butlerQC, inputRefs, outputRefs):
214 inputs = butlerQC.get(inputRefs)
215 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
216 result = self.run(**inputs).table
217 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
218 butlerQC.put(outputs, outputRefs)
220 def run(self, catalog, ccdVisitId=None, **kwargs):
221 """Convert `src` catalog to parquet
225 catalog: `afwTable.SourceCatalog`
226 catalog to be converted
228 ccdVisitId to be added as a column
232 result : `lsst.pipe.base.Struct`
234 `ParquetTable` version of the input catalog
236 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId)
237 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
238 df[
'ccdVisitId'] = ccdVisitId
239 return pipeBase.Struct(table=
ParquetTable(dataFrame=df))
242class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
243 defaultTemplates={
"catalogType":
"",
244 "skyWcsName":
"jointcal",
245 "photoCalibName":
"fgcm"},
246 dimensions=(
"instrument",
"visit",
"detector",
"skymap")):
247 skyMap = connectionTypes.Input(
248 doc=
"skyMap needed to choose which tract-level calibrations to use when multiple available",
249 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
250 storageClass=
"SkyMap",
251 dimensions=(
"skymap",),
253 exposure = connectionTypes.Input(
254 doc=
"Input exposure to perform photometry on.",
256 storageClass=
"ExposureF",
257 dimensions=[
"instrument",
"visit",
"detector"],
259 externalSkyWcsTractCatalog = connectionTypes.Input(
260 doc=(
"Per-tract, per-visit wcs calibrations. These catalogs use the detector "
261 "id for the catalog id, sorted on id for fast lookup."),
262 name=
"{skyWcsName}SkyWcsCatalog",
263 storageClass=
"ExposureCatalog",
264 dimensions=[
"instrument",
"visit",
"tract"],
267 externalSkyWcsGlobalCatalog = connectionTypes.Input(
268 doc=(
"Per-visit wcs calibrations computed globally (with no tract information). "
269 "These catalogs use the detector id for the catalog id, sorted on id for "
271 name=
"{skyWcsName}SkyWcsCatalog",
272 storageClass=
"ExposureCatalog",
273 dimensions=[
"instrument",
"visit"],
275 externalPhotoCalibTractCatalog = connectionTypes.Input(
276 doc=(
"Per-tract, per-visit photometric calibrations. These catalogs use the "
277 "detector id for the catalog id, sorted on id for fast lookup."),
278 name=
"{photoCalibName}PhotoCalibCatalog",
279 storageClass=
"ExposureCatalog",
280 dimensions=[
"instrument",
"visit",
"tract"],
283 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
284 doc=(
"Per-visit photometric calibrations computed globally (with no tract "
285 "information). These catalogs use the detector id for the catalog id, "
286 "sorted on id for fast lookup."),
287 name=
"{photoCalibName}PhotoCalibCatalog",
288 storageClass=
"ExposureCatalog",
289 dimensions=[
"instrument",
"visit"],
292 def __init__(self, *, config=None):
293 super().__init__(config=config)
296 if config.doApplyExternalSkyWcs
and config.doReevaluateSkyWcs:
297 if config.useGlobalExternalSkyWcs:
298 self.inputs.remove(
"externalSkyWcsTractCatalog")
300 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
302 self.inputs.remove(
"externalSkyWcsTractCatalog")
303 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
304 if config.doApplyExternalPhotoCalib
and config.doReevaluatePhotoCalib:
305 if config.useGlobalExternalPhotoCalib:
306 self.inputs.remove(
"externalPhotoCalibTractCatalog")
308 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
310 self.inputs.remove(
"externalPhotoCalibTractCatalog")
311 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
314class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
315 pipelineConnections=WriteRecalibratedSourceTableConnections):
317 doReevaluatePhotoCalib = pexConfig.Field(
320 doc=(
"Add or replace local photoCalib columns")
322 doReevaluateSkyWcs = pexConfig.Field(
325 doc=(
"Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
327 doApplyExternalPhotoCalib = pexConfig.Field(
330 doc=(
"If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
331 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
333 doApplyExternalSkyWcs = pexConfig.Field(
336 doc=(
"if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
337 "else use the wcs already attached to the exposure."),
339 useGlobalExternalPhotoCalib = pexConfig.Field(
342 doc=(
"When using doApplyExternalPhotoCalib, use 'global' calibrations "
343 "that are not run per-tract. When False, use per-tract photometric "
344 "calibration files.")
346 useGlobalExternalSkyWcs = pexConfig.Field(
349 doc=(
"When using doApplyExternalSkyWcs, use 'global' calibrations "
350 "that are not run per-tract. When False, use per-tract wcs "
356 if self.doApplyExternalSkyWcs
and not self.doReevaluateSkyWcs:
357 log.warning(
"doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
358 "External SkyWcs will not be read or evaluated.")
359 if self.doApplyExternalPhotoCalib
and not self.doReevaluatePhotoCalib:
360 log.warning(
"doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
361 "External PhotoCalib will not be read or evaluated.")
364class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
365 """Write source table to parquet
367 _DefaultName = "writeRecalibratedSourceTable"
368 ConfigClass = WriteRecalibratedSourceTableConfig
370 def runQuantum(self, butlerQC, inputRefs, outputRefs):
371 inputs = butlerQC.get(inputRefs)
372 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
373 inputs[
'exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId,
"visit_detector")
375 if self.config.doReevaluatePhotoCalib
or self.config.doReevaluateSkyWcs:
376 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
377 inputs[
'exposure'] = self.attachCalibs(inputRefs, **inputs)
379 inputs[
'catalog'] = self.addCalibColumns(**inputs)
381 result = self.run(**inputs).table
382 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
383 butlerQC.put(outputs, outputRefs)
385 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
386 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
387 externalPhotoCalibTractCatalog=None, **kwargs):
388 """Apply external calibrations to exposure per configuration
390 When multiple tract-level calibrations overlap, select the one with the
391 center closest to detector.
395 inputRefs : `lsst.pipe.base.InputQuantizedConnection`,
for dataIds of
397 skyMap : `lsst.skymap.SkyMap`
398 exposure : `lsst.afw.image.exposure.Exposure`
399 Input exposure to adjust calibrations.
401 Exposure catalog
with external skyWcs to be applied per config
403 Exposure catalog
with external skyWcs to be applied per config
405 Exposure catalog
with external photoCalib to be applied per config
411 exposure : `lsst.afw.image.exposure.Exposure`
412 Exposure
with adjusted calibrations.
414 if not self.config.doApplyExternalSkyWcs:
416 externalSkyWcsCatalog =
None
417 elif self.config.useGlobalExternalSkyWcs:
419 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
420 self.log.info(
'Applying global SkyWcs')
423 inputRef = getattr(inputRefs,
'externalSkyWcsTractCatalog')
424 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
427 self.log.info(
'Applying tract-level SkyWcs from tract %s', tracts[ind])
429 ind = self.getClosestTract(tracts, skyMap,
430 exposure.getBBox(), exposure.getWcs())
431 self.log.info(
'Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
432 'Applying closest to detector center: tract=%s',
str(tracts), tracts[ind])
434 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
436 if not self.config.doApplyExternalPhotoCalib:
438 externalPhotoCalibCatalog =
None
439 elif self.config.useGlobalExternalPhotoCalib:
441 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
442 self.log.info(
'Applying global PhotoCalib')
445 inputRef = getattr(inputRefs,
'externalPhotoCalibTractCatalog')
446 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
449 self.log.info(
'Applying tract-level PhotoCalib from tract %s', tracts[ind])
451 ind = self.getClosestTract(tracts, skyMap,
452 exposure.getBBox(), exposure.getWcs())
453 self.log.info(
'Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
454 'Applying closest to detector center: tract=%s',
str(tracts), tracts[ind])
456 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
458 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
460 def getClosestTract(self, tracts, skyMap, bbox, wcs):
461 """Find the index of the tract closest to detector from list of tractIds
465 tracts: `list` [`int`]
466 Iterable of integer tractIds
467 skyMap : `lsst.skymap.SkyMap`
468 skyMap to lookup tract geometry and wcs
470 Detector bbox, center of which will compared to tract centers
472 Detector Wcs object to map the detector center to SkyCoord
481 center = wcs.pixelToSky(bbox.getCenter())
483 for tractId
in tracts:
484 tract = skyMap[tractId]
485 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
486 sep.append(center.separation(tractCenter))
488 return np.argmin(sep)
490 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
491 """Prepare a calibrated exposure and apply external calibrations
496 exposure : `lsst.afw.image.exposure.Exposure`
497 Input exposure to adjust calibrations.
499 Exposure catalog
with external skyWcs to be applied
500 if config.doApplyExternalSkyWcs=
True. Catalog uses the detector id
501 for the catalog id, sorted on id
for fast lookup.
503 Exposure catalog
with external photoCalib to be applied
504 if config.doApplyExternalPhotoCalib=
True. Catalog uses the detector
505 id
for the catalog id, sorted on id
for fast lookup.
509 exposure : `lsst.afw.image.exposure.Exposure`
510 Exposure
with adjusted calibrations.
512 detectorId = exposure.getInfo().getDetector().getId()
514 if externalPhotoCalibCatalog
is not None:
515 row = externalPhotoCalibCatalog.find(detectorId)
517 self.log.warning(
"Detector id %s not found in externalPhotoCalibCatalog; "
518 "Using original photoCalib.", detectorId)
520 photoCalib = row.getPhotoCalib()
521 if photoCalib
is None:
522 self.log.warning(
"Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
523 "Using original photoCalib.", detectorId)
525 exposure.setPhotoCalib(photoCalib)
527 if externalSkyWcsCatalog
is not None:
528 row = externalSkyWcsCatalog.find(detectorId)
530 self.log.warning(
"Detector id %s not found in externalSkyWcsCatalog; "
531 "Using original skyWcs.", detectorId)
533 skyWcs = row.getWcs()
535 self.log.warning(
"Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
536 "Using original skyWcs.", detectorId)
538 exposure.setWcs(skyWcs)
542 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs):
543 """Add replace columns with calibs evaluated at each centroid
545 Add or replace
'base_LocalWcs' `base_LocalPhotoCalib
' columns in a
546 a source catalog, by rerunning the plugins.
551 catalog to which calib columns will be added
552 exposure : `lsst.afw.image.exposure.Exposure`
553 Exposure with attached PhotoCalibs
and SkyWcs attributes to be
554 reevaluated at local centroids. Pixels are
not required.
555 exposureIdInfo : `lsst.obs.base.ExposureIdInfo`
560 Source Catalog
with requested local calib columns
562 measureConfig = SingleFrameMeasurementTask.ConfigClass()
563 measureConfig.doReplaceWithNoise = False
565 measureConfig.plugins.names = []
566 if self.config.doReevaluateSkyWcs:
567 measureConfig.plugins.names.add(
'base_LocalWcs')
568 self.log.info(
"Re-evaluating base_LocalWcs plugin")
569 if self.config.doReevaluatePhotoCalib:
570 measureConfig.plugins.names.add(
'base_LocalPhotoCalib')
571 self.log.info(
"Re-evaluating base_LocalPhotoCalib plugin")
572 pluginsNotToCopy = tuple(measureConfig.plugins.names)
576 aliasMap = catalog.schema.getAliasMap()
577 mapper = afwTable.SchemaMapper(catalog.schema)
578 for item
in catalog.schema:
579 if not item.field.getName().startswith(pluginsNotToCopy):
580 mapper.addMapping(item.key)
582 schema = mapper.getOutputSchema()
583 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
584 schema.setAliasMap(aliasMap)
585 newCat = afwTable.SourceCatalog(schema)
586 newCat.extend(catalog, mapper=mapper)
592 if self.config.doReevaluateSkyWcs:
593 afwTable.updateSourceCoords(exposure.wcs, newCat)
595 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
600class PostprocessAnalysis(object):
601 """Calculate columns from ParquetTable.
603 This object manages and organizes an arbitrary set of computations
604 on a catalog. The catalog
is defined by a
606 as a `deepCoadd_obj` dataset,
and the computations are defined by a
607 collection of `lsst.pipe.tasks.functor.Functor` objects (
or, equivalently,
608 a `CompositeFunctor`).
610 After the object
is initialized, accessing the `.df` attribute (which
611 holds the `pandas.DataFrame` containing the results of the calculations)
612 triggers computation of said dataframe.
614 One of the conveniences of using this object
is the ability to define a
615 desired common filter
for all functors. This enables the same functor
616 collection to be passed to several different `PostprocessAnalysis` objects
617 without having to change the original functor collection, since the `filt`
618 keyword argument of this object triggers an overwrite of the `filt`
619 property
for all functors
in the collection.
621 This object also allows a list of refFlags to be passed,
and defines a set
622 of default refFlags that are always included even
if not requested.
624 If a list of `ParquetTable` object
is passed, rather than a single one,
625 then the calculations will be mapped over all the input catalogs. In
626 principle, it should be straightforward to parallelize this activity, but
627 initial tests have failed (see TODO
in code comments).
631 parq : `lsst.pipe.tasks.ParquetTable` (
or list of such)
632 Source catalog(s)
for computation.
635 Computations to do (functors that act on `parq`).
636 If a dict, the output
637 DataFrame will have columns keyed accordingly.
638 If a list, the column keys will come
from the
639 `.shortname` attribute of each functor.
641 filt : `str`, optional
642 Filter
in which to calculate. If provided,
643 this will overwrite any existing `.filt` attribute
644 of the provided functors.
646 flags : `list`, optional
647 List of flags (per-band) to include
in output table.
648 Taken
from the `meas` dataset
if applied to a multilevel Object Table.
650 refFlags : `list`, optional
651 List of refFlags (only reference band) to include
in output table.
653 forcedFlags : `list`, optional
654 List of flags (per-band) to include
in output table.
655 Taken
from the ``forced_src`` dataset
if applied to a
656 multilevel Object Table. Intended
for flags
from measurement plugins
657 only run during multi-band forced-photometry.
659 _defaultRefFlags = []
662 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
664 self.functors = functors
667 self.flags = list(flags)
if flags
is not None else []
668 self.forcedFlags = list(forcedFlags)
if forcedFlags
is not None else []
669 self.refFlags = list(self._defaultRefFlags)
670 if refFlags
is not None:
671 self.refFlags += list(refFlags)
676 def defaultFuncs(self):
677 funcs = dict(self._defaultFuncs)
682 additionalFuncs = self.defaultFuncs
683 additionalFuncs.update({flag:
Column(flag, dataset=
'forced_src')
for flag
in self.forcedFlags})
684 additionalFuncs.update({flag:
Column(flag, dataset=
'ref')
for flag
in self.refFlags})
685 additionalFuncs.update({flag:
Column(flag, dataset=
'meas')
for flag
in self.flags})
687 if isinstance(self.functors, CompositeFunctor):
692 func.funcDict.update(additionalFuncs)
693 func.filt = self.filt
699 return [name
for name, func
in self.func.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
707 def compute(self, dropna=False, pool=None):
709 if type(self.parq)
in (list, tuple):
711 dflist = [self.func(parq, dropna=dropna)
for parq
in self.parq]
715 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
716 self._df = pd.concat(dflist)
718 self._df = self.func(self.parq, dropna=dropna)
725 """Expected Connections for subclasses of TransformCatalogBaseTask.
729 inputCatalog = connectionTypes.Input(
731 storageClass=
"DataFrame",
733 outputCatalog = connectionTypes.Output(
735 storageClass=
"DataFrame",
740 pipelineConnections=TransformCatalogBaseConnections):
741 functorFile = pexConfig.Field(
743 doc=
"Path to YAML file specifying Science Data Model functors to use "
744 "when copying columns and computing calibrated values.",
748 primaryKey = pexConfig.Field(
750 doc=
"Name of column to be set as the DataFrame index. If None, the index"
751 "will be named `id`",
755 columnsFromDataId = pexConfig.ListField(
759 doc=
"Columns to extract from the dataId",
764 """Base class for transforming/standardizing a catalog
766 by applying functors that convert units and apply calibrations.
767 The purpose of this task
is to perform a set of computations on
768 an input `ParquetTable` dataset (such
as `deepCoadd_obj`)
and write the
769 results to a new dataset (which needs to be declared
in an `outputDataset`
772 The calculations to be performed are defined
in a YAML file that specifies
773 a set of functors to be computed, provided
as
774 a `--functorFile` config parameter. An example of such a YAML file
799 - base_InputCount_value
802 functor: DeconvolvedMoments
807 - merge_measurement_i
808 - merge_measurement_r
809 - merge_measurement_z
810 - merge_measurement_y
811 - merge_measurement_g
812 - base_PixelFlags_flag_inexact_psfCenter
815 The names
for each entry under
"func" will become the names of columns
in
816 the output dataset. All the functors referenced are defined
in
818 functor are
in the `args` list,
and any additional entries
for each column
819 other than
"functor" or "args" (e.g., `
'filt'`, `
'dataset'`) are treated
as
820 keyword arguments to be passed to the functor initialization.
822 The
"flags" entry
is the default shortcut
for `Column` functors.
823 All columns listed under
"flags" will be copied to the output table
824 untransformed. They can be of any datatype.
825 In the special case of transforming a multi-level oject table
with
826 band
and dataset indices (deepCoadd_obj), these will be taked
from the
827 `meas` dataset
and exploded out per band.
829 There are two special shortcuts that only apply when transforming
830 multi-level Object (deepCoadd_obj) tables:
831 - The
"refFlags" entry
is shortcut
for `Column` functor
832 taken
from the `
'ref'` dataset
if transforming an ObjectTable.
833 - The
"forcedFlags" entry
is shortcut
for `Column` functors.
834 taken
from the ``forced_src`` dataset
if transforming an ObjectTable.
835 These are expanded out per band.
838 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
839 to organize
and excecute the calculations.
842 def _DefaultName(self):
843 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
847 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
851 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
855 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
859 if self.config.functorFile:
860 self.log.info(
'Loading tranform functor definitions from %s',
861 self.config.functorFile)
862 self.
funcs = CompositeFunctor.from_file(self.config.functorFile)
863 self.
funcs.update(dict(PostprocessAnalysis._defaultFuncs))
868 inputs = butlerQC.get(inputRefs)
869 if self.
funcs is None:
870 raise ValueError(
"config.functorFile is None. "
871 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
872 result = self.
run(parq=inputs[
'inputCatalog'], funcs=self.
funcs,
873 dataId=outputRefs.outputCatalog.dataId.full)
874 outputs = pipeBase.Struct(outputCatalog=result)
875 butlerQC.put(outputs, outputRefs)
877 def run(self, parq, funcs=None, dataId=None, band=None):
878 """Do postprocessing calculations
880 Takes a `ParquetTable` object and dataId,
881 returns a dataframe
with results of postprocessing calculations.
886 ParquetTable
from which calculations are done.
887 funcs : `lsst.pipe.tasks.functors.Functors`
888 Functors to apply to the table
's columns
889 dataId : dict, optional
890 Used to add a `patchId` column to the output dataframe.
891 band : `str`, optional
892 Filter band that is being processed.
896 df : `pandas.DataFrame`
898 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
900 df = self.
transform(band, parq, funcs, dataId).df
901 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
910 analysis = PostprocessAnalysis(parq, funcs, filt=band)
914 analysis = self.
getAnalysis(parq, funcs=funcs, band=band)
916 if dataId
and self.config.columnsFromDataId:
917 for key
in self.config.columnsFromDataId:
919 df[
str(key)] = dataId[key]
921 raise ValueError(f
"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
923 if self.config.primaryKey:
924 if df.index.name != self.config.primaryKey
and self.config.primaryKey
in df:
925 df.reset_index(inplace=
True, drop=
True)
926 df.set_index(self.config.primaryKey, inplace=
True)
928 return pipeBase.Struct(
935 defaultTemplates={
"coaddName":
"deep"},
936 dimensions=(
"tract",
"patch",
"skymap")):
937 inputCatalog = connectionTypes.Input(
938 doc=
"The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
939 "stored as a DataFrame with a multi-level column index per-patch.",
940 dimensions=(
"tract",
"patch",
"skymap"),
941 storageClass=
"DataFrame",
942 name=
"{coaddName}Coadd_obj",
945 outputCatalog = connectionTypes.Output(
946 doc=
"Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
948 dimensions=(
"tract",
"patch",
"skymap"),
949 storageClass=
"DataFrame",
955 pipelineConnections=TransformObjectCatalogConnections):
956 coaddName = pexConfig.Field(
962 filterMap = pexConfig.DictField(
966 doc=(
"Dictionary mapping full filter name to short one for column name munging."
967 "These filters determine the output columns no matter what filters the "
968 "input data actually contain."),
969 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
970 "Will be removed after v22.")
972 outputBands = pexConfig.ListField(
976 doc=(
"These bands and only these bands will appear in the output,"
977 " NaN-filled if the input does not include them."
978 " If None, then use all bands found in the input.")
980 camelCase = pexConfig.Field(
983 doc=(
"Write per-band columns names with camelCase, else underscore "
984 "For example: gPsFlux instead of g_PsFlux.")
986 multilevelOutput = pexConfig.Field(
989 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
990 "and name-munged (False).")
992 goodFlags = pexConfig.ListField(
995 doc=(
"List of 'good' flags that should be set False when populating empty tables. "
996 "All other flags are considered to be 'bad' flags and will be set to True.")
998 floatFillValue = pexConfig.Field(
1001 doc=
"Fill value for float fields when populating empty tables."
1003 integerFillValue = pexConfig.Field(
1006 doc=
"Fill value for integer fields when populating empty tables."
1009 def setDefaults(self):
1010 super().setDefaults()
1011 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Object.yaml')
1012 self.primaryKey =
'objectId'
1013 self.columnsFromDataId = [
'tract',
'patch']
1014 self.goodFlags = [
'calib_astrometry_used',
1015 'calib_photometry_reserved',
1016 'calib_photometry_used',
1017 'calib_psf_candidate',
1018 'calib_psf_reserved',
1023 """Produce a flattened Object Table to match the format specified in
1026 Do the same set of postprocessing calculations on all bands.
1028 This is identical to `TransformCatalogBaseTask`,
except for that it does
1029 the specified functor calculations
for all filters present
in the
1030 input `deepCoadd_obj` table. Any specific `
"filt"` keywords specified
1031 by the YAML file will be superceded.
1033 _DefaultName = "transformObjectCatalog"
1034 ConfigClass = TransformObjectCatalogConfig
1036 def run(self, parq, funcs=None, dataId=None, band=None):
1040 templateDf = pd.DataFrame()
1042 if isinstance(parq, DeferredDatasetHandle):
1043 columns = parq.get(component=
'columns')
1044 inputBands = columns.unique(level=1).values
1046 inputBands = parq.columnLevelNames[
'band']
1048 outputBands = self.config.outputBands
if self.config.outputBands
else inputBands
1051 for inputBand
in inputBands:
1052 if inputBand
not in outputBands:
1053 self.log.info(
"Ignoring %s band data in the input", inputBand)
1055 self.log.info(
"Transforming the catalog of band %s", inputBand)
1056 result = self.transform(inputBand, parq, funcs, dataId)
1057 dfDict[inputBand] = result.df
1058 analysisDict[inputBand] = result.analysis
1059 if templateDf.empty:
1060 templateDf = result.df
1063 for filt
in outputBands:
1064 if filt
not in dfDict:
1065 self.log.info(
"Adding empty columns for band %s", filt)
1066 dfTemp = templateDf.copy()
1067 for col
in dfTemp.columns:
1068 testValue = dfTemp[col].values[0]
1069 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1071 if col
in self.config.goodFlags:
1075 elif isinstance(testValue, numbers.Integral):
1079 if isinstance(testValue, np.unsignedinteger):
1080 raise ValueError(
"Parquet tables may not have unsigned integer columns.")
1082 fillValue = self.config.integerFillValue
1084 fillValue = self.config.floatFillValue
1085 dfTemp[col].values[:] = fillValue
1086 dfDict[filt] = dfTemp
1089 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
1091 if not self.config.multilevelOutput:
1092 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
1093 if self.config.primaryKey
in noDupCols:
1094 noDupCols.remove(self.config.primaryKey)
1095 if dataId
and self.config.columnsFromDataId:
1096 noDupCols += self.config.columnsFromDataId
1097 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1098 inputBands=inputBands)
1100 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
1105class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1106 dimensions=(
"tract",
"skymap")):
1107 inputCatalogs = connectionTypes.Input(
1108 doc=
"Per-Patch objectTables conforming to the standard data model.",
1110 storageClass=
"DataFrame",
1111 dimensions=(
"tract",
"patch",
"skymap"),
1114 outputCatalog = connectionTypes.Output(
1115 doc=
"Pre-tract horizontal concatenation of the input objectTables",
1116 name=
"objectTable_tract",
1117 storageClass=
"DataFrame",
1118 dimensions=(
"tract",
"skymap"),
1122class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1123 pipelineConnections=ConsolidateObjectTableConnections):
1124 coaddName = pexConfig.Field(
1131class ConsolidateObjectTableTask(pipeBase.PipelineTask):
1132 """Write patch-merged source tables to a tract-level parquet file.
1134 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1136 _DefaultName = "consolidateObjectTable"
1137 ConfigClass = ConsolidateObjectTableConfig
1139 inputDataset =
'objectTable'
1140 outputDataset =
'objectTable_tract'
1142 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1143 inputs = butlerQC.get(inputRefs)
1144 self.log.info(
"Concatenating %s per-patch Object Tables",
1145 len(inputs[
'inputCatalogs']))
1146 df = pd.concat(inputs[
'inputCatalogs'])
1147 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1150class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1151 defaultTemplates={
"catalogType":
""},
1152 dimensions=(
"instrument",
"visit",
"detector")):
1154 inputCatalog = connectionTypes.Input(
1155 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
1156 name=
"{catalogType}source",
1157 storageClass=
"DataFrame",
1158 dimensions=(
"instrument",
"visit",
"detector"),
1161 outputCatalog = connectionTypes.Output(
1162 doc=
"Narrower, per-detector Source Table transformed and converted per a "
1163 "specified set of functors",
1164 name=
"{catalogType}sourceTable",
1165 storageClass=
"DataFrame",
1166 dimensions=(
"instrument",
"visit",
"detector")
1171 pipelineConnections=TransformSourceTableConnections):
1173 def setDefaults(self):
1174 super().setDefaults()
1175 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Source.yaml')
1176 self.primaryKey =
'sourceId'
1177 self.columnsFromDataId = [
'visit',
'detector',
'band',
'physical_filter']
1181 """Transform/standardize a source catalog
1183 _DefaultName = "transformSourceTable"
1184 ConfigClass = TransformSourceTableConfig
1187class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1188 dimensions=(
"instrument",
"visit",),
1189 defaultTemplates={
"calexpType":
""}):
1190 calexp = connectionTypes.Input(
1191 doc=
"Processed exposures used for metadata",
1192 name=
"{calexpType}calexp",
1193 storageClass=
"ExposureF",
1194 dimensions=(
"instrument",
"visit",
"detector"),
1198 visitSummary = connectionTypes.Output(
1199 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
1200 "detector id for the id and are sorted for fast lookups of a "
1202 name=
"{calexpType}visitSummary",
1203 storageClass=
"ExposureCatalog",
1204 dimensions=(
"instrument",
"visit"),
1208class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1209 pipelineConnections=ConsolidateVisitSummaryConnections):
1210 """Config for ConsolidateVisitSummaryTask"""
1214class ConsolidateVisitSummaryTask(pipeBase.PipelineTask):
1215 """Task to consolidate per-detector visit metadata.
1217 This task aggregates the following metadata from all the detectors
in a
1218 single visit into an exposure catalog:
1222 - The physical_filter
and band (
if available).
1223 - The psf size, shape,
and effective area at the center of the detector.
1224 - The corners of the bounding box
in right ascension/declination.
1226 Other quantities such
as Detector, Psf, ApCorrMap,
and TransmissionCurve
1227 are
not persisted here because of storage concerns,
and because of their
1228 limited utility
as summary statistics.
1230 Tests
for this task are performed
in ci_hsc_gen3.
1232 _DefaultName = "consolidateVisitSummary"
1233 ConfigClass = ConsolidateVisitSummaryConfig
1235 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1236 dataRefs = butlerQC.get(inputRefs.calexp)
1237 visit = dataRefs[0].dataId.byName()[
'visit']
1239 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1240 len(dataRefs), visit)
1242 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1244 butlerQC.put(expCatalog, outputRefs.visitSummary)
1246 def _combineExposureMetadata(self, visit, dataRefs):
1247 """Make a combined exposure catalog from a list of dataRefs.
1248 These dataRefs must point to exposures with wcs, summaryStats,
1249 and other visit metadata.
1254 Visit identification number.
1255 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1256 List of dataRefs
in visit.
1261 Exposure catalog
with per-detector summary information.
1263 schema = self._makeVisitSummarySchema()
1264 cat = afwTable.ExposureCatalog(schema)
1265 cat.resize(len(dataRefs))
1267 cat['visit'] = visit
1269 for i, dataRef
in enumerate(dataRefs):
1270 visitInfo = dataRef.get(component=
'visitInfo')
1271 filterLabel = dataRef.get(component=
'filter')
1272 summaryStats = dataRef.get(component=
'summaryStats')
1273 detector = dataRef.get(component=
'detector')
1274 wcs = dataRef.get(component=
'wcs')
1275 photoCalib = dataRef.get(component=
'photoCalib')
1276 detector = dataRef.get(component=
'detector')
1277 bbox = dataRef.get(component=
'bbox')
1278 validPolygon = dataRef.get(component=
'validPolygon')
1282 rec.setVisitInfo(visitInfo)
1284 rec.setPhotoCalib(photoCalib)
1285 rec.setValidPolygon(validPolygon)
1287 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1288 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1289 rec.setId(detector.getId())
1290 rec[
'psfSigma'] = summaryStats.psfSigma
1291 rec[
'psfIxx'] = summaryStats.psfIxx
1292 rec[
'psfIyy'] = summaryStats.psfIyy
1293 rec[
'psfIxy'] = summaryStats.psfIxy
1294 rec[
'psfArea'] = summaryStats.psfArea
1295 rec[
'raCorners'][:] = summaryStats.raCorners
1296 rec[
'decCorners'][:] = summaryStats.decCorners
1297 rec[
'ra'] = summaryStats.ra
1298 rec[
'decl'] = summaryStats.decl
1299 rec[
'zenithDistance'] = summaryStats.zenithDistance
1300 rec[
'zeroPoint'] = summaryStats.zeroPoint
1301 rec[
'skyBg'] = summaryStats.skyBg
1302 rec[
'skyNoise'] = summaryStats.skyNoise
1303 rec[
'meanVar'] = summaryStats.meanVar
1304 rec[
'astromOffsetMean'] = summaryStats.astromOffsetMean
1305 rec[
'astromOffsetStd'] = summaryStats.astromOffsetStd
1306 rec[
'nPsfStar'] = summaryStats.nPsfStar
1307 rec[
'psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median
1308 rec[
'psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median
1309 rec[
'psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter
1310 rec[
'psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter
1311 rec[
'psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian
1312 rec[
'psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter
1313 rec[
'psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter
1315 metadata = dafBase.PropertyList()
1316 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1318 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1319 cat.setMetadata(metadata)
1324 def _makeVisitSummarySchema(self):
1325 """Make the schema for the visitSummary catalog."""
1326 schema = afwTable.ExposureTable.makeMinimalSchema()
1327 schema.addField(
'visit', type=
'L', doc=
'Visit number')
1328 schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
1329 schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
1330 schema.addField(
'psfSigma', type=
'F',
1331 doc=
'PSF model second-moments determinant radius (center of chip) (pixel)')
1332 schema.addField(
'psfArea', type=
'F',
1333 doc=
'PSF model effective area (center of chip) (pixel**2)')
1334 schema.addField(
'psfIxx', type=
'F',
1335 doc=
'PSF model Ixx (center of chip) (pixel**2)')
1336 schema.addField(
'psfIyy', type=
'F',
1337 doc=
'PSF model Iyy (center of chip) (pixel**2)')
1338 schema.addField(
'psfIxy', type=
'F',
1339 doc=
'PSF model Ixy (center of chip) (pixel**2)')
1340 schema.addField(
'raCorners', type=
'ArrayD', size=4,
1341 doc=
'Right Ascension of bounding box corners (degrees)')
1342 schema.addField(
'decCorners', type=
'ArrayD', size=4,
1343 doc=
'Declination of bounding box corners (degrees)')
1344 schema.addField(
'ra', type=
'D',
1345 doc=
'Right Ascension of bounding box center (degrees)')
1346 schema.addField(
'decl', type=
'D',
1347 doc=
'Declination of bounding box center (degrees)')
1348 schema.addField(
'zenithDistance', type=
'F',
1349 doc=
'Zenith distance of bounding box center (degrees)')
1350 schema.addField(
'zeroPoint', type=
'F',
1351 doc=
'Mean zeropoint in detector (mag)')
1352 schema.addField(
'skyBg', type=
'F',
1353 doc=
'Average sky background (ADU)')
1354 schema.addField(
'skyNoise', type=
'F',
1355 doc=
'Average sky noise (ADU)')
1356 schema.addField(
'meanVar', type=
'F',
1357 doc=
'Mean variance of the weight plane (ADU**2)')
1358 schema.addField(
'astromOffsetMean', type=
'F',
1359 doc=
'Mean offset of astrometric calibration matches (arcsec)')
1360 schema.addField(
'astromOffsetStd', type=
'F',
1361 doc=
'Standard deviation of offsets of astrometric calibration matches (arcsec)')
1362 schema.addField(
'nPsfStar', type=
'I', doc=
'Number of stars used for PSF model')
1363 schema.addField(
'psfStarDeltaE1Median', type=
'F',
1364 doc=
'Median E1 residual (starE1 - psfE1) for psf stars')
1365 schema.addField(
'psfStarDeltaE2Median', type=
'F',
1366 doc=
'Median E2 residual (starE2 - psfE2) for psf stars')
1367 schema.addField(
'psfStarDeltaE1Scatter', type=
'F',
1368 doc=
'Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars')
1369 schema.addField(
'psfStarDeltaE2Scatter', type=
'F',
1370 doc=
'Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars')
1371 schema.addField(
'psfStarDeltaSizeMedian', type=
'F',
1372 doc=
'Median size residual (starSize - psfSize) for psf stars (pixel)')
1373 schema.addField(
'psfStarDeltaSizeScatter', type=
'F',
1374 doc=
'Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)')
1375 schema.addField(
'psfStarScaledDeltaSizeScatter', type=
'F',
1376 doc=
'Scatter (via MAD) of size residual scaled by median size squared')
1381class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1382 defaultTemplates={
"catalogType":
""},
1383 dimensions=(
"instrument",
"visit")):
1384 inputCatalogs = connectionTypes.Input(
1385 doc=
"Input per-detector Source Tables",
1386 name=
"{catalogType}sourceTable",
1387 storageClass=
"DataFrame",
1388 dimensions=(
"instrument",
"visit",
"detector"),
1391 outputCatalog = connectionTypes.Output(
1392 doc=
"Per-visit concatenation of Source Table",
1393 name=
"{catalogType}sourceTable_visit",
1394 storageClass=
"DataFrame",
1395 dimensions=(
"instrument",
"visit")
1399class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1400 pipelineConnections=ConsolidateSourceTableConnections):
1404class ConsolidateSourceTableTask(pipeBase.PipelineTask):
1405 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1407 _DefaultName = 'consolidateSourceTable'
1408 ConfigClass = ConsolidateSourceTableConfig
1410 inputDataset =
'sourceTable'
1411 outputDataset =
'sourceTable_visit'
1413 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1414 from .makeWarp
import reorderRefs
1416 detectorOrder = [ref.dataId[
'detector']
for ref
in inputRefs.inputCatalogs]
1417 detectorOrder.sort()
1418 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey=
'detector')
1419 inputs = butlerQC.get(inputRefs)
1420 self.log.info(
"Concatenating %s per-detector Source Tables",
1421 len(inputs[
'inputCatalogs']))
1422 df = pd.concat(inputs[
'inputCatalogs'])
1423 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1426class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1427 dimensions=(
"instrument",),
1428 defaultTemplates={
"calexpType":
""}):
1429 visitSummaryRefs = connectionTypes.Input(
1430 doc=
"Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1431 name=
"{calexpType}visitSummary",
1432 storageClass=
"ExposureCatalog",
1433 dimensions=(
"instrument",
"visit"),
1437 outputCatalog = connectionTypes.Output(
1438 doc=
"CCD and Visit metadata table",
1439 name=
"ccdVisitTable",
1440 storageClass=
"DataFrame",
1441 dimensions=(
"instrument",)
1445class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1446 pipelineConnections=MakeCcdVisitTableConnections):
1450class MakeCcdVisitTableTask(pipeBase.PipelineTask):
1451 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1453 _DefaultName = 'makeCcdVisitTable'
1454 ConfigClass = MakeCcdVisitTableConfig
1456 def run(self, visitSummaryRefs):
1457 """Make a table of ccd information from the `visitSummary` catalogs.
1461 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1462 List of DeferredDatasetHandles pointing to exposure catalogs with
1463 per-detector summary information.
1467 result : `lsst.pipe.Base.Struct`
1468 Results struct
with attribute:
1471 Catalog of ccd
and visit information.
1474 for visitSummaryRef
in visitSummaryRefs:
1475 visitSummary = visitSummaryRef.get()
1476 visitInfo = visitSummary[0].getVisitInfo()
1479 summaryTable = visitSummary.asAstropy()
1480 selectColumns = [
'id',
'visit',
'physical_filter',
'band',
'ra',
'decl',
'zenithDistance',
1481 'zeroPoint',
'psfSigma',
'skyBg',
'skyNoise',
1482 'astromOffsetMean',
'astromOffsetStd',
'nPsfStar',
1483 'psfStarDeltaE1Median',
'psfStarDeltaE2Median',
1484 'psfStarDeltaE1Scatter',
'psfStarDeltaE2Scatter',
1485 'psfStarDeltaSizeMedian',
'psfStarDeltaSizeScatter',
1486 'psfStarScaledDeltaSizeScatter']
1487 ccdEntry = summaryTable[selectColumns].to_pandas().set_index(
'id')
1492 ccdEntry = ccdEntry.rename(columns={
"visit":
"visitId"})
1493 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id)
for id
in
1495 packer = visitSummaryRef.dataId.universe.makePacker(
'visit_detector', visitSummaryRef.dataId)
1496 ccdVisitIds = [packer.pack(dataId)
for dataId
in dataIds]
1497 ccdEntry[
'ccdVisitId'] = ccdVisitIds
1498 ccdEntry[
'detector'] = summaryTable[
'id']
1499 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds()
for vR
in visitSummary])
1500 ccdEntry[
"seeing"] = visitSummary[
'psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1502 ccdEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1503 ccdEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1504 ccdEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1505 expTime = visitInfo.getExposureTime()
1506 ccdEntry[
'expTime'] = expTime
1507 ccdEntry[
"obsStart"] = ccdEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1508 expTime_days = expTime / (60*60*24)
1509 ccdEntry[
"obsStartMJD"] = ccdEntry[
"expMidptMJD"] - 0.5 * expTime_days
1510 ccdEntry[
'darkTime'] = visitInfo.getDarkTime()
1511 ccdEntry[
'xSize'] = summaryTable[
'bbox_max_x'] - summaryTable[
'bbox_min_x']
1512 ccdEntry[
'ySize'] = summaryTable[
'bbox_max_y'] - summaryTable[
'bbox_min_y']
1513 ccdEntry[
'llcra'] = summaryTable[
'raCorners'][:, 0]
1514 ccdEntry[
'llcdec'] = summaryTable[
'decCorners'][:, 0]
1515 ccdEntry[
'ulcra'] = summaryTable[
'raCorners'][:, 1]
1516 ccdEntry[
'ulcdec'] = summaryTable[
'decCorners'][:, 1]
1517 ccdEntry[
'urcra'] = summaryTable[
'raCorners'][:, 2]
1518 ccdEntry[
'urcdec'] = summaryTable[
'decCorners'][:, 2]
1519 ccdEntry[
'lrcra'] = summaryTable[
'raCorners'][:, 3]
1520 ccdEntry[
'lrcdec'] = summaryTable[
'decCorners'][:, 3]
1524 ccdEntries.append(ccdEntry)
1526 outputCatalog = pd.concat(ccdEntries)
1527 outputCatalog.set_index(
'ccdVisitId', inplace=
True, verify_integrity=
True)
1528 return pipeBase.Struct(outputCatalog=outputCatalog)
1531class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1532 dimensions=(
"instrument",),
1533 defaultTemplates={
"calexpType":
""}):
1534 visitSummaries = connectionTypes.Input(
1535 doc=
"Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1536 name=
"{calexpType}visitSummary",
1537 storageClass=
"ExposureCatalog",
1538 dimensions=(
"instrument",
"visit",),
1542 outputCatalog = connectionTypes.Output(
1543 doc=
"Visit metadata table",
1545 storageClass=
"DataFrame",
1546 dimensions=(
"instrument",)
1550class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1551 pipelineConnections=MakeVisitTableConnections):
1555class MakeVisitTableTask(pipeBase.PipelineTask):
1556 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1558 _DefaultName = 'makeVisitTable'
1559 ConfigClass = MakeVisitTableConfig
1561 def run(self, visitSummaries):
1562 """Make a table of visit information from the `visitSummary` catalogs.
1567 List of exposure catalogs with per-detector summary information.
1570 result : `lsst.pipe.Base.Struct`
1571 Results struct
with attribute:
1574 Catalog of visit information.
1577 for visitSummary
in visitSummaries:
1578 visitSummary = visitSummary.get()
1579 visitRow = visitSummary[0]
1580 visitInfo = visitRow.getVisitInfo()
1583 visitEntry[
"visitId"] = visitRow[
'visit']
1584 visitEntry[
"visit"] = visitRow[
'visit']
1585 visitEntry[
"physical_filter"] = visitRow[
'physical_filter']
1586 visitEntry[
"band"] = visitRow[
'band']
1587 raDec = visitInfo.getBoresightRaDec()
1588 visitEntry[
"ra"] = raDec.getRa().asDegrees()
1589 visitEntry[
"decl"] = raDec.getDec().asDegrees()
1590 visitEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1591 azAlt = visitInfo.getBoresightAzAlt()
1592 visitEntry[
"azimuth"] = azAlt.getLongitude().asDegrees()
1593 visitEntry[
"altitude"] = azAlt.getLatitude().asDegrees()
1594 visitEntry[
"zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1595 visitEntry[
"airmass"] = visitInfo.getBoresightAirmass()
1596 expTime = visitInfo.getExposureTime()
1597 visitEntry[
"expTime"] = expTime
1598 visitEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1599 visitEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1600 visitEntry[
"obsStart"] = visitEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1601 expTime_days = expTime / (60*60*24)
1602 visitEntry[
"obsStartMJD"] = visitEntry[
"expMidptMJD"] - 0.5 * expTime_days
1603 visitEntries.append(visitEntry)
1609 outputCatalog = pd.DataFrame(data=visitEntries)
1610 outputCatalog.set_index(
'visitId', inplace=
True, verify_integrity=
True)
1611 return pipeBase.Struct(outputCatalog=outputCatalog)
1614class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1615 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")):
1617 inputCatalog = connectionTypes.Input(
1618 doc=
"Primary per-detector, single-epoch forced-photometry catalog. "
1619 "By default, it is the output of ForcedPhotCcdTask on calexps",
1621 storageClass=
"SourceCatalog",
1622 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1624 inputCatalogDiff = connectionTypes.Input(
1625 doc=
"Secondary multi-epoch, per-detector, forced photometry catalog. "
1626 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1628 storageClass=
"SourceCatalog",
1629 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1631 outputCatalog = connectionTypes.Output(
1632 doc=
"InputCatalogs horizonatally joined on `objectId` in Parquet format",
1633 name=
"mergedForcedSource",
1634 storageClass=
"DataFrame",
1635 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1639class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1640 pipelineConnections=WriteForcedSourceTableConnections):
1641 key = lsst.pex.config.Field(
1642 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1648class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1649 """Merge and convert per-detector forced source catalogs to parquet.
1651 Because the predecessor ForcedPhotCcdTask operates per-detector,
1652 per-tract, (i.e., it has tract in its dimensions), detectors
1653 on the tract boundary may have multiple forced source catalogs.
1655 The successor task TransformForcedSourceTable runs per-patch
1656 and temporally-aggregates overlapping mergedForcedSource catalogs
from all
1657 available multiple epochs.
1659 _DefaultName = "writeForcedSourceTable"
1660 ConfigClass = WriteForcedSourceTableConfig
1662 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1663 inputs = butlerQC.get(inputRefs)
1665 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
1666 inputs[
'band'] = butlerQC.quantum.dataId.full[
'band']
1667 outputs = self.run(**inputs)
1668 butlerQC.put(outputs, outputRefs)
1670 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1672 for table, dataset,
in zip((inputCatalog, inputCatalogDiff), (
'calexp',
'diff')):
1673 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=
False)
1674 df = df.reindex(sorted(df.columns), axis=1)
1675 df[
'ccdVisitId'] = ccdVisitId
if ccdVisitId
else pd.NA
1676 df[
'band'] = band
if band
else pd.NA
1677 df.columns = pd.MultiIndex.from_tuples([(dataset, c)
for c
in df.columns],
1678 names=(
'dataset',
'column'))
1682 outputCatalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
1683 return pipeBase.Struct(outputCatalog=outputCatalog)
1686class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1687 dimensions=(
"instrument",
"skymap",
"patch",
"tract")):
1689 inputCatalogs = connectionTypes.Input(
1690 doc=
"Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1691 name=
"mergedForcedSource",
1692 storageClass=
"DataFrame",
1693 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract"),
1697 referenceCatalog = connectionTypes.Input(
1698 doc=
"Reference catalog which was used to seed the forcedPhot. Columns "
1699 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1702 storageClass=
"DataFrame",
1703 dimensions=(
"tract",
"patch",
"skymap"),
1706 outputCatalog = connectionTypes.Output(
1707 doc=
"Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1708 "specified set of functors",
1709 name=
"forcedSourceTable",
1710 storageClass=
"DataFrame",
1711 dimensions=(
"tract",
"patch",
"skymap")
1716 pipelineConnections=TransformForcedSourceTableConnections):
1717 referenceColumns = pexConfig.ListField(
1719 default=[
"detect_isPrimary",
"detect_isTractInner",
"detect_isPatchInner"],
1721 doc=
"Columns to pull from reference catalog",
1723 keyRef = lsst.pex.config.Field(
1724 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1728 key = lsst.pex.config.Field(
1729 doc=
"Rename the output DataFrame index to this name",
1731 default=
"forcedSourceId",
1734 def setDefaults(self):
1735 super().setDefaults()
1736 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'ForcedSource.yaml')
1737 self.columnsFromDataId = [
'tract',
'patch']
1741 """Transform/standardize a ForcedSource catalog
1743 Transforms each wide, per-detector forcedSource parquet table per the
1744 specification file (per-camera defaults found in ForcedSource.yaml).
1745 All epochs that overlap the patch are aggregated into one per-patch
1746 narrow-parquet file.
1748 No de-duplication of rows
is performed. Duplicate resolutions flags are
1749 pulled
in from the referenceCatalog: `detect_isPrimary`,
1750 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1751 for analysis
or compare duplicates
for QA.
1753 The resulting table includes multiple bands. Epochs (MJDs)
and other useful
1754 per-visit rows can be retreived by joining
with the CcdVisitTable on
1757 _DefaultName = "transformForcedSourceTable"
1758 ConfigClass = TransformForcedSourceTableConfig
1760 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1761 inputs = butlerQC.get(inputRefs)
1762 if self.funcs
is None:
1763 raise ValueError(
"config.functorFile is None. "
1764 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1765 outputs = self.run(inputs[
'inputCatalogs'], inputs[
'referenceCatalog'], funcs=self.funcs,
1766 dataId=outputRefs.outputCatalog.dataId.full)
1768 butlerQC.put(outputs, outputRefs)
1770 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1772 ref = referenceCatalog.get(parameters={
"columns": self.config.referenceColumns})
1773 self.log.info(
"Aggregating %s input catalogs" % (len(inputCatalogs)))
1774 for handle
in inputCatalogs:
1775 result = self.transform(
None, handle, funcs, dataId)
1777 dfs.append(result.df.join(ref, how=
'inner'))
1779 outputCatalog = pd.concat(dfs)
1783 outputCatalog.index.rename(self.config.keyRef, inplace=
True)
1785 outputCatalog.reset_index(inplace=
True)
1788 outputCatalog.set_index(
"forcedSourceId", inplace=
True, verify_integrity=
True)
1790 outputCatalog.index.rename(self.config.key, inplace=
True)
1792 self.log.info(
"Made a table of %d columns and %d rows",
1793 len(outputCatalog.columns), len(outputCatalog))
1794 return pipeBase.Struct(outputCatalog=outputCatalog)
1797class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1798 defaultTemplates={
"catalogType":
""},
1799 dimensions=(
"instrument",
"tract")):
1800 inputCatalogs = connectionTypes.Input(
1801 doc=
"Input per-patch DataFrame Tables to be concatenated",
1802 name=
"{catalogType}ForcedSourceTable",
1803 storageClass=
"DataFrame",
1804 dimensions=(
"tract",
"patch",
"skymap"),
1808 outputCatalog = connectionTypes.Output(
1809 doc=
"Output per-tract concatenation of DataFrame Tables",
1810 name=
"{catalogType}ForcedSourceTable_tract",
1811 storageClass=
"DataFrame",
1812 dimensions=(
"tract",
"skymap"),
1816class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1817 pipelineConnections=ConsolidateTractConnections):
1821class ConsolidateTractTask(pipeBase.PipelineTask):
1822 """Concatenate any per-patch, dataframe list into a single
1823 per-tract DataFrame.
1825 _DefaultName = 'ConsolidateTract'
1826 ConfigClass = ConsolidateTractConfig
1828 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1829 inputs = butlerQC.get(inputRefs)
1832 self.log.info(
"Concatenating %s per-patch %s Tables",
1833 len(inputs[
'inputCatalogs']),
1834 inputRefs.inputCatalogs[0].datasetType.name)
1835 df = pd.concat(inputs[
'inputCatalogs'])
1836 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def getAnalysis(self, parq, funcs=None, band=None)
def __init__(self, *args, **kwargs)
def transform(self, band, parq, funcs, dataId)
def run(self, parq, funcs=None, dataId=None, band=None)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)