22__all__ = [
"WriteObjectTableConfig",
"WriteObjectTableTask",
23 "WriteSourceTableConfig",
"WriteSourceTableTask",
24 "WriteRecalibratedSourceTableConfig",
"WriteRecalibratedSourceTableTask",
25 "PostprocessAnalysis",
26 "TransformCatalogBaseConfig",
"TransformCatalogBaseTask",
27 "TransformObjectCatalogConfig",
"TransformObjectCatalogTask",
28 "ConsolidateObjectTableConfig",
"ConsolidateObjectTableTask",
29 "TransformSourceTableConfig",
"TransformSourceTableTask",
30 "ConsolidateVisitSummaryConfig",
"ConsolidateVisitSummaryTask",
31 "ConsolidateSourceTableConfig",
"ConsolidateSourceTableTask",
32 "MakeCcdVisitTableConfig",
"MakeCcdVisitTableTask",
33 "MakeVisitTableConfig",
"MakeVisitTableTask",
34 "WriteForcedSourceTableConfig",
"WriteForcedSourceTableTask",
35 "TransformForcedSourceTableConfig",
"TransformForcedSourceTableTask",
36 "ConsolidateTractConfig",
"ConsolidateTractTask"]
49from lsst.obs.base
import ExposureIdInfo
53from lsst.daf.butler
import DeferredDatasetHandle, DataCoordinate
56from .parquetTable
import ParquetTable
57from .functors
import CompositeFunctor, Column
59log = logging.getLogger(__name__)
62def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
63 """Flattens a dataframe with multilevel column index.
65 newDf = pd.DataFrame()
67 dfBands = df.columns.unique(level=0).values
70 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
71 newColumns = {c: columnFormat.format(band, c)
72 for c
in subdf.columns
if c
not in noDupCols}
73 cols = list(newColumns.keys())
74 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
77 presentBands = dfBands
if inputBands
is None else list(set(inputBands).intersection(dfBands))
79 noDupDf = df[presentBands[0]][noDupCols]
80 newDf = pd.concat([noDupDf, newDf], axis=1)
85 defaultTemplates={
"coaddName":
"deep"},
86 dimensions=(
"tract",
"patch",
"skymap")):
87 inputCatalogMeas = connectionTypes.Input(
88 doc=
"Catalog of source measurements on the deepCoadd.",
89 dimensions=(
"tract",
"patch",
"band",
"skymap"),
90 storageClass=
"SourceCatalog",
91 name=
"{coaddName}Coadd_meas",
94 inputCatalogForcedSrc = connectionTypes.Input(
95 doc=
"Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
96 dimensions=(
"tract",
"patch",
"band",
"skymap"),
97 storageClass=
"SourceCatalog",
98 name=
"{coaddName}Coadd_forced_src",
101 inputCatalogRef = connectionTypes.Input(
102 doc=
"Catalog marking the primary detection (which band provides a good shape and position)"
103 "for each detection in deepCoadd_mergeDet.",
104 dimensions=(
"tract",
"patch",
"skymap"),
105 storageClass=
"SourceCatalog",
106 name=
"{coaddName}Coadd_ref"
108 outputCatalog = connectionTypes.Output(
109 doc=
"A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
110 "stored as a DataFrame with a multi-level column index per-patch.",
111 dimensions=(
"tract",
"patch",
"skymap"),
112 storageClass=
"DataFrame",
113 name=
"{coaddName}Coadd_obj"
117class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
118 pipelineConnections=WriteObjectTableConnections):
119 engine = pexConfig.Field(
122 doc=
"Parquet engine for writing (pyarrow or fastparquet)"
124 coaddName = pexConfig.Field(
131class WriteObjectTableTask(pipeBase.PipelineTask):
132 """Write filter-merged source tables to parquet
134 _DefaultName = "writeObjectTable"
135 ConfigClass = WriteObjectTableConfig
138 inputDatasets = (
'forced_src',
'meas',
'ref')
141 outputDataset =
'obj'
143 def runQuantum(self, butlerQC, inputRefs, outputRefs):
144 inputs = butlerQC.get(inputRefs)
146 measDict = {ref.dataId[
'band']: {
'meas': cat}
for ref, cat
in
147 zip(inputRefs.inputCatalogMeas, inputs[
'inputCatalogMeas'])}
148 forcedSourceDict = {ref.dataId[
'band']: {
'forced_src': cat}
for ref, cat
in
149 zip(inputRefs.inputCatalogForcedSrc, inputs[
'inputCatalogForcedSrc'])}
152 for band
in measDict.keys():
153 catalogs[band] = {
'meas': measDict[band][
'meas'],
154 'forced_src': forcedSourceDict[band][
'forced_src'],
155 'ref': inputs[
'inputCatalogRef']}
156 dataId = butlerQC.quantum.dataId
157 df = self.run(catalogs=catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
158 outputs = pipeBase.Struct(outputCatalog=df)
159 butlerQC.put(outputs, outputRefs)
161 def run(self, catalogs, tract, patch):
162 """Merge multiple catalogs.
167 Mapping from filter names to dict of catalogs.
169 tractId to use
for the tractId column.
171 patchId to use
for the patchId column.
175 catalog : `pandas.DataFrame`
180 for filt, tableDict
in catalogs.items():
181 for dataset, table
in tableDict.items():
183 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
186 df = df.reindex(sorted(df.columns), axis=1)
187 df[
'tractId'] = tract
188 df[
'patchId'] = patch
191 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
192 names=(
'dataset',
'band',
'column'))
195 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
199class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
200 defaultTemplates={
"catalogType":
""},
201 dimensions=(
"instrument",
"visit",
"detector")):
203 catalog = connectionTypes.Input(
204 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
205 name=
"{catalogType}src",
206 storageClass=
"SourceCatalog",
207 dimensions=(
"instrument",
"visit",
"detector")
209 outputCatalog = connectionTypes.Output(
210 doc=
"Catalog of sources, `src` in Parquet format. The 'id' column is "
211 "replaced with an index; all other columns are unchanged.",
212 name=
"{catalogType}source",
213 storageClass=
"DataFrame",
214 dimensions=(
"instrument",
"visit",
"detector")
218class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
219 pipelineConnections=WriteSourceTableConnections):
223class WriteSourceTableTask(pipeBase.PipelineTask):
224 """Write source table to parquet.
226 _DefaultName = "writeSourceTable"
227 ConfigClass = WriteSourceTableConfig
229 def runQuantum(self, butlerQC, inputRefs, outputRefs):
230 inputs = butlerQC.get(inputRefs)
231 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
232 result = self.run(**inputs).table
233 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
234 butlerQC.put(outputs, outputRefs)
236 def run(self, catalog, ccdVisitId=None, **kwargs):
237 """Convert `src` catalog to parquet
241 catalog: `afwTable.SourceCatalog`
242 catalog to be converted
244 ccdVisitId to be added as a column
248 result : `lsst.pipe.base.Struct`
250 `ParquetTable` version of the input catalog
252 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId)
253 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
254 df[
'ccdVisitId'] = ccdVisitId
255 return pipeBase.Struct(table=
ParquetTable(dataFrame=df))
258class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
259 defaultTemplates={
"catalogType":
"",
260 "skyWcsName":
"jointcal",
261 "photoCalibName":
"fgcm"},
262 dimensions=(
"instrument",
"visit",
"detector",
"skymap")):
263 skyMap = connectionTypes.Input(
264 doc=
"skyMap needed to choose which tract-level calibrations to use when multiple available",
265 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
266 storageClass=
"SkyMap",
267 dimensions=(
"skymap",),
269 exposure = connectionTypes.Input(
270 doc=
"Input exposure to perform photometry on.",
272 storageClass=
"ExposureF",
273 dimensions=[
"instrument",
"visit",
"detector"],
275 externalSkyWcsTractCatalog = connectionTypes.Input(
276 doc=(
"Per-tract, per-visit wcs calibrations. These catalogs use the detector "
277 "id for the catalog id, sorted on id for fast lookup."),
278 name=
"{skyWcsName}SkyWcsCatalog",
279 storageClass=
"ExposureCatalog",
280 dimensions=[
"instrument",
"visit",
"tract"],
283 externalSkyWcsGlobalCatalog = connectionTypes.Input(
284 doc=(
"Per-visit wcs calibrations computed globally (with no tract information). "
285 "These catalogs use the detector id for the catalog id, sorted on id for "
287 name=
"{skyWcsName}SkyWcsCatalog",
288 storageClass=
"ExposureCatalog",
289 dimensions=[
"instrument",
"visit"],
291 externalPhotoCalibTractCatalog = connectionTypes.Input(
292 doc=(
"Per-tract, per-visit photometric calibrations. These catalogs use the "
293 "detector id for the catalog id, sorted on id for fast lookup."),
294 name=
"{photoCalibName}PhotoCalibCatalog",
295 storageClass=
"ExposureCatalog",
296 dimensions=[
"instrument",
"visit",
"tract"],
299 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
300 doc=(
"Per-visit photometric calibrations computed globally (with no tract "
301 "information). These catalogs use the detector id for the catalog id, "
302 "sorted on id for fast lookup."),
303 name=
"{photoCalibName}PhotoCalibCatalog",
304 storageClass=
"ExposureCatalog",
305 dimensions=[
"instrument",
"visit"],
308 def __init__(self, *, config=None):
309 super().__init__(config=config)
312 if config.doApplyExternalSkyWcs
and config.doReevaluateSkyWcs:
313 if config.useGlobalExternalSkyWcs:
314 self.inputs.remove(
"externalSkyWcsTractCatalog")
316 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
318 self.inputs.remove(
"externalSkyWcsTractCatalog")
319 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
320 if config.doApplyExternalPhotoCalib
and config.doReevaluatePhotoCalib:
321 if config.useGlobalExternalPhotoCalib:
322 self.inputs.remove(
"externalPhotoCalibTractCatalog")
324 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
326 self.inputs.remove(
"externalPhotoCalibTractCatalog")
327 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
330class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
331 pipelineConnections=WriteRecalibratedSourceTableConnections):
333 doReevaluatePhotoCalib = pexConfig.Field(
336 doc=(
"Add or replace local photoCalib columns")
338 doReevaluateSkyWcs = pexConfig.Field(
341 doc=(
"Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
343 doApplyExternalPhotoCalib = pexConfig.Field(
346 doc=(
"If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
347 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
349 doApplyExternalSkyWcs = pexConfig.Field(
352 doc=(
"if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
353 "else use the wcs already attached to the exposure."),
355 useGlobalExternalPhotoCalib = pexConfig.Field(
358 doc=(
"When using doApplyExternalPhotoCalib, use 'global' calibrations "
359 "that are not run per-tract. When False, use per-tract photometric "
360 "calibration files.")
362 useGlobalExternalSkyWcs = pexConfig.Field(
365 doc=(
"When using doApplyExternalSkyWcs, use 'global' calibrations "
366 "that are not run per-tract. When False, use per-tract wcs "
372 if self.doApplyExternalSkyWcs
and not self.doReevaluateSkyWcs:
373 log.warning(
"doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
374 "External SkyWcs will not be read or evaluated.")
375 if self.doApplyExternalPhotoCalib
and not self.doReevaluatePhotoCalib:
376 log.warning(
"doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
377 "External PhotoCalib will not be read or evaluated.")
380class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
381 """Write source table to parquet
383 _DefaultName = "writeRecalibratedSourceTable"
384 ConfigClass = WriteRecalibratedSourceTableConfig
386 def runQuantum(self, butlerQC, inputRefs, outputRefs):
387 inputs = butlerQC.get(inputRefs)
388 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
389 inputs[
'exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId,
"visit_detector")
391 if self.config.doReevaluatePhotoCalib
or self.config.doReevaluateSkyWcs:
392 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
393 inputs[
'exposure'] = self.attachCalibs(inputRefs, **inputs)
395 inputs[
'catalog'] = self.addCalibColumns(**inputs)
397 result = self.run(**inputs).table
398 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
399 butlerQC.put(outputs, outputRefs)
401 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
402 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
403 externalPhotoCalibTractCatalog=None, **kwargs):
404 """Apply external calibrations to exposure per configuration
406 When multiple tract-level calibrations overlap, select the one with the
407 center closest to detector.
411 inputRefs : `lsst.pipe.base.InputQuantizedConnection`,
for dataIds of
413 skyMap : `lsst.skymap.SkyMap`
414 exposure : `lsst.afw.image.exposure.Exposure`
415 Input exposure to adjust calibrations.
417 Exposure catalog
with external skyWcs to be applied per config
419 Exposure catalog
with external skyWcs to be applied per config
421 Exposure catalog
with external photoCalib to be applied per config
427 exposure : `lsst.afw.image.exposure.Exposure`
428 Exposure
with adjusted calibrations.
430 if not self.config.doApplyExternalSkyWcs:
432 externalSkyWcsCatalog =
None
433 elif self.config.useGlobalExternalSkyWcs:
435 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
436 self.log.info(
'Applying global SkyWcs')
439 inputRef = getattr(inputRefs,
'externalSkyWcsTractCatalog')
440 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
443 self.log.info(
'Applying tract-level SkyWcs from tract %s', tracts[ind])
445 ind = self.getClosestTract(tracts, skyMap,
446 exposure.getBBox(), exposure.getWcs())
447 self.log.info(
'Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
448 'Applying closest to detector center: tract=%s',
str(tracts), tracts[ind])
450 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
452 if not self.config.doApplyExternalPhotoCalib:
454 externalPhotoCalibCatalog =
None
455 elif self.config.useGlobalExternalPhotoCalib:
457 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
458 self.log.info(
'Applying global PhotoCalib')
461 inputRef = getattr(inputRefs,
'externalPhotoCalibTractCatalog')
462 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
465 self.log.info(
'Applying tract-level PhotoCalib from tract %s', tracts[ind])
467 ind = self.getClosestTract(tracts, skyMap,
468 exposure.getBBox(), exposure.getWcs())
469 self.log.info(
'Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
470 'Applying closest to detector center: tract=%s',
str(tracts), tracts[ind])
472 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
474 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
476 def getClosestTract(self, tracts, skyMap, bbox, wcs):
477 """Find the index of the tract closest to detector from list of tractIds
481 tracts: `list` [`int`]
482 Iterable of integer tractIds
483 skyMap : `lsst.skymap.SkyMap`
484 skyMap to lookup tract geometry and wcs
486 Detector bbox, center of which will compared to tract centers
488 Detector Wcs object to map the detector center to SkyCoord
497 center = wcs.pixelToSky(bbox.getCenter())
499 for tractId
in tracts:
500 tract = skyMap[tractId]
501 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
502 sep.append(center.separation(tractCenter))
504 return np.argmin(sep)
506 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
507 """Prepare a calibrated exposure and apply external calibrations
512 exposure : `lsst.afw.image.exposure.Exposure`
513 Input exposure to adjust calibrations.
515 Exposure catalog
with external skyWcs to be applied
516 if config.doApplyExternalSkyWcs=
True. Catalog uses the detector id
517 for the catalog id, sorted on id
for fast lookup.
519 Exposure catalog
with external photoCalib to be applied
520 if config.doApplyExternalPhotoCalib=
True. Catalog uses the detector
521 id
for the catalog id, sorted on id
for fast lookup.
525 exposure : `lsst.afw.image.exposure.Exposure`
526 Exposure
with adjusted calibrations.
528 detectorId = exposure.getInfo().getDetector().getId()
530 if externalPhotoCalibCatalog
is not None:
531 row = externalPhotoCalibCatalog.find(detectorId)
533 self.log.warning(
"Detector id %s not found in externalPhotoCalibCatalog; "
534 "Using original photoCalib.", detectorId)
536 photoCalib = row.getPhotoCalib()
537 if photoCalib
is None:
538 self.log.warning(
"Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
539 "Using original photoCalib.", detectorId)
541 exposure.setPhotoCalib(photoCalib)
543 if externalSkyWcsCatalog
is not None:
544 row = externalSkyWcsCatalog.find(detectorId)
546 self.log.warning(
"Detector id %s not found in externalSkyWcsCatalog; "
547 "Using original skyWcs.", detectorId)
549 skyWcs = row.getWcs()
551 self.log.warning(
"Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
552 "Using original skyWcs.", detectorId)
554 exposure.setWcs(skyWcs)
558 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs):
559 """Add replace columns with calibs evaluated at each centroid
561 Add or replace
'base_LocalWcs' `base_LocalPhotoCalib
' columns in a
562 a source catalog, by rerunning the plugins.
567 catalog to which calib columns will be added
568 exposure : `lsst.afw.image.exposure.Exposure`
569 Exposure with attached PhotoCalibs
and SkyWcs attributes to be
570 reevaluated at local centroids. Pixels are
not required.
571 exposureIdInfo : `lsst.obs.base.ExposureIdInfo`
576 Source Catalog
with requested local calib columns
578 measureConfig = SingleFrameMeasurementTask.ConfigClass()
579 measureConfig.doReplaceWithNoise = False
582 for slot
in measureConfig.slots:
583 setattr(measureConfig.slots, slot,
None)
585 measureConfig.plugins.names = []
586 if self.config.doReevaluateSkyWcs:
587 measureConfig.plugins.names.add(
'base_LocalWcs')
588 self.log.info(
"Re-evaluating base_LocalWcs plugin")
589 if self.config.doReevaluatePhotoCalib:
590 measureConfig.plugins.names.add(
'base_LocalPhotoCalib')
591 self.log.info(
"Re-evaluating base_LocalPhotoCalib plugin")
592 pluginsNotToCopy = tuple(measureConfig.plugins.names)
596 aliasMap = catalog.schema.getAliasMap()
597 mapper = afwTable.SchemaMapper(catalog.schema)
598 for item
in catalog.schema:
599 if not item.field.getName().startswith(pluginsNotToCopy):
600 mapper.addMapping(item.key)
602 schema = mapper.getOutputSchema()
603 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
604 schema.setAliasMap(aliasMap)
605 newCat = afwTable.SourceCatalog(schema)
606 newCat.extend(catalog, mapper=mapper)
612 if self.config.doReevaluateSkyWcs:
613 afwTable.updateSourceCoords(exposure.wcs, newCat)
615 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
621 """Calculate columns from ParquetTable.
623 This object manages and organizes an arbitrary set of computations
624 on a catalog. The catalog
is defined by a
626 as a ``deepCoadd_obj`` dataset,
and the computations are defined by a
627 collection of `lsst.pipe.tasks.functor.Functor` objects (
or, equivalently,
628 a ``CompositeFunctor``).
630 After the object
is initialized, accessing the ``.df`` attribute (which
631 holds the `pandas.DataFrame` containing the results of the calculations)
632 triggers computation of said dataframe.
634 One of the conveniences of using this object
is the ability to define a
635 desired common filter
for all functors. This enables the same functor
636 collection to be passed to several different `PostprocessAnalysis` objects
637 without having to change the original functor collection, since the ``filt``
638 keyword argument of this object triggers an overwrite of the ``filt``
639 property
for all functors
in the collection.
641 This object also allows a list of refFlags to be passed,
and defines a set
642 of default refFlags that are always included even
if not requested.
644 If a list of `~lsst.pipe.tasks.ParquetTable` object
is passed, rather than a single one,
645 then the calculations will be mapped over all the input catalogs. In
646 principle, it should be straightforward to parallelize this activity, but
647 initial tests have failed (see TODO
in code comments).
651 parq : `~lsst.pipe.tasks.ParquetTable` (
or list of such)
652 Source
catalog(s)
for computation.
655 Computations to do (functors that act on ``parq``).
656 If a dict, the output
657 DataFrame will have columns keyed accordingly.
658 If a list, the column keys will come
from the
659 ``.shortname`` attribute of each functor.
661 filt : `str`, optional
662 Filter
in which to calculate. If provided,
663 this will overwrite any existing ``.filt`` attribute
664 of the provided functors.
666 flags : `list`, optional
667 List of flags (per-band) to include
in output table.
668 Taken
from the ``meas`` dataset
if applied to a multilevel Object Table.
670 refFlags : `list`, optional
671 List of refFlags (only reference band) to include
in output table.
673 forcedFlags : `list`, optional
674 List of flags (per-band) to include
in output table.
675 Taken
from the ``forced_src`` dataset
if applied to a
676 multilevel Object Table. Intended
for flags
from measurement plugins
677 only run during multi-band forced-photometry.
679 _defaultRefFlags = []
682 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
687 self.
flags = list(flags)
if flags
is not None else []
688 self.
forcedFlags = list(forcedFlags)
if forcedFlags
is not None else []
690 if refFlags
is not None:
703 additionalFuncs.update({flag:
Column(flag, dataset=
'forced_src')
for flag
in self.
forcedFlags})
704 additionalFuncs.update({flag:
Column(flag, dataset=
'ref')
for flag
in self.
refFlags})
705 additionalFuncs.update({flag:
Column(flag, dataset=
'meas')
for flag
in self.
flags})
707 if isinstance(self.
functors, CompositeFunctor):
712 func.funcDict.update(additionalFuncs)
713 func.filt = self.
filt
719 return [name
for name, func
in self.
func.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
729 if type(self.
parq)
in (list, tuple):
731 dflist = [self.
func(parq, dropna=dropna)
for parq
in self.
parq]
735 dflist = pool.map(functools.partial(self.
func, dropna=dropna), self.
parq)
736 self.
_df = pd.concat(dflist)
745 """Expected Connections for subclasses of TransformCatalogBaseTask.
749 inputCatalog = connectionTypes.Input(
751 storageClass=
"DataFrame",
753 outputCatalog = connectionTypes.Output(
755 storageClass=
"DataFrame",
760 pipelineConnections=TransformCatalogBaseConnections):
761 functorFile = pexConfig.Field(
763 doc=
"Path to YAML file specifying Science Data Model functors to use "
764 "when copying columns and computing calibrated values.",
768 primaryKey = pexConfig.Field(
770 doc=
"Name of column to be set as the DataFrame index. If None, the index"
771 "will be named `id`",
775 columnsFromDataId = pexConfig.ListField(
779 doc=
"Columns to extract from the dataId",
784 """Base class for transforming/standardizing a catalog
786 by applying functors that convert units and apply calibrations.
787 The purpose of this task
is to perform a set of computations on
788 an input `ParquetTable` dataset (such
as ``deepCoadd_obj``)
and write the
789 results to a new dataset (which needs to be declared
in an ``outputDataset``
792 The calculations to be performed are defined
in a YAML file that specifies
793 a set of functors to be computed, provided
as
794 a ``--functorFile`` config parameter. An example of such a YAML file
819 - base_InputCount_value
822 functor: DeconvolvedMoments
827 - merge_measurement_i
828 - merge_measurement_r
829 - merge_measurement_z
830 - merge_measurement_y
831 - merge_measurement_g
832 - base_PixelFlags_flag_inexact_psfCenter
835 The names
for each entry under
"func" will become the names of columns
in
836 the output dataset. All the functors referenced are defined
in
838 functor are
in the `args` list,
and any additional entries
for each column
839 other than
"functor" or "args" (e.g., ``
'filt'``, ``
'dataset'``) are treated
as
840 keyword arguments to be passed to the functor initialization.
842 The
"flags" entry
is the default shortcut
for `Column` functors.
843 All columns listed under
"flags" will be copied to the output table
844 untransformed. They can be of any datatype.
845 In the special case of transforming a multi-level oject table
with
846 band
and dataset indices (deepCoadd_obj), these will be taked
from the
847 `meas` dataset
and exploded out per band.
849 There are two special shortcuts that only apply when transforming
850 multi-level Object (deepCoadd_obj) tables:
851 - The
"refFlags" entry
is shortcut
for `Column` functor
852 taken
from the `
'ref'` dataset
if transforming an ObjectTable.
853 - The
"forcedFlags" entry
is shortcut
for `Column` functors.
854 taken
from the ``forced_src`` dataset
if transforming an ObjectTable.
855 These are expanded out per band.
859 to organize
and excecute the calculations.
862 def _DefaultName(self):
863 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
867 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
871 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
874 def ConfigClass(self):
875 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
879 if self.config.functorFile:
880 self.log.info(
'Loading tranform functor definitions from %s',
881 self.config.functorFile)
882 self.
funcs = CompositeFunctor.from_file(self.config.functorFile)
883 self.
funcs.update(dict(PostprocessAnalysis._defaultFuncs))
888 inputs = butlerQC.get(inputRefs)
889 if self.
funcs is None:
890 raise ValueError(
"config.functorFile is None. "
891 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
892 result = self.
run(parq=inputs[
'inputCatalog'], funcs=self.
funcs,
893 dataId=outputRefs.outputCatalog.dataId.full)
894 outputs = pipeBase.Struct(outputCatalog=result)
895 butlerQC.put(outputs, outputRefs)
897 def run(self, parq, funcs=None, dataId=None, band=None):
898 """Do postprocessing calculations
900 Takes a `ParquetTable` object and dataId,
901 returns a dataframe
with results of postprocessing calculations.
906 ParquetTable
from which calculations are done.
907 funcs : `lsst.pipe.tasks.functors.Functors`
908 Functors to apply to the table
's columns
909 dataId : dict, optional
910 Used to add a `patchId` column to the output dataframe.
911 band : `str`, optional
912 Filter band that is being processed.
916 df : `pandas.DataFrame`
918 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
920 df = self.
transform(band, parq, funcs, dataId).df
921 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
933 def transform(self, band, parq, funcs, dataId):
934 analysis = self.
getAnalysis(parq, funcs=funcs, band=band)
936 if dataId
and self.config.columnsFromDataId:
937 for key
in self.config.columnsFromDataId:
939 df[
str(key)] = dataId[key]
941 raise ValueError(f
"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
943 if self.config.primaryKey:
944 if df.index.name != self.config.primaryKey
and self.config.primaryKey
in df:
945 df.reset_index(inplace=
True, drop=
True)
946 df.set_index(self.config.primaryKey, inplace=
True)
948 return pipeBase.Struct(
955 defaultTemplates={
"coaddName":
"deep"},
956 dimensions=(
"tract",
"patch",
"skymap")):
957 inputCatalog = connectionTypes.Input(
958 doc=
"The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
959 "stored as a DataFrame with a multi-level column index per-patch.",
960 dimensions=(
"tract",
"patch",
"skymap"),
961 storageClass=
"DataFrame",
962 name=
"{coaddName}Coadd_obj",
965 outputCatalog = connectionTypes.Output(
966 doc=
"Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
968 dimensions=(
"tract",
"patch",
"skymap"),
969 storageClass=
"DataFrame",
975 pipelineConnections=TransformObjectCatalogConnections):
976 coaddName = pexConfig.Field(
982 filterMap = pexConfig.DictField(
986 doc=(
"Dictionary mapping full filter name to short one for column name munging."
987 "These filters determine the output columns no matter what filters the "
988 "input data actually contain."),
989 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
990 "Will be removed after v22.")
992 outputBands = pexConfig.ListField(
996 doc=(
"These bands and only these bands will appear in the output,"
997 " NaN-filled if the input does not include them."
998 " If None, then use all bands found in the input.")
1000 camelCase = pexConfig.Field(
1003 doc=(
"Write per-band columns names with camelCase, else underscore "
1004 "For example: gPsFlux instead of g_PsFlux.")
1006 multilevelOutput = pexConfig.Field(
1009 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
1010 "and name-munged (False).")
1012 goodFlags = pexConfig.ListField(
1015 doc=(
"List of 'good' flags that should be set False when populating empty tables. "
1016 "All other flags are considered to be 'bad' flags and will be set to True.")
1018 floatFillValue = pexConfig.Field(
1021 doc=
"Fill value for float fields when populating empty tables."
1023 integerFillValue = pexConfig.Field(
1026 doc=
"Fill value for integer fields when populating empty tables."
1029 def setDefaults(self):
1030 super().setDefaults()
1031 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Object.yaml')
1032 self.primaryKey =
'objectId'
1033 self.columnsFromDataId = [
'tract',
'patch']
1034 self.goodFlags = [
'calib_astrometry_used',
1035 'calib_photometry_reserved',
1036 'calib_photometry_used',
1037 'calib_psf_candidate',
1038 'calib_psf_reserved',
1043 """Produce a flattened Object Table to match the format specified in
1046 Do the same set of postprocessing calculations on all bands.
1048 This is identical to `TransformCatalogBaseTask`,
except for that it does
1049 the specified functor calculations
for all filters present
in the
1050 input `deepCoadd_obj` table. Any specific ``
"filt"`` keywords specified
1051 by the YAML file will be superceded.
1053 _DefaultName = "transformObjectCatalog"
1054 ConfigClass = TransformObjectCatalogConfig
1056 def run(self, parq, funcs=None, dataId=None, band=None):
1060 templateDf = pd.DataFrame()
1062 if isinstance(parq, DeferredDatasetHandle):
1063 columns = parq.get(component=
'columns')
1064 inputBands = columns.unique(level=1).values
1066 inputBands = parq.columnLevelNames[
'band']
1068 outputBands = self.config.outputBands
if self.config.outputBands
else inputBands
1071 for inputBand
in inputBands:
1072 if inputBand
not in outputBands:
1073 self.log.info(
"Ignoring %s band data in the input", inputBand)
1075 self.log.info(
"Transforming the catalog of band %s", inputBand)
1076 result = self.transform(inputBand, parq, funcs, dataId)
1077 dfDict[inputBand] = result.df
1078 analysisDict[inputBand] = result.analysis
1079 if templateDf.empty:
1080 templateDf = result.df
1083 for filt
in outputBands:
1084 if filt
not in dfDict:
1085 self.log.info(
"Adding empty columns for band %s", filt)
1086 dfTemp = templateDf.copy()
1087 for col
in dfTemp.columns:
1088 testValue = dfTemp[col].values[0]
1089 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1091 if col
in self.config.goodFlags:
1095 elif isinstance(testValue, numbers.Integral):
1099 if isinstance(testValue, np.unsignedinteger):
1100 raise ValueError(
"Parquet tables may not have unsigned integer columns.")
1102 fillValue = self.config.integerFillValue
1104 fillValue = self.config.floatFillValue
1105 dfTemp[col].values[:] = fillValue
1106 dfDict[filt] = dfTemp
1109 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
1111 if not self.config.multilevelOutput:
1112 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
1113 if self.config.primaryKey
in noDupCols:
1114 noDupCols.remove(self.config.primaryKey)
1115 if dataId
and self.config.columnsFromDataId:
1116 noDupCols += self.config.columnsFromDataId
1117 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1118 inputBands=inputBands)
1120 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
1125class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1126 dimensions=(
"tract",
"skymap")):
1127 inputCatalogs = connectionTypes.Input(
1128 doc=
"Per-Patch objectTables conforming to the standard data model.",
1130 storageClass=
"DataFrame",
1131 dimensions=(
"tract",
"patch",
"skymap"),
1134 outputCatalog = connectionTypes.Output(
1135 doc=
"Pre-tract horizontal concatenation of the input objectTables",
1136 name=
"objectTable_tract",
1137 storageClass=
"DataFrame",
1138 dimensions=(
"tract",
"skymap"),
1142class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1143 pipelineConnections=ConsolidateObjectTableConnections):
1144 coaddName = pexConfig.Field(
1151class ConsolidateObjectTableTask(pipeBase.PipelineTask):
1152 """Write patch-merged source tables to a tract-level parquet file.
1154 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1156 _DefaultName = "consolidateObjectTable"
1157 ConfigClass = ConsolidateObjectTableConfig
1159 inputDataset =
'objectTable'
1160 outputDataset =
'objectTable_tract'
1162 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1163 inputs = butlerQC.get(inputRefs)
1164 self.log.info(
"Concatenating %s per-patch Object Tables",
1165 len(inputs[
'inputCatalogs']))
1166 df = pd.concat(inputs[
'inputCatalogs'])
1167 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1170class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1171 defaultTemplates={
"catalogType":
""},
1172 dimensions=(
"instrument",
"visit",
"detector")):
1174 inputCatalog = connectionTypes.Input(
1175 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
1176 name=
"{catalogType}source",
1177 storageClass=
"DataFrame",
1178 dimensions=(
"instrument",
"visit",
"detector"),
1181 outputCatalog = connectionTypes.Output(
1182 doc=
"Narrower, per-detector Source Table transformed and converted per a "
1183 "specified set of functors",
1184 name=
"{catalogType}sourceTable",
1185 storageClass=
"DataFrame",
1186 dimensions=(
"instrument",
"visit",
"detector")
1191 pipelineConnections=TransformSourceTableConnections):
1193 def setDefaults(self):
1194 super().setDefaults()
1195 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Source.yaml')
1196 self.primaryKey =
'sourceId'
1197 self.columnsFromDataId = [
'visit',
'detector',
'band',
'physical_filter']
1201 """Transform/standardize a source catalog
1203 _DefaultName = "transformSourceTable"
1204 ConfigClass = TransformSourceTableConfig
1207class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1208 dimensions=(
"instrument",
"visit",),
1209 defaultTemplates={
"calexpType":
""}):
1210 calexp = connectionTypes.Input(
1211 doc=
"Processed exposures used for metadata",
1212 name=
"{calexpType}calexp",
1213 storageClass=
"ExposureF",
1214 dimensions=(
"instrument",
"visit",
"detector"),
1218 visitSummary = connectionTypes.Output(
1219 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
1220 "detector id for the id and are sorted for fast lookups of a "
1222 name=
"{calexpType}visitSummary",
1223 storageClass=
"ExposureCatalog",
1224 dimensions=(
"instrument",
"visit"),
1228class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1229 pipelineConnections=ConsolidateVisitSummaryConnections):
1230 """Config for ConsolidateVisitSummaryTask"""
1234class ConsolidateVisitSummaryTask(pipeBase.PipelineTask):
1235 """Task to consolidate per-detector visit metadata.
1237 This task aggregates the following metadata from all the detectors
in a
1238 single visit into an exposure catalog:
1242 - The physical_filter
and band (
if available).
1243 - The psf size, shape,
and effective area at the center of the detector.
1244 - The corners of the bounding box
in right ascension/declination.
1246 Other quantities such
as Detector, Psf, ApCorrMap,
and TransmissionCurve
1247 are
not persisted here because of storage concerns,
and because of their
1248 limited utility
as summary statistics.
1250 Tests
for this task are performed
in ci_hsc_gen3.
1252 _DefaultName = "consolidateVisitSummary"
1253 ConfigClass = ConsolidateVisitSummaryConfig
1255 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1256 dataRefs = butlerQC.get(inputRefs.calexp)
1257 visit = dataRefs[0].dataId.byName()[
'visit']
1259 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1260 len(dataRefs), visit)
1262 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1264 butlerQC.put(expCatalog, outputRefs.visitSummary)
1266 def _combineExposureMetadata(self, visit, dataRefs):
1267 """Make a combined exposure catalog from a list of dataRefs.
1268 These dataRefs must point to exposures with wcs, summaryStats,
1269 and other visit metadata.
1274 Visit identification number.
1275 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1276 List of dataRefs
in visit.
1281 Exposure catalog
with per-detector summary information.
1283 schema = self._makeVisitSummarySchema()
1284 cat = afwTable.ExposureCatalog(schema)
1285 cat.resize(len(dataRefs))
1287 cat['visit'] = visit
1289 for i, dataRef
in enumerate(dataRefs):
1290 visitInfo = dataRef.get(component=
'visitInfo')
1291 filterLabel = dataRef.get(component=
'filter')
1292 summaryStats = dataRef.get(component=
'summaryStats')
1293 detector = dataRef.get(component=
'detector')
1294 wcs = dataRef.get(component=
'wcs')
1295 photoCalib = dataRef.get(component=
'photoCalib')
1296 detector = dataRef.get(component=
'detector')
1297 bbox = dataRef.get(component=
'bbox')
1298 validPolygon = dataRef.get(component=
'validPolygon')
1302 rec.setVisitInfo(visitInfo)
1304 rec.setPhotoCalib(photoCalib)
1305 rec.setValidPolygon(validPolygon)
1307 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1308 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1309 rec.setId(detector.getId())
1310 rec[
'psfSigma'] = summaryStats.psfSigma
1311 rec[
'psfIxx'] = summaryStats.psfIxx
1312 rec[
'psfIyy'] = summaryStats.psfIyy
1313 rec[
'psfIxy'] = summaryStats.psfIxy
1314 rec[
'psfArea'] = summaryStats.psfArea
1315 rec[
'raCorners'][:] = summaryStats.raCorners
1316 rec[
'decCorners'][:] = summaryStats.decCorners
1317 rec[
'ra'] = summaryStats.ra
1318 rec[
'decl'] = summaryStats.decl
1319 rec[
'zenithDistance'] = summaryStats.zenithDistance
1320 rec[
'zeroPoint'] = summaryStats.zeroPoint
1321 rec[
'skyBg'] = summaryStats.skyBg
1322 rec[
'skyNoise'] = summaryStats.skyNoise
1323 rec[
'meanVar'] = summaryStats.meanVar
1324 rec[
'astromOffsetMean'] = summaryStats.astromOffsetMean
1325 rec[
'astromOffsetStd'] = summaryStats.astromOffsetStd
1326 rec[
'nPsfStar'] = summaryStats.nPsfStar
1327 rec[
'psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median
1328 rec[
'psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median
1329 rec[
'psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter
1330 rec[
'psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter
1331 rec[
'psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian
1332 rec[
'psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter
1333 rec[
'psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter
1335 metadata = dafBase.PropertyList()
1336 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1338 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1339 cat.setMetadata(metadata)
1344 def _makeVisitSummarySchema(self):
1345 """Make the schema for the visitSummary catalog."""
1346 schema = afwTable.ExposureTable.makeMinimalSchema()
1347 schema.addField(
'visit', type=
'L', doc=
'Visit number')
1348 schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
1349 schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
1350 schema.addField(
'psfSigma', type=
'F',
1351 doc=
'PSF model second-moments determinant radius (center of chip) (pixel)')
1352 schema.addField(
'psfArea', type=
'F',
1353 doc=
'PSF model effective area (center of chip) (pixel**2)')
1354 schema.addField(
'psfIxx', type=
'F',
1355 doc=
'PSF model Ixx (center of chip) (pixel**2)')
1356 schema.addField(
'psfIyy', type=
'F',
1357 doc=
'PSF model Iyy (center of chip) (pixel**2)')
1358 schema.addField(
'psfIxy', type=
'F',
1359 doc=
'PSF model Ixy (center of chip) (pixel**2)')
1360 schema.addField(
'raCorners', type=
'ArrayD', size=4,
1361 doc=
'Right Ascension of bounding box corners (degrees)')
1362 schema.addField(
'decCorners', type=
'ArrayD', size=4,
1363 doc=
'Declination of bounding box corners (degrees)')
1364 schema.addField(
'ra', type=
'D',
1365 doc=
'Right Ascension of bounding box center (degrees)')
1366 schema.addField(
'decl', type=
'D',
1367 doc=
'Declination of bounding box center (degrees)')
1368 schema.addField(
'zenithDistance', type=
'F',
1369 doc=
'Zenith distance of bounding box center (degrees)')
1370 schema.addField(
'zeroPoint', type=
'F',
1371 doc=
'Mean zeropoint in detector (mag)')
1372 schema.addField(
'skyBg', type=
'F',
1373 doc=
'Average sky background (ADU)')
1374 schema.addField(
'skyNoise', type=
'F',
1375 doc=
'Average sky noise (ADU)')
1376 schema.addField(
'meanVar', type=
'F',
1377 doc=
'Mean variance of the weight plane (ADU**2)')
1378 schema.addField(
'astromOffsetMean', type=
'F',
1379 doc=
'Mean offset of astrometric calibration matches (arcsec)')
1380 schema.addField(
'astromOffsetStd', type=
'F',
1381 doc=
'Standard deviation of offsets of astrometric calibration matches (arcsec)')
1382 schema.addField(
'nPsfStar', type=
'I', doc=
'Number of stars used for PSF model')
1383 schema.addField(
'psfStarDeltaE1Median', type=
'F',
1384 doc=
'Median E1 residual (starE1 - psfE1) for psf stars')
1385 schema.addField(
'psfStarDeltaE2Median', type=
'F',
1386 doc=
'Median E2 residual (starE2 - psfE2) for psf stars')
1387 schema.addField(
'psfStarDeltaE1Scatter', type=
'F',
1388 doc=
'Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars')
1389 schema.addField(
'psfStarDeltaE2Scatter', type=
'F',
1390 doc=
'Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars')
1391 schema.addField(
'psfStarDeltaSizeMedian', type=
'F',
1392 doc=
'Median size residual (starSize - psfSize) for psf stars (pixel)')
1393 schema.addField(
'psfStarDeltaSizeScatter', type=
'F',
1394 doc=
'Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)')
1395 schema.addField(
'psfStarScaledDeltaSizeScatter', type=
'F',
1396 doc=
'Scatter (via MAD) of size residual scaled by median size squared')
1401class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1402 defaultTemplates={
"catalogType":
""},
1403 dimensions=(
"instrument",
"visit")):
1404 inputCatalogs = connectionTypes.Input(
1405 doc=
"Input per-detector Source Tables",
1406 name=
"{catalogType}sourceTable",
1407 storageClass=
"DataFrame",
1408 dimensions=(
"instrument",
"visit",
"detector"),
1411 outputCatalog = connectionTypes.Output(
1412 doc=
"Per-visit concatenation of Source Table",
1413 name=
"{catalogType}sourceTable_visit",
1414 storageClass=
"DataFrame",
1415 dimensions=(
"instrument",
"visit")
1419class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1420 pipelineConnections=ConsolidateSourceTableConnections):
1424class ConsolidateSourceTableTask(pipeBase.PipelineTask):
1425 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1427 _DefaultName = 'consolidateSourceTable'
1428 ConfigClass = ConsolidateSourceTableConfig
1430 inputDataset =
'sourceTable'
1431 outputDataset =
'sourceTable_visit'
1433 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1434 from .makeWarp
import reorderRefs
1436 detectorOrder = [ref.dataId[
'detector']
for ref
in inputRefs.inputCatalogs]
1437 detectorOrder.sort()
1438 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey=
'detector')
1439 inputs = butlerQC.get(inputRefs)
1440 self.log.info(
"Concatenating %s per-detector Source Tables",
1441 len(inputs[
'inputCatalogs']))
1442 df = pd.concat(inputs[
'inputCatalogs'])
1443 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1446class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1447 dimensions=(
"instrument",),
1448 defaultTemplates={
"calexpType":
""}):
1449 visitSummaryRefs = connectionTypes.Input(
1450 doc=
"Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1451 name=
"{calexpType}visitSummary",
1452 storageClass=
"ExposureCatalog",
1453 dimensions=(
"instrument",
"visit"),
1457 outputCatalog = connectionTypes.Output(
1458 doc=
"CCD and Visit metadata table",
1459 name=
"{calexpType}ccdVisitTable",
1460 storageClass=
"DataFrame",
1461 dimensions=(
"instrument",)
1465class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1466 pipelineConnections=MakeCcdVisitTableConnections):
1470class MakeCcdVisitTableTask(pipeBase.PipelineTask):
1471 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1473 _DefaultName = 'makeCcdVisitTable'
1474 ConfigClass = MakeCcdVisitTableConfig
1476 def run(self, visitSummaryRefs):
1477 """Make a table of ccd information from the `visitSummary` catalogs.
1481 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1482 List of DeferredDatasetHandles pointing to exposure catalogs with
1483 per-detector summary information.
1487 result : `lsst.pipe.Base.Struct`
1488 Results struct
with attribute:
1491 Catalog of ccd
and visit information.
1494 for visitSummaryRef
in visitSummaryRefs:
1495 visitSummary = visitSummaryRef.get()
1496 visitInfo = visitSummary[0].getVisitInfo()
1499 summaryTable = visitSummary.asAstropy()
1500 selectColumns = [
'id',
'visit',
'physical_filter',
'band',
'ra',
'decl',
'zenithDistance',
1501 'zeroPoint',
'psfSigma',
'skyBg',
'skyNoise',
1502 'astromOffsetMean',
'astromOffsetStd',
'nPsfStar',
1503 'psfStarDeltaE1Median',
'psfStarDeltaE2Median',
1504 'psfStarDeltaE1Scatter',
'psfStarDeltaE2Scatter',
1505 'psfStarDeltaSizeMedian',
'psfStarDeltaSizeScatter',
1506 'psfStarScaledDeltaSizeScatter']
1507 ccdEntry = summaryTable[selectColumns].to_pandas().set_index(
'id')
1512 ccdEntry = ccdEntry.rename(columns={
"visit":
"visitId"})
1513 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id)
for id
in
1515 packer = visitSummaryRef.dataId.universe.makePacker(
'visit_detector', visitSummaryRef.dataId)
1516 ccdVisitIds = [packer.pack(dataId)
for dataId
in dataIds]
1517 ccdEntry[
'ccdVisitId'] = ccdVisitIds
1518 ccdEntry[
'detector'] = summaryTable[
'id']
1519 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds()
for vR
in visitSummary])
1520 ccdEntry[
"seeing"] = visitSummary[
'psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1522 ccdEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1523 ccdEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1524 ccdEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1525 expTime = visitInfo.getExposureTime()
1526 ccdEntry[
'expTime'] = expTime
1527 ccdEntry[
"obsStart"] = ccdEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1528 expTime_days = expTime / (60*60*24)
1529 ccdEntry[
"obsStartMJD"] = ccdEntry[
"expMidptMJD"] - 0.5 * expTime_days
1530 ccdEntry[
'darkTime'] = visitInfo.getDarkTime()
1531 ccdEntry[
'xSize'] = summaryTable[
'bbox_max_x'] - summaryTable[
'bbox_min_x']
1532 ccdEntry[
'ySize'] = summaryTable[
'bbox_max_y'] - summaryTable[
'bbox_min_y']
1533 ccdEntry[
'llcra'] = summaryTable[
'raCorners'][:, 0]
1534 ccdEntry[
'llcdec'] = summaryTable[
'decCorners'][:, 0]
1535 ccdEntry[
'ulcra'] = summaryTable[
'raCorners'][:, 1]
1536 ccdEntry[
'ulcdec'] = summaryTable[
'decCorners'][:, 1]
1537 ccdEntry[
'urcra'] = summaryTable[
'raCorners'][:, 2]
1538 ccdEntry[
'urcdec'] = summaryTable[
'decCorners'][:, 2]
1539 ccdEntry[
'lrcra'] = summaryTable[
'raCorners'][:, 3]
1540 ccdEntry[
'lrcdec'] = summaryTable[
'decCorners'][:, 3]
1544 ccdEntries.append(ccdEntry)
1546 outputCatalog = pd.concat(ccdEntries)
1547 outputCatalog.set_index(
'ccdVisitId', inplace=
True, verify_integrity=
True)
1548 return pipeBase.Struct(outputCatalog=outputCatalog)
1551class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1552 dimensions=(
"instrument",),
1553 defaultTemplates={
"calexpType":
""}):
1554 visitSummaries = connectionTypes.Input(
1555 doc=
"Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1556 name=
"{calexpType}visitSummary",
1557 storageClass=
"ExposureCatalog",
1558 dimensions=(
"instrument",
"visit",),
1562 outputCatalog = connectionTypes.Output(
1563 doc=
"Visit metadata table",
1564 name=
"{calexpType}visitTable",
1565 storageClass=
"DataFrame",
1566 dimensions=(
"instrument",)
1570class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1571 pipelineConnections=MakeVisitTableConnections):
1575class MakeVisitTableTask(pipeBase.PipelineTask):
1576 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1578 _DefaultName = 'makeVisitTable'
1579 ConfigClass = MakeVisitTableConfig
1581 def run(self, visitSummaries):
1582 """Make a table of visit information from the `visitSummary` catalogs.
1587 List of exposure catalogs with per-detector summary information.
1590 result : `lsst.pipe.Base.Struct`
1591 Results struct
with attribute:
1594 Catalog of visit information.
1597 for visitSummary
in visitSummaries:
1598 visitSummary = visitSummary.get()
1599 visitRow = visitSummary[0]
1600 visitInfo = visitRow.getVisitInfo()
1603 visitEntry[
"visitId"] = visitRow[
'visit']
1604 visitEntry[
"visit"] = visitRow[
'visit']
1605 visitEntry[
"physical_filter"] = visitRow[
'physical_filter']
1606 visitEntry[
"band"] = visitRow[
'band']
1607 raDec = visitInfo.getBoresightRaDec()
1608 visitEntry[
"ra"] = raDec.getRa().asDegrees()
1609 visitEntry[
"decl"] = raDec.getDec().asDegrees()
1610 visitEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1611 azAlt = visitInfo.getBoresightAzAlt()
1612 visitEntry[
"azimuth"] = azAlt.getLongitude().asDegrees()
1613 visitEntry[
"altitude"] = azAlt.getLatitude().asDegrees()
1614 visitEntry[
"zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1615 visitEntry[
"airmass"] = visitInfo.getBoresightAirmass()
1616 expTime = visitInfo.getExposureTime()
1617 visitEntry[
"expTime"] = expTime
1618 visitEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1619 visitEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1620 visitEntry[
"obsStart"] = visitEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1621 expTime_days = expTime / (60*60*24)
1622 visitEntry[
"obsStartMJD"] = visitEntry[
"expMidptMJD"] - 0.5 * expTime_days
1623 visitEntries.append(visitEntry)
1629 outputCatalog = pd.DataFrame(data=visitEntries)
1630 outputCatalog.set_index(
'visitId', inplace=
True, verify_integrity=
True)
1631 return pipeBase.Struct(outputCatalog=outputCatalog)
1634class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1635 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")):
1637 inputCatalog = connectionTypes.Input(
1638 doc=
"Primary per-detector, single-epoch forced-photometry catalog. "
1639 "By default, it is the output of ForcedPhotCcdTask on calexps",
1641 storageClass=
"SourceCatalog",
1642 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1644 inputCatalogDiff = connectionTypes.Input(
1645 doc=
"Secondary multi-epoch, per-detector, forced photometry catalog. "
1646 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1648 storageClass=
"SourceCatalog",
1649 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1651 outputCatalog = connectionTypes.Output(
1652 doc=
"InputCatalogs horizonatally joined on `objectId` in Parquet format",
1653 name=
"mergedForcedSource",
1654 storageClass=
"DataFrame",
1655 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1659class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1660 pipelineConnections=WriteForcedSourceTableConnections):
1661 key = lsst.pex.config.Field(
1662 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1668class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1669 """Merge and convert per-detector forced source catalogs to parquet.
1671 Because the predecessor ForcedPhotCcdTask operates per-detector,
1672 per-tract, (i.e., it has tract in its dimensions), detectors
1673 on the tract boundary may have multiple forced source catalogs.
1675 The successor task TransformForcedSourceTable runs per-patch
1676 and temporally-aggregates overlapping mergedForcedSource catalogs
from all
1677 available multiple epochs.
1679 _DefaultName = "writeForcedSourceTable"
1680 ConfigClass = WriteForcedSourceTableConfig
1682 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1683 inputs = butlerQC.get(inputRefs)
1685 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
1686 inputs[
'band'] = butlerQC.quantum.dataId.full[
'band']
1687 outputs = self.run(**inputs)
1688 butlerQC.put(outputs, outputRefs)
1690 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1692 for table, dataset,
in zip((inputCatalog, inputCatalogDiff), (
'calexp',
'diff')):
1693 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=
False)
1694 df = df.reindex(sorted(df.columns), axis=1)
1695 df[
'ccdVisitId'] = ccdVisitId
if ccdVisitId
else pd.NA
1696 df[
'band'] = band
if band
else pd.NA
1697 df.columns = pd.MultiIndex.from_tuples([(dataset, c)
for c
in df.columns],
1698 names=(
'dataset',
'column'))
1702 outputCatalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
1703 return pipeBase.Struct(outputCatalog=outputCatalog)
1706class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1707 dimensions=(
"instrument",
"skymap",
"patch",
"tract")):
1709 inputCatalogs = connectionTypes.Input(
1710 doc=
"Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1711 name=
"mergedForcedSource",
1712 storageClass=
"DataFrame",
1713 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract"),
1717 referenceCatalog = connectionTypes.Input(
1718 doc=
"Reference catalog which was used to seed the forcedPhot. Columns "
1719 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1722 storageClass=
"DataFrame",
1723 dimensions=(
"tract",
"patch",
"skymap"),
1726 outputCatalog = connectionTypes.Output(
1727 doc=
"Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1728 "specified set of functors",
1729 name=
"forcedSourceTable",
1730 storageClass=
"DataFrame",
1731 dimensions=(
"tract",
"patch",
"skymap")
1736 pipelineConnections=TransformForcedSourceTableConnections):
1737 referenceColumns = pexConfig.ListField(
1739 default=[
"detect_isPrimary",
"detect_isTractInner",
"detect_isPatchInner"],
1741 doc=
"Columns to pull from reference catalog",
1743 keyRef = lsst.pex.config.Field(
1744 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1748 key = lsst.pex.config.Field(
1749 doc=
"Rename the output DataFrame index to this name",
1751 default=
"forcedSourceId",
1754 def setDefaults(self):
1755 super().setDefaults()
1756 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'ForcedSource.yaml')
1757 self.columnsFromDataId = [
'tract',
'patch']
1761 """Transform/standardize a ForcedSource catalog
1763 Transforms each wide, per-detector forcedSource parquet table per the
1764 specification file (per-camera defaults found in ForcedSource.yaml).
1765 All epochs that overlap the patch are aggregated into one per-patch
1766 narrow-parquet file.
1768 No de-duplication of rows
is performed. Duplicate resolutions flags are
1769 pulled
in from the referenceCatalog: `detect_isPrimary`,
1770 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1771 for analysis
or compare duplicates
for QA.
1773 The resulting table includes multiple bands. Epochs (MJDs)
and other useful
1774 per-visit rows can be retreived by joining
with the CcdVisitTable on
1777 _DefaultName = "transformForcedSourceTable"
1778 ConfigClass = TransformForcedSourceTableConfig
1780 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1781 inputs = butlerQC.get(inputRefs)
1782 if self.funcs
is None:
1783 raise ValueError(
"config.functorFile is None. "
1784 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1785 outputs = self.run(inputs[
'inputCatalogs'], inputs[
'referenceCatalog'], funcs=self.funcs,
1786 dataId=outputRefs.outputCatalog.dataId.full)
1788 butlerQC.put(outputs, outputRefs)
1790 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1792 ref = referenceCatalog.get(parameters={
"columns": self.config.referenceColumns})
1793 self.log.info(
"Aggregating %s input catalogs" % (len(inputCatalogs)))
1794 for handle
in inputCatalogs:
1795 result = self.transform(
None, handle, funcs, dataId)
1797 dfs.append(result.df.join(ref, how=
'inner'))
1799 outputCatalog = pd.concat(dfs)
1803 outputCatalog.index.rename(self.config.keyRef, inplace=
True)
1805 outputCatalog.reset_index(inplace=
True)
1808 outputCatalog.set_index(
"forcedSourceId", inplace=
True, verify_integrity=
True)
1810 outputCatalog.index.rename(self.config.key, inplace=
True)
1812 self.log.info(
"Made a table of %d columns and %d rows",
1813 len(outputCatalog.columns), len(outputCatalog))
1814 return pipeBase.Struct(outputCatalog=outputCatalog)
1817class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1818 defaultTemplates={
"catalogType":
""},
1819 dimensions=(
"instrument",
"tract")):
1820 inputCatalogs = connectionTypes.Input(
1821 doc=
"Input per-patch DataFrame Tables to be concatenated",
1822 name=
"{catalogType}ForcedSourceTable",
1823 storageClass=
"DataFrame",
1824 dimensions=(
"tract",
"patch",
"skymap"),
1828 outputCatalog = connectionTypes.Output(
1829 doc=
"Output per-tract concatenation of DataFrame Tables",
1830 name=
"{catalogType}ForcedSourceTable_tract",
1831 storageClass=
"DataFrame",
1832 dimensions=(
"tract",
"skymap"),
1836class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1837 pipelineConnections=ConsolidateTractConnections):
1841class ConsolidateTractTask(pipeBase.PipelineTask):
1842 """Concatenate any per-patch, dataframe list into a single
1843 per-tract DataFrame.
1845 _DefaultName = 'ConsolidateTract'
1846 ConfigClass = ConsolidateTractConfig
1848 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1849 inputs = butlerQC.get(inputRefs)
1852 self.log.info(
"Concatenating %s per-patch %s Tables",
1853 len(inputs[
'inputCatalogs']),
1854 inputRefs.inputCatalogs[0].datasetType.name)
1855 df = pd.concat(inputs[
'inputCatalogs'])
1856 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def compute(self, dropna=False, pool=None)
def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None)
def getAnalysis(self, parq, funcs=None, band=None)
def __init__(self, *args, **kwargs)
def transform(self, band, parq, funcs, dataId)
def run(self, parq, funcs=None, dataId=None, band=None)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)