33from lsst.obs.base
import ExposureIdInfo
37from lsst.daf.butler
import DeferredDatasetHandle, DataCoordinate
40from .parquetTable
import ParquetTable
41from .functors
import CompositeFunctor, Column
43log = logging.getLogger(__name__)
46def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
47 """Flattens a dataframe with multilevel column index.
49 newDf = pd.DataFrame()
51 dfBands = df.columns.unique(level=0).values
54 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
55 newColumns = {c: columnFormat.format(band, c)
56 for c
in subdf.columns
if c
not in noDupCols}
57 cols = list(newColumns.keys())
58 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
61 presentBands = dfBands
if inputBands
is None else list(set(inputBands).intersection(dfBands))
63 noDupDf = df[presentBands[0]][noDupCols]
64 newDf = pd.concat([noDupDf, newDf], axis=1)
69 defaultTemplates={
"coaddName":
"deep"},
70 dimensions=(
"tract",
"patch",
"skymap")):
71 inputCatalogMeas = connectionTypes.Input(
72 doc=
"Catalog of source measurements on the deepCoadd.",
73 dimensions=(
"tract",
"patch",
"band",
"skymap"),
74 storageClass=
"SourceCatalog",
75 name=
"{coaddName}Coadd_meas",
78 inputCatalogForcedSrc = connectionTypes.Input(
79 doc=
"Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
80 dimensions=(
"tract",
"patch",
"band",
"skymap"),
81 storageClass=
"SourceCatalog",
82 name=
"{coaddName}Coadd_forced_src",
85 inputCatalogRef = connectionTypes.Input(
86 doc=
"Catalog marking the primary detection (which band provides a good shape and position)"
87 "for each detection in deepCoadd_mergeDet.",
88 dimensions=(
"tract",
"patch",
"skymap"),
89 storageClass=
"SourceCatalog",
90 name=
"{coaddName}Coadd_ref"
92 outputCatalog = connectionTypes.Output(
93 doc=
"A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
94 "stored as a DataFrame with a multi-level column index per-patch.",
95 dimensions=(
"tract",
"patch",
"skymap"),
96 storageClass=
"DataFrame",
97 name=
"{coaddName}Coadd_obj"
101class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
102 pipelineConnections=WriteObjectTableConnections):
103 engine = pexConfig.Field(
106 doc=
"Parquet engine for writing (pyarrow or fastparquet)"
108 coaddName = pexConfig.Field(
115class WriteObjectTableTask(pipeBase.PipelineTask):
116 """Write filter-merged source tables to parquet
118 _DefaultName = "writeObjectTable"
119 ConfigClass = WriteObjectTableConfig
122 inputDatasets = (
'forced_src',
'meas',
'ref')
125 outputDataset =
'obj'
127 def runQuantum(self, butlerQC, inputRefs, outputRefs):
128 inputs = butlerQC.get(inputRefs)
130 measDict = {ref.dataId[
'band']: {
'meas': cat}
for ref, cat
in
131 zip(inputRefs.inputCatalogMeas, inputs[
'inputCatalogMeas'])}
132 forcedSourceDict = {ref.dataId[
'band']: {
'forced_src': cat}
for ref, cat
in
133 zip(inputRefs.inputCatalogForcedSrc, inputs[
'inputCatalogForcedSrc'])}
136 for band
in measDict.keys():
137 catalogs[band] = {
'meas': measDict[band][
'meas'],
138 'forced_src': forcedSourceDict[band][
'forced_src'],
139 'ref': inputs[
'inputCatalogRef']}
140 dataId = butlerQC.quantum.dataId
141 df = self.run(catalogs=catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
142 outputs = pipeBase.Struct(outputCatalog=df)
143 butlerQC.put(outputs, outputRefs)
145 def run(self, catalogs, tract, patch):
146 """Merge multiple catalogs.
151 Mapping from filter names to dict of catalogs.
153 tractId to use
for the tractId column.
155 patchId to use
for the patchId column.
159 catalog : `pandas.DataFrame`
164 for filt, tableDict
in catalogs.items():
165 for dataset, table
in tableDict.items():
167 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
170 df = df.reindex(sorted(df.columns), axis=1)
171 df[
'tractId'] = tract
172 df[
'patchId'] = patch
175 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
176 names=(
'dataset',
'band',
'column'))
179 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
183class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
184 defaultTemplates={
"catalogType":
""},
185 dimensions=(
"instrument",
"visit",
"detector")):
187 catalog = connectionTypes.Input(
188 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
189 name=
"{catalogType}src",
190 storageClass=
"SourceCatalog",
191 dimensions=(
"instrument",
"visit",
"detector")
193 outputCatalog = connectionTypes.Output(
194 doc=
"Catalog of sources, `src` in Parquet format. The 'id' column is "
195 "replaced with an index; all other columns are unchanged.",
196 name=
"{catalogType}source",
197 storageClass=
"DataFrame",
198 dimensions=(
"instrument",
"visit",
"detector")
202class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
203 pipelineConnections=WriteSourceTableConnections):
207class WriteSourceTableTask(pipeBase.PipelineTask):
208 """Write source table to parquet.
210 _DefaultName = "writeSourceTable"
211 ConfigClass = WriteSourceTableConfig
213 def runQuantum(self, butlerQC, inputRefs, outputRefs):
214 inputs = butlerQC.get(inputRefs)
215 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
216 result = self.run(**inputs).table
217 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
218 butlerQC.put(outputs, outputRefs)
220 def run(self, catalog, ccdVisitId=None, **kwargs):
221 """Convert `src` catalog to parquet
225 catalog: `afwTable.SourceCatalog`
226 catalog to be converted
228 ccdVisitId to be added as a column
232 result : `lsst.pipe.base.Struct`
234 `ParquetTable` version of the input catalog
236 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId)
237 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
238 df[
'ccdVisitId'] = ccdVisitId
239 return pipeBase.Struct(table=
ParquetTable(dataFrame=df))
242class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
243 defaultTemplates={
"catalogType":
"",
244 "skyWcsName":
"jointcal",
245 "photoCalibName":
"fgcm"},
246 dimensions=(
"instrument",
"visit",
"detector",
"skymap")):
247 skyMap = connectionTypes.Input(
248 doc=
"skyMap needed to choose which tract-level calibrations to use when multiple available",
249 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
250 storageClass=
"SkyMap",
251 dimensions=(
"skymap",),
253 exposure = connectionTypes.Input(
254 doc=
"Input exposure to perform photometry on.",
256 storageClass=
"ExposureF",
257 dimensions=[
"instrument",
"visit",
"detector"],
259 externalSkyWcsTractCatalog = connectionTypes.Input(
260 doc=(
"Per-tract, per-visit wcs calibrations. These catalogs use the detector "
261 "id for the catalog id, sorted on id for fast lookup."),
262 name=
"{skyWcsName}SkyWcsCatalog",
263 storageClass=
"ExposureCatalog",
264 dimensions=[
"instrument",
"visit",
"tract"],
267 externalSkyWcsGlobalCatalog = connectionTypes.Input(
268 doc=(
"Per-visit wcs calibrations computed globally (with no tract information). "
269 "These catalogs use the detector id for the catalog id, sorted on id for "
271 name=
"{skyWcsName}SkyWcsCatalog",
272 storageClass=
"ExposureCatalog",
273 dimensions=[
"instrument",
"visit"],
275 externalPhotoCalibTractCatalog = connectionTypes.Input(
276 doc=(
"Per-tract, per-visit photometric calibrations. These catalogs use the "
277 "detector id for the catalog id, sorted on id for fast lookup."),
278 name=
"{photoCalibName}PhotoCalibCatalog",
279 storageClass=
"ExposureCatalog",
280 dimensions=[
"instrument",
"visit",
"tract"],
283 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
284 doc=(
"Per-visit photometric calibrations computed globally (with no tract "
285 "information). These catalogs use the detector id for the catalog id, "
286 "sorted on id for fast lookup."),
287 name=
"{photoCalibName}PhotoCalibCatalog",
288 storageClass=
"ExposureCatalog",
289 dimensions=[
"instrument",
"visit"],
292 def __init__(self, *, config=None):
293 super().__init__(config=config)
296 if config.doApplyExternalSkyWcs
and config.doReevaluateSkyWcs:
297 if config.useGlobalExternalSkyWcs:
298 self.inputs.remove(
"externalSkyWcsTractCatalog")
300 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
302 self.inputs.remove(
"externalSkyWcsTractCatalog")
303 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
304 if config.doApplyExternalPhotoCalib
and config.doReevaluatePhotoCalib:
305 if config.useGlobalExternalPhotoCalib:
306 self.inputs.remove(
"externalPhotoCalibTractCatalog")
308 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
310 self.inputs.remove(
"externalPhotoCalibTractCatalog")
311 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
314class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
315 pipelineConnections=WriteRecalibratedSourceTableConnections):
317 doReevaluatePhotoCalib = pexConfig.Field(
320 doc=(
"Add or replace local photoCalib columns")
322 doReevaluateSkyWcs = pexConfig.Field(
325 doc=(
"Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
327 doApplyExternalPhotoCalib = pexConfig.Field(
330 doc=(
"If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
331 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
333 doApplyExternalSkyWcs = pexConfig.Field(
336 doc=(
"if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
337 "else use the wcs already attached to the exposure."),
339 useGlobalExternalPhotoCalib = pexConfig.Field(
342 doc=(
"When using doApplyExternalPhotoCalib, use 'global' calibrations "
343 "that are not run per-tract. When False, use per-tract photometric "
344 "calibration files.")
346 useGlobalExternalSkyWcs = pexConfig.Field(
349 doc=(
"When using doApplyExternalSkyWcs, use 'global' calibrations "
350 "that are not run per-tract. When False, use per-tract wcs "
356 if self.doApplyExternalSkyWcs
and not self.doReevaluateSkyWcs:
357 log.warning(
"doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
358 "External SkyWcs will not be read or evaluated.")
359 if self.doApplyExternalPhotoCalib
and not self.doReevaluatePhotoCalib:
360 log.warning(
"doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
361 "External PhotoCalib will not be read or evaluated.")
364class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
365 """Write source table to parquet
367 _DefaultName = "writeRecalibratedSourceTable"
368 ConfigClass = WriteRecalibratedSourceTableConfig
370 def runQuantum(self, butlerQC, inputRefs, outputRefs):
371 inputs = butlerQC.get(inputRefs)
372 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
373 inputs[
'exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId,
"visit_detector")
375 if self.config.doReevaluatePhotoCalib
or self.config.doReevaluateSkyWcs:
376 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
377 inputs[
'exposure'] = self.attachCalibs(inputRefs, **inputs)
379 inputs[
'catalog'] = self.addCalibColumns(**inputs)
381 result = self.run(**inputs).table
382 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
383 butlerQC.put(outputs, outputRefs)
385 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
386 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
387 externalPhotoCalibTractCatalog=None, **kwargs):
388 """Apply external calibrations to exposure per configuration
390 When multiple tract-level calibrations overlap, select the one with the
391 center closest to detector.
395 inputRefs : `lsst.pipe.base.InputQuantizedConnection`,
for dataIds of
397 skyMap : `lsst.skymap.SkyMap`
398 exposure : `lsst.afw.image.exposure.Exposure`
399 Input exposure to adjust calibrations.
401 Exposure catalog
with external skyWcs to be applied per config
403 Exposure catalog
with external skyWcs to be applied per config
405 Exposure catalog
with external photoCalib to be applied per config
411 exposure : `lsst.afw.image.exposure.Exposure`
412 Exposure
with adjusted calibrations.
414 if not self.config.doApplyExternalSkyWcs:
416 externalSkyWcsCatalog =
None
417 elif self.config.useGlobalExternalSkyWcs:
419 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
420 self.log.info(
'Applying global SkyWcs')
423 inputRef = getattr(inputRefs,
'externalSkyWcsTractCatalog')
424 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
427 self.log.info(
'Applying tract-level SkyWcs from tract %s', tracts[ind])
429 ind = self.getClosestTract(tracts, skyMap,
430 exposure.getBBox(), exposure.getWcs())
431 self.log.info(
'Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
432 'Applying closest to detector center: tract=%s',
str(tracts), tracts[ind])
434 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
436 if not self.config.doApplyExternalPhotoCalib:
438 externalPhotoCalibCatalog =
None
439 elif self.config.useGlobalExternalPhotoCalib:
441 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
442 self.log.info(
'Applying global PhotoCalib')
445 inputRef = getattr(inputRefs,
'externalPhotoCalibTractCatalog')
446 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
449 self.log.info(
'Applying tract-level PhotoCalib from tract %s', tracts[ind])
451 ind = self.getClosestTract(tracts, skyMap,
452 exposure.getBBox(), exposure.getWcs())
453 self.log.info(
'Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
454 'Applying closest to detector center: tract=%s',
str(tracts), tracts[ind])
456 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
458 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
460 def getClosestTract(self, tracts, skyMap, bbox, wcs):
461 """Find the index of the tract closest to detector from list of tractIds
465 tracts: `list` [`int`]
466 Iterable of integer tractIds
467 skyMap : `lsst.skymap.SkyMap`
468 skyMap to lookup tract geometry and wcs
470 Detector bbox, center of which will compared to tract centers
472 Detector Wcs object to map the detector center to SkyCoord
481 center = wcs.pixelToSky(bbox.getCenter())
483 for tractId
in tracts:
484 tract = skyMap[tractId]
485 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
486 sep.append(center.separation(tractCenter))
488 return np.argmin(sep)
490 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
491 """Prepare a calibrated exposure and apply external calibrations
496 exposure : `lsst.afw.image.exposure.Exposure`
497 Input exposure to adjust calibrations.
499 Exposure catalog
with external skyWcs to be applied
500 if config.doApplyExternalSkyWcs=
True. Catalog uses the detector id
501 for the catalog id, sorted on id
for fast lookup.
503 Exposure catalog
with external photoCalib to be applied
504 if config.doApplyExternalPhotoCalib=
True. Catalog uses the detector
505 id
for the catalog id, sorted on id
for fast lookup.
509 exposure : `lsst.afw.image.exposure.Exposure`
510 Exposure
with adjusted calibrations.
512 detectorId = exposure.getInfo().getDetector().getId()
514 if externalPhotoCalibCatalog
is not None:
515 row = externalPhotoCalibCatalog.find(detectorId)
517 self.log.warning(
"Detector id %s not found in externalPhotoCalibCatalog; "
518 "Using original photoCalib.", detectorId)
520 photoCalib = row.getPhotoCalib()
521 if photoCalib
is None:
522 self.log.warning(
"Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
523 "Using original photoCalib.", detectorId)
525 exposure.setPhotoCalib(photoCalib)
527 if externalSkyWcsCatalog
is not None:
528 row = externalSkyWcsCatalog.find(detectorId)
530 self.log.warning(
"Detector id %s not found in externalSkyWcsCatalog; "
531 "Using original skyWcs.", detectorId)
533 skyWcs = row.getWcs()
535 self.log.warning(
"Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
536 "Using original skyWcs.", detectorId)
538 exposure.setWcs(skyWcs)
542 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs):
543 """Add replace columns with calibs evaluated at each centroid
545 Add or replace
'base_LocalWcs' `base_LocalPhotoCalib
' columns in a
546 a source catalog, by rerunning the plugins.
551 catalog to which calib columns will be added
552 exposure : `lsst.afw.image.exposure.Exposure`
553 Exposure with attached PhotoCalibs
and SkyWcs attributes to be
554 reevaluated at local centroids. Pixels are
not required.
555 exposureIdInfo : `lsst.obs.base.ExposureIdInfo`
560 Source Catalog
with requested local calib columns
562 measureConfig = SingleFrameMeasurementTask.ConfigClass()
563 measureConfig.doReplaceWithNoise = False
566 for slot
in measureConfig.slots:
567 setattr(measureConfig.slots, slot,
None)
569 measureConfig.plugins.names = []
570 if self.config.doReevaluateSkyWcs:
571 measureConfig.plugins.names.add(
'base_LocalWcs')
572 self.log.info(
"Re-evaluating base_LocalWcs plugin")
573 if self.config.doReevaluatePhotoCalib:
574 measureConfig.plugins.names.add(
'base_LocalPhotoCalib')
575 self.log.info(
"Re-evaluating base_LocalPhotoCalib plugin")
576 pluginsNotToCopy = tuple(measureConfig.plugins.names)
580 aliasMap = catalog.schema.getAliasMap()
581 mapper = afwTable.SchemaMapper(catalog.schema)
582 for item
in catalog.schema:
583 if not item.field.getName().startswith(pluginsNotToCopy):
584 mapper.addMapping(item.key)
586 schema = mapper.getOutputSchema()
587 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
588 schema.setAliasMap(aliasMap)
589 newCat = afwTable.SourceCatalog(schema)
590 newCat.extend(catalog, mapper=mapper)
596 if self.config.doReevaluateSkyWcs:
597 afwTable.updateSourceCoords(exposure.wcs, newCat)
599 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
605 """Calculate columns from ParquetTable.
607 This object manages and organizes an arbitrary set of computations
608 on a catalog. The catalog
is defined by a
610 as a `deepCoadd_obj` dataset,
and the computations are defined by a
611 collection of `lsst.pipe.tasks.functor.Functor` objects (
or, equivalently,
612 a `CompositeFunctor`).
614 After the object
is initialized, accessing the `.df` attribute (which
615 holds the `pandas.DataFrame` containing the results of the calculations)
616 triggers computation of said dataframe.
618 One of the conveniences of using this object
is the ability to define a
619 desired common filter
for all functors. This enables the same functor
620 collection to be passed to several different `PostprocessAnalysis` objects
621 without having to change the original functor collection, since the `filt`
622 keyword argument of this object triggers an overwrite of the `filt`
623 property
for all functors
in the collection.
625 This object also allows a list of refFlags to be passed,
and defines a set
626 of default refFlags that are always included even
if not requested.
628 If a list of `ParquetTable` object
is passed, rather than a single one,
629 then the calculations will be mapped over all the input catalogs. In
630 principle, it should be straightforward to parallelize this activity, but
631 initial tests have failed (see TODO
in code comments).
635 parq : `lsst.pipe.tasks.ParquetTable` (
or list of such)
636 Source
catalog(s)
for computation.
639 Computations to do (functors that act on `parq`).
640 If a dict, the output
641 DataFrame will have columns keyed accordingly.
642 If a list, the column keys will come
from the
643 `.shortname` attribute of each functor.
645 filt : `str`, optional
646 Filter
in which to calculate. If provided,
647 this will overwrite any existing `.filt` attribute
648 of the provided functors.
650 flags : `list`, optional
651 List of flags (per-band) to include
in output table.
652 Taken
from the `meas` dataset
if applied to a multilevel Object Table.
654 refFlags : `list`, optional
655 List of refFlags (only reference band) to include
in output table.
657 forcedFlags : `list`, optional
658 List of flags (per-band) to include
in output table.
659 Taken
from the ``forced_src`` dataset
if applied to a
660 multilevel Object Table. Intended
for flags
from measurement plugins
661 only run during multi-band forced-photometry.
663 _defaultRefFlags = []
666 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
671 self.
flags = list(flags)
if flags
is not None else []
672 self.
forcedFlags = list(forcedFlags)
if forcedFlags
is not None else []
674 if refFlags
is not None:
687 additionalFuncs.update({flag:
Column(flag, dataset=
'forced_src')
for flag
in self.
forcedFlags})
688 additionalFuncs.update({flag:
Column(flag, dataset=
'ref')
for flag
in self.
refFlags})
689 additionalFuncs.update({flag:
Column(flag, dataset=
'meas')
for flag
in self.
flags})
691 if isinstance(self.
functors, CompositeFunctor):
696 func.funcDict.update(additionalFuncs)
697 func.filt = self.
filt
703 return [name
for name, func
in self.
func.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
713 if type(self.
parq)
in (list, tuple):
715 dflist = [self.
func(parq, dropna=dropna)
for parq
in self.
parq]
719 dflist = pool.map(functools.partial(self.
func, dropna=dropna), self.
parq)
720 self.
_df = pd.concat(dflist)
729 """Expected Connections for subclasses of TransformCatalogBaseTask.
733 inputCatalog = connectionTypes.Input(
735 storageClass=
"DataFrame",
737 outputCatalog = connectionTypes.Output(
739 storageClass=
"DataFrame",
744 pipelineConnections=TransformCatalogBaseConnections):
745 functorFile = pexConfig.Field(
747 doc=
"Path to YAML file specifying Science Data Model functors to use "
748 "when copying columns and computing calibrated values.",
752 primaryKey = pexConfig.Field(
754 doc=
"Name of column to be set as the DataFrame index. If None, the index"
755 "will be named `id`",
759 columnsFromDataId = pexConfig.ListField(
763 doc=
"Columns to extract from the dataId",
768 """Base class for transforming/standardizing a catalog
770 by applying functors that convert units and apply calibrations.
771 The purpose of this task
is to perform a set of computations on
772 an input `ParquetTable` dataset (such
as `deepCoadd_obj`)
and write the
773 results to a new dataset (which needs to be declared
in an `outputDataset`
776 The calculations to be performed are defined
in a YAML file that specifies
777 a set of functors to be computed, provided
as
778 a `--functorFile` config parameter. An example of such a YAML file
803 - base_InputCount_value
806 functor: DeconvolvedMoments
811 - merge_measurement_i
812 - merge_measurement_r
813 - merge_measurement_z
814 - merge_measurement_y
815 - merge_measurement_g
816 - base_PixelFlags_flag_inexact_psfCenter
819 The names
for each entry under
"func" will become the names of columns
in
820 the output dataset. All the functors referenced are defined
in
822 functor are
in the `args` list,
and any additional entries
for each column
823 other than
"functor" or "args" (e.g., `
'filt'`, `
'dataset'`) are treated
as
824 keyword arguments to be passed to the functor initialization.
826 The
"flags" entry
is the default shortcut
for `Column` functors.
827 All columns listed under
"flags" will be copied to the output table
828 untransformed. They can be of any datatype.
829 In the special case of transforming a multi-level oject table
with
830 band
and dataset indices (deepCoadd_obj), these will be taked
from the
831 `meas` dataset
and exploded out per band.
833 There are two special shortcuts that only apply when transforming
834 multi-level Object (deepCoadd_obj) tables:
835 - The
"refFlags" entry
is shortcut
for `Column` functor
836 taken
from the `
'ref'` dataset
if transforming an ObjectTable.
837 - The
"forcedFlags" entry
is shortcut
for `Column` functors.
838 taken
from the ``forced_src`` dataset
if transforming an ObjectTable.
839 These are expanded out per band.
843 to organize
and excecute the calculations.
846 def _DefaultName(self):
847 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
851 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
855 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
858 def ConfigClass(self):
859 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
863 if self.config.functorFile:
864 self.log.info(
'Loading tranform functor definitions from %s',
865 self.config.functorFile)
866 self.
funcs = CompositeFunctor.from_file(self.config.functorFile)
867 self.
funcs.update(dict(PostprocessAnalysis._defaultFuncs))
872 inputs = butlerQC.get(inputRefs)
873 if self.
funcs is None:
874 raise ValueError(
"config.functorFile is None. "
875 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
876 result = self.
run(parq=inputs[
'inputCatalog'], funcs=self.
funcs,
877 dataId=outputRefs.outputCatalog.dataId.full)
878 outputs = pipeBase.Struct(outputCatalog=result)
879 butlerQC.put(outputs, outputRefs)
881 def run(self, parq, funcs=None, dataId=None, band=None):
882 """Do postprocessing calculations
884 Takes a `ParquetTable` object and dataId,
885 returns a dataframe
with results of postprocessing calculations.
890 ParquetTable
from which calculations are done.
891 funcs : `lsst.pipe.tasks.functors.Functors`
892 Functors to apply to the table
's columns
893 dataId : dict, optional
894 Used to add a `patchId` column to the output dataframe.
895 band : `str`, optional
896 Filter band that is being processed.
900 df : `pandas.DataFrame`
902 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
904 df = self.
transform(band, parq, funcs, dataId).df
905 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
917 def transform(self, band, parq, funcs, dataId):
918 analysis = self.
getAnalysis(parq, funcs=funcs, band=band)
920 if dataId
and self.config.columnsFromDataId:
921 for key
in self.config.columnsFromDataId:
923 df[
str(key)] = dataId[key]
925 raise ValueError(f
"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
927 if self.config.primaryKey:
928 if df.index.name != self.config.primaryKey
and self.config.primaryKey
in df:
929 df.reset_index(inplace=
True, drop=
True)
930 df.set_index(self.config.primaryKey, inplace=
True)
932 return pipeBase.Struct(
939 defaultTemplates={
"coaddName":
"deep"},
940 dimensions=(
"tract",
"patch",
"skymap")):
941 inputCatalog = connectionTypes.Input(
942 doc=
"The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
943 "stored as a DataFrame with a multi-level column index per-patch.",
944 dimensions=(
"tract",
"patch",
"skymap"),
945 storageClass=
"DataFrame",
946 name=
"{coaddName}Coadd_obj",
949 outputCatalog = connectionTypes.Output(
950 doc=
"Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
952 dimensions=(
"tract",
"patch",
"skymap"),
953 storageClass=
"DataFrame",
959 pipelineConnections=TransformObjectCatalogConnections):
960 coaddName = pexConfig.Field(
966 filterMap = pexConfig.DictField(
970 doc=(
"Dictionary mapping full filter name to short one for column name munging."
971 "These filters determine the output columns no matter what filters the "
972 "input data actually contain."),
973 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
974 "Will be removed after v22.")
976 outputBands = pexConfig.ListField(
980 doc=(
"These bands and only these bands will appear in the output,"
981 " NaN-filled if the input does not include them."
982 " If None, then use all bands found in the input.")
984 camelCase = pexConfig.Field(
987 doc=(
"Write per-band columns names with camelCase, else underscore "
988 "For example: gPsFlux instead of g_PsFlux.")
990 multilevelOutput = pexConfig.Field(
993 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
994 "and name-munged (False).")
996 goodFlags = pexConfig.ListField(
999 doc=(
"List of 'good' flags that should be set False when populating empty tables. "
1000 "All other flags are considered to be 'bad' flags and will be set to True.")
1002 floatFillValue = pexConfig.Field(
1005 doc=
"Fill value for float fields when populating empty tables."
1007 integerFillValue = pexConfig.Field(
1010 doc=
"Fill value for integer fields when populating empty tables."
1013 def setDefaults(self):
1014 super().setDefaults()
1015 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Object.yaml')
1016 self.primaryKey =
'objectId'
1017 self.columnsFromDataId = [
'tract',
'patch']
1018 self.goodFlags = [
'calib_astrometry_used',
1019 'calib_photometry_reserved',
1020 'calib_photometry_used',
1021 'calib_psf_candidate',
1022 'calib_psf_reserved',
1027 """Produce a flattened Object Table to match the format specified in
1030 Do the same set of postprocessing calculations on all bands.
1032 This is identical to `TransformCatalogBaseTask`,
except for that it does
1033 the specified functor calculations
for all filters present
in the
1034 input `deepCoadd_obj` table. Any specific `
"filt"` keywords specified
1035 by the YAML file will be superceded.
1037 _DefaultName = "transformObjectCatalog"
1038 ConfigClass = TransformObjectCatalogConfig
1040 def run(self, parq, funcs=None, dataId=None, band=None):
1044 templateDf = pd.DataFrame()
1046 if isinstance(parq, DeferredDatasetHandle):
1047 columns = parq.get(component=
'columns')
1048 inputBands = columns.unique(level=1).values
1050 inputBands = parq.columnLevelNames[
'band']
1052 outputBands = self.config.outputBands
if self.config.outputBands
else inputBands
1055 for inputBand
in inputBands:
1056 if inputBand
not in outputBands:
1057 self.log.info(
"Ignoring %s band data in the input", inputBand)
1059 self.log.info(
"Transforming the catalog of band %s", inputBand)
1060 result = self.transform(inputBand, parq, funcs, dataId)
1061 dfDict[inputBand] = result.df
1062 analysisDict[inputBand] = result.analysis
1063 if templateDf.empty:
1064 templateDf = result.df
1067 for filt
in outputBands:
1068 if filt
not in dfDict:
1069 self.log.info(
"Adding empty columns for band %s", filt)
1070 dfTemp = templateDf.copy()
1071 for col
in dfTemp.columns:
1072 testValue = dfTemp[col].values[0]
1073 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1075 if col
in self.config.goodFlags:
1079 elif isinstance(testValue, numbers.Integral):
1083 if isinstance(testValue, np.unsignedinteger):
1084 raise ValueError(
"Parquet tables may not have unsigned integer columns.")
1086 fillValue = self.config.integerFillValue
1088 fillValue = self.config.floatFillValue
1089 dfTemp[col].values[:] = fillValue
1090 dfDict[filt] = dfTemp
1093 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
1095 if not self.config.multilevelOutput:
1096 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
1097 if self.config.primaryKey
in noDupCols:
1098 noDupCols.remove(self.config.primaryKey)
1099 if dataId
and self.config.columnsFromDataId:
1100 noDupCols += self.config.columnsFromDataId
1101 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1102 inputBands=inputBands)
1104 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
1109class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1110 dimensions=(
"tract",
"skymap")):
1111 inputCatalogs = connectionTypes.Input(
1112 doc=
"Per-Patch objectTables conforming to the standard data model.",
1114 storageClass=
"DataFrame",
1115 dimensions=(
"tract",
"patch",
"skymap"),
1118 outputCatalog = connectionTypes.Output(
1119 doc=
"Pre-tract horizontal concatenation of the input objectTables",
1120 name=
"objectTable_tract",
1121 storageClass=
"DataFrame",
1122 dimensions=(
"tract",
"skymap"),
1126class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1127 pipelineConnections=ConsolidateObjectTableConnections):
1128 coaddName = pexConfig.Field(
1135class ConsolidateObjectTableTask(pipeBase.PipelineTask):
1136 """Write patch-merged source tables to a tract-level parquet file.
1138 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1140 _DefaultName = "consolidateObjectTable"
1141 ConfigClass = ConsolidateObjectTableConfig
1143 inputDataset =
'objectTable'
1144 outputDataset =
'objectTable_tract'
1146 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1147 inputs = butlerQC.get(inputRefs)
1148 self.log.info(
"Concatenating %s per-patch Object Tables",
1149 len(inputs[
'inputCatalogs']))
1150 df = pd.concat(inputs[
'inputCatalogs'])
1151 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1154class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1155 defaultTemplates={
"catalogType":
""},
1156 dimensions=(
"instrument",
"visit",
"detector")):
1158 inputCatalog = connectionTypes.Input(
1159 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
1160 name=
"{catalogType}source",
1161 storageClass=
"DataFrame",
1162 dimensions=(
"instrument",
"visit",
"detector"),
1165 outputCatalog = connectionTypes.Output(
1166 doc=
"Narrower, per-detector Source Table transformed and converted per a "
1167 "specified set of functors",
1168 name=
"{catalogType}sourceTable",
1169 storageClass=
"DataFrame",
1170 dimensions=(
"instrument",
"visit",
"detector")
1175 pipelineConnections=TransformSourceTableConnections):
1177 def setDefaults(self):
1178 super().setDefaults()
1179 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Source.yaml')
1180 self.primaryKey =
'sourceId'
1181 self.columnsFromDataId = [
'visit',
'detector',
'band',
'physical_filter']
1185 """Transform/standardize a source catalog
1187 _DefaultName = "transformSourceTable"
1188 ConfigClass = TransformSourceTableConfig
1191class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1192 dimensions=(
"instrument",
"visit",),
1193 defaultTemplates={
"calexpType":
""}):
1194 calexp = connectionTypes.Input(
1195 doc=
"Processed exposures used for metadata",
1196 name=
"{calexpType}calexp",
1197 storageClass=
"ExposureF",
1198 dimensions=(
"instrument",
"visit",
"detector"),
1202 visitSummary = connectionTypes.Output(
1203 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
1204 "detector id for the id and are sorted for fast lookups of a "
1206 name=
"{calexpType}visitSummary",
1207 storageClass=
"ExposureCatalog",
1208 dimensions=(
"instrument",
"visit"),
1212class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1213 pipelineConnections=ConsolidateVisitSummaryConnections):
1214 """Config for ConsolidateVisitSummaryTask"""
1218class ConsolidateVisitSummaryTask(pipeBase.PipelineTask):
1219 """Task to consolidate per-detector visit metadata.
1221 This task aggregates the following metadata from all the detectors
in a
1222 single visit into an exposure catalog:
1226 - The physical_filter
and band (
if available).
1227 - The psf size, shape,
and effective area at the center of the detector.
1228 - The corners of the bounding box
in right ascension/declination.
1230 Other quantities such
as Detector, Psf, ApCorrMap,
and TransmissionCurve
1231 are
not persisted here because of storage concerns,
and because of their
1232 limited utility
as summary statistics.
1234 Tests
for this task are performed
in ci_hsc_gen3.
1236 _DefaultName = "consolidateVisitSummary"
1237 ConfigClass = ConsolidateVisitSummaryConfig
1239 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1240 dataRefs = butlerQC.get(inputRefs.calexp)
1241 visit = dataRefs[0].dataId.byName()[
'visit']
1243 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1244 len(dataRefs), visit)
1246 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1248 butlerQC.put(expCatalog, outputRefs.visitSummary)
1250 def _combineExposureMetadata(self, visit, dataRefs):
1251 """Make a combined exposure catalog from a list of dataRefs.
1252 These dataRefs must point to exposures with wcs, summaryStats,
1253 and other visit metadata.
1258 Visit identification number.
1259 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1260 List of dataRefs
in visit.
1265 Exposure catalog
with per-detector summary information.
1267 schema = self._makeVisitSummarySchema()
1268 cat = afwTable.ExposureCatalog(schema)
1269 cat.resize(len(dataRefs))
1271 cat['visit'] = visit
1273 for i, dataRef
in enumerate(dataRefs):
1274 visitInfo = dataRef.get(component=
'visitInfo')
1275 filterLabel = dataRef.get(component=
'filter')
1276 summaryStats = dataRef.get(component=
'summaryStats')
1277 detector = dataRef.get(component=
'detector')
1278 wcs = dataRef.get(component=
'wcs')
1279 photoCalib = dataRef.get(component=
'photoCalib')
1280 detector = dataRef.get(component=
'detector')
1281 bbox = dataRef.get(component=
'bbox')
1282 validPolygon = dataRef.get(component=
'validPolygon')
1286 rec.setVisitInfo(visitInfo)
1288 rec.setPhotoCalib(photoCalib)
1289 rec.setValidPolygon(validPolygon)
1291 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1292 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1293 rec.setId(detector.getId())
1294 rec[
'psfSigma'] = summaryStats.psfSigma
1295 rec[
'psfIxx'] = summaryStats.psfIxx
1296 rec[
'psfIyy'] = summaryStats.psfIyy
1297 rec[
'psfIxy'] = summaryStats.psfIxy
1298 rec[
'psfArea'] = summaryStats.psfArea
1299 rec[
'raCorners'][:] = summaryStats.raCorners
1300 rec[
'decCorners'][:] = summaryStats.decCorners
1301 rec[
'ra'] = summaryStats.ra
1302 rec[
'decl'] = summaryStats.decl
1303 rec[
'zenithDistance'] = summaryStats.zenithDistance
1304 rec[
'zeroPoint'] = summaryStats.zeroPoint
1305 rec[
'skyBg'] = summaryStats.skyBg
1306 rec[
'skyNoise'] = summaryStats.skyNoise
1307 rec[
'meanVar'] = summaryStats.meanVar
1308 rec[
'astromOffsetMean'] = summaryStats.astromOffsetMean
1309 rec[
'astromOffsetStd'] = summaryStats.astromOffsetStd
1310 rec[
'nPsfStar'] = summaryStats.nPsfStar
1311 rec[
'psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median
1312 rec[
'psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median
1313 rec[
'psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter
1314 rec[
'psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter
1315 rec[
'psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian
1316 rec[
'psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter
1317 rec[
'psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter
1319 metadata = dafBase.PropertyList()
1320 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1322 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1323 cat.setMetadata(metadata)
1328 def _makeVisitSummarySchema(self):
1329 """Make the schema for the visitSummary catalog."""
1330 schema = afwTable.ExposureTable.makeMinimalSchema()
1331 schema.addField(
'visit', type=
'L', doc=
'Visit number')
1332 schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
1333 schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
1334 schema.addField(
'psfSigma', type=
'F',
1335 doc=
'PSF model second-moments determinant radius (center of chip) (pixel)')
1336 schema.addField(
'psfArea', type=
'F',
1337 doc=
'PSF model effective area (center of chip) (pixel**2)')
1338 schema.addField(
'psfIxx', type=
'F',
1339 doc=
'PSF model Ixx (center of chip) (pixel**2)')
1340 schema.addField(
'psfIyy', type=
'F',
1341 doc=
'PSF model Iyy (center of chip) (pixel**2)')
1342 schema.addField(
'psfIxy', type=
'F',
1343 doc=
'PSF model Ixy (center of chip) (pixel**2)')
1344 schema.addField(
'raCorners', type=
'ArrayD', size=4,
1345 doc=
'Right Ascension of bounding box corners (degrees)')
1346 schema.addField(
'decCorners', type=
'ArrayD', size=4,
1347 doc=
'Declination of bounding box corners (degrees)')
1348 schema.addField(
'ra', type=
'D',
1349 doc=
'Right Ascension of bounding box center (degrees)')
1350 schema.addField(
'decl', type=
'D',
1351 doc=
'Declination of bounding box center (degrees)')
1352 schema.addField(
'zenithDistance', type=
'F',
1353 doc=
'Zenith distance of bounding box center (degrees)')
1354 schema.addField(
'zeroPoint', type=
'F',
1355 doc=
'Mean zeropoint in detector (mag)')
1356 schema.addField(
'skyBg', type=
'F',
1357 doc=
'Average sky background (ADU)')
1358 schema.addField(
'skyNoise', type=
'F',
1359 doc=
'Average sky noise (ADU)')
1360 schema.addField(
'meanVar', type=
'F',
1361 doc=
'Mean variance of the weight plane (ADU**2)')
1362 schema.addField(
'astromOffsetMean', type=
'F',
1363 doc=
'Mean offset of astrometric calibration matches (arcsec)')
1364 schema.addField(
'astromOffsetStd', type=
'F',
1365 doc=
'Standard deviation of offsets of astrometric calibration matches (arcsec)')
1366 schema.addField(
'nPsfStar', type=
'I', doc=
'Number of stars used for PSF model')
1367 schema.addField(
'psfStarDeltaE1Median', type=
'F',
1368 doc=
'Median E1 residual (starE1 - psfE1) for psf stars')
1369 schema.addField(
'psfStarDeltaE2Median', type=
'F',
1370 doc=
'Median E2 residual (starE2 - psfE2) for psf stars')
1371 schema.addField(
'psfStarDeltaE1Scatter', type=
'F',
1372 doc=
'Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars')
1373 schema.addField(
'psfStarDeltaE2Scatter', type=
'F',
1374 doc=
'Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars')
1375 schema.addField(
'psfStarDeltaSizeMedian', type=
'F',
1376 doc=
'Median size residual (starSize - psfSize) for psf stars (pixel)')
1377 schema.addField(
'psfStarDeltaSizeScatter', type=
'F',
1378 doc=
'Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)')
1379 schema.addField(
'psfStarScaledDeltaSizeScatter', type=
'F',
1380 doc=
'Scatter (via MAD) of size residual scaled by median size squared')
1385class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1386 defaultTemplates={
"catalogType":
""},
1387 dimensions=(
"instrument",
"visit")):
1388 inputCatalogs = connectionTypes.Input(
1389 doc=
"Input per-detector Source Tables",
1390 name=
"{catalogType}sourceTable",
1391 storageClass=
"DataFrame",
1392 dimensions=(
"instrument",
"visit",
"detector"),
1395 outputCatalog = connectionTypes.Output(
1396 doc=
"Per-visit concatenation of Source Table",
1397 name=
"{catalogType}sourceTable_visit",
1398 storageClass=
"DataFrame",
1399 dimensions=(
"instrument",
"visit")
1403class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1404 pipelineConnections=ConsolidateSourceTableConnections):
1408class ConsolidateSourceTableTask(pipeBase.PipelineTask):
1409 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1411 _DefaultName = 'consolidateSourceTable'
1412 ConfigClass = ConsolidateSourceTableConfig
1414 inputDataset =
'sourceTable'
1415 outputDataset =
'sourceTable_visit'
1417 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1418 from .makeWarp
import reorderRefs
1420 detectorOrder = [ref.dataId[
'detector']
for ref
in inputRefs.inputCatalogs]
1421 detectorOrder.sort()
1422 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey=
'detector')
1423 inputs = butlerQC.get(inputRefs)
1424 self.log.info(
"Concatenating %s per-detector Source Tables",
1425 len(inputs[
'inputCatalogs']))
1426 df = pd.concat(inputs[
'inputCatalogs'])
1427 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1430class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1431 dimensions=(
"instrument",),
1432 defaultTemplates={
"calexpType":
""}):
1433 visitSummaryRefs = connectionTypes.Input(
1434 doc=
"Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1435 name=
"{calexpType}visitSummary",
1436 storageClass=
"ExposureCatalog",
1437 dimensions=(
"instrument",
"visit"),
1441 outputCatalog = connectionTypes.Output(
1442 doc=
"CCD and Visit metadata table",
1443 name=
"ccdVisitTable",
1444 storageClass=
"DataFrame",
1445 dimensions=(
"instrument",)
1449class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1450 pipelineConnections=MakeCcdVisitTableConnections):
1454class MakeCcdVisitTableTask(pipeBase.PipelineTask):
1455 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1457 _DefaultName = 'makeCcdVisitTable'
1458 ConfigClass = MakeCcdVisitTableConfig
1460 def run(self, visitSummaryRefs):
1461 """Make a table of ccd information from the `visitSummary` catalogs.
1465 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1466 List of DeferredDatasetHandles pointing to exposure catalogs with
1467 per-detector summary information.
1471 result : `lsst.pipe.Base.Struct`
1472 Results struct
with attribute:
1475 Catalog of ccd
and visit information.
1478 for visitSummaryRef
in visitSummaryRefs:
1479 visitSummary = visitSummaryRef.get()
1480 visitInfo = visitSummary[0].getVisitInfo()
1483 summaryTable = visitSummary.asAstropy()
1484 selectColumns = [
'id',
'visit',
'physical_filter',
'band',
'ra',
'decl',
'zenithDistance',
1485 'zeroPoint',
'psfSigma',
'skyBg',
'skyNoise',
1486 'astromOffsetMean',
'astromOffsetStd',
'nPsfStar',
1487 'psfStarDeltaE1Median',
'psfStarDeltaE2Median',
1488 'psfStarDeltaE1Scatter',
'psfStarDeltaE2Scatter',
1489 'psfStarDeltaSizeMedian',
'psfStarDeltaSizeScatter',
1490 'psfStarScaledDeltaSizeScatter']
1491 ccdEntry = summaryTable[selectColumns].to_pandas().set_index(
'id')
1496 ccdEntry = ccdEntry.rename(columns={
"visit":
"visitId"})
1497 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id)
for id
in
1499 packer = visitSummaryRef.dataId.universe.makePacker(
'visit_detector', visitSummaryRef.dataId)
1500 ccdVisitIds = [packer.pack(dataId)
for dataId
in dataIds]
1501 ccdEntry[
'ccdVisitId'] = ccdVisitIds
1502 ccdEntry[
'detector'] = summaryTable[
'id']
1503 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds()
for vR
in visitSummary])
1504 ccdEntry[
"seeing"] = visitSummary[
'psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1506 ccdEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1507 ccdEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1508 ccdEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1509 expTime = visitInfo.getExposureTime()
1510 ccdEntry[
'expTime'] = expTime
1511 ccdEntry[
"obsStart"] = ccdEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1512 expTime_days = expTime / (60*60*24)
1513 ccdEntry[
"obsStartMJD"] = ccdEntry[
"expMidptMJD"] - 0.5 * expTime_days
1514 ccdEntry[
'darkTime'] = visitInfo.getDarkTime()
1515 ccdEntry[
'xSize'] = summaryTable[
'bbox_max_x'] - summaryTable[
'bbox_min_x']
1516 ccdEntry[
'ySize'] = summaryTable[
'bbox_max_y'] - summaryTable[
'bbox_min_y']
1517 ccdEntry[
'llcra'] = summaryTable[
'raCorners'][:, 0]
1518 ccdEntry[
'llcdec'] = summaryTable[
'decCorners'][:, 0]
1519 ccdEntry[
'ulcra'] = summaryTable[
'raCorners'][:, 1]
1520 ccdEntry[
'ulcdec'] = summaryTable[
'decCorners'][:, 1]
1521 ccdEntry[
'urcra'] = summaryTable[
'raCorners'][:, 2]
1522 ccdEntry[
'urcdec'] = summaryTable[
'decCorners'][:, 2]
1523 ccdEntry[
'lrcra'] = summaryTable[
'raCorners'][:, 3]
1524 ccdEntry[
'lrcdec'] = summaryTable[
'decCorners'][:, 3]
1528 ccdEntries.append(ccdEntry)
1530 outputCatalog = pd.concat(ccdEntries)
1531 outputCatalog.set_index(
'ccdVisitId', inplace=
True, verify_integrity=
True)
1532 return pipeBase.Struct(outputCatalog=outputCatalog)
1535class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1536 dimensions=(
"instrument",),
1537 defaultTemplates={
"calexpType":
""}):
1538 visitSummaries = connectionTypes.Input(
1539 doc=
"Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1540 name=
"{calexpType}visitSummary",
1541 storageClass=
"ExposureCatalog",
1542 dimensions=(
"instrument",
"visit",),
1546 outputCatalog = connectionTypes.Output(
1547 doc=
"Visit metadata table",
1549 storageClass=
"DataFrame",
1550 dimensions=(
"instrument",)
1554class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1555 pipelineConnections=MakeVisitTableConnections):
1559class MakeVisitTableTask(pipeBase.PipelineTask):
1560 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1562 _DefaultName = 'makeVisitTable'
1563 ConfigClass = MakeVisitTableConfig
1565 def run(self, visitSummaries):
1566 """Make a table of visit information from the `visitSummary` catalogs.
1571 List of exposure catalogs with per-detector summary information.
1574 result : `lsst.pipe.Base.Struct`
1575 Results struct
with attribute:
1578 Catalog of visit information.
1581 for visitSummary
in visitSummaries:
1582 visitSummary = visitSummary.get()
1583 visitRow = visitSummary[0]
1584 visitInfo = visitRow.getVisitInfo()
1587 visitEntry[
"visitId"] = visitRow[
'visit']
1588 visitEntry[
"visit"] = visitRow[
'visit']
1589 visitEntry[
"physical_filter"] = visitRow[
'physical_filter']
1590 visitEntry[
"band"] = visitRow[
'band']
1591 raDec = visitInfo.getBoresightRaDec()
1592 visitEntry[
"ra"] = raDec.getRa().asDegrees()
1593 visitEntry[
"decl"] = raDec.getDec().asDegrees()
1594 visitEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1595 azAlt = visitInfo.getBoresightAzAlt()
1596 visitEntry[
"azimuth"] = azAlt.getLongitude().asDegrees()
1597 visitEntry[
"altitude"] = azAlt.getLatitude().asDegrees()
1598 visitEntry[
"zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1599 visitEntry[
"airmass"] = visitInfo.getBoresightAirmass()
1600 expTime = visitInfo.getExposureTime()
1601 visitEntry[
"expTime"] = expTime
1602 visitEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1603 visitEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1604 visitEntry[
"obsStart"] = visitEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1605 expTime_days = expTime / (60*60*24)
1606 visitEntry[
"obsStartMJD"] = visitEntry[
"expMidptMJD"] - 0.5 * expTime_days
1607 visitEntries.append(visitEntry)
1613 outputCatalog = pd.DataFrame(data=visitEntries)
1614 outputCatalog.set_index(
'visitId', inplace=
True, verify_integrity=
True)
1615 return pipeBase.Struct(outputCatalog=outputCatalog)
1618class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1619 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")):
1621 inputCatalog = connectionTypes.Input(
1622 doc=
"Primary per-detector, single-epoch forced-photometry catalog. "
1623 "By default, it is the output of ForcedPhotCcdTask on calexps",
1625 storageClass=
"SourceCatalog",
1626 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1628 inputCatalogDiff = connectionTypes.Input(
1629 doc=
"Secondary multi-epoch, per-detector, forced photometry catalog. "
1630 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1632 storageClass=
"SourceCatalog",
1633 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1635 outputCatalog = connectionTypes.Output(
1636 doc=
"InputCatalogs horizonatally joined on `objectId` in Parquet format",
1637 name=
"mergedForcedSource",
1638 storageClass=
"DataFrame",
1639 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1643class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1644 pipelineConnections=WriteForcedSourceTableConnections):
1645 key = lsst.pex.config.Field(
1646 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1652class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1653 """Merge and convert per-detector forced source catalogs to parquet.
1655 Because the predecessor ForcedPhotCcdTask operates per-detector,
1656 per-tract, (i.e., it has tract in its dimensions), detectors
1657 on the tract boundary may have multiple forced source catalogs.
1659 The successor task TransformForcedSourceTable runs per-patch
1660 and temporally-aggregates overlapping mergedForcedSource catalogs
from all
1661 available multiple epochs.
1663 _DefaultName = "writeForcedSourceTable"
1664 ConfigClass = WriteForcedSourceTableConfig
1666 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1667 inputs = butlerQC.get(inputRefs)
1669 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
1670 inputs[
'band'] = butlerQC.quantum.dataId.full[
'band']
1671 outputs = self.run(**inputs)
1672 butlerQC.put(outputs, outputRefs)
1674 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1676 for table, dataset,
in zip((inputCatalog, inputCatalogDiff), (
'calexp',
'diff')):
1677 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=
False)
1678 df = df.reindex(sorted(df.columns), axis=1)
1679 df[
'ccdVisitId'] = ccdVisitId
if ccdVisitId
else pd.NA
1680 df[
'band'] = band
if band
else pd.NA
1681 df.columns = pd.MultiIndex.from_tuples([(dataset, c)
for c
in df.columns],
1682 names=(
'dataset',
'column'))
1686 outputCatalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
1687 return pipeBase.Struct(outputCatalog=outputCatalog)
1690class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1691 dimensions=(
"instrument",
"skymap",
"patch",
"tract")):
1693 inputCatalogs = connectionTypes.Input(
1694 doc=
"Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1695 name=
"mergedForcedSource",
1696 storageClass=
"DataFrame",
1697 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract"),
1701 referenceCatalog = connectionTypes.Input(
1702 doc=
"Reference catalog which was used to seed the forcedPhot. Columns "
1703 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1706 storageClass=
"DataFrame",
1707 dimensions=(
"tract",
"patch",
"skymap"),
1710 outputCatalog = connectionTypes.Output(
1711 doc=
"Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1712 "specified set of functors",
1713 name=
"forcedSourceTable",
1714 storageClass=
"DataFrame",
1715 dimensions=(
"tract",
"patch",
"skymap")
1720 pipelineConnections=TransformForcedSourceTableConnections):
1721 referenceColumns = pexConfig.ListField(
1723 default=[
"detect_isPrimary",
"detect_isTractInner",
"detect_isPatchInner"],
1725 doc=
"Columns to pull from reference catalog",
1727 keyRef = lsst.pex.config.Field(
1728 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1732 key = lsst.pex.config.Field(
1733 doc=
"Rename the output DataFrame index to this name",
1735 default=
"forcedSourceId",
1738 def setDefaults(self):
1739 super().setDefaults()
1740 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'ForcedSource.yaml')
1741 self.columnsFromDataId = [
'tract',
'patch']
1745 """Transform/standardize a ForcedSource catalog
1747 Transforms each wide, per-detector forcedSource parquet table per the
1748 specification file (per-camera defaults found in ForcedSource.yaml).
1749 All epochs that overlap the patch are aggregated into one per-patch
1750 narrow-parquet file.
1752 No de-duplication of rows
is performed. Duplicate resolutions flags are
1753 pulled
in from the referenceCatalog: `detect_isPrimary`,
1754 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1755 for analysis
or compare duplicates
for QA.
1757 The resulting table includes multiple bands. Epochs (MJDs)
and other useful
1758 per-visit rows can be retreived by joining
with the CcdVisitTable on
1761 _DefaultName = "transformForcedSourceTable"
1762 ConfigClass = TransformForcedSourceTableConfig
1764 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1765 inputs = butlerQC.get(inputRefs)
1766 if self.funcs
is None:
1767 raise ValueError(
"config.functorFile is None. "
1768 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1769 outputs = self.run(inputs[
'inputCatalogs'], inputs[
'referenceCatalog'], funcs=self.funcs,
1770 dataId=outputRefs.outputCatalog.dataId.full)
1772 butlerQC.put(outputs, outputRefs)
1774 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1776 ref = referenceCatalog.get(parameters={
"columns": self.config.referenceColumns})
1777 self.log.info(
"Aggregating %s input catalogs" % (len(inputCatalogs)))
1778 for handle
in inputCatalogs:
1779 result = self.transform(
None, handle, funcs, dataId)
1781 dfs.append(result.df.join(ref, how=
'inner'))
1783 outputCatalog = pd.concat(dfs)
1787 outputCatalog.index.rename(self.config.keyRef, inplace=
True)
1789 outputCatalog.reset_index(inplace=
True)
1792 outputCatalog.set_index(
"forcedSourceId", inplace=
True, verify_integrity=
True)
1794 outputCatalog.index.rename(self.config.key, inplace=
True)
1796 self.log.info(
"Made a table of %d columns and %d rows",
1797 len(outputCatalog.columns), len(outputCatalog))
1798 return pipeBase.Struct(outputCatalog=outputCatalog)
1801class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1802 defaultTemplates={
"catalogType":
""},
1803 dimensions=(
"instrument",
"tract")):
1804 inputCatalogs = connectionTypes.Input(
1805 doc=
"Input per-patch DataFrame Tables to be concatenated",
1806 name=
"{catalogType}ForcedSourceTable",
1807 storageClass=
"DataFrame",
1808 dimensions=(
"tract",
"patch",
"skymap"),
1812 outputCatalog = connectionTypes.Output(
1813 doc=
"Output per-tract concatenation of DataFrame Tables",
1814 name=
"{catalogType}ForcedSourceTable_tract",
1815 storageClass=
"DataFrame",
1816 dimensions=(
"tract",
"skymap"),
1820class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1821 pipelineConnections=ConsolidateTractConnections):
1825class ConsolidateTractTask(pipeBase.PipelineTask):
1826 """Concatenate any per-patch, dataframe list into a single
1827 per-tract DataFrame.
1829 _DefaultName = 'ConsolidateTract'
1830 ConfigClass = ConsolidateTractConfig
1832 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1833 inputs = butlerQC.get(inputRefs)
1836 self.log.info(
"Concatenating %s per-patch %s Tables",
1837 len(inputs[
'inputCatalogs']),
1838 inputRefs.inputCatalogs[0].datasetType.name)
1839 df = pd.concat(inputs[
'inputCatalogs'])
1840 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def compute(self, dropna=False, pool=None)
def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None)
def getAnalysis(self, parq, funcs=None, band=None)
def __init__(self, *args, **kwargs)
def transform(self, band, parq, funcs, dataId)
def run(self, parq, funcs=None, dataId=None, band=None)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)