22__all__ = [
"WriteObjectTableConfig",
"WriteObjectTableTask",
23 "WriteSourceTableConfig",
"WriteSourceTableTask",
24 "WriteRecalibratedSourceTableConfig",
"WriteRecalibratedSourceTableTask",
25 "PostprocessAnalysis",
26 "TransformCatalogBaseConfig",
"TransformCatalogBaseTask",
27 "TransformObjectCatalogConfig",
"TransformObjectCatalogTask",
28 "ConsolidateObjectTableConfig",
"ConsolidateObjectTableTask",
29 "TransformSourceTableConfig",
"TransformSourceTableTask",
30 "ConsolidateVisitSummaryConfig",
"ConsolidateVisitSummaryTask",
31 "ConsolidateSourceTableConfig",
"ConsolidateSourceTableTask",
32 "MakeCcdVisitTableConfig",
"MakeCcdVisitTableTask",
33 "MakeVisitTableConfig",
"MakeVisitTableTask",
34 "WriteForcedSourceTableConfig",
"WriteForcedSourceTableTask",
35 "TransformForcedSourceTableConfig",
"TransformForcedSourceTableTask",
36 "ConsolidateTractConfig",
"ConsolidateTractTask"]
52from lsst.meas.base import SingleFrameMeasurementTask, DetectorVisitIdGeneratorConfig
55from .functors
import CompositeFunctor, Column
57log = logging.getLogger(__name__)
60def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
61 """Flattens a dataframe with multilevel column index.
63 newDf = pd.DataFrame()
65 dfBands = df.columns.unique(level=0).values
68 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
69 newColumns = {c: columnFormat.format(band, c)
70 for c
in subdf.columns
if c
not in noDupCols}
71 cols = list(newColumns.keys())
72 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
75 presentBands = dfBands
if inputBands
is None else list(set(inputBands).intersection(dfBands))
77 noDupDf = df[presentBands[0]][noDupCols]
78 newDf = pd.concat([noDupDf, newDf], axis=1)
83 defaultTemplates={
"coaddName":
"deep"},
84 dimensions=(
"tract",
"patch",
"skymap")):
85 inputCatalogMeas = connectionTypes.Input(
86 doc=
"Catalog of source measurements on the deepCoadd.",
87 dimensions=(
"tract",
"patch",
"band",
"skymap"),
88 storageClass=
"SourceCatalog",
89 name=
"{coaddName}Coadd_meas",
92 inputCatalogForcedSrc = connectionTypes.Input(
93 doc=
"Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
94 dimensions=(
"tract",
"patch",
"band",
"skymap"),
95 storageClass=
"SourceCatalog",
96 name=
"{coaddName}Coadd_forced_src",
99 inputCatalogRef = connectionTypes.Input(
100 doc=
"Catalog marking the primary detection (which band provides a good shape and position)"
101 "for each detection in deepCoadd_mergeDet.",
102 dimensions=(
"tract",
"patch",
"skymap"),
103 storageClass=
"SourceCatalog",
104 name=
"{coaddName}Coadd_ref"
106 outputCatalog = connectionTypes.Output(
107 doc=
"A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
108 "stored as a DataFrame with a multi-level column index per-patch.",
109 dimensions=(
"tract",
"patch",
"skymap"),
110 storageClass=
"DataFrame",
111 name=
"{coaddName}Coadd_obj"
115class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
116 pipelineConnections=WriteObjectTableConnections):
117 engine = pexConfig.Field(
120 doc=
"Parquet engine for writing (pyarrow or fastparquet)",
121 deprecated=
"This config is no longer used, and will be removed after v26."
123 coaddName = pexConfig.Field(
130class WriteObjectTableTask(pipeBase.PipelineTask):
131 """Write filter-merged source tables as a DataFrame in parquet format.
133 _DefaultName = "writeObjectTable"
134 ConfigClass = WriteObjectTableConfig
137 inputDatasets = (
'forced_src',
'meas',
'ref')
140 outputDataset =
'obj'
142 def runQuantum(self, butlerQC, inputRefs, outputRefs):
143 inputs = butlerQC.get(inputRefs)
145 measDict = {ref.dataId[
'band']: {
'meas': cat}
for ref, cat
in
146 zip(inputRefs.inputCatalogMeas, inputs[
'inputCatalogMeas'])}
147 forcedSourceDict = {ref.dataId[
'band']: {
'forced_src': cat}
for ref, cat
in
148 zip(inputRefs.inputCatalogForcedSrc, inputs[
'inputCatalogForcedSrc'])}
151 for band
in measDict.keys():
152 catalogs[band] = {
'meas': measDict[band][
'meas'],
153 'forced_src': forcedSourceDict[band][
'forced_src'],
154 'ref': inputs[
'inputCatalogRef']}
155 dataId = butlerQC.quantum.dataId
156 df = self.run(catalogs=catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
157 outputs = pipeBase.Struct(outputCatalog=df)
158 butlerQC.put(outputs, outputRefs)
160 def run(self, catalogs, tract, patch):
161 """Merge multiple catalogs.
166 Mapping from filter names to dict of catalogs.
168 tractId to use
for the tractId column.
170 patchId to use
for the patchId column.
174 catalog : `pandas.DataFrame`
178 for filt, tableDict
in catalogs.items():
179 for dataset, table
in tableDict.items():
181 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
184 df = df.reindex(sorted(df.columns), axis=1)
185 df = df.assign(tractId=tract, patchId=patch)
188 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
189 names=(
'dataset',
'band',
'column'))
194 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
198class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
199 defaultTemplates={
"catalogType":
""},
200 dimensions=(
"instrument",
"visit",
"detector")):
202 catalog = connectionTypes.Input(
203 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
204 name=
"{catalogType}src",
205 storageClass=
"SourceCatalog",
206 dimensions=(
"instrument",
"visit",
"detector")
208 outputCatalog = connectionTypes.Output(
209 doc=
"Catalog of sources, `src` in DataFrame/Parquet format. The 'id' column is "
210 "replaced with an index; all other columns are unchanged.",
211 name=
"{catalogType}source",
212 storageClass=
"DataFrame",
213 dimensions=(
"instrument",
"visit",
"detector")
217class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
218 pipelineConnections=WriteSourceTableConnections):
219 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
222class WriteSourceTableTask(pipeBase.PipelineTask):
223 """Write source table to DataFrame Parquet format.
225 _DefaultName = "writeSourceTable"
226 ConfigClass = WriteSourceTableConfig
228 def runQuantum(self, butlerQC, inputRefs, outputRefs):
229 inputs = butlerQC.get(inputRefs)
230 inputs[
'ccdVisitId'] = self.config.idGenerator.apply(butlerQC.quantum.dataId).catalog_id
231 result = self.run(**inputs)
232 outputs = pipeBase.Struct(outputCatalog=result.table)
233 butlerQC.put(outputs, outputRefs)
235 def run(self, catalog, ccdVisitId=None, **kwargs):
236 """Convert `src` catalog to DataFrame
240 catalog: `afwTable.SourceCatalog`
241 catalog to be converted
243 ccdVisitId to be added as a column
245 Additional keyword arguments are ignored
as a convenience
for
246 subclasses that
pass the same arguments to several different
251 result : `lsst.pipe.base.Struct`
253 `DataFrame` version of the input catalog
255 self.log.info("Generating DataFrame from src catalog ccdVisitId=%s", ccdVisitId)
256 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
257 df[
'ccdVisitId'] = ccdVisitId
258 return pipeBase.Struct(table=df)
261class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
262 defaultTemplates={
"catalogType":
"",
263 "skyWcsName":
"gbdesAstrometricFit",
264 "photoCalibName":
"fgcm"},
265 dimensions=(
"instrument",
"visit",
"detector",
"skymap")):
266 skyMap = connectionTypes.Input(
267 doc=
"skyMap needed to choose which tract-level calibrations to use when multiple available",
268 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
269 storageClass=
"SkyMap",
270 dimensions=(
"skymap",),
272 exposure = connectionTypes.Input(
273 doc=
"Input exposure to perform photometry on.",
275 storageClass=
"ExposureF",
276 dimensions=[
"instrument",
"visit",
"detector"],
278 externalSkyWcsTractCatalog = connectionTypes.Input(
279 doc=(
"Per-tract, per-visit wcs calibrations. These catalogs use the detector "
280 "id for the catalog id, sorted on id for fast lookup."),
281 name=
"{skyWcsName}SkyWcsCatalog",
282 storageClass=
"ExposureCatalog",
283 dimensions=[
"instrument",
"visit",
"tract"],
286 externalSkyWcsGlobalCatalog = connectionTypes.Input(
287 doc=(
"Per-visit wcs calibrations computed globally (with no tract information). "
288 "These catalogs use the detector id for the catalog id, sorted on id for "
290 name=
"finalVisitSummary",
291 storageClass=
"ExposureCatalog",
292 dimensions=[
"instrument",
"visit"],
294 externalPhotoCalibTractCatalog = connectionTypes.Input(
295 doc=(
"Per-tract, per-visit photometric calibrations. These catalogs use the "
296 "detector id for the catalog id, sorted on id for fast lookup."),
297 name=
"{photoCalibName}PhotoCalibCatalog",
298 storageClass=
"ExposureCatalog",
299 dimensions=[
"instrument",
"visit",
"tract"],
302 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
303 doc=(
"Per-visit photometric calibrations computed globally (with no tract "
304 "information). These catalogs use the detector id for the catalog id, "
305 "sorted on id for fast lookup."),
306 name=
"finalVisitSummary",
307 storageClass=
"ExposureCatalog",
308 dimensions=[
"instrument",
"visit"],
311 def __init__(self, *, config=None):
312 super().__init__(config=config)
315 if config.doApplyExternalSkyWcs
and config.doReevaluateSkyWcs:
316 if config.useGlobalExternalSkyWcs:
317 self.inputs.remove(
"externalSkyWcsTractCatalog")
319 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
321 self.inputs.remove(
"externalSkyWcsTractCatalog")
322 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
323 if config.doApplyExternalPhotoCalib
and config.doReevaluatePhotoCalib:
324 if config.useGlobalExternalPhotoCalib:
325 self.inputs.remove(
"externalPhotoCalibTractCatalog")
327 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
329 self.inputs.remove(
"externalPhotoCalibTractCatalog")
330 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
333class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
334 pipelineConnections=WriteRecalibratedSourceTableConnections):
336 doReevaluatePhotoCalib = pexConfig.Field(
339 doc=(
"Add or replace local photoCalib columns")
341 doReevaluateSkyWcs = pexConfig.Field(
344 doc=(
"Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
346 doApplyExternalPhotoCalib = pexConfig.Field(
349 doc=(
"If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
350 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
352 doApplyExternalSkyWcs = pexConfig.Field(
355 doc=(
"if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
356 "else use the wcs already attached to the exposure."),
358 useGlobalExternalPhotoCalib = pexConfig.Field(
361 doc=(
"When using doApplyExternalPhotoCalib, use 'global' calibrations "
362 "that are not run per-tract. When False, use per-tract photometric "
363 "calibration files.")
365 useGlobalExternalSkyWcs = pexConfig.Field(
368 doc=(
"When using doApplyExternalSkyWcs, use 'global' calibrations "
369 "that are not run per-tract. When False, use per-tract wcs "
372 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
376 if self.doApplyExternalSkyWcs
and not self.doReevaluateSkyWcs:
377 log.warning(
"doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
378 "External SkyWcs will not be read or evaluated.")
379 if self.doApplyExternalPhotoCalib
and not self.doReevaluatePhotoCalib:
380 log.warning(
"doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
381 "External PhotoCalib will not be read or evaluated.")
384class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
385 """Write source table to DataFrame Parquet format.
387 _DefaultName = "writeRecalibratedSourceTable"
388 ConfigClass = WriteRecalibratedSourceTableConfig
390 def runQuantum(self, butlerQC, inputRefs, outputRefs):
391 inputs = butlerQC.get(inputRefs)
393 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
394 inputs[
'idGenerator'] = idGenerator
395 inputs[
'ccdVisitId'] = idGenerator.catalog_id
397 if self.config.doReevaluatePhotoCalib
or self.config.doReevaluateSkyWcs:
398 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
399 inputs[
'exposure'] = self.attachCalibs(inputRefs, **inputs)
401 inputs[
'catalog'] = self.addCalibColumns(**inputs)
403 result = self.run(**inputs)
404 outputs = pipeBase.Struct(outputCatalog=result.table)
405 butlerQC.put(outputs, outputRefs)
407 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
408 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
409 externalPhotoCalibTractCatalog=None, **kwargs):
410 """Apply external calibrations to exposure per configuration
412 When multiple tract-level calibrations overlap, select the one with the
413 center closest to detector.
417 inputRefs : `lsst.pipe.base.InputQuantizedConnection`,
for dataIds of
419 skyMap : `lsst.skymap.SkyMap`
420 exposure : `lsst.afw.image.exposure.Exposure`
421 Input exposure to adjust calibrations.
423 Exposure catalog
with external skyWcs to be applied per config
425 Exposure catalog
with external skyWcs to be applied per config
427 Exposure catalog
with external photoCalib to be applied per config
429 Exposure catalog
with external photoCalib to be applied per config
431 Additional keyword arguments are ignored to facilitate passing the
432 same arguments to several methods.
436 exposure : `lsst.afw.image.exposure.Exposure`
437 Exposure
with adjusted calibrations.
439 if not self.config.doApplyExternalSkyWcs:
441 externalSkyWcsCatalog =
None
442 elif self.config.useGlobalExternalSkyWcs:
444 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
445 self.log.info(
'Applying global SkyWcs')
448 inputRef = getattr(inputRefs,
'externalSkyWcsTractCatalog')
449 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
452 self.log.info(
'Applying tract-level SkyWcs from tract %s', tracts[ind])
454 if exposure.getWcs()
is None:
455 raise ValueError(
"Trying to locate nearest tract, but exposure.wcs is None.")
456 ind = self.getClosestTract(tracts, skyMap,
457 exposure.getBBox(), exposure.getWcs())
458 self.log.info(
'Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
459 'Applying closest to detector center: tract=%s',
str(tracts), tracts[ind])
461 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
463 if not self.config.doApplyExternalPhotoCalib:
465 externalPhotoCalibCatalog =
None
466 elif self.config.useGlobalExternalPhotoCalib:
468 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
469 self.log.info(
'Applying global PhotoCalib')
472 inputRef = getattr(inputRefs,
'externalPhotoCalibTractCatalog')
473 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
476 self.log.info(
'Applying tract-level PhotoCalib from tract %s', tracts[ind])
478 ind = self.getClosestTract(tracts, skyMap,
479 exposure.getBBox(), exposure.getWcs())
480 self.log.info(
'Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
481 'Applying closest to detector center: tract=%s',
str(tracts), tracts[ind])
483 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
485 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
487 def getClosestTract(self, tracts, skyMap, bbox, wcs):
488 """Find the index of the tract closest to detector from list of tractIds
492 tracts: `list` [`int`]
493 Iterable of integer tractIds
494 skyMap : `lsst.skymap.SkyMap`
495 skyMap to lookup tract geometry and wcs
497 Detector bbox, center of which will compared to tract centers
499 Detector Wcs object to map the detector center to SkyCoord
508 center = wcs.pixelToSky(bbox.getCenter())
510 for tractId
in tracts:
511 tract = skyMap[tractId]
512 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
513 sep.append(center.separation(tractCenter))
515 return np.argmin(sep)
517 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
518 """Prepare a calibrated exposure and apply external calibrations
523 exposure : `lsst.afw.image.exposure.Exposure`
524 Input exposure to adjust calibrations.
526 Exposure catalog
with external skyWcs to be applied
527 if config.doApplyExternalSkyWcs=
True. Catalog uses the detector id
528 for the catalog id, sorted on id
for fast lookup.
530 Exposure catalog
with external photoCalib to be applied
531 if config.doApplyExternalPhotoCalib=
True. Catalog uses the detector
532 id
for the catalog id, sorted on id
for fast lookup.
536 exposure : `lsst.afw.image.exposure.Exposure`
537 Exposure
with adjusted calibrations.
539 detectorId = exposure.getInfo().getDetector().getId()
541 if externalPhotoCalibCatalog
is not None:
542 row = externalPhotoCalibCatalog.find(detectorId)
544 self.log.warning(
"Detector id %s not found in externalPhotoCalibCatalog; "
545 "Using original photoCalib.", detectorId)
547 photoCalib = row.getPhotoCalib()
548 if photoCalib
is None:
549 self.log.warning(
"Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
550 "Using original photoCalib.", detectorId)
552 exposure.setPhotoCalib(photoCalib)
554 if externalSkyWcsCatalog
is not None:
555 row = externalSkyWcsCatalog.find(detectorId)
557 self.log.warning(
"Detector id %s not found in externalSkyWcsCatalog; "
558 "Using original skyWcs.", detectorId)
560 skyWcs = row.getWcs()
562 self.log.warning(
"Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
563 "Using original skyWcs.", detectorId)
565 exposure.setWcs(skyWcs)
569 def addCalibColumns(self, catalog, exposure, idGenerator, **kwargs):
570 """Add replace columns with calibs evaluated at each centroid
572 Add or replace
'base_LocalWcs' `base_LocalPhotoCalib
' columns in a
573 a source catalog, by rerunning the plugins.
578 catalog to which calib columns will be added
579 exposure : `lsst.afw.image.exposure.Exposure`
580 Exposure with attached PhotoCalibs
and SkyWcs attributes to be
581 reevaluated at local centroids. Pixels are
not required.
582 idGenerator : `lsst.meas.base.IdGenerator`
583 Object that generates Source IDs
and random seeds.
585 Additional keyword arguments are ignored to facilitate passing the
586 same arguments to several methods.
591 Source Catalog
with requested local calib columns
593 measureConfig = SingleFrameMeasurementTask.ConfigClass()
594 measureConfig.doReplaceWithNoise = False
597 for slot
in measureConfig.slots:
598 setattr(measureConfig.slots, slot,
None)
600 measureConfig.plugins.names = []
601 if self.config.doReevaluateSkyWcs:
602 measureConfig.plugins.names.add(
'base_LocalWcs')
603 self.log.info(
"Re-evaluating base_LocalWcs plugin")
604 if self.config.doReevaluatePhotoCalib:
605 measureConfig.plugins.names.add(
'base_LocalPhotoCalib')
606 self.log.info(
"Re-evaluating base_LocalPhotoCalib plugin")
607 pluginsNotToCopy = tuple(measureConfig.plugins.names)
611 aliasMap = catalog.schema.getAliasMap()
612 mapper = afwTable.SchemaMapper(catalog.schema)
613 for item
in catalog.schema:
614 if not item.field.getName().startswith(pluginsNotToCopy):
615 mapper.addMapping(item.key)
617 schema = mapper.getOutputSchema()
618 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
619 schema.setAliasMap(aliasMap)
620 newCat = afwTable.SourceCatalog(schema)
621 newCat.extend(catalog, mapper=mapper)
627 if self.config.doReevaluateSkyWcs
and exposure.wcs
is not None:
628 afwTable.updateSourceCoords(exposure.wcs, newCat)
630 measurement.run(measCat=newCat, exposure=exposure, exposureId=idGenerator.catalog_id)
636 """Calculate columns from DataFrames or handles storing DataFrames.
638 This object manages and organizes an arbitrary set of computations
639 on a catalog. The catalog
is defined by a
640 `DeferredDatasetHandle`
or `InMemoryDatasetHandle` object
641 (
or list thereof), such
as a ``deepCoadd_obj`` dataset,
and the
642 computations are defined by a collection of `lsst.pipe.tasks.functor.Functor`
643 objects (
or, equivalently, a ``CompositeFunctor``).
645 After the object
is initialized, accessing the ``.df`` attribute (which
646 holds the `pandas.DataFrame` containing the results of the calculations)
647 triggers computation of said dataframe.
649 One of the conveniences of using this object
is the ability to define a
650 desired common filter
for all functors. This enables the same functor
651 collection to be passed to several different `PostprocessAnalysis` objects
652 without having to change the original functor collection, since the ``filt``
653 keyword argument of this object triggers an overwrite of the ``filt``
654 property
for all functors
in the collection.
656 This object also allows a list of refFlags to be passed,
and defines a set
657 of default refFlags that are always included even
if not requested.
659 If a list of DataFrames
or Handles
is passed, rather than a single one,
660 then the calculations will be mapped over all the input catalogs. In
661 principle, it should be straightforward to parallelize this activity, but
662 initial tests have failed (see TODO
in code comments).
666 handles : `lsst.daf.butler.DeferredDatasetHandle`
or
667 `lsst.pipe.base.InMemoryDatasetHandle`
or
669 Source
catalog(s)
for computation.
671 Computations to do (functors that act on ``handles``).
672 If a dict, the output
673 DataFrame will have columns keyed accordingly.
674 If a list, the column keys will come
from the
675 ``.shortname`` attribute of each functor.
677 filt : `str`, optional
678 Filter
in which to calculate. If provided,
679 this will overwrite any existing ``.filt`` attribute
680 of the provided functors.
682 flags : `list`, optional
683 List of flags (per-band) to include
in output table.
684 Taken
from the ``meas`` dataset
if applied to a multilevel Object Table.
686 refFlags : `list`, optional
687 List of refFlags (only reference band) to include
in output table.
689 forcedFlags : `list`, optional
690 List of flags (per-band) to include
in output table.
691 Taken
from the ``forced_src`` dataset
if applied to a
692 multilevel Object Table. Intended
for flags
from measurement plugins
693 only run during multi-band forced-photometry.
695 _defaultRefFlags = []
698 def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
703 self.
flags = list(flags)
if flags
is not None else []
704 self.
forcedFlags = list(forcedFlags)
if forcedFlags
is not None else []
706 if refFlags
is not None:
719 additionalFuncs.update({flag:
Column(flag, dataset=
'forced_src')
for flag
in self.
forcedFlags})
720 additionalFuncs.update({flag:
Column(flag, dataset=
'ref')
for flag
in self.
refFlags})
721 additionalFuncs.update({flag:
Column(flag, dataset=
'meas')
for flag
in self.
flags})
723 if isinstance(self.
functors, CompositeFunctor):
728 func.funcDict.update(additionalFuncs)
729 func.filt = self.
filt
735 return [name
for name, func
in self.
func.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
745 if type(self.
handles)
in (list, tuple):
747 dflist = [self.
func(handle, dropna=dropna)
for handle
in self.
handles]
751 dflist = pool.map(functools.partial(self.
func, dropna=dropna), self.
handles)
752 self.
_df = pd.concat(dflist)
761 """Expected Connections for subclasses of TransformCatalogBaseTask.
765 inputCatalog = connectionTypes.Input(
767 storageClass=
"DataFrame",
769 outputCatalog = connectionTypes.Output(
771 storageClass=
"DataFrame",
776 pipelineConnections=TransformCatalogBaseConnections):
777 functorFile = pexConfig.Field(
779 doc=
"Path to YAML file specifying Science Data Model functors to use "
780 "when copying columns and computing calibrated values.",
784 primaryKey = pexConfig.Field(
786 doc=
"Name of column to be set as the DataFrame index. If None, the index"
787 "will be named `id`",
791 columnsFromDataId = pexConfig.ListField(
795 doc=
"Columns to extract from the dataId",
800 """Base class for transforming/standardizing a catalog
802 by applying functors that convert units and apply calibrations.
803 The purpose of this task
is to perform a set of computations on
804 an input ``DeferredDatasetHandle``
or ``InMemoryDatasetHandle`` that holds
805 a ``DataFrame`` dataset (such
as ``deepCoadd_obj``),
and write the
806 results to a new dataset (which needs to be declared
in an ``outputDataset``
809 The calculations to be performed are defined
in a YAML file that specifies
810 a set of functors to be computed, provided
as
811 a ``--functorFile`` config parameter. An example of such a YAML file
836 - base_InputCount_value
839 functor: DeconvolvedMoments
844 - merge_measurement_i
845 - merge_measurement_r
846 - merge_measurement_z
847 - merge_measurement_y
848 - merge_measurement_g
849 - base_PixelFlags_flag_inexact_psfCenter
852 The names
for each entry under
"func" will become the names of columns
in
853 the output dataset. All the functors referenced are defined
in
855 functor are
in the `args` list,
and any additional entries
for each column
856 other than
"functor" or "args" (e.g., ``
'filt'``, ``
'dataset'``) are treated
as
857 keyword arguments to be passed to the functor initialization.
859 The
"flags" entry
is the default shortcut
for `Column` functors.
860 All columns listed under
"flags" will be copied to the output table
861 untransformed. They can be of any datatype.
862 In the special case of transforming a multi-level oject table
with
863 band
and dataset indices (deepCoadd_obj), these will be taked
from the
864 `meas` dataset
and exploded out per band.
866 There are two special shortcuts that only apply when transforming
867 multi-level Object (deepCoadd_obj) tables:
868 - The
"refFlags" entry
is shortcut
for `Column` functor
869 taken
from the `
'ref'` dataset
if transforming an ObjectTable.
870 - The
"forcedFlags" entry
is shortcut
for `Column` functors.
871 taken
from the ``forced_src`` dataset
if transforming an ObjectTable.
872 These are expanded out per band.
876 to organize
and excecute the calculations.
879 def _DefaultName(self):
880 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
884 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
888 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
891 def ConfigClass(self):
892 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
896 if self.config.functorFile:
897 self.log.info(
'Loading tranform functor definitions from %s',
898 self.config.functorFile)
899 self.
funcs = CompositeFunctor.from_file(self.config.functorFile)
900 self.
funcs.update(dict(PostprocessAnalysis._defaultFuncs))
905 inputs = butlerQC.get(inputRefs)
906 if self.
funcs is None:
907 raise ValueError(
"config.functorFile is None. "
908 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
909 result = self.
run(handle=inputs[
'inputCatalog'], funcs=self.
funcs,
910 dataId=outputRefs.outputCatalog.dataId.full)
911 outputs = pipeBase.Struct(outputCatalog=result)
912 butlerQC.put(outputs, outputRefs)
914 def run(self, handle, funcs=None, dataId=None, band=None):
915 """Do postprocessing calculations
917 Takes a ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle``
or
918 ``DataFrame`` object
and dataId,
919 returns a dataframe
with results of postprocessing calculations.
923 handles : `lsst.daf.butler.DeferredDatasetHandle`
or
924 `lsst.pipe.base.InMemoryDatasetHandle`
or
925 `pandas.DataFrame`,
or list of these.
926 DataFrames
from which calculations are done.
927 funcs : `lsst.pipe.tasks.functors.Functors`
928 Functors to apply to the table
's columns
929 dataId : dict, optional
930 Used to add a `patchId` column to the output dataframe.
931 band : `str`, optional
932 Filter band that is being processed.
936 df : `pandas.DataFrame`
938 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
940 df = self.
transform(band, handle, funcs, dataId).df
941 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
953 def transform(self, band, handles, funcs, dataId):
954 analysis = self.
getAnalysis(handles, funcs=funcs, band=band)
956 if dataId
and self.config.columnsFromDataId:
957 for key
in self.config.columnsFromDataId:
959 df[
str(key)] = dataId[key]
961 raise ValueError(f
"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
963 if self.config.primaryKey:
964 if df.index.name != self.config.primaryKey
and self.config.primaryKey
in df:
965 df.reset_index(inplace=
True, drop=
True)
966 df.set_index(self.config.primaryKey, inplace=
True)
968 return pipeBase.Struct(
975 defaultTemplates={
"coaddName":
"deep"},
976 dimensions=(
"tract",
"patch",
"skymap")):
977 inputCatalog = connectionTypes.Input(
978 doc=
"The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
979 "stored as a DataFrame with a multi-level column index per-patch.",
980 dimensions=(
"tract",
"patch",
"skymap"),
981 storageClass=
"DataFrame",
982 name=
"{coaddName}Coadd_obj",
985 outputCatalog = connectionTypes.Output(
986 doc=
"Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
988 dimensions=(
"tract",
"patch",
"skymap"),
989 storageClass=
"DataFrame",
995 pipelineConnections=TransformObjectCatalogConnections):
996 coaddName = pexConfig.Field(
1002 filterMap = pexConfig.DictField(
1006 doc=(
"Dictionary mapping full filter name to short one for column name munging."
1007 "These filters determine the output columns no matter what filters the "
1008 "input data actually contain."),
1009 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
1010 "Will be removed after v22.")
1012 outputBands = pexConfig.ListField(
1016 doc=(
"These bands and only these bands will appear in the output,"
1017 " NaN-filled if the input does not include them."
1018 " If None, then use all bands found in the input.")
1020 camelCase = pexConfig.Field(
1023 doc=(
"Write per-band columns names with camelCase, else underscore "
1024 "For example: gPsFlux instead of g_PsFlux.")
1026 multilevelOutput = pexConfig.Field(
1029 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
1030 "and name-munged (False).")
1032 goodFlags = pexConfig.ListField(
1035 doc=(
"List of 'good' flags that should be set False when populating empty tables. "
1036 "All other flags are considered to be 'bad' flags and will be set to True.")
1038 floatFillValue = pexConfig.Field(
1041 doc=
"Fill value for float fields when populating empty tables."
1043 integerFillValue = pexConfig.Field(
1046 doc=
"Fill value for integer fields when populating empty tables."
1049 def setDefaults(self):
1050 super().setDefaults()
1051 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Object.yaml')
1052 self.primaryKey =
'objectId'
1053 self.columnsFromDataId = [
'tract',
'patch']
1054 self.goodFlags = [
'calib_astrometry_used',
1055 'calib_photometry_reserved',
1056 'calib_photometry_used',
1057 'calib_psf_candidate',
1058 'calib_psf_reserved',
1063 """Produce a flattened Object Table to match the format specified in
1066 Do the same set of postprocessing calculations on all bands.
1068 This is identical to `TransformCatalogBaseTask`,
except for that it does
1069 the specified functor calculations
for all filters present
in the
1070 input `deepCoadd_obj` table. Any specific ``
"filt"`` keywords specified
1071 by the YAML file will be superceded.
1073 _DefaultName = "transformObjectCatalog"
1074 ConfigClass = TransformObjectCatalogConfig
1076 def run(self, handle, funcs=None, dataId=None, band=None):
1080 templateDf = pd.DataFrame()
1082 columns = handle.get(component=
'columns')
1083 inputBands = columns.unique(level=1).values
1085 outputBands = self.config.outputBands
if self.config.outputBands
else inputBands
1088 for inputBand
in inputBands:
1089 if inputBand
not in outputBands:
1090 self.log.info(
"Ignoring %s band data in the input", inputBand)
1092 self.log.info(
"Transforming the catalog of band %s", inputBand)
1093 result = self.transform(inputBand, handle, funcs, dataId)
1094 dfDict[inputBand] = result.df
1095 analysisDict[inputBand] = result.analysis
1096 if templateDf.empty:
1097 templateDf = result.df
1100 for filt
in outputBands:
1101 if filt
not in dfDict:
1102 self.log.info(
"Adding empty columns for band %s", filt)
1103 dfTemp = templateDf.copy()
1104 for col
in dfTemp.columns:
1105 testValue = dfTemp[col].values[0]
1106 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1108 if col
in self.config.goodFlags:
1112 elif isinstance(testValue, numbers.Integral):
1116 if isinstance(testValue, np.unsignedinteger):
1117 raise ValueError(
"Parquet tables may not have unsigned integer columns.")
1119 fillValue = self.config.integerFillValue
1121 fillValue = self.config.floatFillValue
1122 dfTemp[col].values[:] = fillValue
1123 dfDict[filt] = dfTemp
1126 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
1128 if not self.config.multilevelOutput:
1129 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
1130 if self.config.primaryKey
in noDupCols:
1131 noDupCols.remove(self.config.primaryKey)
1132 if dataId
and self.config.columnsFromDataId:
1133 noDupCols += self.config.columnsFromDataId
1134 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1135 inputBands=inputBands)
1137 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
1142class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1143 dimensions=(
"tract",
"skymap")):
1144 inputCatalogs = connectionTypes.Input(
1145 doc=
"Per-Patch objectTables conforming to the standard data model.",
1147 storageClass=
"DataFrame",
1148 dimensions=(
"tract",
"patch",
"skymap"),
1151 outputCatalog = connectionTypes.Output(
1152 doc=
"Pre-tract horizontal concatenation of the input objectTables",
1153 name=
"objectTable_tract",
1154 storageClass=
"DataFrame",
1155 dimensions=(
"tract",
"skymap"),
1159class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1160 pipelineConnections=ConsolidateObjectTableConnections):
1161 coaddName = pexConfig.Field(
1168class ConsolidateObjectTableTask(pipeBase.PipelineTask):
1169 """Write patch-merged source tables to a tract-level DataFrame Parquet file.
1171 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1173 _DefaultName = "consolidateObjectTable"
1174 ConfigClass = ConsolidateObjectTableConfig
1176 inputDataset =
'objectTable'
1177 outputDataset =
'objectTable_tract'
1179 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1180 inputs = butlerQC.get(inputRefs)
1181 self.log.info(
"Concatenating %s per-patch Object Tables",
1182 len(inputs[
'inputCatalogs']))
1183 df = pd.concat(inputs[
'inputCatalogs'])
1184 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1187class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1188 defaultTemplates={
"catalogType":
""},
1189 dimensions=(
"instrument",
"visit",
"detector")):
1191 inputCatalog = connectionTypes.Input(
1192 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
1193 name=
"{catalogType}source",
1194 storageClass=
"DataFrame",
1195 dimensions=(
"instrument",
"visit",
"detector"),
1198 outputCatalog = connectionTypes.Output(
1199 doc=
"Narrower, per-detector Source Table transformed and converted per a "
1200 "specified set of functors",
1201 name=
"{catalogType}sourceTable",
1202 storageClass=
"DataFrame",
1203 dimensions=(
"instrument",
"visit",
"detector")
1208 pipelineConnections=TransformSourceTableConnections):
1210 def setDefaults(self):
1211 super().setDefaults()
1212 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Source.yaml')
1213 self.primaryKey =
'sourceId'
1214 self.columnsFromDataId = [
'visit',
'detector',
'band',
'physical_filter']
1218 """Transform/standardize a source catalog
1220 _DefaultName = "transformSourceTable"
1221 ConfigClass = TransformSourceTableConfig
1224class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1225 dimensions=(
"instrument",
"visit",),
1226 defaultTemplates={
"calexpType":
""}):
1227 calexp = connectionTypes.Input(
1228 doc=
"Processed exposures used for metadata",
1230 storageClass=
"ExposureF",
1231 dimensions=(
"instrument",
"visit",
"detector"),
1235 visitSummary = connectionTypes.Output(
1236 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
1237 "detector id for the id and are sorted for fast lookups of a "
1239 name=
"visitSummary",
1240 storageClass=
"ExposureCatalog",
1241 dimensions=(
"instrument",
"visit"),
1243 visitSummarySchema = connectionTypes.InitOutput(
1244 doc=
"Schema of the visitSummary catalog",
1245 name=
"visitSummary_schema",
1246 storageClass=
"ExposureCatalog",
1250class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1251 pipelineConnections=ConsolidateVisitSummaryConnections):
1252 """Config for ConsolidateVisitSummaryTask"""
1256class ConsolidateVisitSummaryTask(pipeBase.PipelineTask):
1257 """Task to consolidate per-detector visit metadata.
1259 This task aggregates the following metadata from all the detectors
in a
1260 single visit into an exposure catalog:
1264 - The physical_filter
and band (
if available).
1265 - The psf size, shape,
and effective area at the center of the detector.
1266 - The corners of the bounding box
in right ascension/declination.
1268 Other quantities such
as Detector, Psf, ApCorrMap,
and TransmissionCurve
1269 are
not persisted here because of storage concerns,
and because of their
1270 limited utility
as summary statistics.
1272 Tests
for this task are performed
in ci_hsc_gen3.
1274 _DefaultName = "consolidateVisitSummary"
1275 ConfigClass = ConsolidateVisitSummaryConfig
1277 def __init__(self, **kwargs):
1278 super().__init__(**kwargs)
1279 self.schema = afwTable.ExposureTable.makeMinimalSchema()
1280 self.schema.addField(
'visit', type=
'L', doc=
'Visit number')
1281 self.schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
1282 self.schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
1283 ExposureSummaryStats.update_schema(self.schema)
1284 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema)
1286 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1287 dataRefs = butlerQC.get(inputRefs.calexp)
1288 visit = dataRefs[0].dataId.byName()[
'visit']
1290 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1291 len(dataRefs), visit)
1293 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1295 butlerQC.put(expCatalog, outputRefs.visitSummary)
1297 def _combineExposureMetadata(self, visit, dataRefs):
1298 """Make a combined exposure catalog from a list of dataRefs.
1299 These dataRefs must point to exposures with wcs, summaryStats,
1300 and other visit metadata.
1305 Visit identification number.
1306 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1307 List of dataRefs
in visit.
1312 Exposure catalog
with per-detector summary information.
1314 cat = afwTable.ExposureCatalog(self.schema)
1315 cat.resize(len(dataRefs))
1317 cat['visit'] = visit
1319 for i, dataRef
in enumerate(dataRefs):
1320 visitInfo = dataRef.get(component=
'visitInfo')
1321 filterLabel = dataRef.get(component=
'filter')
1322 summaryStats = dataRef.get(component=
'summaryStats')
1323 detector = dataRef.get(component=
'detector')
1324 wcs = dataRef.get(component=
'wcs')
1325 photoCalib = dataRef.get(component=
'photoCalib')
1326 detector = dataRef.get(component=
'detector')
1327 bbox = dataRef.get(component=
'bbox')
1328 validPolygon = dataRef.get(component=
'validPolygon')
1332 rec.setVisitInfo(visitInfo)
1334 rec.setPhotoCalib(photoCalib)
1335 rec.setValidPolygon(validPolygon)
1337 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1338 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1339 rec.setId(detector.getId())
1340 summaryStats.update_record(rec)
1342 metadata = dafBase.PropertyList()
1343 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1345 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1346 cat.setMetadata(metadata)
1352class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1353 defaultTemplates={
"catalogType":
""},
1354 dimensions=(
"instrument",
"visit")):
1355 inputCatalogs = connectionTypes.Input(
1356 doc=
"Input per-detector Source Tables",
1357 name=
"{catalogType}sourceTable",
1358 storageClass=
"DataFrame",
1359 dimensions=(
"instrument",
"visit",
"detector"),
1362 outputCatalog = connectionTypes.Output(
1363 doc=
"Per-visit concatenation of Source Table",
1364 name=
"{catalogType}sourceTable_visit",
1365 storageClass=
"DataFrame",
1366 dimensions=(
"instrument",
"visit")
1370class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1371 pipelineConnections=ConsolidateSourceTableConnections):
1375class ConsolidateSourceTableTask(pipeBase.PipelineTask):
1376 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1378 _DefaultName = 'consolidateSourceTable'
1379 ConfigClass = ConsolidateSourceTableConfig
1381 inputDataset =
'sourceTable'
1382 outputDataset =
'sourceTable_visit'
1384 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1385 from .makeWarp
import reorderRefs
1387 detectorOrder = [ref.dataId[
'detector']
for ref
in inputRefs.inputCatalogs]
1388 detectorOrder.sort()
1389 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey=
'detector')
1390 inputs = butlerQC.get(inputRefs)
1391 self.log.info(
"Concatenating %s per-detector Source Tables",
1392 len(inputs[
'inputCatalogs']))
1393 df = pd.concat(inputs[
'inputCatalogs'])
1394 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1397class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1398 dimensions=(
"instrument",),
1399 defaultTemplates={
"calexpType":
""}):
1400 visitSummaryRefs = connectionTypes.Input(
1401 doc=
"Data references for per-visit consolidated exposure metadata",
1402 name=
"finalVisitSummary",
1403 storageClass=
"ExposureCatalog",
1404 dimensions=(
"instrument",
"visit"),
1408 outputCatalog = connectionTypes.Output(
1409 doc=
"CCD and Visit metadata table",
1410 name=
"ccdVisitTable",
1411 storageClass=
"DataFrame",
1412 dimensions=(
"instrument",)
1416class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1417 pipelineConnections=MakeCcdVisitTableConnections):
1418 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
1421class MakeCcdVisitTableTask(pipeBase.PipelineTask):
1422 """Produce a `ccdVisitTable` from the visit summary exposure catalogs.
1424 _DefaultName = 'makeCcdVisitTable'
1425 ConfigClass = MakeCcdVisitTableConfig
1427 def run(self, visitSummaryRefs):
1428 """Make a table of ccd information from the visit summary catalogs.
1432 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1433 List of DeferredDatasetHandles pointing to exposure catalogs with
1434 per-detector summary information.
1438 result : `lsst.pipe.Base.Struct`
1439 Results struct
with attribute:
1442 Catalog of ccd
and visit information.
1445 for visitSummaryRef
in visitSummaryRefs:
1446 visitSummary = visitSummaryRef.get()
1447 visitInfo = visitSummary[0].getVisitInfo()
1450 summaryTable = visitSummary.asAstropy()
1451 selectColumns = [
'id',
'visit',
'physical_filter',
'band',
'ra',
'decl',
'zenithDistance',
1452 'zeroPoint',
'psfSigma',
'skyBg',
'skyNoise',
1453 'astromOffsetMean',
'astromOffsetStd',
'nPsfStar',
1454 'psfStarDeltaE1Median',
'psfStarDeltaE2Median',
1455 'psfStarDeltaE1Scatter',
'psfStarDeltaE2Scatter',
1456 'psfStarDeltaSizeMedian',
'psfStarDeltaSizeScatter',
1457 'psfStarScaledDeltaSizeScatter',
1458 'psfTraceRadiusDelta',
'maxDistToNearestPsf']
1459 ccdEntry = summaryTable[selectColumns].to_pandas().set_index(
'id')
1464 ccdEntry = ccdEntry.rename(columns={
"visit":
"visitId"})
1465 ccdEntry[
'ccdVisitId'] = [
1466 self.config.idGenerator.apply(
1467 visitSummaryRef.dataId,
1468 detector=detector_id,
1475 for detector_id
in summaryTable[
'id']
1477 ccdEntry[
'detector'] = summaryTable[
'id']
1478 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds()
if vR.getWcs()
1479 else np.nan
for vR
in visitSummary])
1480 ccdEntry[
"seeing"] = visitSummary[
'psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1482 ccdEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1483 ccdEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1484 ccdEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1485 expTime = visitInfo.getExposureTime()
1486 ccdEntry[
'expTime'] = expTime
1487 ccdEntry[
"obsStart"] = ccdEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1488 expTime_days = expTime / (60*60*24)
1489 ccdEntry[
"obsStartMJD"] = ccdEntry[
"expMidptMJD"] - 0.5 * expTime_days
1490 ccdEntry[
'darkTime'] = visitInfo.getDarkTime()
1491 ccdEntry[
'xSize'] = summaryTable[
'bbox_max_x'] - summaryTable[
'bbox_min_x']
1492 ccdEntry[
'ySize'] = summaryTable[
'bbox_max_y'] - summaryTable[
'bbox_min_y']
1493 ccdEntry[
'llcra'] = summaryTable[
'raCorners'][:, 0]
1494 ccdEntry[
'llcdec'] = summaryTable[
'decCorners'][:, 0]
1495 ccdEntry[
'ulcra'] = summaryTable[
'raCorners'][:, 1]
1496 ccdEntry[
'ulcdec'] = summaryTable[
'decCorners'][:, 1]
1497 ccdEntry[
'urcra'] = summaryTable[
'raCorners'][:, 2]
1498 ccdEntry[
'urcdec'] = summaryTable[
'decCorners'][:, 2]
1499 ccdEntry[
'lrcra'] = summaryTable[
'raCorners'][:, 3]
1500 ccdEntry[
'lrcdec'] = summaryTable[
'decCorners'][:, 3]
1504 ccdEntries.append(ccdEntry)
1506 outputCatalog = pd.concat(ccdEntries)
1507 outputCatalog.set_index(
'ccdVisitId', inplace=
True, verify_integrity=
True)
1508 return pipeBase.Struct(outputCatalog=outputCatalog)
1511class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1512 dimensions=(
"instrument",),
1513 defaultTemplates={
"calexpType":
""}):
1514 visitSummaries = connectionTypes.Input(
1515 doc=
"Per-visit consolidated exposure metadata",
1516 name=
"finalVisitSummary",
1517 storageClass=
"ExposureCatalog",
1518 dimensions=(
"instrument",
"visit",),
1522 outputCatalog = connectionTypes.Output(
1523 doc=
"Visit metadata table",
1525 storageClass=
"DataFrame",
1526 dimensions=(
"instrument",)
1530class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1531 pipelineConnections=MakeVisitTableConnections):
1535class MakeVisitTableTask(pipeBase.PipelineTask):
1536 """Produce a `visitTable` from the visit summary exposure catalogs.
1538 _DefaultName = 'makeVisitTable'
1539 ConfigClass = MakeVisitTableConfig
1541 def run(self, visitSummaries):
1542 """Make a table of visit information from the visit summary catalogs.
1547 List of exposure catalogs with per-detector summary information.
1550 result : `lsst.pipe.Base.Struct`
1551 Results struct
with attribute:
1554 Catalog of visit information.
1557 for visitSummary
in visitSummaries:
1558 visitSummary = visitSummary.get()
1559 visitRow = visitSummary[0]
1560 visitInfo = visitRow.getVisitInfo()
1563 visitEntry[
"visitId"] = visitRow[
'visit']
1564 visitEntry[
"visit"] = visitRow[
'visit']
1565 visitEntry[
"physical_filter"] = visitRow[
'physical_filter']
1566 visitEntry[
"band"] = visitRow[
'band']
1567 raDec = visitInfo.getBoresightRaDec()
1568 visitEntry[
"ra"] = raDec.getRa().asDegrees()
1569 visitEntry[
"decl"] = raDec.getDec().asDegrees()
1570 visitEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1571 azAlt = visitInfo.getBoresightAzAlt()
1572 visitEntry[
"azimuth"] = azAlt.getLongitude().asDegrees()
1573 visitEntry[
"altitude"] = azAlt.getLatitude().asDegrees()
1574 visitEntry[
"zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1575 visitEntry[
"airmass"] = visitInfo.getBoresightAirmass()
1576 expTime = visitInfo.getExposureTime()
1577 visitEntry[
"expTime"] = expTime
1578 visitEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1579 visitEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1580 visitEntry[
"obsStart"] = visitEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1581 expTime_days = expTime / (60*60*24)
1582 visitEntry[
"obsStartMJD"] = visitEntry[
"expMidptMJD"] - 0.5 * expTime_days
1583 visitEntries.append(visitEntry)
1589 outputCatalog = pd.DataFrame(data=visitEntries)
1590 outputCatalog.set_index(
'visitId', inplace=
True, verify_integrity=
True)
1591 return pipeBase.Struct(outputCatalog=outputCatalog)
1594class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1595 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")):
1597 inputCatalog = connectionTypes.Input(
1598 doc=
"Primary per-detector, single-epoch forced-photometry catalog. "
1599 "By default, it is the output of ForcedPhotCcdTask on calexps",
1601 storageClass=
"SourceCatalog",
1602 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1604 inputCatalogDiff = connectionTypes.Input(
1605 doc=
"Secondary multi-epoch, per-detector, forced photometry catalog. "
1606 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1608 storageClass=
"SourceCatalog",
1609 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1611 outputCatalog = connectionTypes.Output(
1612 doc=
"InputCatalogs horizonatally joined on `objectId` in DataFrame parquet format",
1613 name=
"mergedForcedSource",
1614 storageClass=
"DataFrame",
1615 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1619class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1620 pipelineConnections=WriteForcedSourceTableConnections):
1621 key = lsst.pex.config.Field(
1622 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1626 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
1629class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1630 """Merge and convert per-detector forced source catalogs to DataFrame Parquet format.
1632 Because the predecessor ForcedPhotCcdTask operates per-detector,
1633 per-tract, (i.e., it has tract in its dimensions), detectors
1634 on the tract boundary may have multiple forced source catalogs.
1636 The successor task TransformForcedSourceTable runs per-patch
1637 and temporally-aggregates overlapping mergedForcedSource catalogs
from all
1638 available multiple epochs.
1640 _DefaultName = "writeForcedSourceTable"
1641 ConfigClass = WriteForcedSourceTableConfig
1643 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1644 inputs = butlerQC.get(inputRefs)
1646 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
1647 inputs[
'ccdVisitId'] = idGenerator.catalog_id
1648 inputs[
'band'] = butlerQC.quantum.dataId.full[
'band']
1649 outputs = self.run(**inputs)
1650 butlerQC.put(outputs, outputRefs)
1652 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1654 for table, dataset,
in zip((inputCatalog, inputCatalogDiff), (
'calexp',
'diff')):
1655 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=
False)
1656 df = df.reindex(sorted(df.columns), axis=1)
1657 df[
'ccdVisitId'] = ccdVisitId
if ccdVisitId
else pd.NA
1658 df[
'band'] = band
if band
else pd.NA
1659 df.columns = pd.MultiIndex.from_tuples([(dataset, c)
for c
in df.columns],
1660 names=(
'dataset',
'column'))
1664 outputCatalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
1665 return pipeBase.Struct(outputCatalog=outputCatalog)
1668class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1669 dimensions=(
"instrument",
"skymap",
"patch",
"tract")):
1671 inputCatalogs = connectionTypes.Input(
1672 doc=
"DataFrames of merged ForcedSources produced by WriteForcedSourceTableTask",
1673 name=
"mergedForcedSource",
1674 storageClass=
"DataFrame",
1675 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract"),
1679 referenceCatalog = connectionTypes.Input(
1680 doc=
"Reference catalog which was used to seed the forcedPhot. Columns "
1681 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1684 storageClass=
"DataFrame",
1685 dimensions=(
"tract",
"patch",
"skymap"),
1688 outputCatalog = connectionTypes.Output(
1689 doc=
"Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1690 "specified set of functors",
1691 name=
"forcedSourceTable",
1692 storageClass=
"DataFrame",
1693 dimensions=(
"tract",
"patch",
"skymap")
1698 pipelineConnections=TransformForcedSourceTableConnections):
1699 referenceColumns = pexConfig.ListField(
1701 default=[
"detect_isPrimary",
"detect_isTractInner",
"detect_isPatchInner"],
1703 doc=
"Columns to pull from reference catalog",
1705 keyRef = lsst.pex.config.Field(
1706 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1710 key = lsst.pex.config.Field(
1711 doc=
"Rename the output DataFrame index to this name",
1713 default=
"forcedSourceId",
1716 def setDefaults(self):
1717 super().setDefaults()
1718 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'ForcedSource.yaml')
1719 self.columnsFromDataId = [
'tract',
'patch']
1723 """Transform/standardize a ForcedSource catalog
1725 Transforms each wide, per-detector forcedSource DataFrame per the
1726 specification file (per-camera defaults found in ForcedSource.yaml).
1727 All epochs that overlap the patch are aggregated into one per-patch
1728 narrow-DataFrame file.
1730 No de-duplication of rows
is performed. Duplicate resolutions flags are
1731 pulled
in from the referenceCatalog: `detect_isPrimary`,
1732 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1733 for analysis
or compare duplicates
for QA.
1735 The resulting table includes multiple bands. Epochs (MJDs)
and other useful
1736 per-visit rows can be retreived by joining
with the CcdVisitTable on
1739 _DefaultName = "transformForcedSourceTable"
1740 ConfigClass = TransformForcedSourceTableConfig
1742 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1743 inputs = butlerQC.get(inputRefs)
1744 if self.funcs
is None:
1745 raise ValueError(
"config.functorFile is None. "
1746 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1747 outputs = self.run(inputs[
'inputCatalogs'], inputs[
'referenceCatalog'], funcs=self.funcs,
1748 dataId=outputRefs.outputCatalog.dataId.full)
1750 butlerQC.put(outputs, outputRefs)
1752 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1754 ref = referenceCatalog.get(parameters={
"columns": self.config.referenceColumns})
1755 self.log.info(
"Aggregating %s input catalogs" % (len(inputCatalogs)))
1756 for handle
in inputCatalogs:
1757 result = self.transform(
None, handle, funcs, dataId)
1759 dfs.append(result.df.join(ref, how=
'inner'))
1761 outputCatalog = pd.concat(dfs)
1765 outputCatalog.index.rename(self.config.keyRef, inplace=
True)
1767 outputCatalog.reset_index(inplace=
True)
1770 outputCatalog.set_index(
"forcedSourceId", inplace=
True, verify_integrity=
True)
1772 outputCatalog.index.rename(self.config.key, inplace=
True)
1774 self.log.info(
"Made a table of %d columns and %d rows",
1775 len(outputCatalog.columns), len(outputCatalog))
1776 return pipeBase.Struct(outputCatalog=outputCatalog)
1779class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1780 defaultTemplates={
"catalogType":
""},
1781 dimensions=(
"instrument",
"tract")):
1782 inputCatalogs = connectionTypes.Input(
1783 doc=
"Input per-patch DataFrame Tables to be concatenated",
1784 name=
"{catalogType}ForcedSourceTable",
1785 storageClass=
"DataFrame",
1786 dimensions=(
"tract",
"patch",
"skymap"),
1790 outputCatalog = connectionTypes.Output(
1791 doc=
"Output per-tract concatenation of DataFrame Tables",
1792 name=
"{catalogType}ForcedSourceTable_tract",
1793 storageClass=
"DataFrame",
1794 dimensions=(
"tract",
"skymap"),
1798class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1799 pipelineConnections=ConsolidateTractConnections):
1803class ConsolidateTractTask(pipeBase.PipelineTask):
1804 """Concatenate any per-patch, dataframe list into a single
1805 per-tract DataFrame.
1807 _DefaultName = 'ConsolidateTract'
1808 ConfigClass = ConsolidateTractConfig
1810 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1811 inputs = butlerQC.get(inputRefs)
1814 self.log.info(
"Concatenating %s per-patch %s Tables",
1815 len(inputs[
'inputCatalogs']),
1816 inputRefs.inputCatalogs[0].datasetType.name)
1817 df = pd.concat(inputs[
'inputCatalogs'])
1818 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None)
def compute(self, dropna=False, pool=None)
def __init__(self, *args, **kwargs)
def getAnalysis(self, handles, funcs=None, band=None)
def run(self, handle, funcs=None, dataId=None, band=None)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def transform(self, band, handles, funcs, dataId)
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)