22__all__ = [
"WriteObjectTableConfig",
"WriteObjectTableTask",
23 "WriteSourceTableConfig",
"WriteSourceTableTask",
24 "WriteRecalibratedSourceTableConfig",
"WriteRecalibratedSourceTableTask",
25 "PostprocessAnalysis",
26 "TransformCatalogBaseConfig",
"TransformCatalogBaseTask",
27 "TransformObjectCatalogConfig",
"TransformObjectCatalogTask",
28 "ConsolidateObjectTableConfig",
"ConsolidateObjectTableTask",
29 "TransformSourceTableConfig",
"TransformSourceTableTask",
30 "ConsolidateVisitSummaryConfig",
"ConsolidateVisitSummaryTask",
31 "ConsolidateSourceTableConfig",
"ConsolidateSourceTableTask",
32 "MakeCcdVisitTableConfig",
"MakeCcdVisitTableTask",
33 "MakeVisitTableConfig",
"MakeVisitTableTask",
34 "WriteForcedSourceTableConfig",
"WriteForcedSourceTableTask",
35 "TransformForcedSourceTableConfig",
"TransformForcedSourceTableTask",
36 "ConsolidateTractConfig",
"ConsolidateTractTask"]
49from lsst.obs.base
import ExposureIdInfo
54from lsst.daf.butler
import DeferredDatasetHandle, DataCoordinate
57from .parquetTable
import ParquetTable
58from .functors
import CompositeFunctor, Column
60log = logging.getLogger(__name__)
63def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
64 """Flattens a dataframe with multilevel column index.
66 newDf = pd.DataFrame()
68 dfBands = df.columns.unique(level=0).values
71 columnFormat =
'{0}{1}' if camelCase
else '{0}_{1}'
72 newColumns = {c: columnFormat.format(band, c)
73 for c
in subdf.columns
if c
not in noDupCols}
74 cols = list(newColumns.keys())
75 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
78 presentBands = dfBands
if inputBands
is None else list(set(inputBands).intersection(dfBands))
80 noDupDf = df[presentBands[0]][noDupCols]
81 newDf = pd.concat([noDupDf, newDf], axis=1)
86 defaultTemplates={
"coaddName":
"deep"},
87 dimensions=(
"tract",
"patch",
"skymap")):
88 inputCatalogMeas = connectionTypes.Input(
89 doc=
"Catalog of source measurements on the deepCoadd.",
90 dimensions=(
"tract",
"patch",
"band",
"skymap"),
91 storageClass=
"SourceCatalog",
92 name=
"{coaddName}Coadd_meas",
95 inputCatalogForcedSrc = connectionTypes.Input(
96 doc=
"Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
97 dimensions=(
"tract",
"patch",
"band",
"skymap"),
98 storageClass=
"SourceCatalog",
99 name=
"{coaddName}Coadd_forced_src",
102 inputCatalogRef = connectionTypes.Input(
103 doc=
"Catalog marking the primary detection (which band provides a good shape and position)"
104 "for each detection in deepCoadd_mergeDet.",
105 dimensions=(
"tract",
"patch",
"skymap"),
106 storageClass=
"SourceCatalog",
107 name=
"{coaddName}Coadd_ref"
109 outputCatalog = connectionTypes.Output(
110 doc=
"A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
111 "stored as a DataFrame with a multi-level column index per-patch.",
112 dimensions=(
"tract",
"patch",
"skymap"),
113 storageClass=
"DataFrame",
114 name=
"{coaddName}Coadd_obj"
118class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
119 pipelineConnections=WriteObjectTableConnections):
120 engine = pexConfig.Field(
123 doc=
"Parquet engine for writing (pyarrow or fastparquet)"
125 coaddName = pexConfig.Field(
132class WriteObjectTableTask(pipeBase.PipelineTask):
133 """Write filter-merged source tables to parquet
135 _DefaultName = "writeObjectTable"
136 ConfigClass = WriteObjectTableConfig
139 inputDatasets = (
'forced_src',
'meas',
'ref')
142 outputDataset =
'obj'
144 def runQuantum(self, butlerQC, inputRefs, outputRefs):
145 inputs = butlerQC.get(inputRefs)
147 measDict = {ref.dataId[
'band']: {
'meas': cat}
for ref, cat
in
148 zip(inputRefs.inputCatalogMeas, inputs[
'inputCatalogMeas'])}
149 forcedSourceDict = {ref.dataId[
'band']: {
'forced_src': cat}
for ref, cat
in
150 zip(inputRefs.inputCatalogForcedSrc, inputs[
'inputCatalogForcedSrc'])}
153 for band
in measDict.keys():
154 catalogs[band] = {
'meas': measDict[band][
'meas'],
155 'forced_src': forcedSourceDict[band][
'forced_src'],
156 'ref': inputs[
'inputCatalogRef']}
157 dataId = butlerQC.quantum.dataId
158 df = self.run(catalogs=catalogs, tract=dataId[
'tract'], patch=dataId[
'patch'])
159 outputs = pipeBase.Struct(outputCatalog=df)
160 butlerQC.put(outputs, outputRefs)
162 def run(self, catalogs, tract, patch):
163 """Merge multiple catalogs.
168 Mapping from filter names to dict of catalogs.
170 tractId to use
for the tractId column.
172 patchId to use
for the patchId column.
176 catalog : `pandas.DataFrame`
181 for filt, tableDict
in catalogs.items():
182 for dataset, table
in tableDict.items():
184 df = table.asAstropy().to_pandas().set_index(
'id', drop=
True)
187 df = df.reindex(sorted(df.columns), axis=1)
188 df[
'tractId'] = tract
189 df[
'patchId'] = patch
192 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c)
for c
in df.columns],
193 names=(
'dataset',
'band',
'column'))
196 catalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
200class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
201 defaultTemplates={
"catalogType":
""},
202 dimensions=(
"instrument",
"visit",
"detector")):
204 catalog = connectionTypes.Input(
205 doc=
"Input full-depth catalog of sources produced by CalibrateTask",
206 name=
"{catalogType}src",
207 storageClass=
"SourceCatalog",
208 dimensions=(
"instrument",
"visit",
"detector")
210 outputCatalog = connectionTypes.Output(
211 doc=
"Catalog of sources, `src` in Parquet format. The 'id' column is "
212 "replaced with an index; all other columns are unchanged.",
213 name=
"{catalogType}source",
214 storageClass=
"DataFrame",
215 dimensions=(
"instrument",
"visit",
"detector")
219class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
220 pipelineConnections=WriteSourceTableConnections):
224class WriteSourceTableTask(pipeBase.PipelineTask):
225 """Write source table to parquet.
227 _DefaultName = "writeSourceTable"
228 ConfigClass = WriteSourceTableConfig
230 def runQuantum(self, butlerQC, inputRefs, outputRefs):
231 inputs = butlerQC.get(inputRefs)
232 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
233 result = self.run(**inputs).table
234 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
235 butlerQC.put(outputs, outputRefs)
237 def run(self, catalog, ccdVisitId=None, **kwargs):
238 """Convert `src` catalog to parquet
242 catalog: `afwTable.SourceCatalog`
243 catalog to be converted
245 ccdVisitId to be added as a column
249 result : `lsst.pipe.base.Struct`
251 `ParquetTable` version of the input catalog
253 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId)
254 df = catalog.asAstropy().to_pandas().set_index(
'id', drop=
True)
255 df[
'ccdVisitId'] = ccdVisitId
256 return pipeBase.Struct(table=
ParquetTable(dataFrame=df))
259class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
260 defaultTemplates={
"catalogType":
"",
261 "skyWcsName":
"jointcal",
262 "photoCalibName":
"fgcm"},
263 dimensions=(
"instrument",
"visit",
"detector",
"skymap")):
264 skyMap = connectionTypes.Input(
265 doc=
"skyMap needed to choose which tract-level calibrations to use when multiple available",
266 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
267 storageClass=
"SkyMap",
268 dimensions=(
"skymap",),
270 exposure = connectionTypes.Input(
271 doc=
"Input exposure to perform photometry on.",
273 storageClass=
"ExposureF",
274 dimensions=[
"instrument",
"visit",
"detector"],
276 externalSkyWcsTractCatalog = connectionTypes.Input(
277 doc=(
"Per-tract, per-visit wcs calibrations. These catalogs use the detector "
278 "id for the catalog id, sorted on id for fast lookup."),
279 name=
"{skyWcsName}SkyWcsCatalog",
280 storageClass=
"ExposureCatalog",
281 dimensions=[
"instrument",
"visit",
"tract"],
284 externalSkyWcsGlobalCatalog = connectionTypes.Input(
285 doc=(
"Per-visit wcs calibrations computed globally (with no tract information). "
286 "These catalogs use the detector id for the catalog id, sorted on id for "
288 name=
"{skyWcsName}SkyWcsCatalog",
289 storageClass=
"ExposureCatalog",
290 dimensions=[
"instrument",
"visit"],
292 externalPhotoCalibTractCatalog = connectionTypes.Input(
293 doc=(
"Per-tract, per-visit photometric calibrations. These catalogs use the "
294 "detector id for the catalog id, sorted on id for fast lookup."),
295 name=
"{photoCalibName}PhotoCalibCatalog",
296 storageClass=
"ExposureCatalog",
297 dimensions=[
"instrument",
"visit",
"tract"],
300 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
301 doc=(
"Per-visit photometric calibrations computed globally (with no tract "
302 "information). These catalogs use the detector id for the catalog id, "
303 "sorted on id for fast lookup."),
304 name=
"{photoCalibName}PhotoCalibCatalog",
305 storageClass=
"ExposureCatalog",
306 dimensions=[
"instrument",
"visit"],
309 def __init__(self, *, config=None):
310 super().__init__(config=config)
313 if config.doApplyExternalSkyWcs
and config.doReevaluateSkyWcs:
314 if config.useGlobalExternalSkyWcs:
315 self.inputs.remove(
"externalSkyWcsTractCatalog")
317 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
319 self.inputs.remove(
"externalSkyWcsTractCatalog")
320 self.inputs.remove(
"externalSkyWcsGlobalCatalog")
321 if config.doApplyExternalPhotoCalib
and config.doReevaluatePhotoCalib:
322 if config.useGlobalExternalPhotoCalib:
323 self.inputs.remove(
"externalPhotoCalibTractCatalog")
325 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
327 self.inputs.remove(
"externalPhotoCalibTractCatalog")
328 self.inputs.remove(
"externalPhotoCalibGlobalCatalog")
331class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
332 pipelineConnections=WriteRecalibratedSourceTableConnections):
334 doReevaluatePhotoCalib = pexConfig.Field(
337 doc=(
"Add or replace local photoCalib columns")
339 doReevaluateSkyWcs = pexConfig.Field(
342 doc=(
"Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
344 doApplyExternalPhotoCalib = pexConfig.Field(
347 doc=(
"If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
348 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
350 doApplyExternalSkyWcs = pexConfig.Field(
353 doc=(
"if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
354 "else use the wcs already attached to the exposure."),
356 useGlobalExternalPhotoCalib = pexConfig.Field(
359 doc=(
"When using doApplyExternalPhotoCalib, use 'global' calibrations "
360 "that are not run per-tract. When False, use per-tract photometric "
361 "calibration files.")
363 useGlobalExternalSkyWcs = pexConfig.Field(
366 doc=(
"When using doApplyExternalSkyWcs, use 'global' calibrations "
367 "that are not run per-tract. When False, use per-tract wcs "
373 if self.doApplyExternalSkyWcs
and not self.doReevaluateSkyWcs:
374 log.warning(
"doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
375 "External SkyWcs will not be read or evaluated.")
376 if self.doApplyExternalPhotoCalib
and not self.doReevaluatePhotoCalib:
377 log.warning(
"doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
378 "External PhotoCalib will not be read or evaluated.")
381class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
382 """Write source table to parquet
384 _DefaultName = "writeRecalibratedSourceTable"
385 ConfigClass = WriteRecalibratedSourceTableConfig
387 def runQuantum(self, butlerQC, inputRefs, outputRefs):
388 inputs = butlerQC.get(inputRefs)
389 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
390 inputs[
'exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId,
"visit_detector")
392 if self.config.doReevaluatePhotoCalib
or self.config.doReevaluateSkyWcs:
393 if self.config.doApplyExternalPhotoCalib
or self.config.doApplyExternalSkyWcs:
394 inputs[
'exposure'] = self.attachCalibs(inputRefs, **inputs)
396 inputs[
'catalog'] = self.addCalibColumns(**inputs)
398 result = self.run(**inputs).table
399 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
400 butlerQC.put(outputs, outputRefs)
402 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
403 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
404 externalPhotoCalibTractCatalog=None, **kwargs):
405 """Apply external calibrations to exposure per configuration
407 When multiple tract-level calibrations overlap, select the one with the
408 center closest to detector.
412 inputRefs : `lsst.pipe.base.InputQuantizedConnection`,
for dataIds of
414 skyMap : `lsst.skymap.SkyMap`
415 exposure : `lsst.afw.image.exposure.Exposure`
416 Input exposure to adjust calibrations.
418 Exposure catalog
with external skyWcs to be applied per config
420 Exposure catalog
with external skyWcs to be applied per config
422 Exposure catalog
with external photoCalib to be applied per config
428 exposure : `lsst.afw.image.exposure.Exposure`
429 Exposure
with adjusted calibrations.
431 if not self.config.doApplyExternalSkyWcs:
433 externalSkyWcsCatalog =
None
434 elif self.config.useGlobalExternalSkyWcs:
436 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
437 self.log.info(
'Applying global SkyWcs')
440 inputRef = getattr(inputRefs,
'externalSkyWcsTractCatalog')
441 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
444 self.log.info(
'Applying tract-level SkyWcs from tract %s', tracts[ind])
446 if exposure.getWcs()
is None:
447 raise ValueError(
"Trying to locate nearest tract, but exposure.wcs is None.")
448 ind = self.getClosestTract(tracts, skyMap,
449 exposure.getBBox(), exposure.getWcs())
450 self.log.info(
'Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
451 'Applying closest to detector center: tract=%s',
str(tracts), tracts[ind])
453 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
455 if not self.config.doApplyExternalPhotoCalib:
457 externalPhotoCalibCatalog =
None
458 elif self.config.useGlobalExternalPhotoCalib:
460 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
461 self.log.info(
'Applying global PhotoCalib')
464 inputRef = getattr(inputRefs,
'externalPhotoCalibTractCatalog')
465 tracts = [ref.dataId[
'tract']
for ref
in inputRef]
468 self.log.info(
'Applying tract-level PhotoCalib from tract %s', tracts[ind])
470 ind = self.getClosestTract(tracts, skyMap,
471 exposure.getBBox(), exposure.getWcs())
472 self.log.info(
'Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
473 'Applying closest to detector center: tract=%s',
str(tracts), tracts[ind])
475 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
477 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
479 def getClosestTract(self, tracts, skyMap, bbox, wcs):
480 """Find the index of the tract closest to detector from list of tractIds
484 tracts: `list` [`int`]
485 Iterable of integer tractIds
486 skyMap : `lsst.skymap.SkyMap`
487 skyMap to lookup tract geometry and wcs
489 Detector bbox, center of which will compared to tract centers
491 Detector Wcs object to map the detector center to SkyCoord
500 center = wcs.pixelToSky(bbox.getCenter())
502 for tractId
in tracts:
503 tract = skyMap[tractId]
504 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
505 sep.append(center.separation(tractCenter))
507 return np.argmin(sep)
509 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
510 """Prepare a calibrated exposure and apply external calibrations
515 exposure : `lsst.afw.image.exposure.Exposure`
516 Input exposure to adjust calibrations.
518 Exposure catalog
with external skyWcs to be applied
519 if config.doApplyExternalSkyWcs=
True. Catalog uses the detector id
520 for the catalog id, sorted on id
for fast lookup.
522 Exposure catalog
with external photoCalib to be applied
523 if config.doApplyExternalPhotoCalib=
True. Catalog uses the detector
524 id
for the catalog id, sorted on id
for fast lookup.
528 exposure : `lsst.afw.image.exposure.Exposure`
529 Exposure
with adjusted calibrations.
531 detectorId = exposure.getInfo().getDetector().getId()
533 if externalPhotoCalibCatalog
is not None:
534 row = externalPhotoCalibCatalog.find(detectorId)
536 self.log.warning(
"Detector id %s not found in externalPhotoCalibCatalog; "
537 "Using original photoCalib.", detectorId)
539 photoCalib = row.getPhotoCalib()
540 if photoCalib
is None:
541 self.log.warning(
"Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
542 "Using original photoCalib.", detectorId)
544 exposure.setPhotoCalib(photoCalib)
546 if externalSkyWcsCatalog
is not None:
547 row = externalSkyWcsCatalog.find(detectorId)
549 self.log.warning(
"Detector id %s not found in externalSkyWcsCatalog; "
550 "Using original skyWcs.", detectorId)
552 skyWcs = row.getWcs()
554 self.log.warning(
"Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
555 "Using original skyWcs.", detectorId)
557 exposure.setWcs(skyWcs)
561 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs):
562 """Add replace columns with calibs evaluated at each centroid
564 Add or replace
'base_LocalWcs' `base_LocalPhotoCalib
' columns in a
565 a source catalog, by rerunning the plugins.
570 catalog to which calib columns will be added
571 exposure : `lsst.afw.image.exposure.Exposure`
572 Exposure with attached PhotoCalibs
and SkyWcs attributes to be
573 reevaluated at local centroids. Pixels are
not required.
574 exposureIdInfo : `lsst.obs.base.ExposureIdInfo`
579 Source Catalog
with requested local calib columns
581 measureConfig = SingleFrameMeasurementTask.ConfigClass()
582 measureConfig.doReplaceWithNoise = False
585 for slot
in measureConfig.slots:
586 setattr(measureConfig.slots, slot,
None)
588 measureConfig.plugins.names = []
589 if self.config.doReevaluateSkyWcs:
590 measureConfig.plugins.names.add(
'base_LocalWcs')
591 self.log.info(
"Re-evaluating base_LocalWcs plugin")
592 if self.config.doReevaluatePhotoCalib:
593 measureConfig.plugins.names.add(
'base_LocalPhotoCalib')
594 self.log.info(
"Re-evaluating base_LocalPhotoCalib plugin")
595 pluginsNotToCopy = tuple(measureConfig.plugins.names)
599 aliasMap = catalog.schema.getAliasMap()
600 mapper = afwTable.SchemaMapper(catalog.schema)
601 for item
in catalog.schema:
602 if not item.field.getName().startswith(pluginsNotToCopy):
603 mapper.addMapping(item.key)
605 schema = mapper.getOutputSchema()
606 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
607 schema.setAliasMap(aliasMap)
608 newCat = afwTable.SourceCatalog(schema)
609 newCat.extend(catalog, mapper=mapper)
615 if self.config.doReevaluateSkyWcs
and exposure.wcs
is not None:
616 afwTable.updateSourceCoords(exposure.wcs, newCat)
618 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
624 """Calculate columns from ParquetTable.
626 This object manages and organizes an arbitrary set of computations
627 on a catalog. The catalog
is defined by a
629 as a ``deepCoadd_obj`` dataset,
and the computations are defined by a
630 collection of `lsst.pipe.tasks.functor.Functor` objects (
or, equivalently,
631 a ``CompositeFunctor``).
633 After the object
is initialized, accessing the ``.df`` attribute (which
634 holds the `pandas.DataFrame` containing the results of the calculations)
635 triggers computation of said dataframe.
637 One of the conveniences of using this object
is the ability to define a
638 desired common filter
for all functors. This enables the same functor
639 collection to be passed to several different `PostprocessAnalysis` objects
640 without having to change the original functor collection, since the ``filt``
641 keyword argument of this object triggers an overwrite of the ``filt``
642 property
for all functors
in the collection.
644 This object also allows a list of refFlags to be passed,
and defines a set
645 of default refFlags that are always included even
if not requested.
647 If a list of `~lsst.pipe.tasks.ParquetTable` object
is passed, rather than a single one,
648 then the calculations will be mapped over all the input catalogs. In
649 principle, it should be straightforward to parallelize this activity, but
650 initial tests have failed (see TODO
in code comments).
654 parq : `~lsst.pipe.tasks.ParquetTable` (
or list of such)
655 Source
catalog(s)
for computation.
658 Computations to do (functors that act on ``parq``).
659 If a dict, the output
660 DataFrame will have columns keyed accordingly.
661 If a list, the column keys will come
from the
662 ``.shortname`` attribute of each functor.
664 filt : `str`, optional
665 Filter
in which to calculate. If provided,
666 this will overwrite any existing ``.filt`` attribute
667 of the provided functors.
669 flags : `list`, optional
670 List of flags (per-band) to include
in output table.
671 Taken
from the ``meas`` dataset
if applied to a multilevel Object Table.
673 refFlags : `list`, optional
674 List of refFlags (only reference band) to include
in output table.
676 forcedFlags : `list`, optional
677 List of flags (per-band) to include
in output table.
678 Taken
from the ``forced_src`` dataset
if applied to a
679 multilevel Object Table. Intended
for flags
from measurement plugins
680 only run during multi-band forced-photometry.
682 _defaultRefFlags = []
685 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
690 self.
flags = list(flags)
if flags
is not None else []
691 self.
forcedFlags = list(forcedFlags)
if forcedFlags
is not None else []
693 if refFlags
is not None:
706 additionalFuncs.update({flag:
Column(flag, dataset=
'forced_src')
for flag
in self.
forcedFlags})
707 additionalFuncs.update({flag:
Column(flag, dataset=
'ref')
for flag
in self.
refFlags})
708 additionalFuncs.update({flag:
Column(flag, dataset=
'meas')
for flag
in self.
flags})
710 if isinstance(self.
functors, CompositeFunctor):
715 func.funcDict.update(additionalFuncs)
716 func.filt = self.
filt
722 return [name
for name, func
in self.
func.funcDict.items()
if func.noDup
or func.dataset ==
'ref']
732 if type(self.
parq)
in (list, tuple):
734 dflist = [self.
func(parq, dropna=dropna)
for parq
in self.
parq]
738 dflist = pool.map(functools.partial(self.
func, dropna=dropna), self.
parq)
739 self.
_df = pd.concat(dflist)
748 """Expected Connections for subclasses of TransformCatalogBaseTask.
752 inputCatalog = connectionTypes.Input(
754 storageClass=
"DataFrame",
756 outputCatalog = connectionTypes.Output(
758 storageClass=
"DataFrame",
763 pipelineConnections=TransformCatalogBaseConnections):
764 functorFile = pexConfig.Field(
766 doc=
"Path to YAML file specifying Science Data Model functors to use "
767 "when copying columns and computing calibrated values.",
771 primaryKey = pexConfig.Field(
773 doc=
"Name of column to be set as the DataFrame index. If None, the index"
774 "will be named `id`",
778 columnsFromDataId = pexConfig.ListField(
782 doc=
"Columns to extract from the dataId",
787 """Base class for transforming/standardizing a catalog
789 by applying functors that convert units and apply calibrations.
790 The purpose of this task
is to perform a set of computations on
791 an input `ParquetTable` dataset (such
as ``deepCoadd_obj``)
and write the
792 results to a new dataset (which needs to be declared
in an ``outputDataset``
795 The calculations to be performed are defined
in a YAML file that specifies
796 a set of functors to be computed, provided
as
797 a ``--functorFile`` config parameter. An example of such a YAML file
822 - base_InputCount_value
825 functor: DeconvolvedMoments
830 - merge_measurement_i
831 - merge_measurement_r
832 - merge_measurement_z
833 - merge_measurement_y
834 - merge_measurement_g
835 - base_PixelFlags_flag_inexact_psfCenter
838 The names
for each entry under
"func" will become the names of columns
in
839 the output dataset. All the functors referenced are defined
in
841 functor are
in the `args` list,
and any additional entries
for each column
842 other than
"functor" or "args" (e.g., ``
'filt'``, ``
'dataset'``) are treated
as
843 keyword arguments to be passed to the functor initialization.
845 The
"flags" entry
is the default shortcut
for `Column` functors.
846 All columns listed under
"flags" will be copied to the output table
847 untransformed. They can be of any datatype.
848 In the special case of transforming a multi-level oject table
with
849 band
and dataset indices (deepCoadd_obj), these will be taked
from the
850 `meas` dataset
and exploded out per band.
852 There are two special shortcuts that only apply when transforming
853 multi-level Object (deepCoadd_obj) tables:
854 - The
"refFlags" entry
is shortcut
for `Column` functor
855 taken
from the `
'ref'` dataset
if transforming an ObjectTable.
856 - The
"forcedFlags" entry
is shortcut
for `Column` functors.
857 taken
from the ``forced_src`` dataset
if transforming an ObjectTable.
858 These are expanded out per band.
862 to organize
and excecute the calculations.
865 def _DefaultName(self):
866 raise NotImplementedError(
'Subclass must define "_DefaultName" attribute')
870 raise NotImplementedError(
'Subclass must define "outputDataset" attribute')
874 raise NotImplementedError(
'Subclass must define "inputDataset" attribute')
877 def ConfigClass(self):
878 raise NotImplementedError(
'Subclass must define "ConfigClass" attribute')
882 if self.config.functorFile:
883 self.log.info(
'Loading tranform functor definitions from %s',
884 self.config.functorFile)
885 self.
funcs = CompositeFunctor.from_file(self.config.functorFile)
886 self.
funcs.update(dict(PostprocessAnalysis._defaultFuncs))
891 inputs = butlerQC.get(inputRefs)
892 if self.
funcs is None:
893 raise ValueError(
"config.functorFile is None. "
894 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
895 result = self.
run(parq=inputs[
'inputCatalog'], funcs=self.
funcs,
896 dataId=outputRefs.outputCatalog.dataId.full)
897 outputs = pipeBase.Struct(outputCatalog=result)
898 butlerQC.put(outputs, outputRefs)
900 def run(self, parq, funcs=None, dataId=None, band=None):
901 """Do postprocessing calculations
903 Takes a `ParquetTable` object and dataId,
904 returns a dataframe
with results of postprocessing calculations.
909 ParquetTable
from which calculations are done.
910 funcs : `lsst.pipe.tasks.functors.Functors`
911 Functors to apply to the table
's columns
912 dataId : dict, optional
913 Used to add a `patchId` column to the output dataframe.
914 band : `str`, optional
915 Filter band that is being processed.
919 df : `pandas.DataFrame`
921 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
923 df = self.
transform(band, parq, funcs, dataId).df
924 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
936 def transform(self, band, parq, funcs, dataId):
937 analysis = self.
getAnalysis(parq, funcs=funcs, band=band)
939 if dataId
and self.config.columnsFromDataId:
940 for key
in self.config.columnsFromDataId:
942 df[
str(key)] = dataId[key]
944 raise ValueError(f
"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
946 if self.config.primaryKey:
947 if df.index.name != self.config.primaryKey
and self.config.primaryKey
in df:
948 df.reset_index(inplace=
True, drop=
True)
949 df.set_index(self.config.primaryKey, inplace=
True)
951 return pipeBase.Struct(
958 defaultTemplates={
"coaddName":
"deep"},
959 dimensions=(
"tract",
"patch",
"skymap")):
960 inputCatalog = connectionTypes.Input(
961 doc=
"The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
962 "stored as a DataFrame with a multi-level column index per-patch.",
963 dimensions=(
"tract",
"patch",
"skymap"),
964 storageClass=
"DataFrame",
965 name=
"{coaddName}Coadd_obj",
968 outputCatalog = connectionTypes.Output(
969 doc=
"Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
971 dimensions=(
"tract",
"patch",
"skymap"),
972 storageClass=
"DataFrame",
978 pipelineConnections=TransformObjectCatalogConnections):
979 coaddName = pexConfig.Field(
985 filterMap = pexConfig.DictField(
989 doc=(
"Dictionary mapping full filter name to short one for column name munging."
990 "These filters determine the output columns no matter what filters the "
991 "input data actually contain."),
992 deprecated=(
"Coadds are now identified by the band, so this transform is unused."
993 "Will be removed after v22.")
995 outputBands = pexConfig.ListField(
999 doc=(
"These bands and only these bands will appear in the output,"
1000 " NaN-filled if the input does not include them."
1001 " If None, then use all bands found in the input.")
1003 camelCase = pexConfig.Field(
1006 doc=(
"Write per-band columns names with camelCase, else underscore "
1007 "For example: gPsFlux instead of g_PsFlux.")
1009 multilevelOutput = pexConfig.Field(
1012 doc=(
"Whether results dataframe should have a multilevel column index (True) or be flat "
1013 "and name-munged (False).")
1015 goodFlags = pexConfig.ListField(
1018 doc=(
"List of 'good' flags that should be set False when populating empty tables. "
1019 "All other flags are considered to be 'bad' flags and will be set to True.")
1021 floatFillValue = pexConfig.Field(
1024 doc=
"Fill value for float fields when populating empty tables."
1026 integerFillValue = pexConfig.Field(
1029 doc=
"Fill value for integer fields when populating empty tables."
1032 def setDefaults(self):
1033 super().setDefaults()
1034 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Object.yaml')
1035 self.primaryKey =
'objectId'
1036 self.columnsFromDataId = [
'tract',
'patch']
1037 self.goodFlags = [
'calib_astrometry_used',
1038 'calib_photometry_reserved',
1039 'calib_photometry_used',
1040 'calib_psf_candidate',
1041 'calib_psf_reserved',
1046 """Produce a flattened Object Table to match the format specified in
1049 Do the same set of postprocessing calculations on all bands.
1051 This is identical to `TransformCatalogBaseTask`,
except for that it does
1052 the specified functor calculations
for all filters present
in the
1053 input `deepCoadd_obj` table. Any specific ``
"filt"`` keywords specified
1054 by the YAML file will be superceded.
1056 _DefaultName = "transformObjectCatalog"
1057 ConfigClass = TransformObjectCatalogConfig
1059 def run(self, parq, funcs=None, dataId=None, band=None):
1063 templateDf = pd.DataFrame()
1065 if isinstance(parq, DeferredDatasetHandle):
1066 columns = parq.get(component=
'columns')
1067 inputBands = columns.unique(level=1).values
1069 inputBands = parq.columnLevelNames[
'band']
1071 outputBands = self.config.outputBands
if self.config.outputBands
else inputBands
1074 for inputBand
in inputBands:
1075 if inputBand
not in outputBands:
1076 self.log.info(
"Ignoring %s band data in the input", inputBand)
1078 self.log.info(
"Transforming the catalog of band %s", inputBand)
1079 result = self.transform(inputBand, parq, funcs, dataId)
1080 dfDict[inputBand] = result.df
1081 analysisDict[inputBand] = result.analysis
1082 if templateDf.empty:
1083 templateDf = result.df
1086 for filt
in outputBands:
1087 if filt
not in dfDict:
1088 self.log.info(
"Adding empty columns for band %s", filt)
1089 dfTemp = templateDf.copy()
1090 for col
in dfTemp.columns:
1091 testValue = dfTemp[col].values[0]
1092 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1094 if col
in self.config.goodFlags:
1098 elif isinstance(testValue, numbers.Integral):
1102 if isinstance(testValue, np.unsignedinteger):
1103 raise ValueError(
"Parquet tables may not have unsigned integer columns.")
1105 fillValue = self.config.integerFillValue
1107 fillValue = self.config.floatFillValue
1108 dfTemp[col].values[:] = fillValue
1109 dfDict[filt] = dfTemp
1112 df = pd.concat(dfDict, axis=1, names=[
'band',
'column'])
1114 if not self.config.multilevelOutput:
1115 noDupCols = list(set.union(*[set(v.noDupCols)
for v
in analysisDict.values()]))
1116 if self.config.primaryKey
in noDupCols:
1117 noDupCols.remove(self.config.primaryKey)
1118 if dataId
and self.config.columnsFromDataId:
1119 noDupCols += self.config.columnsFromDataId
1120 df =
flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1121 inputBands=inputBands)
1123 self.log.info(
"Made a table of %d columns and %d rows", len(df.columns), len(df))
1128class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1129 dimensions=(
"tract",
"skymap")):
1130 inputCatalogs = connectionTypes.Input(
1131 doc=
"Per-Patch objectTables conforming to the standard data model.",
1133 storageClass=
"DataFrame",
1134 dimensions=(
"tract",
"patch",
"skymap"),
1137 outputCatalog = connectionTypes.Output(
1138 doc=
"Pre-tract horizontal concatenation of the input objectTables",
1139 name=
"objectTable_tract",
1140 storageClass=
"DataFrame",
1141 dimensions=(
"tract",
"skymap"),
1145class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1146 pipelineConnections=ConsolidateObjectTableConnections):
1147 coaddName = pexConfig.Field(
1154class ConsolidateObjectTableTask(pipeBase.PipelineTask):
1155 """Write patch-merged source tables to a tract-level parquet file.
1157 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1159 _DefaultName = "consolidateObjectTable"
1160 ConfigClass = ConsolidateObjectTableConfig
1162 inputDataset =
'objectTable'
1163 outputDataset =
'objectTable_tract'
1165 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1166 inputs = butlerQC.get(inputRefs)
1167 self.log.info(
"Concatenating %s per-patch Object Tables",
1168 len(inputs[
'inputCatalogs']))
1169 df = pd.concat(inputs[
'inputCatalogs'])
1170 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1173class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1174 defaultTemplates={
"catalogType":
""},
1175 dimensions=(
"instrument",
"visit",
"detector")):
1177 inputCatalog = connectionTypes.Input(
1178 doc=
"Wide input catalog of sources produced by WriteSourceTableTask",
1179 name=
"{catalogType}source",
1180 storageClass=
"DataFrame",
1181 dimensions=(
"instrument",
"visit",
"detector"),
1184 outputCatalog = connectionTypes.Output(
1185 doc=
"Narrower, per-detector Source Table transformed and converted per a "
1186 "specified set of functors",
1187 name=
"{catalogType}sourceTable",
1188 storageClass=
"DataFrame",
1189 dimensions=(
"instrument",
"visit",
"detector")
1194 pipelineConnections=TransformSourceTableConnections):
1196 def setDefaults(self):
1197 super().setDefaults()
1198 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'Source.yaml')
1199 self.primaryKey =
'sourceId'
1200 self.columnsFromDataId = [
'visit',
'detector',
'band',
'physical_filter']
1204 """Transform/standardize a source catalog
1206 _DefaultName = "transformSourceTable"
1207 ConfigClass = TransformSourceTableConfig
1210class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1211 dimensions=(
"instrument",
"visit",),
1212 defaultTemplates={
"calexpType":
""}):
1213 calexp = connectionTypes.Input(
1214 doc=
"Processed exposures used for metadata",
1215 name=
"{calexpType}calexp",
1216 storageClass=
"ExposureF",
1217 dimensions=(
"instrument",
"visit",
"detector"),
1221 visitSummary = connectionTypes.Output(
1222 doc=(
"Per-visit consolidated exposure metadata. These catalogs use "
1223 "detector id for the id and are sorted for fast lookups of a "
1225 name=
"{calexpType}visitSummary",
1226 storageClass=
"ExposureCatalog",
1227 dimensions=(
"instrument",
"visit"),
1231class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1232 pipelineConnections=ConsolidateVisitSummaryConnections):
1233 """Config for ConsolidateVisitSummaryTask"""
1237class ConsolidateVisitSummaryTask(pipeBase.PipelineTask):
1238 """Task to consolidate per-detector visit metadata.
1240 This task aggregates the following metadata from all the detectors
in a
1241 single visit into an exposure catalog:
1245 - The physical_filter
and band (
if available).
1246 - The psf size, shape,
and effective area at the center of the detector.
1247 - The corners of the bounding box
in right ascension/declination.
1249 Other quantities such
as Detector, Psf, ApCorrMap,
and TransmissionCurve
1250 are
not persisted here because of storage concerns,
and because of their
1251 limited utility
as summary statistics.
1253 Tests
for this task are performed
in ci_hsc_gen3.
1255 _DefaultName = "consolidateVisitSummary"
1256 ConfigClass = ConsolidateVisitSummaryConfig
1258 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1259 dataRefs = butlerQC.get(inputRefs.calexp)
1260 visit = dataRefs[0].dataId.byName()[
'visit']
1262 self.log.debug(
"Concatenating metadata from %d per-detector calexps (visit %d)",
1263 len(dataRefs), visit)
1265 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1267 butlerQC.put(expCatalog, outputRefs.visitSummary)
1269 def _combineExposureMetadata(self, visit, dataRefs):
1270 """Make a combined exposure catalog from a list of dataRefs.
1271 These dataRefs must point to exposures with wcs, summaryStats,
1272 and other visit metadata.
1277 Visit identification number.
1278 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1279 List of dataRefs
in visit.
1284 Exposure catalog
with per-detector summary information.
1286 schema = self._makeVisitSummarySchema()
1287 cat = afwTable.ExposureCatalog(schema)
1288 cat.resize(len(dataRefs))
1290 cat['visit'] = visit
1292 for i, dataRef
in enumerate(dataRefs):
1293 visitInfo = dataRef.get(component=
'visitInfo')
1294 filterLabel = dataRef.get(component=
'filter')
1295 summaryStats = dataRef.get(component=
'summaryStats')
1296 detector = dataRef.get(component=
'detector')
1297 wcs = dataRef.get(component=
'wcs')
1298 photoCalib = dataRef.get(component=
'photoCalib')
1299 detector = dataRef.get(component=
'detector')
1300 bbox = dataRef.get(component=
'bbox')
1301 validPolygon = dataRef.get(component=
'validPolygon')
1305 rec.setVisitInfo(visitInfo)
1307 rec.setPhotoCalib(photoCalib)
1308 rec.setValidPolygon(validPolygon)
1310 rec[
'physical_filter'] = filterLabel.physicalLabel
if filterLabel.hasPhysicalLabel()
else ""
1311 rec[
'band'] = filterLabel.bandLabel
if filterLabel.hasBandLabel()
else ""
1312 rec.setId(detector.getId())
1313 summaryStats.update_record(rec)
1315 metadata = dafBase.PropertyList()
1316 metadata.add(
"COMMENT",
"Catalog id is detector id, sorted.")
1318 metadata.add(
"COMMENT",
"Only detectors with data have entries.")
1319 cat.setMetadata(metadata)
1324 def _makeVisitSummarySchema(self):
1325 """Make the schema for the visitSummary catalog."""
1326 schema = afwTable.ExposureTable.makeMinimalSchema()
1327 schema.addField(
'visit', type=
'L', doc=
'Visit number')
1328 schema.addField(
'physical_filter', type=
'String', size=32, doc=
'Physical filter')
1329 schema.addField(
'band', type=
'String', size=32, doc=
'Name of band')
1330 ExposureSummaryStats.update_schema(schema)
1334class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1335 defaultTemplates={
"catalogType":
""},
1336 dimensions=(
"instrument",
"visit")):
1337 inputCatalogs = connectionTypes.Input(
1338 doc=
"Input per-detector Source Tables",
1339 name=
"{catalogType}sourceTable",
1340 storageClass=
"DataFrame",
1341 dimensions=(
"instrument",
"visit",
"detector"),
1344 outputCatalog = connectionTypes.Output(
1345 doc=
"Per-visit concatenation of Source Table",
1346 name=
"{catalogType}sourceTable_visit",
1347 storageClass=
"DataFrame",
1348 dimensions=(
"instrument",
"visit")
1352class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1353 pipelineConnections=ConsolidateSourceTableConnections):
1357class ConsolidateSourceTableTask(pipeBase.PipelineTask):
1358 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1360 _DefaultName = 'consolidateSourceTable'
1361 ConfigClass = ConsolidateSourceTableConfig
1363 inputDataset =
'sourceTable'
1364 outputDataset =
'sourceTable_visit'
1366 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1367 from .makeWarp
import reorderRefs
1369 detectorOrder = [ref.dataId[
'detector']
for ref
in inputRefs.inputCatalogs]
1370 detectorOrder.sort()
1371 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey=
'detector')
1372 inputs = butlerQC.get(inputRefs)
1373 self.log.info(
"Concatenating %s per-detector Source Tables",
1374 len(inputs[
'inputCatalogs']))
1375 df = pd.concat(inputs[
'inputCatalogs'])
1376 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1379class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1380 dimensions=(
"instrument",),
1381 defaultTemplates={
"calexpType":
""}):
1382 visitSummaryRefs = connectionTypes.Input(
1383 doc=
"Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1384 name=
"{calexpType}visitSummary",
1385 storageClass=
"ExposureCatalog",
1386 dimensions=(
"instrument",
"visit"),
1390 outputCatalog = connectionTypes.Output(
1391 doc=
"CCD and Visit metadata table",
1392 name=
"{calexpType}ccdVisitTable",
1393 storageClass=
"DataFrame",
1394 dimensions=(
"instrument",)
1398class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1399 pipelineConnections=MakeCcdVisitTableConnections):
1403class MakeCcdVisitTableTask(pipeBase.PipelineTask):
1404 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1406 _DefaultName = 'makeCcdVisitTable'
1407 ConfigClass = MakeCcdVisitTableConfig
1409 def run(self, visitSummaryRefs):
1410 """Make a table of ccd information from the `visitSummary` catalogs.
1414 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1415 List of DeferredDatasetHandles pointing to exposure catalogs with
1416 per-detector summary information.
1420 result : `lsst.pipe.Base.Struct`
1421 Results struct
with attribute:
1424 Catalog of ccd
and visit information.
1427 for visitSummaryRef
in visitSummaryRefs:
1428 visitSummary = visitSummaryRef.get()
1429 visitInfo = visitSummary[0].getVisitInfo()
1432 summaryTable = visitSummary.asAstropy()
1433 selectColumns = [
'id',
'visit',
'physical_filter',
'band',
'ra',
'decl',
'zenithDistance',
1434 'zeroPoint',
'psfSigma',
'skyBg',
'skyNoise',
1435 'astromOffsetMean',
'astromOffsetStd',
'nPsfStar',
1436 'psfStarDeltaE1Median',
'psfStarDeltaE2Median',
1437 'psfStarDeltaE1Scatter',
'psfStarDeltaE2Scatter',
1438 'psfStarDeltaSizeMedian',
'psfStarDeltaSizeScatter',
1439 'psfStarScaledDeltaSizeScatter',
1440 'psfTraceRadiusDelta',
'maxDistToNearestPsf']
1441 ccdEntry = summaryTable[selectColumns].to_pandas().set_index(
'id')
1446 ccdEntry = ccdEntry.rename(columns={
"visit":
"visitId"})
1447 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id)
for id
in
1449 packer = visitSummaryRef.dataId.universe.makePacker(
'visit_detector', visitSummaryRef.dataId)
1450 ccdVisitIds = [packer.pack(dataId)
for dataId
in dataIds]
1451 ccdEntry[
'ccdVisitId'] = ccdVisitIds
1452 ccdEntry[
'detector'] = summaryTable[
'id']
1453 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds()
if vR.getWcs()
1454 else np.nan
for vR
in visitSummary])
1455 ccdEntry[
"seeing"] = visitSummary[
'psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1457 ccdEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1458 ccdEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1459 ccdEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1460 expTime = visitInfo.getExposureTime()
1461 ccdEntry[
'expTime'] = expTime
1462 ccdEntry[
"obsStart"] = ccdEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1463 expTime_days = expTime / (60*60*24)
1464 ccdEntry[
"obsStartMJD"] = ccdEntry[
"expMidptMJD"] - 0.5 * expTime_days
1465 ccdEntry[
'darkTime'] = visitInfo.getDarkTime()
1466 ccdEntry[
'xSize'] = summaryTable[
'bbox_max_x'] - summaryTable[
'bbox_min_x']
1467 ccdEntry[
'ySize'] = summaryTable[
'bbox_max_y'] - summaryTable[
'bbox_min_y']
1468 ccdEntry[
'llcra'] = summaryTable[
'raCorners'][:, 0]
1469 ccdEntry[
'llcdec'] = summaryTable[
'decCorners'][:, 0]
1470 ccdEntry[
'ulcra'] = summaryTable[
'raCorners'][:, 1]
1471 ccdEntry[
'ulcdec'] = summaryTable[
'decCorners'][:, 1]
1472 ccdEntry[
'urcra'] = summaryTable[
'raCorners'][:, 2]
1473 ccdEntry[
'urcdec'] = summaryTable[
'decCorners'][:, 2]
1474 ccdEntry[
'lrcra'] = summaryTable[
'raCorners'][:, 3]
1475 ccdEntry[
'lrcdec'] = summaryTable[
'decCorners'][:, 3]
1479 ccdEntries.append(ccdEntry)
1481 outputCatalog = pd.concat(ccdEntries)
1482 outputCatalog.set_index(
'ccdVisitId', inplace=
True, verify_integrity=
True)
1483 return pipeBase.Struct(outputCatalog=outputCatalog)
1486class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1487 dimensions=(
"instrument",),
1488 defaultTemplates={
"calexpType":
""}):
1489 visitSummaries = connectionTypes.Input(
1490 doc=
"Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1491 name=
"{calexpType}visitSummary",
1492 storageClass=
"ExposureCatalog",
1493 dimensions=(
"instrument",
"visit",),
1497 outputCatalog = connectionTypes.Output(
1498 doc=
"Visit metadata table",
1499 name=
"{calexpType}visitTable",
1500 storageClass=
"DataFrame",
1501 dimensions=(
"instrument",)
1505class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1506 pipelineConnections=MakeVisitTableConnections):
1510class MakeVisitTableTask(pipeBase.PipelineTask):
1511 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1513 _DefaultName = 'makeVisitTable'
1514 ConfigClass = MakeVisitTableConfig
1516 def run(self, visitSummaries):
1517 """Make a table of visit information from the `visitSummary` catalogs.
1522 List of exposure catalogs with per-detector summary information.
1525 result : `lsst.pipe.Base.Struct`
1526 Results struct
with attribute:
1529 Catalog of visit information.
1532 for visitSummary
in visitSummaries:
1533 visitSummary = visitSummary.get()
1534 visitRow = visitSummary[0]
1535 visitInfo = visitRow.getVisitInfo()
1538 visitEntry[
"visitId"] = visitRow[
'visit']
1539 visitEntry[
"visit"] = visitRow[
'visit']
1540 visitEntry[
"physical_filter"] = visitRow[
'physical_filter']
1541 visitEntry[
"band"] = visitRow[
'band']
1542 raDec = visitInfo.getBoresightRaDec()
1543 visitEntry[
"ra"] = raDec.getRa().asDegrees()
1544 visitEntry[
"decl"] = raDec.getDec().asDegrees()
1545 visitEntry[
"skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1546 azAlt = visitInfo.getBoresightAzAlt()
1547 visitEntry[
"azimuth"] = azAlt.getLongitude().asDegrees()
1548 visitEntry[
"altitude"] = azAlt.getLatitude().asDegrees()
1549 visitEntry[
"zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1550 visitEntry[
"airmass"] = visitInfo.getBoresightAirmass()
1551 expTime = visitInfo.getExposureTime()
1552 visitEntry[
"expTime"] = expTime
1553 visitEntry[
"expMidpt"] = visitInfo.getDate().toPython()
1554 visitEntry[
"expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1555 visitEntry[
"obsStart"] = visitEntry[
"expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1556 expTime_days = expTime / (60*60*24)
1557 visitEntry[
"obsStartMJD"] = visitEntry[
"expMidptMJD"] - 0.5 * expTime_days
1558 visitEntries.append(visitEntry)
1564 outputCatalog = pd.DataFrame(data=visitEntries)
1565 outputCatalog.set_index(
'visitId', inplace=
True, verify_integrity=
True)
1566 return pipeBase.Struct(outputCatalog=outputCatalog)
1569class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1570 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")):
1572 inputCatalog = connectionTypes.Input(
1573 doc=
"Primary per-detector, single-epoch forced-photometry catalog. "
1574 "By default, it is the output of ForcedPhotCcdTask on calexps",
1576 storageClass=
"SourceCatalog",
1577 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1579 inputCatalogDiff = connectionTypes.Input(
1580 doc=
"Secondary multi-epoch, per-detector, forced photometry catalog. "
1581 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1583 storageClass=
"SourceCatalog",
1584 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1586 outputCatalog = connectionTypes.Output(
1587 doc=
"InputCatalogs horizonatally joined on `objectId` in Parquet format",
1588 name=
"mergedForcedSource",
1589 storageClass=
"DataFrame",
1590 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract")
1594class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1595 pipelineConnections=WriteForcedSourceTableConnections):
1596 key = lsst.pex.config.Field(
1597 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1603class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1604 """Merge and convert per-detector forced source catalogs to parquet.
1606 Because the predecessor ForcedPhotCcdTask operates per-detector,
1607 per-tract, (i.e., it has tract in its dimensions), detectors
1608 on the tract boundary may have multiple forced source catalogs.
1610 The successor task TransformForcedSourceTable runs per-patch
1611 and temporally-aggregates overlapping mergedForcedSource catalogs
from all
1612 available multiple epochs.
1614 _DefaultName = "writeForcedSourceTable"
1615 ConfigClass = WriteForcedSourceTableConfig
1617 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1618 inputs = butlerQC.get(inputRefs)
1620 inputs[
'ccdVisitId'] = butlerQC.quantum.dataId.pack(
"visit_detector")
1621 inputs[
'band'] = butlerQC.quantum.dataId.full[
'band']
1622 outputs = self.run(**inputs)
1623 butlerQC.put(outputs, outputRefs)
1625 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1627 for table, dataset,
in zip((inputCatalog, inputCatalogDiff), (
'calexp',
'diff')):
1628 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=
False)
1629 df = df.reindex(sorted(df.columns), axis=1)
1630 df[
'ccdVisitId'] = ccdVisitId
if ccdVisitId
else pd.NA
1631 df[
'band'] = band
if band
else pd.NA
1632 df.columns = pd.MultiIndex.from_tuples([(dataset, c)
for c
in df.columns],
1633 names=(
'dataset',
'column'))
1637 outputCatalog = functools.reduce(
lambda d1, d2: d1.join(d2), dfs)
1638 return pipeBase.Struct(outputCatalog=outputCatalog)
1641class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1642 dimensions=(
"instrument",
"skymap",
"patch",
"tract")):
1644 inputCatalogs = connectionTypes.Input(
1645 doc=
"Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1646 name=
"mergedForcedSource",
1647 storageClass=
"DataFrame",
1648 dimensions=(
"instrument",
"visit",
"detector",
"skymap",
"tract"),
1652 referenceCatalog = connectionTypes.Input(
1653 doc=
"Reference catalog which was used to seed the forcedPhot. Columns "
1654 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1657 storageClass=
"DataFrame",
1658 dimensions=(
"tract",
"patch",
"skymap"),
1661 outputCatalog = connectionTypes.Output(
1662 doc=
"Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1663 "specified set of functors",
1664 name=
"forcedSourceTable",
1665 storageClass=
"DataFrame",
1666 dimensions=(
"tract",
"patch",
"skymap")
1671 pipelineConnections=TransformForcedSourceTableConnections):
1672 referenceColumns = pexConfig.ListField(
1674 default=[
"detect_isPrimary",
"detect_isTractInner",
"detect_isPatchInner"],
1676 doc=
"Columns to pull from reference catalog",
1678 keyRef = lsst.pex.config.Field(
1679 doc=
"Column on which to join the two input tables on and make the primary key of the output",
1683 key = lsst.pex.config.Field(
1684 doc=
"Rename the output DataFrame index to this name",
1686 default=
"forcedSourceId",
1689 def setDefaults(self):
1690 super().setDefaults()
1691 self.functorFile = os.path.join(
'$PIPE_TASKS_DIR',
'schemas',
'ForcedSource.yaml')
1692 self.columnsFromDataId = [
'tract',
'patch']
1696 """Transform/standardize a ForcedSource catalog
1698 Transforms each wide, per-detector forcedSource parquet table per the
1699 specification file (per-camera defaults found in ForcedSource.yaml).
1700 All epochs that overlap the patch are aggregated into one per-patch
1701 narrow-parquet file.
1703 No de-duplication of rows
is performed. Duplicate resolutions flags are
1704 pulled
in from the referenceCatalog: `detect_isPrimary`,
1705 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1706 for analysis
or compare duplicates
for QA.
1708 The resulting table includes multiple bands. Epochs (MJDs)
and other useful
1709 per-visit rows can be retreived by joining
with the CcdVisitTable on
1712 _DefaultName = "transformForcedSourceTable"
1713 ConfigClass = TransformForcedSourceTableConfig
1715 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1716 inputs = butlerQC.get(inputRefs)
1717 if self.funcs
is None:
1718 raise ValueError(
"config.functorFile is None. "
1719 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1720 outputs = self.run(inputs[
'inputCatalogs'], inputs[
'referenceCatalog'], funcs=self.funcs,
1721 dataId=outputRefs.outputCatalog.dataId.full)
1723 butlerQC.put(outputs, outputRefs)
1725 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1727 ref = referenceCatalog.get(parameters={
"columns": self.config.referenceColumns})
1728 self.log.info(
"Aggregating %s input catalogs" % (len(inputCatalogs)))
1729 for handle
in inputCatalogs:
1730 result = self.transform(
None, handle, funcs, dataId)
1732 dfs.append(result.df.join(ref, how=
'inner'))
1734 outputCatalog = pd.concat(dfs)
1738 outputCatalog.index.rename(self.config.keyRef, inplace=
True)
1740 outputCatalog.reset_index(inplace=
True)
1743 outputCatalog.set_index(
"forcedSourceId", inplace=
True, verify_integrity=
True)
1745 outputCatalog.index.rename(self.config.key, inplace=
True)
1747 self.log.info(
"Made a table of %d columns and %d rows",
1748 len(outputCatalog.columns), len(outputCatalog))
1749 return pipeBase.Struct(outputCatalog=outputCatalog)
1752class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1753 defaultTemplates={
"catalogType":
""},
1754 dimensions=(
"instrument",
"tract")):
1755 inputCatalogs = connectionTypes.Input(
1756 doc=
"Input per-patch DataFrame Tables to be concatenated",
1757 name=
"{catalogType}ForcedSourceTable",
1758 storageClass=
"DataFrame",
1759 dimensions=(
"tract",
"patch",
"skymap"),
1763 outputCatalog = connectionTypes.Output(
1764 doc=
"Output per-tract concatenation of DataFrame Tables",
1765 name=
"{catalogType}ForcedSourceTable_tract",
1766 storageClass=
"DataFrame",
1767 dimensions=(
"tract",
"skymap"),
1771class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1772 pipelineConnections=ConsolidateTractConnections):
1776class ConsolidateTractTask(pipeBase.PipelineTask):
1777 """Concatenate any per-patch, dataframe list into a single
1778 per-tract DataFrame.
1780 _DefaultName = 'ConsolidateTract'
1781 ConfigClass = ConsolidateTractConfig
1783 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1784 inputs = butlerQC.get(inputRefs)
1787 self.log.info(
"Concatenating %s per-patch %s Tables",
1788 len(inputs[
'inputCatalogs']),
1789 inputRefs.inputCatalogs[0].datasetType.name)
1790 df = pd.concat(inputs[
'inputCatalogs'])
1791 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def compute(self, dropna=False, pool=None)
def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None)
def getAnalysis(self, parq, funcs=None, band=None)
def __init__(self, *args, **kwargs)
def transform(self, band, parq, funcs, dataId)
def run(self, parq, funcs=None, dataId=None, band=None)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)