Coverage for python/lsst/pipe/tasks/postprocess.py: 32%
693 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-18 12:37 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-18 12:37 -0700
1# This file is part of pipe_tasks
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import functools
23import pandas as pd
24import logging
25import numpy as np
26import numbers
27import os
29import lsst.geom
30import lsst.pex.config as pexConfig
31import lsst.pipe.base as pipeBase
32import lsst.daf.base as dafBase
33from lsst.obs.base import ExposureIdInfo
34from lsst.pipe.base import connectionTypes
35import lsst.afw.table as afwTable
36from lsst.meas.base import SingleFrameMeasurementTask
37from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate
38from lsst.skymap import BaseSkyMap
40from .parquetTable import ParquetTable
41from .functors import CompositeFunctor, Column
43log = logging.getLogger(__name__)
46def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
47 """Flattens a dataframe with multilevel column index.
48 """
49 newDf = pd.DataFrame()
50 # band is the level 0 index
51 dfBands = df.columns.unique(level=0).values
52 for band in dfBands:
53 subdf = df[band]
54 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
55 newColumns = {c: columnFormat.format(band, c)
56 for c in subdf.columns if c not in noDupCols}
57 cols = list(newColumns.keys())
58 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
60 # Band must be present in the input and output or else column is all NaN:
61 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
62 # Get the unexploded columns from any present band's partition
63 noDupDf = df[presentBands[0]][noDupCols]
64 newDf = pd.concat([noDupDf, newDf], axis=1)
65 return newDf
68class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
69 defaultTemplates={"coaddName": "deep"},
70 dimensions=("tract", "patch", "skymap")):
71 inputCatalogMeas = connectionTypes.Input(
72 doc="Catalog of source measurements on the deepCoadd.",
73 dimensions=("tract", "patch", "band", "skymap"),
74 storageClass="SourceCatalog",
75 name="{coaddName}Coadd_meas",
76 multiple=True
77 )
78 inputCatalogForcedSrc = connectionTypes.Input(
79 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
80 dimensions=("tract", "patch", "band", "skymap"),
81 storageClass="SourceCatalog",
82 name="{coaddName}Coadd_forced_src",
83 multiple=True
84 )
85 inputCatalogRef = connectionTypes.Input(
86 doc="Catalog marking the primary detection (which band provides a good shape and position)"
87 "for each detection in deepCoadd_mergeDet.",
88 dimensions=("tract", "patch", "skymap"),
89 storageClass="SourceCatalog",
90 name="{coaddName}Coadd_ref"
91 )
92 outputCatalog = connectionTypes.Output(
93 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
94 "stored as a DataFrame with a multi-level column index per-patch.",
95 dimensions=("tract", "patch", "skymap"),
96 storageClass="DataFrame",
97 name="{coaddName}Coadd_obj"
98 )
101class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
102 pipelineConnections=WriteObjectTableConnections):
103 engine = pexConfig.Field(
104 dtype=str,
105 default="pyarrow",
106 doc="Parquet engine for writing (pyarrow or fastparquet)"
107 )
108 coaddName = pexConfig.Field(
109 dtype=str,
110 default="deep",
111 doc="Name of coadd"
112 )
115class WriteObjectTableTask(pipeBase.PipelineTask):
116 """Write filter-merged source tables to parquet
117 """
118 _DefaultName = "writeObjectTable"
119 ConfigClass = WriteObjectTableConfig
121 # Names of table datasets to be merged
122 inputDatasets = ('forced_src', 'meas', 'ref')
124 # Tag of output dataset written by `MergeSourcesTask.write`
125 outputDataset = 'obj'
127 def runQuantum(self, butlerQC, inputRefs, outputRefs):
128 inputs = butlerQC.get(inputRefs)
130 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
131 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
132 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
133 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
135 catalogs = {}
136 for band in measDict.keys():
137 catalogs[band] = {'meas': measDict[band]['meas'],
138 'forced_src': forcedSourceDict[band]['forced_src'],
139 'ref': inputs['inputCatalogRef']}
140 dataId = butlerQC.quantum.dataId
141 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
142 outputs = pipeBase.Struct(outputCatalog=df)
143 butlerQC.put(outputs, outputRefs)
145 def run(self, catalogs, tract, patch):
146 """Merge multiple catalogs.
148 Parameters
149 ----------
150 catalogs : `dict`
151 Mapping from filter names to dict of catalogs.
152 tract : int
153 tractId to use for the tractId column.
154 patch : str
155 patchId to use for the patchId column.
157 Returns
158 -------
159 catalog : `pandas.DataFrame`
160 Merged dataframe.
161 """
163 dfs = []
164 for filt, tableDict in catalogs.items():
165 for dataset, table in tableDict.items():
166 # Convert afwTable to pandas DataFrame
167 df = table.asAstropy().to_pandas().set_index('id', drop=True)
169 # Sort columns by name, to ensure matching schema among patches
170 df = df.reindex(sorted(df.columns), axis=1)
171 df['tractId'] = tract
172 df['patchId'] = patch
174 # Make columns a 3-level MultiIndex
175 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
176 names=('dataset', 'band', 'column'))
177 dfs.append(df)
179 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
180 return catalog
183class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
184 defaultTemplates={"catalogType": ""},
185 dimensions=("instrument", "visit", "detector")):
187 catalog = connectionTypes.Input(
188 doc="Input full-depth catalog of sources produced by CalibrateTask",
189 name="{catalogType}src",
190 storageClass="SourceCatalog",
191 dimensions=("instrument", "visit", "detector")
192 )
193 outputCatalog = connectionTypes.Output(
194 doc="Catalog of sources, `src` in Parquet format. The 'id' column is "
195 "replaced with an index; all other columns are unchanged.",
196 name="{catalogType}source",
197 storageClass="DataFrame",
198 dimensions=("instrument", "visit", "detector")
199 )
202class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
203 pipelineConnections=WriteSourceTableConnections):
204 pass
207class WriteSourceTableTask(pipeBase.PipelineTask):
208 """Write source table to parquet.
209 """
210 _DefaultName = "writeSourceTable"
211 ConfigClass = WriteSourceTableConfig
213 def runQuantum(self, butlerQC, inputRefs, outputRefs):
214 inputs = butlerQC.get(inputRefs)
215 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
216 result = self.run(**inputs).table
217 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
218 butlerQC.put(outputs, outputRefs)
220 def run(self, catalog, ccdVisitId=None, **kwargs):
221 """Convert `src` catalog to parquet
223 Parameters
224 ----------
225 catalog: `afwTable.SourceCatalog`
226 catalog to be converted
227 ccdVisitId: `int`
228 ccdVisitId to be added as a column
230 Returns
231 -------
232 result : `lsst.pipe.base.Struct`
233 ``table``
234 `ParquetTable` version of the input catalog
235 """
236 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId)
237 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
238 df['ccdVisitId'] = ccdVisitId
239 return pipeBase.Struct(table=ParquetTable(dataFrame=df))
242class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
243 defaultTemplates={"catalogType": "",
244 "skyWcsName": "jointcal",
245 "photoCalibName": "fgcm"},
246 dimensions=("instrument", "visit", "detector", "skymap")):
247 skyMap = connectionTypes.Input(
248 doc="skyMap needed to choose which tract-level calibrations to use when multiple available",
249 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
250 storageClass="SkyMap",
251 dimensions=("skymap",),
252 )
253 exposure = connectionTypes.Input(
254 doc="Input exposure to perform photometry on.",
255 name="calexp",
256 storageClass="ExposureF",
257 dimensions=["instrument", "visit", "detector"],
258 )
259 externalSkyWcsTractCatalog = connectionTypes.Input(
260 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector "
261 "id for the catalog id, sorted on id for fast lookup."),
262 name="{skyWcsName}SkyWcsCatalog",
263 storageClass="ExposureCatalog",
264 dimensions=["instrument", "visit", "tract"],
265 multiple=True
266 )
267 externalSkyWcsGlobalCatalog = connectionTypes.Input(
268 doc=("Per-visit wcs calibrations computed globally (with no tract information). "
269 "These catalogs use the detector id for the catalog id, sorted on id for "
270 "fast lookup."),
271 name="{skyWcsName}SkyWcsCatalog",
272 storageClass="ExposureCatalog",
273 dimensions=["instrument", "visit"],
274 )
275 externalPhotoCalibTractCatalog = connectionTypes.Input(
276 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the "
277 "detector id for the catalog id, sorted on id for fast lookup."),
278 name="{photoCalibName}PhotoCalibCatalog",
279 storageClass="ExposureCatalog",
280 dimensions=["instrument", "visit", "tract"],
281 multiple=True
282 )
283 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
284 doc=("Per-visit photometric calibrations computed globally (with no tract "
285 "information). These catalogs use the detector id for the catalog id, "
286 "sorted on id for fast lookup."),
287 name="{photoCalibName}PhotoCalibCatalog",
288 storageClass="ExposureCatalog",
289 dimensions=["instrument", "visit"],
290 )
292 def __init__(self, *, config=None):
293 super().__init__(config=config)
294 # Same connection boilerplate as all other applications of
295 # Global/Tract calibrations
296 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs:
297 if config.useGlobalExternalSkyWcs:
298 self.inputs.remove("externalSkyWcsTractCatalog")
299 else:
300 self.inputs.remove("externalSkyWcsGlobalCatalog")
301 else:
302 self.inputs.remove("externalSkyWcsTractCatalog")
303 self.inputs.remove("externalSkyWcsGlobalCatalog")
304 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib:
305 if config.useGlobalExternalPhotoCalib:
306 self.inputs.remove("externalPhotoCalibTractCatalog")
307 else:
308 self.inputs.remove("externalPhotoCalibGlobalCatalog")
309 else:
310 self.inputs.remove("externalPhotoCalibTractCatalog")
311 self.inputs.remove("externalPhotoCalibGlobalCatalog")
314class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
315 pipelineConnections=WriteRecalibratedSourceTableConnections):
317 doReevaluatePhotoCalib = pexConfig.Field(
318 dtype=bool,
319 default=True,
320 doc=("Add or replace local photoCalib columns")
321 )
322 doReevaluateSkyWcs = pexConfig.Field(
323 dtype=bool,
324 default=True,
325 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
326 )
327 doApplyExternalPhotoCalib = pexConfig.Field(
328 dtype=bool,
329 default=True,
330 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
331 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
332 )
333 doApplyExternalSkyWcs = pexConfig.Field(
334 dtype=bool,
335 default=True,
336 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
337 "else use the wcs already attached to the exposure."),
338 )
339 useGlobalExternalPhotoCalib = pexConfig.Field(
340 dtype=bool,
341 default=True,
342 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations "
343 "that are not run per-tract. When False, use per-tract photometric "
344 "calibration files.")
345 )
346 useGlobalExternalSkyWcs = pexConfig.Field(
347 dtype=bool,
348 default=False,
349 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations "
350 "that are not run per-tract. When False, use per-tract wcs "
351 "files.")
352 )
354 def validate(self):
355 super().validate()
356 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs:
357 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
358 "External SkyWcs will not be read or evaluated.")
359 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib:
360 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
361 "External PhotoCalib will not be read or evaluated.")
364class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
365 """Write source table to parquet
366 """
367 _DefaultName = "writeRecalibratedSourceTable"
368 ConfigClass = WriteRecalibratedSourceTableConfig
370 def runQuantum(self, butlerQC, inputRefs, outputRefs):
371 inputs = butlerQC.get(inputRefs)
372 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
373 inputs['exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "visit_detector")
375 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs:
376 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
377 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs)
379 inputs['catalog'] = self.addCalibColumns(**inputs)
381 result = self.run(**inputs).table
382 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
383 butlerQC.put(outputs, outputRefs)
385 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
386 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
387 externalPhotoCalibTractCatalog=None, **kwargs):
388 """Apply external calibrations to exposure per configuration
390 When multiple tract-level calibrations overlap, select the one with the
391 center closest to detector.
393 Parameters
394 ----------
395 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of
396 tract-level calibs.
397 skyMap : `lsst.skymap.SkyMap`
398 exposure : `lsst.afw.image.exposure.Exposure`
399 Input exposure to adjust calibrations.
400 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
401 Exposure catalog with external skyWcs to be applied per config
402 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
403 Exposure catalog with external skyWcs to be applied per config
404 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
405 Exposure catalog with external photoCalib to be applied per config
406 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
409 Returns
410 -------
411 exposure : `lsst.afw.image.exposure.Exposure`
412 Exposure with adjusted calibrations.
413 """
414 if not self.config.doApplyExternalSkyWcs:
415 # Do not modify the exposure's SkyWcs
416 externalSkyWcsCatalog = None
417 elif self.config.useGlobalExternalSkyWcs:
418 # Use the global external SkyWcs
419 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
420 self.log.info('Applying global SkyWcs')
421 else:
422 # use tract-level external SkyWcs from the closest overlapping tract
423 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog')
424 tracts = [ref.dataId['tract'] for ref in inputRef]
425 if len(tracts) == 1:
426 ind = 0
427 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind])
428 else:
429 ind = self.getClosestTract(tracts, skyMap,
430 exposure.getBBox(), exposure.getWcs())
431 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
432 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
434 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
436 if not self.config.doApplyExternalPhotoCalib:
437 # Do not modify the exposure's PhotoCalib
438 externalPhotoCalibCatalog = None
439 elif self.config.useGlobalExternalPhotoCalib:
440 # Use the global external PhotoCalib
441 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
442 self.log.info('Applying global PhotoCalib')
443 else:
444 # use tract-level external PhotoCalib from the closest overlapping tract
445 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog')
446 tracts = [ref.dataId['tract'] for ref in inputRef]
447 if len(tracts) == 1:
448 ind = 0
449 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind])
450 else:
451 ind = self.getClosestTract(tracts, skyMap,
452 exposure.getBBox(), exposure.getWcs())
453 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
454 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
456 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
458 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
460 def getClosestTract(self, tracts, skyMap, bbox, wcs):
461 """Find the index of the tract closest to detector from list of tractIds
463 Parameters
464 ----------
465 tracts: `list` [`int`]
466 Iterable of integer tractIds
467 skyMap : `lsst.skymap.SkyMap`
468 skyMap to lookup tract geometry and wcs
469 bbox : `lsst.geom.Box2I`
470 Detector bbox, center of which will compared to tract centers
471 wcs : `lsst.afw.geom.SkyWcs`
472 Detector Wcs object to map the detector center to SkyCoord
474 Returns
475 -------
476 index : `int`
477 """
478 if len(tracts) == 1:
479 return 0
481 center = wcs.pixelToSky(bbox.getCenter())
482 sep = []
483 for tractId in tracts:
484 tract = skyMap[tractId]
485 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
486 sep.append(center.separation(tractCenter))
488 return np.argmin(sep)
490 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
491 """Prepare a calibrated exposure and apply external calibrations
492 if so configured.
494 Parameters
495 ----------
496 exposure : `lsst.afw.image.exposure.Exposure`
497 Input exposure to adjust calibrations.
498 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional
499 Exposure catalog with external skyWcs to be applied
500 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id
501 for the catalog id, sorted on id for fast lookup.
502 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional
503 Exposure catalog with external photoCalib to be applied
504 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector
505 id for the catalog id, sorted on id for fast lookup.
507 Returns
508 -------
509 exposure : `lsst.afw.image.exposure.Exposure`
510 Exposure with adjusted calibrations.
511 """
512 detectorId = exposure.getInfo().getDetector().getId()
514 if externalPhotoCalibCatalog is not None:
515 row = externalPhotoCalibCatalog.find(detectorId)
516 if row is None:
517 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; "
518 "Using original photoCalib.", detectorId)
519 else:
520 photoCalib = row.getPhotoCalib()
521 if photoCalib is None:
522 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
523 "Using original photoCalib.", detectorId)
524 else:
525 exposure.setPhotoCalib(photoCalib)
527 if externalSkyWcsCatalog is not None:
528 row = externalSkyWcsCatalog.find(detectorId)
529 if row is None:
530 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; "
531 "Using original skyWcs.", detectorId)
532 else:
533 skyWcs = row.getWcs()
534 if skyWcs is None:
535 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
536 "Using original skyWcs.", detectorId)
537 else:
538 exposure.setWcs(skyWcs)
540 return exposure
542 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs):
543 """Add replace columns with calibs evaluated at each centroid
545 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a
546 a source catalog, by rerunning the plugins.
548 Parameters
549 ----------
550 catalog : `lsst.afw.table.SourceCatalog`
551 catalog to which calib columns will be added
552 exposure : `lsst.afw.image.exposure.Exposure`
553 Exposure with attached PhotoCalibs and SkyWcs attributes to be
554 reevaluated at local centroids. Pixels are not required.
555 exposureIdInfo : `lsst.obs.base.ExposureIdInfo`
557 Returns
558 -------
559 newCat: `lsst.afw.table.SourceCatalog`
560 Source Catalog with requested local calib columns
561 """
562 measureConfig = SingleFrameMeasurementTask.ConfigClass()
563 measureConfig.doReplaceWithNoise = False
565 measureConfig.plugins.names = []
566 if self.config.doReevaluateSkyWcs:
567 measureConfig.plugins.names.add('base_LocalWcs')
568 self.log.info("Re-evaluating base_LocalWcs plugin")
569 if self.config.doReevaluatePhotoCalib:
570 measureConfig.plugins.names.add('base_LocalPhotoCalib')
571 self.log.info("Re-evaluating base_LocalPhotoCalib plugin")
572 pluginsNotToCopy = tuple(measureConfig.plugins.names)
574 # Create a new schema and catalog
575 # Copy all columns from original except for the ones to reevaluate
576 aliasMap = catalog.schema.getAliasMap()
577 mapper = afwTable.SchemaMapper(catalog.schema)
578 for item in catalog.schema:
579 if not item.field.getName().startswith(pluginsNotToCopy):
580 mapper.addMapping(item.key)
582 schema = mapper.getOutputSchema()
583 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
584 schema.setAliasMap(aliasMap)
585 newCat = afwTable.SourceCatalog(schema)
586 newCat.extend(catalog, mapper=mapper)
588 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to
589 # update here. LocalPhotoCalibs are applied during transform tasks.
590 # Update coord_ra/coord_dec, which are expected to be positions on the
591 # sky and are used as such in sdm tables without transform
592 if self.config.doReevaluateSkyWcs:
593 afwTable.updateSourceCoords(exposure.wcs, newCat)
595 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
597 return newCat
600class PostprocessAnalysis(object):
601 """Calculate columns from ParquetTable.
603 This object manages and organizes an arbitrary set of computations
604 on a catalog. The catalog is defined by a
605 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such
606 as a `deepCoadd_obj` dataset, and the computations are defined by a
607 collection of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
608 a `CompositeFunctor`).
610 After the object is initialized, accessing the `.df` attribute (which
611 holds the `pandas.DataFrame` containing the results of the calculations)
612 triggers computation of said dataframe.
614 One of the conveniences of using this object is the ability to define a
615 desired common filter for all functors. This enables the same functor
616 collection to be passed to several different `PostprocessAnalysis` objects
617 without having to change the original functor collection, since the `filt`
618 keyword argument of this object triggers an overwrite of the `filt`
619 property for all functors in the collection.
621 This object also allows a list of refFlags to be passed, and defines a set
622 of default refFlags that are always included even if not requested.
624 If a list of `ParquetTable` object is passed, rather than a single one,
625 then the calculations will be mapped over all the input catalogs. In
626 principle, it should be straightforward to parallelize this activity, but
627 initial tests have failed (see TODO in code comments).
629 Parameters
630 ----------
631 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
632 Source catalog(s) for computation.
634 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
635 Computations to do (functors that act on `parq`).
636 If a dict, the output
637 DataFrame will have columns keyed accordingly.
638 If a list, the column keys will come from the
639 `.shortname` attribute of each functor.
641 filt : `str`, optional
642 Filter in which to calculate. If provided,
643 this will overwrite any existing `.filt` attribute
644 of the provided functors.
646 flags : `list`, optional
647 List of flags (per-band) to include in output table.
648 Taken from the `meas` dataset if applied to a multilevel Object Table.
650 refFlags : `list`, optional
651 List of refFlags (only reference band) to include in output table.
653 forcedFlags : `list`, optional
654 List of flags (per-band) to include in output table.
655 Taken from the ``forced_src`` dataset if applied to a
656 multilevel Object Table. Intended for flags from measurement plugins
657 only run during multi-band forced-photometry.
658 """
659 _defaultRefFlags = []
660 _defaultFuncs = ()
662 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
663 self.parq = parq
664 self.functors = functors
666 self.filt = filt
667 self.flags = list(flags) if flags is not None else []
668 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else []
669 self.refFlags = list(self._defaultRefFlags)
670 if refFlags is not None:
671 self.refFlags += list(refFlags)
673 self._df = None
675 @property
676 def defaultFuncs(self):
677 funcs = dict(self._defaultFuncs)
678 return funcs
680 @property
681 def func(self):
682 additionalFuncs = self.defaultFuncs
683 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags})
684 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
685 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
687 if isinstance(self.functors, CompositeFunctor):
688 func = self.functors
689 else:
690 func = CompositeFunctor(self.functors)
692 func.funcDict.update(additionalFuncs)
693 func.filt = self.filt
695 return func
697 @property
698 def noDupCols(self):
699 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
701 @property
702 def df(self):
703 if self._df is None:
704 self.compute()
705 return self._df
707 def compute(self, dropna=False, pool=None):
708 # map over multiple parquet tables
709 if type(self.parq) in (list, tuple):
710 if pool is None:
711 dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
712 else:
713 # TODO: Figure out why this doesn't work (pyarrow pickling
714 # issues?)
715 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
716 self._df = pd.concat(dflist)
717 else:
718 self._df = self.func(self.parq, dropna=dropna)
720 return self._df
723class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
724 dimensions=()):
725 """Expected Connections for subclasses of TransformCatalogBaseTask.
727 Must be subclassed.
728 """
729 inputCatalog = connectionTypes.Input(
730 name="",
731 storageClass="DataFrame",
732 )
733 outputCatalog = connectionTypes.Output(
734 name="",
735 storageClass="DataFrame",
736 )
739class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
740 pipelineConnections=TransformCatalogBaseConnections):
741 functorFile = pexConfig.Field(
742 dtype=str,
743 doc="Path to YAML file specifying Science Data Model functors to use "
744 "when copying columns and computing calibrated values.",
745 default=None,
746 optional=True
747 )
748 primaryKey = pexConfig.Field(
749 dtype=str,
750 doc="Name of column to be set as the DataFrame index. If None, the index"
751 "will be named `id`",
752 default=None,
753 optional=True
754 )
755 columnsFromDataId = pexConfig.ListField(
756 dtype=str,
757 default=None,
758 optional=True,
759 doc="Columns to extract from the dataId",
760 )
763class TransformCatalogBaseTask(pipeBase.PipelineTask):
764 """Base class for transforming/standardizing a catalog
766 by applying functors that convert units and apply calibrations.
767 The purpose of this task is to perform a set of computations on
768 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
769 results to a new dataset (which needs to be declared in an `outputDataset`
770 attribute).
772 The calculations to be performed are defined in a YAML file that specifies
773 a set of functors to be computed, provided as
774 a `--functorFile` config parameter. An example of such a YAML file
775 is the following:
777 funcs:
778 psfMag:
779 functor: Mag
780 args:
781 - base_PsfFlux
782 filt: HSC-G
783 dataset: meas
784 cmodel_magDiff:
785 functor: MagDiff
786 args:
787 - modelfit_CModel
788 - base_PsfFlux
789 filt: HSC-G
790 gauss_magDiff:
791 functor: MagDiff
792 args:
793 - base_GaussianFlux
794 - base_PsfFlux
795 filt: HSC-G
796 count:
797 functor: Column
798 args:
799 - base_InputCount_value
800 filt: HSC-G
801 deconvolved_moments:
802 functor: DeconvolvedMoments
803 filt: HSC-G
804 dataset: forced_src
805 refFlags:
806 - calib_psfUsed
807 - merge_measurement_i
808 - merge_measurement_r
809 - merge_measurement_z
810 - merge_measurement_y
811 - merge_measurement_g
812 - base_PixelFlags_flag_inexact_psfCenter
813 - detect_isPrimary
815 The names for each entry under "func" will become the names of columns in
816 the output dataset. All the functors referenced are defined in
817 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each
818 functor are in the `args` list, and any additional entries for each column
819 other than "functor" or "args" (e.g., `'filt'`, `'dataset'`) are treated as
820 keyword arguments to be passed to the functor initialization.
822 The "flags" entry is the default shortcut for `Column` functors.
823 All columns listed under "flags" will be copied to the output table
824 untransformed. They can be of any datatype.
825 In the special case of transforming a multi-level oject table with
826 band and dataset indices (deepCoadd_obj), these will be taked from the
827 `meas` dataset and exploded out per band.
829 There are two special shortcuts that only apply when transforming
830 multi-level Object (deepCoadd_obj) tables:
831 - The "refFlags" entry is shortcut for `Column` functor
832 taken from the `'ref'` dataset if transforming an ObjectTable.
833 - The "forcedFlags" entry is shortcut for `Column` functors.
834 taken from the ``forced_src`` dataset if transforming an ObjectTable.
835 These are expanded out per band.
838 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
839 to organize and excecute the calculations.
840 """
841 @property
842 def _DefaultName(self):
843 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
845 @property
846 def outputDataset(self):
847 raise NotImplementedError('Subclass must define "outputDataset" attribute')
849 @property
850 def inputDataset(self):
851 raise NotImplementedError('Subclass must define "inputDataset" attribute')
853 @property
854 def ConfigClass(self):
855 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
857 def __init__(self, *args, **kwargs):
858 super().__init__(*args, **kwargs)
859 if self.config.functorFile:
860 self.log.info('Loading tranform functor definitions from %s',
861 self.config.functorFile)
862 self.funcs = CompositeFunctor.from_file(self.config.functorFile)
863 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs))
864 else:
865 self.funcs = None
867 def runQuantum(self, butlerQC, inputRefs, outputRefs):
868 inputs = butlerQC.get(inputRefs)
869 if self.funcs is None:
870 raise ValueError("config.functorFile is None. "
871 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
872 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs,
873 dataId=outputRefs.outputCatalog.dataId.full)
874 outputs = pipeBase.Struct(outputCatalog=result)
875 butlerQC.put(outputs, outputRefs)
877 def run(self, parq, funcs=None, dataId=None, band=None):
878 """Do postprocessing calculations
880 Takes a `ParquetTable` object and dataId,
881 returns a dataframe with results of postprocessing calculations.
883 Parameters
884 ----------
885 parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
886 ParquetTable from which calculations are done.
887 funcs : `lsst.pipe.tasks.functors.Functors`
888 Functors to apply to the table's columns
889 dataId : dict, optional
890 Used to add a `patchId` column to the output dataframe.
891 band : `str`, optional
892 Filter band that is being processed.
894 Returns
895 ------
896 df : `pandas.DataFrame`
897 """
898 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
900 df = self.transform(band, parq, funcs, dataId).df
901 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
902 return df
904 def getFunctors(self):
905 return self.funcs
907 def getAnalysis(self, parq, funcs=None, band=None):
908 if funcs is None:
909 funcs = self.funcs
910 analysis = PostprocessAnalysis(parq, funcs, filt=band)
911 return analysis
913 def transform(self, band, parq, funcs, dataId):
914 analysis = self.getAnalysis(parq, funcs=funcs, band=band)
915 df = analysis.df
916 if dataId and self.config.columnsFromDataId:
917 for key in self.config.columnsFromDataId:
918 if key in dataId:
919 df[str(key)] = dataId[key]
920 else:
921 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
923 if self.config.primaryKey:
924 if df.index.name != self.config.primaryKey and self.config.primaryKey in df:
925 df.reset_index(inplace=True, drop=True)
926 df.set_index(self.config.primaryKey, inplace=True)
928 return pipeBase.Struct(
929 df=df,
930 analysis=analysis
931 )
934class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
935 defaultTemplates={"coaddName": "deep"},
936 dimensions=("tract", "patch", "skymap")):
937 inputCatalog = connectionTypes.Input(
938 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
939 "stored as a DataFrame with a multi-level column index per-patch.",
940 dimensions=("tract", "patch", "skymap"),
941 storageClass="DataFrame",
942 name="{coaddName}Coadd_obj",
943 deferLoad=True,
944 )
945 outputCatalog = connectionTypes.Output(
946 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
947 "data model.",
948 dimensions=("tract", "patch", "skymap"),
949 storageClass="DataFrame",
950 name="objectTable"
951 )
954class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
955 pipelineConnections=TransformObjectCatalogConnections):
956 coaddName = pexConfig.Field(
957 dtype=str,
958 default="deep",
959 doc="Name of coadd"
960 )
961 # TODO: remove in DM-27177
962 filterMap = pexConfig.DictField(
963 keytype=str,
964 itemtype=str,
965 default={},
966 doc=("Dictionary mapping full filter name to short one for column name munging."
967 "These filters determine the output columns no matter what filters the "
968 "input data actually contain."),
969 deprecated=("Coadds are now identified by the band, so this transform is unused."
970 "Will be removed after v22.")
971 )
972 outputBands = pexConfig.ListField(
973 dtype=str,
974 default=None,
975 optional=True,
976 doc=("These bands and only these bands will appear in the output,"
977 " NaN-filled if the input does not include them."
978 " If None, then use all bands found in the input.")
979 )
980 camelCase = pexConfig.Field(
981 dtype=bool,
982 default=False,
983 doc=("Write per-band columns names with camelCase, else underscore "
984 "For example: gPsFlux instead of g_PsFlux.")
985 )
986 multilevelOutput = pexConfig.Field(
987 dtype=bool,
988 default=False,
989 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
990 "and name-munged (False).")
991 )
992 goodFlags = pexConfig.ListField(
993 dtype=str,
994 default=[],
995 doc=("List of 'good' flags that should be set False when populating empty tables. "
996 "All other flags are considered to be 'bad' flags and will be set to True.")
997 )
998 floatFillValue = pexConfig.Field(
999 dtype=float,
1000 default=np.nan,
1001 doc="Fill value for float fields when populating empty tables."
1002 )
1003 integerFillValue = pexConfig.Field(
1004 dtype=int,
1005 default=-1,
1006 doc="Fill value for integer fields when populating empty tables."
1007 )
1009 def setDefaults(self):
1010 super().setDefaults()
1011 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml')
1012 self.primaryKey = 'objectId'
1013 self.columnsFromDataId = ['tract', 'patch']
1014 self.goodFlags = ['calib_astrometry_used',
1015 'calib_photometry_reserved',
1016 'calib_photometry_used',
1017 'calib_psf_candidate',
1018 'calib_psf_reserved',
1019 'calib_psf_used']
1022class TransformObjectCatalogTask(TransformCatalogBaseTask):
1023 """Produce a flattened Object Table to match the format specified in
1024 sdm_schemas.
1026 Do the same set of postprocessing calculations on all bands.
1028 This is identical to `TransformCatalogBaseTask`, except for that it does
1029 the specified functor calculations for all filters present in the
1030 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
1031 by the YAML file will be superceded.
1032 """
1033 _DefaultName = "transformObjectCatalog"
1034 ConfigClass = TransformObjectCatalogConfig
1036 def run(self, parq, funcs=None, dataId=None, band=None):
1037 # NOTE: band kwarg is ignored here.
1038 dfDict = {}
1039 analysisDict = {}
1040 templateDf = pd.DataFrame()
1042 if isinstance(parq, DeferredDatasetHandle):
1043 columns = parq.get(component='columns')
1044 inputBands = columns.unique(level=1).values
1045 else:
1046 inputBands = parq.columnLevelNames['band']
1048 outputBands = self.config.outputBands if self.config.outputBands else inputBands
1050 # Perform transform for data of filters that exist in parq.
1051 for inputBand in inputBands:
1052 if inputBand not in outputBands:
1053 self.log.info("Ignoring %s band data in the input", inputBand)
1054 continue
1055 self.log.info("Transforming the catalog of band %s", inputBand)
1056 result = self.transform(inputBand, parq, funcs, dataId)
1057 dfDict[inputBand] = result.df
1058 analysisDict[inputBand] = result.analysis
1059 if templateDf.empty:
1060 templateDf = result.df
1062 # Put filler values in columns of other wanted bands
1063 for filt in outputBands:
1064 if filt not in dfDict:
1065 self.log.info("Adding empty columns for band %s", filt)
1066 dfTemp = templateDf.copy()
1067 for col in dfTemp.columns:
1068 testValue = dfTemp[col].values[0]
1069 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1070 # Boolean flag type, check if it is a "good" flag
1071 if col in self.config.goodFlags:
1072 fillValue = False
1073 else:
1074 fillValue = True
1075 elif isinstance(testValue, numbers.Integral):
1076 # Checking numbers.Integral catches all flavors
1077 # of python, numpy, pandas, etc. integers.
1078 # We must ensure this is not an unsigned integer.
1079 if isinstance(testValue, np.unsignedinteger):
1080 raise ValueError("Parquet tables may not have unsigned integer columns.")
1081 else:
1082 fillValue = self.config.integerFillValue
1083 else:
1084 fillValue = self.config.floatFillValue
1085 dfTemp[col].values[:] = fillValue
1086 dfDict[filt] = dfTemp
1088 # This makes a multilevel column index, with band as first level
1089 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
1091 if not self.config.multilevelOutput:
1092 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
1093 if self.config.primaryKey in noDupCols:
1094 noDupCols.remove(self.config.primaryKey)
1095 if dataId and self.config.columnsFromDataId:
1096 noDupCols += self.config.columnsFromDataId
1097 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1098 inputBands=inputBands)
1100 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
1102 return df
1105class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1106 dimensions=("tract", "skymap")):
1107 inputCatalogs = connectionTypes.Input(
1108 doc="Per-Patch objectTables conforming to the standard data model.",
1109 name="objectTable",
1110 storageClass="DataFrame",
1111 dimensions=("tract", "patch", "skymap"),
1112 multiple=True,
1113 )
1114 outputCatalog = connectionTypes.Output(
1115 doc="Pre-tract horizontal concatenation of the input objectTables",
1116 name="objectTable_tract",
1117 storageClass="DataFrame",
1118 dimensions=("tract", "skymap"),
1119 )
1122class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1123 pipelineConnections=ConsolidateObjectTableConnections):
1124 coaddName = pexConfig.Field(
1125 dtype=str,
1126 default="deep",
1127 doc="Name of coadd"
1128 )
1131class ConsolidateObjectTableTask(pipeBase.PipelineTask):
1132 """Write patch-merged source tables to a tract-level parquet file.
1134 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1135 """
1136 _DefaultName = "consolidateObjectTable"
1137 ConfigClass = ConsolidateObjectTableConfig
1139 inputDataset = 'objectTable'
1140 outputDataset = 'objectTable_tract'
1142 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1143 inputs = butlerQC.get(inputRefs)
1144 self.log.info("Concatenating %s per-patch Object Tables",
1145 len(inputs['inputCatalogs']))
1146 df = pd.concat(inputs['inputCatalogs'])
1147 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1150class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1151 defaultTemplates={"catalogType": ""},
1152 dimensions=("instrument", "visit", "detector")):
1154 inputCatalog = connectionTypes.Input(
1155 doc="Wide input catalog of sources produced by WriteSourceTableTask",
1156 name="{catalogType}source",
1157 storageClass="DataFrame",
1158 dimensions=("instrument", "visit", "detector"),
1159 deferLoad=True
1160 )
1161 outputCatalog = connectionTypes.Output(
1162 doc="Narrower, per-detector Source Table transformed and converted per a "
1163 "specified set of functors",
1164 name="{catalogType}sourceTable",
1165 storageClass="DataFrame",
1166 dimensions=("instrument", "visit", "detector")
1167 )
1170class TransformSourceTableConfig(TransformCatalogBaseConfig,
1171 pipelineConnections=TransformSourceTableConnections):
1173 def setDefaults(self):
1174 super().setDefaults()
1175 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml')
1176 self.primaryKey = 'sourceId'
1177 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter']
1180class TransformSourceTableTask(TransformCatalogBaseTask):
1181 """Transform/standardize a source catalog
1182 """
1183 _DefaultName = "transformSourceTable"
1184 ConfigClass = TransformSourceTableConfig
1187class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1188 dimensions=("instrument", "visit",),
1189 defaultTemplates={"calexpType": ""}):
1190 calexp = connectionTypes.Input(
1191 doc="Processed exposures used for metadata",
1192 name="{calexpType}calexp",
1193 storageClass="ExposureF",
1194 dimensions=("instrument", "visit", "detector"),
1195 deferLoad=True,
1196 multiple=True,
1197 )
1198 visitSummary = connectionTypes.Output(
1199 doc=("Per-visit consolidated exposure metadata. These catalogs use "
1200 "detector id for the id and are sorted for fast lookups of a "
1201 "detector."),
1202 name="{calexpType}visitSummary",
1203 storageClass="ExposureCatalog",
1204 dimensions=("instrument", "visit"),
1205 )
1208class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1209 pipelineConnections=ConsolidateVisitSummaryConnections):
1210 """Config for ConsolidateVisitSummaryTask"""
1211 pass
1214class ConsolidateVisitSummaryTask(pipeBase.PipelineTask):
1215 """Task to consolidate per-detector visit metadata.
1217 This task aggregates the following metadata from all the detectors in a
1218 single visit into an exposure catalog:
1219 - The visitInfo.
1220 - The wcs.
1221 - The photoCalib.
1222 - The physical_filter and band (if available).
1223 - The psf size, shape, and effective area at the center of the detector.
1224 - The corners of the bounding box in right ascension/declination.
1226 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1227 are not persisted here because of storage concerns, and because of their
1228 limited utility as summary statistics.
1230 Tests for this task are performed in ci_hsc_gen3.
1231 """
1232 _DefaultName = "consolidateVisitSummary"
1233 ConfigClass = ConsolidateVisitSummaryConfig
1235 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1236 dataRefs = butlerQC.get(inputRefs.calexp)
1237 visit = dataRefs[0].dataId.byName()['visit']
1239 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1240 len(dataRefs), visit)
1242 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1244 butlerQC.put(expCatalog, outputRefs.visitSummary)
1246 def _combineExposureMetadata(self, visit, dataRefs):
1247 """Make a combined exposure catalog from a list of dataRefs.
1248 These dataRefs must point to exposures with wcs, summaryStats,
1249 and other visit metadata.
1251 Parameters
1252 ----------
1253 visit : `int`
1254 Visit identification number.
1255 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1256 List of dataRefs in visit.
1258 Returns
1259 -------
1260 visitSummary : `lsst.afw.table.ExposureCatalog`
1261 Exposure catalog with per-detector summary information.
1262 """
1263 schema = self._makeVisitSummarySchema()
1264 cat = afwTable.ExposureCatalog(schema)
1265 cat.resize(len(dataRefs))
1267 cat['visit'] = visit
1269 for i, dataRef in enumerate(dataRefs):
1270 visitInfo = dataRef.get(component='visitInfo')
1271 filterLabel = dataRef.get(component='filter')
1272 summaryStats = dataRef.get(component='summaryStats')
1273 detector = dataRef.get(component='detector')
1274 wcs = dataRef.get(component='wcs')
1275 photoCalib = dataRef.get(component='photoCalib')
1276 detector = dataRef.get(component='detector')
1277 bbox = dataRef.get(component='bbox')
1278 validPolygon = dataRef.get(component='validPolygon')
1280 rec = cat[i]
1281 rec.setBBox(bbox)
1282 rec.setVisitInfo(visitInfo)
1283 rec.setWcs(wcs)
1284 rec.setPhotoCalib(photoCalib)
1285 rec.setValidPolygon(validPolygon)
1287 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1288 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1289 rec.setId(detector.getId())
1290 rec['psfSigma'] = summaryStats.psfSigma
1291 rec['psfIxx'] = summaryStats.psfIxx
1292 rec['psfIyy'] = summaryStats.psfIyy
1293 rec['psfIxy'] = summaryStats.psfIxy
1294 rec['psfArea'] = summaryStats.psfArea
1295 rec['raCorners'][:] = summaryStats.raCorners
1296 rec['decCorners'][:] = summaryStats.decCorners
1297 rec['ra'] = summaryStats.ra
1298 rec['decl'] = summaryStats.decl
1299 rec['zenithDistance'] = summaryStats.zenithDistance
1300 rec['zeroPoint'] = summaryStats.zeroPoint
1301 rec['skyBg'] = summaryStats.skyBg
1302 rec['skyNoise'] = summaryStats.skyNoise
1303 rec['meanVar'] = summaryStats.meanVar
1304 rec['astromOffsetMean'] = summaryStats.astromOffsetMean
1305 rec['astromOffsetStd'] = summaryStats.astromOffsetStd
1306 rec['nPsfStar'] = summaryStats.nPsfStar
1307 rec['psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median
1308 rec['psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median
1309 rec['psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter
1310 rec['psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter
1311 rec['psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian
1312 rec['psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter
1313 rec['psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter
1315 metadata = dafBase.PropertyList()
1316 metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1317 # We are looping over existing datarefs, so the following is true
1318 metadata.add("COMMENT", "Only detectors with data have entries.")
1319 cat.setMetadata(metadata)
1321 cat.sort()
1322 return cat
1324 def _makeVisitSummarySchema(self):
1325 """Make the schema for the visitSummary catalog."""
1326 schema = afwTable.ExposureTable.makeMinimalSchema()
1327 schema.addField('visit', type='L', doc='Visit number')
1328 schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1329 schema.addField('band', type='String', size=32, doc='Name of band')
1330 schema.addField('psfSigma', type='F',
1331 doc='PSF model second-moments determinant radius (center of chip) (pixel)')
1332 schema.addField('psfArea', type='F',
1333 doc='PSF model effective area (center of chip) (pixel**2)')
1334 schema.addField('psfIxx', type='F',
1335 doc='PSF model Ixx (center of chip) (pixel**2)')
1336 schema.addField('psfIyy', type='F',
1337 doc='PSF model Iyy (center of chip) (pixel**2)')
1338 schema.addField('psfIxy', type='F',
1339 doc='PSF model Ixy (center of chip) (pixel**2)')
1340 schema.addField('raCorners', type='ArrayD', size=4,
1341 doc='Right Ascension of bounding box corners (degrees)')
1342 schema.addField('decCorners', type='ArrayD', size=4,
1343 doc='Declination of bounding box corners (degrees)')
1344 schema.addField('ra', type='D',
1345 doc='Right Ascension of bounding box center (degrees)')
1346 schema.addField('decl', type='D',
1347 doc='Declination of bounding box center (degrees)')
1348 schema.addField('zenithDistance', type='F',
1349 doc='Zenith distance of bounding box center (degrees)')
1350 schema.addField('zeroPoint', type='F',
1351 doc='Mean zeropoint in detector (mag)')
1352 schema.addField('skyBg', type='F',
1353 doc='Average sky background (ADU)')
1354 schema.addField('skyNoise', type='F',
1355 doc='Average sky noise (ADU)')
1356 schema.addField('meanVar', type='F',
1357 doc='Mean variance of the weight plane (ADU**2)')
1358 schema.addField('astromOffsetMean', type='F',
1359 doc='Mean offset of astrometric calibration matches (arcsec)')
1360 schema.addField('astromOffsetStd', type='F',
1361 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)')
1362 schema.addField('nPsfStar', type='I', doc='Number of stars used for PSF model')
1363 schema.addField('psfStarDeltaE1Median', type='F',
1364 doc='Median E1 residual (starE1 - psfE1) for psf stars')
1365 schema.addField('psfStarDeltaE2Median', type='F',
1366 doc='Median E2 residual (starE2 - psfE2) for psf stars')
1367 schema.addField('psfStarDeltaE1Scatter', type='F',
1368 doc='Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars')
1369 schema.addField('psfStarDeltaE2Scatter', type='F',
1370 doc='Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars')
1371 schema.addField('psfStarDeltaSizeMedian', type='F',
1372 doc='Median size residual (starSize - psfSize) for psf stars (pixel)')
1373 schema.addField('psfStarDeltaSizeScatter', type='F',
1374 doc='Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)')
1375 schema.addField('psfStarScaledDeltaSizeScatter', type='F',
1376 doc='Scatter (via MAD) of size residual scaled by median size squared')
1378 return schema
1381class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1382 defaultTemplates={"catalogType": ""},
1383 dimensions=("instrument", "visit")):
1384 inputCatalogs = connectionTypes.Input(
1385 doc="Input per-detector Source Tables",
1386 name="{catalogType}sourceTable",
1387 storageClass="DataFrame",
1388 dimensions=("instrument", "visit", "detector"),
1389 multiple=True
1390 )
1391 outputCatalog = connectionTypes.Output(
1392 doc="Per-visit concatenation of Source Table",
1393 name="{catalogType}sourceTable_visit",
1394 storageClass="DataFrame",
1395 dimensions=("instrument", "visit")
1396 )
1399class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1400 pipelineConnections=ConsolidateSourceTableConnections):
1401 pass
1404class ConsolidateSourceTableTask(pipeBase.PipelineTask):
1405 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1406 """
1407 _DefaultName = 'consolidateSourceTable'
1408 ConfigClass = ConsolidateSourceTableConfig
1410 inputDataset = 'sourceTable'
1411 outputDataset = 'sourceTable_visit'
1413 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1414 from .makeWarp import reorderRefs
1416 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs]
1417 detectorOrder.sort()
1418 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector')
1419 inputs = butlerQC.get(inputRefs)
1420 self.log.info("Concatenating %s per-detector Source Tables",
1421 len(inputs['inputCatalogs']))
1422 df = pd.concat(inputs['inputCatalogs'])
1423 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1426class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1427 dimensions=("instrument",),
1428 defaultTemplates={"calexpType": ""}):
1429 visitSummaryRefs = connectionTypes.Input(
1430 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1431 name="{calexpType}visitSummary",
1432 storageClass="ExposureCatalog",
1433 dimensions=("instrument", "visit"),
1434 multiple=True,
1435 deferLoad=True,
1436 )
1437 outputCatalog = connectionTypes.Output(
1438 doc="CCD and Visit metadata table",
1439 name="ccdVisitTable",
1440 storageClass="DataFrame",
1441 dimensions=("instrument",)
1442 )
1445class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1446 pipelineConnections=MakeCcdVisitTableConnections):
1447 pass
1450class MakeCcdVisitTableTask(pipeBase.PipelineTask):
1451 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1452 """
1453 _DefaultName = 'makeCcdVisitTable'
1454 ConfigClass = MakeCcdVisitTableConfig
1456 def run(self, visitSummaryRefs):
1457 """Make a table of ccd information from the `visitSummary` catalogs.
1459 Parameters
1460 ----------
1461 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1462 List of DeferredDatasetHandles pointing to exposure catalogs with
1463 per-detector summary information.
1465 Returns
1466 -------
1467 result : `lsst.pipe.Base.Struct`
1468 Results struct with attribute:
1470 ``outputCatalog``
1471 Catalog of ccd and visit information.
1472 """
1473 ccdEntries = []
1474 for visitSummaryRef in visitSummaryRefs:
1475 visitSummary = visitSummaryRef.get()
1476 visitInfo = visitSummary[0].getVisitInfo()
1478 ccdEntry = {}
1479 summaryTable = visitSummary.asAstropy()
1480 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance',
1481 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise',
1482 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar',
1483 'psfStarDeltaE1Median', 'psfStarDeltaE2Median',
1484 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter',
1485 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter',
1486 'psfStarScaledDeltaSizeScatter']
1487 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1488 # 'visit' is the human readable visit number.
1489 # 'visitId' is the key to the visitId table. They are the same.
1490 # Technically you should join to get the visit from the visit
1491 # table.
1492 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"})
1493 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in
1494 summaryTable['id']]
1495 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId)
1496 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds]
1497 ccdEntry['ccdVisitId'] = ccdVisitIds
1498 ccdEntry['detector'] = summaryTable['id']
1499 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary])
1500 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1502 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1503 ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1504 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1505 expTime = visitInfo.getExposureTime()
1506 ccdEntry['expTime'] = expTime
1507 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1508 expTime_days = expTime / (60*60*24)
1509 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days
1510 ccdEntry['darkTime'] = visitInfo.getDarkTime()
1511 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1512 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1513 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1514 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1515 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1516 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1517 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1518 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1519 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1520 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1521 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY,
1522 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc.
1523 # values are actually wanted.
1524 ccdEntries.append(ccdEntry)
1526 outputCatalog = pd.concat(ccdEntries)
1527 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True)
1528 return pipeBase.Struct(outputCatalog=outputCatalog)
1531class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1532 dimensions=("instrument",),
1533 defaultTemplates={"calexpType": ""}):
1534 visitSummaries = connectionTypes.Input(
1535 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1536 name="{calexpType}visitSummary",
1537 storageClass="ExposureCatalog",
1538 dimensions=("instrument", "visit",),
1539 multiple=True,
1540 deferLoad=True,
1541 )
1542 outputCatalog = connectionTypes.Output(
1543 doc="Visit metadata table",
1544 name="visitTable",
1545 storageClass="DataFrame",
1546 dimensions=("instrument",)
1547 )
1550class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1551 pipelineConnections=MakeVisitTableConnections):
1552 pass
1555class MakeVisitTableTask(pipeBase.PipelineTask):
1556 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1557 """
1558 _DefaultName = 'makeVisitTable'
1559 ConfigClass = MakeVisitTableConfig
1561 def run(self, visitSummaries):
1562 """Make a table of visit information from the `visitSummary` catalogs.
1564 Parameters
1565 ----------
1566 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog`
1567 List of exposure catalogs with per-detector summary information.
1568 Returns
1569 -------
1570 result : `lsst.pipe.Base.Struct`
1571 Results struct with attribute:
1573 ``outputCatalog``
1574 Catalog of visit information.
1575 """
1576 visitEntries = []
1577 for visitSummary in visitSummaries:
1578 visitSummary = visitSummary.get()
1579 visitRow = visitSummary[0]
1580 visitInfo = visitRow.getVisitInfo()
1582 visitEntry = {}
1583 visitEntry["visitId"] = visitRow['visit']
1584 visitEntry["visit"] = visitRow['visit']
1585 visitEntry["physical_filter"] = visitRow['physical_filter']
1586 visitEntry["band"] = visitRow['band']
1587 raDec = visitInfo.getBoresightRaDec()
1588 visitEntry["ra"] = raDec.getRa().asDegrees()
1589 visitEntry["decl"] = raDec.getDec().asDegrees()
1590 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1591 azAlt = visitInfo.getBoresightAzAlt()
1592 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1593 visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1594 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1595 visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1596 expTime = visitInfo.getExposureTime()
1597 visitEntry["expTime"] = expTime
1598 visitEntry["expMidpt"] = visitInfo.getDate().toPython()
1599 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1600 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1601 expTime_days = expTime / (60*60*24)
1602 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days
1603 visitEntries.append(visitEntry)
1605 # TODO: DM-30623, Add programId, exposureType, cameraTemp,
1606 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp,
1607 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures.
1609 outputCatalog = pd.DataFrame(data=visitEntries)
1610 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True)
1611 return pipeBase.Struct(outputCatalog=outputCatalog)
1614class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1615 dimensions=("instrument", "visit", "detector", "skymap", "tract")):
1617 inputCatalog = connectionTypes.Input(
1618 doc="Primary per-detector, single-epoch forced-photometry catalog. "
1619 "By default, it is the output of ForcedPhotCcdTask on calexps",
1620 name="forced_src",
1621 storageClass="SourceCatalog",
1622 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1623 )
1624 inputCatalogDiff = connectionTypes.Input(
1625 doc="Secondary multi-epoch, per-detector, forced photometry catalog. "
1626 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1627 name="forced_diff",
1628 storageClass="SourceCatalog",
1629 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1630 )
1631 outputCatalog = connectionTypes.Output(
1632 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format",
1633 name="mergedForcedSource",
1634 storageClass="DataFrame",
1635 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1636 )
1639class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1640 pipelineConnections=WriteForcedSourceTableConnections):
1641 key = lsst.pex.config.Field(
1642 doc="Column on which to join the two input tables on and make the primary key of the output",
1643 dtype=str,
1644 default="objectId",
1645 )
1648class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1649 """Merge and convert per-detector forced source catalogs to parquet.
1651 Because the predecessor ForcedPhotCcdTask operates per-detector,
1652 per-tract, (i.e., it has tract in its dimensions), detectors
1653 on the tract boundary may have multiple forced source catalogs.
1655 The successor task TransformForcedSourceTable runs per-patch
1656 and temporally-aggregates overlapping mergedForcedSource catalogs from all
1657 available multiple epochs.
1658 """
1659 _DefaultName = "writeForcedSourceTable"
1660 ConfigClass = WriteForcedSourceTableConfig
1662 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1663 inputs = butlerQC.get(inputRefs)
1664 # Add ccdVisitId to allow joining with CcdVisitTable
1665 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
1666 inputs['band'] = butlerQC.quantum.dataId.full['band']
1667 outputs = self.run(**inputs)
1668 butlerQC.put(outputs, outputRefs)
1670 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1671 dfs = []
1672 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')):
1673 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False)
1674 df = df.reindex(sorted(df.columns), axis=1)
1675 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA
1676 df['band'] = band if band else pd.NA
1677 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns],
1678 names=('dataset', 'column'))
1680 dfs.append(df)
1682 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
1683 return pipeBase.Struct(outputCatalog=outputCatalog)
1686class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1687 dimensions=("instrument", "skymap", "patch", "tract")):
1689 inputCatalogs = connectionTypes.Input(
1690 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1691 name="mergedForcedSource",
1692 storageClass="DataFrame",
1693 dimensions=("instrument", "visit", "detector", "skymap", "tract"),
1694 multiple=True,
1695 deferLoad=True
1696 )
1697 referenceCatalog = connectionTypes.Input(
1698 doc="Reference catalog which was used to seed the forcedPhot. Columns "
1699 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1700 "are expected.",
1701 name="objectTable",
1702 storageClass="DataFrame",
1703 dimensions=("tract", "patch", "skymap"),
1704 deferLoad=True
1705 )
1706 outputCatalog = connectionTypes.Output(
1707 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1708 "specified set of functors",
1709 name="forcedSourceTable",
1710 storageClass="DataFrame",
1711 dimensions=("tract", "patch", "skymap")
1712 )
1715class TransformForcedSourceTableConfig(TransformCatalogBaseConfig,
1716 pipelineConnections=TransformForcedSourceTableConnections):
1717 referenceColumns = pexConfig.ListField(
1718 dtype=str,
1719 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"],
1720 optional=True,
1721 doc="Columns to pull from reference catalog",
1722 )
1723 keyRef = lsst.pex.config.Field(
1724 doc="Column on which to join the two input tables on and make the primary key of the output",
1725 dtype=str,
1726 default="objectId",
1727 )
1728 key = lsst.pex.config.Field(
1729 doc="Rename the output DataFrame index to this name",
1730 dtype=str,
1731 default="forcedSourceId",
1732 )
1734 def setDefaults(self):
1735 super().setDefaults()
1736 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml')
1737 self.columnsFromDataId = ['tract', 'patch']
1740class TransformForcedSourceTableTask(TransformCatalogBaseTask):
1741 """Transform/standardize a ForcedSource catalog
1743 Transforms each wide, per-detector forcedSource parquet table per the
1744 specification file (per-camera defaults found in ForcedSource.yaml).
1745 All epochs that overlap the patch are aggregated into one per-patch
1746 narrow-parquet file.
1748 No de-duplication of rows is performed. Duplicate resolutions flags are
1749 pulled in from the referenceCatalog: `detect_isPrimary`,
1750 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1751 for analysis or compare duplicates for QA.
1753 The resulting table includes multiple bands. Epochs (MJDs) and other useful
1754 per-visit rows can be retreived by joining with the CcdVisitTable on
1755 ccdVisitId.
1756 """
1757 _DefaultName = "transformForcedSourceTable"
1758 ConfigClass = TransformForcedSourceTableConfig
1760 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1761 inputs = butlerQC.get(inputRefs)
1762 if self.funcs is None:
1763 raise ValueError("config.functorFile is None. "
1764 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1765 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs,
1766 dataId=outputRefs.outputCatalog.dataId.full)
1768 butlerQC.put(outputs, outputRefs)
1770 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1771 dfs = []
1772 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns})
1773 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs)))
1774 for handle in inputCatalogs:
1775 result = self.transform(None, handle, funcs, dataId)
1776 # Filter for only rows that were detected on (overlap) the patch
1777 dfs.append(result.df.join(ref, how='inner'))
1779 outputCatalog = pd.concat(dfs)
1781 # Now that we are done joining on config.keyRef
1782 # Change index to config.key by
1783 outputCatalog.index.rename(self.config.keyRef, inplace=True)
1784 # Add config.keyRef to the column list
1785 outputCatalog.reset_index(inplace=True)
1786 # Set the forcedSourceId to the index. This is specified in the
1787 # ForcedSource.yaml
1788 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True)
1789 # Rename it to the config.key
1790 outputCatalog.index.rename(self.config.key, inplace=True)
1792 self.log.info("Made a table of %d columns and %d rows",
1793 len(outputCatalog.columns), len(outputCatalog))
1794 return pipeBase.Struct(outputCatalog=outputCatalog)
1797class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1798 defaultTemplates={"catalogType": ""},
1799 dimensions=("instrument", "tract")):
1800 inputCatalogs = connectionTypes.Input(
1801 doc="Input per-patch DataFrame Tables to be concatenated",
1802 name="{catalogType}ForcedSourceTable",
1803 storageClass="DataFrame",
1804 dimensions=("tract", "patch", "skymap"),
1805 multiple=True,
1806 )
1808 outputCatalog = connectionTypes.Output(
1809 doc="Output per-tract concatenation of DataFrame Tables",
1810 name="{catalogType}ForcedSourceTable_tract",
1811 storageClass="DataFrame",
1812 dimensions=("tract", "skymap"),
1813 )
1816class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1817 pipelineConnections=ConsolidateTractConnections):
1818 pass
1821class ConsolidateTractTask(pipeBase.PipelineTask):
1822 """Concatenate any per-patch, dataframe list into a single
1823 per-tract DataFrame.
1824 """
1825 _DefaultName = 'ConsolidateTract'
1826 ConfigClass = ConsolidateTractConfig
1828 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1829 inputs = butlerQC.get(inputRefs)
1830 # Not checking at least one inputCatalog exists because that'd be an
1831 # empty QG.
1832 self.log.info("Concatenating %s per-patch %s Tables",
1833 len(inputs['inputCatalogs']),
1834 inputRefs.inputCatalogs[0].datasetType.name)
1835 df = pd.concat(inputs['inputCatalogs'])
1836 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)