lsst.pipe.tasks gcb98b56dc4+7edcf6d93f
Loading...
Searching...
No Matches
postprocess.py
Go to the documentation of this file.
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22__all__ = ["WriteObjectTableConfig", "WriteObjectTableTask",
23 "WriteSourceTableConfig", "WriteSourceTableTask",
24 "WriteRecalibratedSourceTableConfig", "WriteRecalibratedSourceTableTask",
25 "PostprocessAnalysis",
26 "TransformCatalogBaseConfig", "TransformCatalogBaseTask",
27 "TransformObjectCatalogConfig", "TransformObjectCatalogTask",
28 "ConsolidateObjectTableConfig", "ConsolidateObjectTableTask",
29 "TransformSourceTableConfig", "TransformSourceTableTask",
30 "ConsolidateVisitSummaryConfig", "ConsolidateVisitSummaryTask",
31 "ConsolidateSourceTableConfig", "ConsolidateSourceTableTask",
32 "MakeCcdVisitTableConfig", "MakeCcdVisitTableTask",
33 "MakeVisitTableConfig", "MakeVisitTableTask",
34 "WriteForcedSourceTableConfig", "WriteForcedSourceTableTask",
35 "TransformForcedSourceTableConfig", "TransformForcedSourceTableTask",
36 "ConsolidateTractConfig", "ConsolidateTractTask"]
37
38import functools
39import pandas as pd
40import logging
41import numpy as np
42import numbers
43import os
44
45import lsst.geom
46import lsst.pex.config as pexConfig
47import lsst.pipe.base as pipeBase
48import lsst.daf.base as dafBase
49from lsst.pipe.base import connectionTypes
50import lsst.afw.table as afwTable
51from lsst.afw.image import ExposureSummaryStats
52from lsst.meas.base import SingleFrameMeasurementTask, DetectorVisitIdGeneratorConfig
53from lsst.skymap import BaseSkyMap
54
55from .functors import CompositeFunctor, Column
56
57log = logging.getLogger(__name__)
58
59
60def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
61 """Flattens a dataframe with multilevel column index.
62 """
63 newDf = pd.DataFrame()
64 # band is the level 0 index
65 dfBands = df.columns.unique(level=0).values
66 for band in dfBands:
67 subdf = df[band]
68 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
69 newColumns = {c: columnFormat.format(band, c)
70 for c in subdf.columns if c not in noDupCols}
71 cols = list(newColumns.keys())
72 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
73
74 # Band must be present in the input and output or else column is all NaN:
75 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
76 # Get the unexploded columns from any present band's partition
77 noDupDf = df[presentBands[0]][noDupCols]
78 newDf = pd.concat([noDupDf, newDf], axis=1)
79 return newDf
80
81
82class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
83 defaultTemplates={"coaddName": "deep"},
84 dimensions=("tract", "patch", "skymap")):
85 inputCatalogMeas = connectionTypes.Input(
86 doc="Catalog of source measurements on the deepCoadd.",
87 dimensions=("tract", "patch", "band", "skymap"),
88 storageClass="SourceCatalog",
89 name="{coaddName}Coadd_meas",
90 multiple=True
91 )
92 inputCatalogForcedSrc = connectionTypes.Input(
93 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
94 dimensions=("tract", "patch", "band", "skymap"),
95 storageClass="SourceCatalog",
96 name="{coaddName}Coadd_forced_src",
97 multiple=True
98 )
99 inputCatalogRef = connectionTypes.Input(
100 doc="Catalog marking the primary detection (which band provides a good shape and position)"
101 "for each detection in deepCoadd_mergeDet.",
102 dimensions=("tract", "patch", "skymap"),
103 storageClass="SourceCatalog",
104 name="{coaddName}Coadd_ref"
105 )
106 outputCatalog = connectionTypes.Output(
107 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
108 "stored as a DataFrame with a multi-level column index per-patch.",
109 dimensions=("tract", "patch", "skymap"),
110 storageClass="DataFrame",
111 name="{coaddName}Coadd_obj"
112 )
113
114
115class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
116 pipelineConnections=WriteObjectTableConnections):
117 engine = pexConfig.Field(
118 dtype=str,
119 default="pyarrow",
120 doc="Parquet engine for writing (pyarrow or fastparquet)",
121 deprecated="This config is no longer used, and will be removed after v26."
122 )
123 coaddName = pexConfig.Field(
124 dtype=str,
125 default="deep",
126 doc="Name of coadd"
127 )
128
129
130class WriteObjectTableTask(pipeBase.PipelineTask):
131 """Write filter-merged source tables as a DataFrame in parquet format.
132 """
133 _DefaultName = "writeObjectTable"
134 ConfigClass = WriteObjectTableConfig
135
136 # Names of table datasets to be merged
137 inputDatasets = ('forced_src', 'meas', 'ref')
138
139 # Tag of output dataset written by `MergeSourcesTask.write`
140 outputDataset = 'obj'
141
142 def runQuantum(self, butlerQC, inputRefs, outputRefs):
143 inputs = butlerQC.get(inputRefs)
144
145 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
146 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
147 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
148 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
149
150 catalogs = {}
151 for band in measDict.keys():
152 catalogs[band] = {'meas': measDict[band]['meas'],
153 'forced_src': forcedSourceDict[band]['forced_src'],
154 'ref': inputs['inputCatalogRef']}
155 dataId = butlerQC.quantum.dataId
156 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
157 outputs = pipeBase.Struct(outputCatalog=df)
158 butlerQC.put(outputs, outputRefs)
159
160 def run(self, catalogs, tract, patch):
161 """Merge multiple catalogs.
162
163 Parameters
164 ----------
165 catalogs : `dict`
166 Mapping from filter names to dict of catalogs.
167 tract : int
168 tractId to use for the tractId column.
169 patch : str
170 patchId to use for the patchId column.
171
172 Returns
173 -------
174 catalog : `pandas.DataFrame`
175 Merged dataframe.
176 """
177 dfs = []
178 for filt, tableDict in catalogs.items():
179 for dataset, table in tableDict.items():
180 # Convert afwTable to pandas DataFrame
181 df = table.asAstropy().to_pandas().set_index('id', drop=True)
182
183 # Sort columns by name, to ensure matching schema among patches
184 df = df.reindex(sorted(df.columns), axis=1)
185 df = df.assign(tractId=tract, patchId=patch)
186
187 # Make columns a 3-level MultiIndex
188 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
189 names=('dataset', 'band', 'column'))
190 dfs.append(df)
191
192 # We do this dance and not `pd.concat(dfs)` because the pandas
193 # concatenation uses infinite memory.
194 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
195 return catalog
196
197
198class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
199 defaultTemplates={"catalogType": ""},
200 dimensions=("instrument", "visit", "detector")):
201
202 catalog = connectionTypes.Input(
203 doc="Input full-depth catalog of sources produced by CalibrateTask",
204 name="{catalogType}src",
205 storageClass="SourceCatalog",
206 dimensions=("instrument", "visit", "detector")
207 )
208 outputCatalog = connectionTypes.Output(
209 doc="Catalog of sources, `src` in DataFrame/Parquet format. The 'id' column is "
210 "replaced with an index; all other columns are unchanged.",
211 name="{catalogType}source",
212 storageClass="DataFrame",
213 dimensions=("instrument", "visit", "detector")
214 )
215
216
217class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
218 pipelineConnections=WriteSourceTableConnections):
219 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
220
221
222class WriteSourceTableTask(pipeBase.PipelineTask):
223 """Write source table to DataFrame Parquet format.
224 """
225 _DefaultName = "writeSourceTable"
226 ConfigClass = WriteSourceTableConfig
227
228 def runQuantum(self, butlerQC, inputRefs, outputRefs):
229 inputs = butlerQC.get(inputRefs)
230 inputs['ccdVisitId'] = self.config.idGenerator.apply(butlerQC.quantum.dataId).catalog_id
231 result = self.run(**inputs)
232 outputs = pipeBase.Struct(outputCatalog=result.table)
233 butlerQC.put(outputs, outputRefs)
234
235 def run(self, catalog, ccdVisitId=None, **kwargs):
236 """Convert `src` catalog to DataFrame
237
238 Parameters
239 ----------
240 catalog: `afwTable.SourceCatalog`
241 catalog to be converted
242 ccdVisitId: `int`
243 ccdVisitId to be added as a column
244 **kwargs
245 Additional keyword arguments are ignored as a convenience for
246 subclasses that pass the same arguments to several different
247 methods.
248
249 Returns
250 -------
251 result : `lsst.pipe.base.Struct`
252 ``table``
253 `DataFrame` version of the input catalog
254 """
255 self.log.info("Generating DataFrame from src catalog ccdVisitId=%s", ccdVisitId)
256 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
257 df['ccdVisitId'] = ccdVisitId
258 return pipeBase.Struct(table=df)
259
260
261class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
262 defaultTemplates={"catalogType": "",
263 "skyWcsName": "gbdesAstrometricFit",
264 "photoCalibName": "fgcm"},
265 dimensions=("instrument", "visit", "detector", "skymap")):
266 skyMap = connectionTypes.Input(
267 doc="skyMap needed to choose which tract-level calibrations to use when multiple available",
268 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
269 storageClass="SkyMap",
270 dimensions=("skymap",),
271 )
272 exposure = connectionTypes.Input(
273 doc="Input exposure to perform photometry on.",
274 name="calexp",
275 storageClass="ExposureF",
276 dimensions=["instrument", "visit", "detector"],
277 )
278 externalSkyWcsTractCatalog = connectionTypes.Input(
279 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector "
280 "id for the catalog id, sorted on id for fast lookup."),
281 name="{skyWcsName}SkyWcsCatalog",
282 storageClass="ExposureCatalog",
283 dimensions=["instrument", "visit", "tract"],
284 multiple=True
285 )
286 externalSkyWcsGlobalCatalog = connectionTypes.Input(
287 doc=("Per-visit wcs calibrations computed globally (with no tract information). "
288 "These catalogs use the detector id for the catalog id, sorted on id for "
289 "fast lookup."),
290 name="finalVisitSummary",
291 storageClass="ExposureCatalog",
292 dimensions=["instrument", "visit"],
293 )
294 externalPhotoCalibTractCatalog = connectionTypes.Input(
295 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the "
296 "detector id for the catalog id, sorted on id for fast lookup."),
297 name="{photoCalibName}PhotoCalibCatalog",
298 storageClass="ExposureCatalog",
299 dimensions=["instrument", "visit", "tract"],
300 multiple=True
301 )
302 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
303 doc=("Per-visit photometric calibrations computed globally (with no tract "
304 "information). These catalogs use the detector id for the catalog id, "
305 "sorted on id for fast lookup."),
306 name="finalVisitSummary",
307 storageClass="ExposureCatalog",
308 dimensions=["instrument", "visit"],
309 )
310
311 def __init__(self, *, config=None):
312 super().__init__(config=config)
313 # Same connection boilerplate as all other applications of
314 # Global/Tract calibrations
315 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs:
316 if config.useGlobalExternalSkyWcs:
317 self.inputs.remove("externalSkyWcsTractCatalog")
318 else:
319 self.inputs.remove("externalSkyWcsGlobalCatalog")
320 else:
321 self.inputs.remove("externalSkyWcsTractCatalog")
322 self.inputs.remove("externalSkyWcsGlobalCatalog")
323 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib:
324 if config.useGlobalExternalPhotoCalib:
325 self.inputs.remove("externalPhotoCalibTractCatalog")
326 else:
327 self.inputs.remove("externalPhotoCalibGlobalCatalog")
328 else:
329 self.inputs.remove("externalPhotoCalibTractCatalog")
330 self.inputs.remove("externalPhotoCalibGlobalCatalog")
331
332
333class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
334 pipelineConnections=WriteRecalibratedSourceTableConnections):
335
336 doReevaluatePhotoCalib = pexConfig.Field(
337 dtype=bool,
338 default=True,
339 doc=("Add or replace local photoCalib columns")
340 )
341 doReevaluateSkyWcs = pexConfig.Field(
342 dtype=bool,
343 default=True,
344 doc=("Add or replace local WCS columns and update the coord columns, coord_ra and coord_dec")
345 )
346 doApplyExternalPhotoCalib = pexConfig.Field(
347 dtype=bool,
348 default=True,
349 doc=("If and only if doReevaluatePhotoCalib, apply the photometric calibrations from an external ",
350 "algorithm such as FGCM or jointcal, else use the photoCalib already attached to the exposure."),
351 )
352 doApplyExternalSkyWcs = pexConfig.Field(
353 dtype=bool,
354 default=True,
355 doc=("if and only if doReevaluateSkyWcs, apply the WCS from an external algorithm such as jointcal, ",
356 "else use the wcs already attached to the exposure."),
357 )
358 useGlobalExternalPhotoCalib = pexConfig.Field(
359 dtype=bool,
360 default=True,
361 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations "
362 "that are not run per-tract. When False, use per-tract photometric "
363 "calibration files.")
364 )
365 useGlobalExternalSkyWcs = pexConfig.Field(
366 dtype=bool,
367 default=True,
368 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations "
369 "that are not run per-tract. When False, use per-tract wcs "
370 "files.")
371 )
372 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
373
374 def validate(self):
375 super().validate()
376 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs:
377 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
378 "External SkyWcs will not be read or evaluated.")
379 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib:
380 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
381 "External PhotoCalib will not be read or evaluated.")
382
383
384class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
385 """Write source table to DataFrame Parquet format.
386 """
387 _DefaultName = "writeRecalibratedSourceTable"
388 ConfigClass = WriteRecalibratedSourceTableConfig
389
390 def runQuantum(self, butlerQC, inputRefs, outputRefs):
391 inputs = butlerQC.get(inputRefs)
392
393 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
394 inputs['idGenerator'] = idGenerator
395 inputs['ccdVisitId'] = idGenerator.catalog_id
396
397 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs:
398 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
399 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs)
400
401 inputs['catalog'] = self.addCalibColumns(**inputs)
402
403 result = self.run(**inputs)
404 outputs = pipeBase.Struct(outputCatalog=result.table)
405 butlerQC.put(outputs, outputRefs)
406
407 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
408 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
409 externalPhotoCalibTractCatalog=None, **kwargs):
410 """Apply external calibrations to exposure per configuration
411
412 When multiple tract-level calibrations overlap, select the one with the
413 center closest to detector.
414
415 Parameters
416 ----------
417 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of
418 tract-level calibs.
419 skyMap : `lsst.skymap.SkyMap`
420 exposure : `lsst.afw.image.exposure.Exposure`
421 Input exposure to adjust calibrations.
422 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
423 Exposure catalog with external skyWcs to be applied per config
424 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
425 Exposure catalog with external skyWcs to be applied per config
426 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
427 Exposure catalog with external photoCalib to be applied per config
428 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
429 Exposure catalog with external photoCalib to be applied per config
430 **kwargs
431 Additional keyword arguments are ignored to facilitate passing the
432 same arguments to several methods.
433
434 Returns
435 -------
436 exposure : `lsst.afw.image.exposure.Exposure`
437 Exposure with adjusted calibrations.
438 """
439 if not self.config.doApplyExternalSkyWcs:
440 # Do not modify the exposure's SkyWcs
441 externalSkyWcsCatalog = None
442 elif self.config.useGlobalExternalSkyWcs:
443 # Use the global external SkyWcs
444 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
445 self.log.info('Applying global SkyWcs')
446 else:
447 # use tract-level external SkyWcs from the closest overlapping tract
448 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog')
449 tracts = [ref.dataId['tract'] for ref in inputRef]
450 if len(tracts) == 1:
451 ind = 0
452 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind])
453 else:
454 if exposure.getWcs() is None: # TODO: could this look-up use the externalPhotoCalib?
455 raise ValueError("Trying to locate nearest tract, but exposure.wcs is None.")
456 ind = self.getClosestTract(tracts, skyMap,
457 exposure.getBBox(), exposure.getWcs())
458 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
459 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
460
461 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
462
463 if not self.config.doApplyExternalPhotoCalib:
464 # Do not modify the exposure's PhotoCalib
465 externalPhotoCalibCatalog = None
466 elif self.config.useGlobalExternalPhotoCalib:
467 # Use the global external PhotoCalib
468 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
469 self.log.info('Applying global PhotoCalib')
470 else:
471 # use tract-level external PhotoCalib from the closest overlapping tract
472 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog')
473 tracts = [ref.dataId['tract'] for ref in inputRef]
474 if len(tracts) == 1:
475 ind = 0
476 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind])
477 else:
478 ind = self.getClosestTract(tracts, skyMap,
479 exposure.getBBox(), exposure.getWcs())
480 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
481 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
482
483 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
484
485 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
486
487 def getClosestTract(self, tracts, skyMap, bbox, wcs):
488 """Find the index of the tract closest to detector from list of tractIds
489
490 Parameters
491 ----------
492 tracts: `list` [`int`]
493 Iterable of integer tractIds
494 skyMap : `lsst.skymap.SkyMap`
495 skyMap to lookup tract geometry and wcs
496 bbox : `lsst.geom.Box2I`
497 Detector bbox, center of which will compared to tract centers
499 Detector Wcs object to map the detector center to SkyCoord
500
501 Returns
502 -------
503 index : `int`
504 """
505 if len(tracts) == 1:
506 return 0
507
508 center = wcs.pixelToSky(bbox.getCenter())
509 sep = []
510 for tractId in tracts:
511 tract = skyMap[tractId]
512 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
513 sep.append(center.separation(tractCenter))
514
515 return np.argmin(sep)
516
517 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
518 """Prepare a calibrated exposure and apply external calibrations
519 if so configured.
520
521 Parameters
522 ----------
523 exposure : `lsst.afw.image.exposure.Exposure`
524 Input exposure to adjust calibrations.
525 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional
526 Exposure catalog with external skyWcs to be applied
527 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id
528 for the catalog id, sorted on id for fast lookup.
529 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional
530 Exposure catalog with external photoCalib to be applied
531 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector
532 id for the catalog id, sorted on id for fast lookup.
533
534 Returns
535 -------
536 exposure : `lsst.afw.image.exposure.Exposure`
537 Exposure with adjusted calibrations.
538 """
539 detectorId = exposure.getInfo().getDetector().getId()
540
541 if externalPhotoCalibCatalog is not None:
542 row = externalPhotoCalibCatalog.find(detectorId)
543 if row is None:
544 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; "
545 "Using original photoCalib.", detectorId)
546 else:
547 photoCalib = row.getPhotoCalib()
548 if photoCalib is None:
549 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
550 "Using original photoCalib.", detectorId)
551 else:
552 exposure.setPhotoCalib(photoCalib)
553
554 if externalSkyWcsCatalog is not None:
555 row = externalSkyWcsCatalog.find(detectorId)
556 if row is None:
557 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; "
558 "Using original skyWcs.", detectorId)
559 else:
560 skyWcs = row.getWcs()
561 if skyWcs is None:
562 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
563 "Using original skyWcs.", detectorId)
564 else:
565 exposure.setWcs(skyWcs)
566
567 return exposure
568
569 def addCalibColumns(self, catalog, exposure, idGenerator, **kwargs):
570 """Add replace columns with calibs evaluated at each centroid
571
572 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a
573 a source catalog, by rerunning the plugins.
574
575 Parameters
576 ----------
578 catalog to which calib columns will be added
579 exposure : `lsst.afw.image.exposure.Exposure`
580 Exposure with attached PhotoCalibs and SkyWcs attributes to be
581 reevaluated at local centroids. Pixels are not required.
582 idGenerator : `lsst.meas.base.IdGenerator`
583 Object that generates Source IDs and random seeds.
584 **kwargs
585 Additional keyword arguments are ignored to facilitate passing the
586 same arguments to several methods.
587
588 Returns
589 -------
591 Source Catalog with requested local calib columns
592 """
593 measureConfig = SingleFrameMeasurementTask.ConfigClass()
594 measureConfig.doReplaceWithNoise = False
595
596 # Clear all slots, because we aren't running the relevant plugins.
597 for slot in measureConfig.slots:
598 setattr(measureConfig.slots, slot, None)
599
600 measureConfig.plugins.names = []
601 if self.config.doReevaluateSkyWcs:
602 measureConfig.plugins.names.add('base_LocalWcs')
603 self.log.info("Re-evaluating base_LocalWcs plugin")
604 if self.config.doReevaluatePhotoCalib:
605 measureConfig.plugins.names.add('base_LocalPhotoCalib')
606 self.log.info("Re-evaluating base_LocalPhotoCalib plugin")
607 pluginsNotToCopy = tuple(measureConfig.plugins.names)
608
609 # Create a new schema and catalog
610 # Copy all columns from original except for the ones to reevaluate
611 aliasMap = catalog.schema.getAliasMap()
612 mapper = afwTable.SchemaMapper(catalog.schema)
613 for item in catalog.schema:
614 if not item.field.getName().startswith(pluginsNotToCopy):
615 mapper.addMapping(item.key)
616
617 schema = mapper.getOutputSchema()
618 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
619 schema.setAliasMap(aliasMap)
620 newCat = afwTable.SourceCatalog(schema)
621 newCat.extend(catalog, mapper=mapper)
622
623 # Fluxes in sourceCatalogs are in counts, so there are no fluxes to
624 # update here. LocalPhotoCalibs are applied during transform tasks.
625 # Update coord_ra/coord_dec, which are expected to be positions on the
626 # sky and are used as such in sdm tables without transform
627 if self.config.doReevaluateSkyWcs and exposure.wcs is not None:
628 afwTable.updateSourceCoords(exposure.wcs, newCat)
629
630 measurement.run(measCat=newCat, exposure=exposure, exposureId=idGenerator.catalog_id)
631
632 return newCat
633
634
636 """Calculate columns from DataFrames or handles storing DataFrames.
637
638 This object manages and organizes an arbitrary set of computations
639 on a catalog. The catalog is defined by a
640 `DeferredDatasetHandle` or `InMemoryDatasetHandle` object
641 (or list thereof), such as a ``deepCoadd_obj`` dataset, and the
642 computations are defined by a collection of `lsst.pipe.tasks.functor.Functor`
643 objects (or, equivalently, a ``CompositeFunctor``).
644
645 After the object is initialized, accessing the ``.df`` attribute (which
646 holds the `pandas.DataFrame` containing the results of the calculations)
647 triggers computation of said dataframe.
648
649 One of the conveniences of using this object is the ability to define a
650 desired common filter for all functors. This enables the same functor
651 collection to be passed to several different `PostprocessAnalysis` objects
652 without having to change the original functor collection, since the ``filt``
653 keyword argument of this object triggers an overwrite of the ``filt``
654 property for all functors in the collection.
655
656 This object also allows a list of refFlags to be passed, and defines a set
657 of default refFlags that are always included even if not requested.
658
659 If a list of DataFrames or Handles is passed, rather than a single one,
660 then the calculations will be mapped over all the input catalogs. In
661 principle, it should be straightforward to parallelize this activity, but
662 initial tests have failed (see TODO in code comments).
663
664 Parameters
665 ----------
666 handles : `lsst.daf.butler.DeferredDatasetHandle` or
667 `lsst.pipe.base.InMemoryDatasetHandle` or
668 list of these.
669 Source catalog(s) for computation.
670 functors : `list`, `dict`, or `~lsst.pipe.tasks.functors.CompositeFunctor`
671 Computations to do (functors that act on ``handles``).
672 If a dict, the output
673 DataFrame will have columns keyed accordingly.
674 If a list, the column keys will come from the
675 ``.shortname`` attribute of each functor.
676
677 filt : `str`, optional
678 Filter in which to calculate. If provided,
679 this will overwrite any existing ``.filt`` attribute
680 of the provided functors.
681
682 flags : `list`, optional
683 List of flags (per-band) to include in output table.
684 Taken from the ``meas`` dataset if applied to a multilevel Object Table.
685
686 refFlags : `list`, optional
687 List of refFlags (only reference band) to include in output table.
688
689 forcedFlags : `list`, optional
690 List of flags (per-band) to include in output table.
691 Taken from the ``forced_src`` dataset if applied to a
692 multilevel Object Table. Intended for flags from measurement plugins
693 only run during multi-band forced-photometry.
694 """
695 _defaultRefFlags = []
696 _defaultFuncs = ()
697
698 def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
699 self.handles = handles
700 self.functors = functors
701
702 self.filt = filt
703 self.flags = list(flags) if flags is not None else []
704 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else []
705 self.refFlags = list(self._defaultRefFlags)
706 if refFlags is not None:
707 self.refFlags += list(refFlags)
708
709 self._df = None
710
711 @property
712 def defaultFuncs(self):
713 funcs = dict(self._defaultFuncs)
714 return funcs
715
716 @property
717 def func(self):
718 additionalFuncs = self.defaultFuncs
719 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags})
720 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
721 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
722
723 if isinstance(self.functors, CompositeFunctor):
724 func = self.functors
725 else:
726 func = CompositeFunctor(self.functors)
727
728 func.funcDict.update(additionalFuncs)
729 func.filt = self.filt
730
731 return func
732
733 @property
734 def noDupCols(self):
735 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
736
737 @property
738 def df(self):
739 if self._df is None:
740 self.compute()
741 return self._df
742
743 def compute(self, dropna=False, pool=None):
744 # map over multiple handles
745 if type(self.handles) in (list, tuple):
746 if pool is None:
747 dflist = [self.func(handle, dropna=dropna) for handle in self.handles]
748 else:
749 # TODO: Figure out why this doesn't work (pyarrow pickling
750 # issues?)
751 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.handles)
752 self._df = pd.concat(dflist)
753 else:
754 self._df = self.func(self.handles, dropna=dropna)
755
756 return self._df
757
758
759class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
760 dimensions=()):
761 """Expected Connections for subclasses of TransformCatalogBaseTask.
762
763 Must be subclassed.
764 """
765 inputCatalog = connectionTypes.Input(
766 name="",
767 storageClass="DataFrame",
768 )
769 outputCatalog = connectionTypes.Output(
770 name="",
771 storageClass="DataFrame",
772 )
773
774
775class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
776 pipelineConnections=TransformCatalogBaseConnections):
777 functorFile = pexConfig.Field(
778 dtype=str,
779 doc="Path to YAML file specifying Science Data Model functors to use "
780 "when copying columns and computing calibrated values.",
781 default=None,
782 optional=True
783 )
784 primaryKey = pexConfig.Field(
785 dtype=str,
786 doc="Name of column to be set as the DataFrame index. If None, the index"
787 "will be named `id`",
788 default=None,
789 optional=True
790 )
791 columnsFromDataId = pexConfig.ListField(
792 dtype=str,
793 default=None,
794 optional=True,
795 doc="Columns to extract from the dataId",
796 )
797
798
799class TransformCatalogBaseTask(pipeBase.PipelineTask):
800 """Base class for transforming/standardizing a catalog
801
802 by applying functors that convert units and apply calibrations.
803 The purpose of this task is to perform a set of computations on
804 an input ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` that holds
805 a ``DataFrame`` dataset (such as ``deepCoadd_obj``), and write the
806 results to a new dataset (which needs to be declared in an ``outputDataset``
807 attribute).
808
809 The calculations to be performed are defined in a YAML file that specifies
810 a set of functors to be computed, provided as
811 a ``--functorFile`` config parameter. An example of such a YAML file
812 is the following:
813
814 funcs:
815 psfMag:
816 functor: Mag
817 args:
818 - base_PsfFlux
819 filt: HSC-G
820 dataset: meas
821 cmodel_magDiff:
822 functor: MagDiff
823 args:
824 - modelfit_CModel
825 - base_PsfFlux
826 filt: HSC-G
827 gauss_magDiff:
828 functor: MagDiff
829 args:
830 - base_GaussianFlux
831 - base_PsfFlux
832 filt: HSC-G
833 count:
834 functor: Column
835 args:
836 - base_InputCount_value
837 filt: HSC-G
838 deconvolved_moments:
839 functor: DeconvolvedMoments
840 filt: HSC-G
841 dataset: forced_src
842 refFlags:
843 - calib_psfUsed
844 - merge_measurement_i
845 - merge_measurement_r
846 - merge_measurement_z
847 - merge_measurement_y
848 - merge_measurement_g
849 - base_PixelFlags_flag_inexact_psfCenter
850 - detect_isPrimary
851
852 The names for each entry under "func" will become the names of columns in
853 the output dataset. All the functors referenced are defined in
854 `lsst.pipe.tasks.functors`. Positional arguments to be passed to each
855 functor are in the `args` list, and any additional entries for each column
856 other than "functor" or "args" (e.g., ``'filt'``, ``'dataset'``) are treated as
857 keyword arguments to be passed to the functor initialization.
858
859 The "flags" entry is the default shortcut for `Column` functors.
860 All columns listed under "flags" will be copied to the output table
861 untransformed. They can be of any datatype.
862 In the special case of transforming a multi-level oject table with
863 band and dataset indices (deepCoadd_obj), these will be taked from the
864 `meas` dataset and exploded out per band.
865
866 There are two special shortcuts that only apply when transforming
867 multi-level Object (deepCoadd_obj) tables:
868 - The "refFlags" entry is shortcut for `Column` functor
869 taken from the `'ref'` dataset if transforming an ObjectTable.
870 - The "forcedFlags" entry is shortcut for `Column` functors.
871 taken from the ``forced_src`` dataset if transforming an ObjectTable.
872 These are expanded out per band.
873
874
875 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
876 to organize and excecute the calculations.
877 """
878 @property
879 def _DefaultName(self):
880 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
881
882 @property
883 def outputDataset(self):
884 raise NotImplementedError('Subclass must define "outputDataset" attribute')
885
886 @property
887 def inputDataset(self):
888 raise NotImplementedError('Subclass must define "inputDataset" attribute')
889
890 @property
891 def ConfigClass(self):
892 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
893
894 def __init__(self, *args, **kwargs):
895 super().__init__(*args, **kwargs)
896 if self.config.functorFile:
897 self.log.info('Loading tranform functor definitions from %s',
898 self.config.functorFile)
899 self.funcs = CompositeFunctor.from_file(self.config.functorFile)
900 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs))
901 else:
902 self.funcs = None
903
904 def runQuantum(self, butlerQC, inputRefs, outputRefs):
905 inputs = butlerQC.get(inputRefs)
906 if self.funcs is None:
907 raise ValueError("config.functorFile is None. "
908 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
909 result = self.run(handle=inputs['inputCatalog'], funcs=self.funcs,
910 dataId=outputRefs.outputCatalog.dataId.full)
911 outputs = pipeBase.Struct(outputCatalog=result)
912 butlerQC.put(outputs, outputRefs)
913
914 def run(self, handle, funcs=None, dataId=None, band=None):
915 """Do postprocessing calculations
916
917 Takes a ``DeferredDatasetHandle`` or ``InMemoryDatasetHandle`` or
918 ``DataFrame`` object and dataId,
919 returns a dataframe with results of postprocessing calculations.
920
921 Parameters
922 ----------
923 handles : `lsst.daf.butler.DeferredDatasetHandle` or
924 `lsst.pipe.base.InMemoryDatasetHandle` or
925 `pandas.DataFrame`, or list of these.
926 DataFrames from which calculations are done.
927 funcs : `lsst.pipe.tasks.functors.Functors`
928 Functors to apply to the table's columns
929 dataId : dict, optional
930 Used to add a `patchId` column to the output dataframe.
931 band : `str`, optional
932 Filter band that is being processed.
933
934 Returns
935 ------
936 df : `pandas.DataFrame`
937 """
938 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
939
940 df = self.transform(band, handle, funcs, dataId).df
941 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
942 return df
943
944 def getFunctors(self):
945 return self.funcs
946
947 def getAnalysis(self, handles, funcs=None, band=None):
948 if funcs is None:
949 funcs = self.funcs
950 analysis = PostprocessAnalysis(handles, funcs, filt=band)
951 return analysis
952
953 def transform(self, band, handles, funcs, dataId):
954 analysis = self.getAnalysis(handles, funcs=funcs, band=band)
955 df = analysis.df
956 if dataId and self.config.columnsFromDataId:
957 for key in self.config.columnsFromDataId:
958 if key in dataId:
959 df[str(key)] = dataId[key]
960 else:
961 raise ValueError(f"'{key}' in config.columnsFromDataId not found in dataId: {dataId}")
962
963 if self.config.primaryKey:
964 if df.index.name != self.config.primaryKey and self.config.primaryKey in df:
965 df.reset_index(inplace=True, drop=True)
966 df.set_index(self.config.primaryKey, inplace=True)
967
968 return pipeBase.Struct(
969 df=df,
970 analysis=analysis
971 )
972
973
974class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
975 defaultTemplates={"coaddName": "deep"},
976 dimensions=("tract", "patch", "skymap")):
977 inputCatalog = connectionTypes.Input(
978 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
979 "stored as a DataFrame with a multi-level column index per-patch.",
980 dimensions=("tract", "patch", "skymap"),
981 storageClass="DataFrame",
982 name="{coaddName}Coadd_obj",
983 deferLoad=True,
984 )
985 outputCatalog = connectionTypes.Output(
986 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
987 "data model.",
988 dimensions=("tract", "patch", "skymap"),
989 storageClass="DataFrame",
990 name="objectTable"
991 )
992
993
994class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
995 pipelineConnections=TransformObjectCatalogConnections):
996 coaddName = pexConfig.Field(
997 dtype=str,
998 default="deep",
999 doc="Name of coadd"
1000 )
1001 # TODO: remove in DM-27177
1002 filterMap = pexConfig.DictField(
1003 keytype=str,
1004 itemtype=str,
1005 default={},
1006 doc=("Dictionary mapping full filter name to short one for column name munging."
1007 "These filters determine the output columns no matter what filters the "
1008 "input data actually contain."),
1009 deprecated=("Coadds are now identified by the band, so this transform is unused."
1010 "Will be removed after v22.")
1011 )
1012 outputBands = pexConfig.ListField(
1013 dtype=str,
1014 default=None,
1015 optional=True,
1016 doc=("These bands and only these bands will appear in the output,"
1017 " NaN-filled if the input does not include them."
1018 " If None, then use all bands found in the input.")
1019 )
1020 camelCase = pexConfig.Field(
1021 dtype=bool,
1022 default=False,
1023 doc=("Write per-band columns names with camelCase, else underscore "
1024 "For example: gPsFlux instead of g_PsFlux.")
1025 )
1026 multilevelOutput = pexConfig.Field(
1027 dtype=bool,
1028 default=False,
1029 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
1030 "and name-munged (False).")
1031 )
1032 goodFlags = pexConfig.ListField(
1033 dtype=str,
1034 default=[],
1035 doc=("List of 'good' flags that should be set False when populating empty tables. "
1036 "All other flags are considered to be 'bad' flags and will be set to True.")
1037 )
1038 floatFillValue = pexConfig.Field(
1039 dtype=float,
1040 default=np.nan,
1041 doc="Fill value for float fields when populating empty tables."
1042 )
1043 integerFillValue = pexConfig.Field(
1044 dtype=int,
1045 default=-1,
1046 doc="Fill value for integer fields when populating empty tables."
1047 )
1048
1049 def setDefaults(self):
1050 super().setDefaults()
1051 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml')
1052 self.primaryKey = 'objectId'
1053 self.columnsFromDataId = ['tract', 'patch']
1054 self.goodFlags = ['calib_astrometry_used',
1055 'calib_photometry_reserved',
1056 'calib_photometry_used',
1057 'calib_psf_candidate',
1058 'calib_psf_reserved',
1059 'calib_psf_used']
1060
1061
1062class TransformObjectCatalogTask(TransformCatalogBaseTask):
1063 """Produce a flattened Object Table to match the format specified in
1064 sdm_schemas.
1065
1066 Do the same set of postprocessing calculations on all bands.
1067
1068 This is identical to `TransformCatalogBaseTask`, except for that it does
1069 the specified functor calculations for all filters present in the
1070 input `deepCoadd_obj` table. Any specific ``"filt"`` keywords specified
1071 by the YAML file will be superceded.
1072 """
1073 _DefaultName = "transformObjectCatalog"
1074 ConfigClass = TransformObjectCatalogConfig
1075
1076 def run(self, handle, funcs=None, dataId=None, band=None):
1077 # NOTE: band kwarg is ignored here.
1078 dfDict = {}
1079 analysisDict = {}
1080 templateDf = pd.DataFrame()
1081
1082 columns = handle.get(component='columns')
1083 inputBands = columns.unique(level=1).values
1084
1085 outputBands = self.config.outputBands if self.config.outputBands else inputBands
1086
1087 # Perform transform for data of filters that exist in the handle dataframe.
1088 for inputBand in inputBands:
1089 if inputBand not in outputBands:
1090 self.log.info("Ignoring %s band data in the input", inputBand)
1091 continue
1092 self.log.info("Transforming the catalog of band %s", inputBand)
1093 result = self.transform(inputBand, handle, funcs, dataId)
1094 dfDict[inputBand] = result.df
1095 analysisDict[inputBand] = result.analysis
1096 if templateDf.empty:
1097 templateDf = result.df
1098
1099 # Put filler values in columns of other wanted bands
1100 for filt in outputBands:
1101 if filt not in dfDict:
1102 self.log.info("Adding empty columns for band %s", filt)
1103 dfTemp = templateDf.copy()
1104 for col in dfTemp.columns:
1105 testValue = dfTemp[col].values[0]
1106 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1107 # Boolean flag type, check if it is a "good" flag
1108 if col in self.config.goodFlags:
1109 fillValue = False
1110 else:
1111 fillValue = True
1112 elif isinstance(testValue, numbers.Integral):
1113 # Checking numbers.Integral catches all flavors
1114 # of python, numpy, pandas, etc. integers.
1115 # We must ensure this is not an unsigned integer.
1116 if isinstance(testValue, np.unsignedinteger):
1117 raise ValueError("Parquet tables may not have unsigned integer columns.")
1118 else:
1119 fillValue = self.config.integerFillValue
1120 else:
1121 fillValue = self.config.floatFillValue
1122 dfTemp[col].values[:] = fillValue
1123 dfDict[filt] = dfTemp
1124
1125 # This makes a multilevel column index, with band as first level
1126 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
1127
1128 if not self.config.multilevelOutput:
1129 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
1130 if self.config.primaryKey in noDupCols:
1131 noDupCols.remove(self.config.primaryKey)
1132 if dataId and self.config.columnsFromDataId:
1133 noDupCols += self.config.columnsFromDataId
1134 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1135 inputBands=inputBands)
1136
1137 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
1138
1139 return df
1140
1141
1142class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1143 dimensions=("tract", "skymap")):
1144 inputCatalogs = connectionTypes.Input(
1145 doc="Per-Patch objectTables conforming to the standard data model.",
1146 name="objectTable",
1147 storageClass="DataFrame",
1148 dimensions=("tract", "patch", "skymap"),
1149 multiple=True,
1150 )
1151 outputCatalog = connectionTypes.Output(
1152 doc="Pre-tract horizontal concatenation of the input objectTables",
1153 name="objectTable_tract",
1154 storageClass="DataFrame",
1155 dimensions=("tract", "skymap"),
1156 )
1157
1158
1159class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1160 pipelineConnections=ConsolidateObjectTableConnections):
1161 coaddName = pexConfig.Field(
1162 dtype=str,
1163 default="deep",
1164 doc="Name of coadd"
1165 )
1166
1167
1168class ConsolidateObjectTableTask(pipeBase.PipelineTask):
1169 """Write patch-merged source tables to a tract-level DataFrame Parquet file.
1170
1171 Concatenates `objectTable` list into a per-visit `objectTable_tract`.
1172 """
1173 _DefaultName = "consolidateObjectTable"
1174 ConfigClass = ConsolidateObjectTableConfig
1175
1176 inputDataset = 'objectTable'
1177 outputDataset = 'objectTable_tract'
1178
1179 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1180 inputs = butlerQC.get(inputRefs)
1181 self.log.info("Concatenating %s per-patch Object Tables",
1182 len(inputs['inputCatalogs']))
1183 df = pd.concat(inputs['inputCatalogs'])
1184 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1185
1186
1187class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1188 defaultTemplates={"catalogType": ""},
1189 dimensions=("instrument", "visit", "detector")):
1190
1191 inputCatalog = connectionTypes.Input(
1192 doc="Wide input catalog of sources produced by WriteSourceTableTask",
1193 name="{catalogType}source",
1194 storageClass="DataFrame",
1195 dimensions=("instrument", "visit", "detector"),
1196 deferLoad=True
1197 )
1198 outputCatalog = connectionTypes.Output(
1199 doc="Narrower, per-detector Source Table transformed and converted per a "
1200 "specified set of functors",
1201 name="{catalogType}sourceTable",
1202 storageClass="DataFrame",
1203 dimensions=("instrument", "visit", "detector")
1204 )
1205
1206
1207class TransformSourceTableConfig(TransformCatalogBaseConfig,
1208 pipelineConnections=TransformSourceTableConnections):
1209
1210 def setDefaults(self):
1211 super().setDefaults()
1212 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml')
1213 self.primaryKey = 'sourceId'
1214 self.columnsFromDataId = ['visit', 'detector', 'band', 'physical_filter']
1215
1216
1217class TransformSourceTableTask(TransformCatalogBaseTask):
1218 """Transform/standardize a source catalog
1219 """
1220 _DefaultName = "transformSourceTable"
1221 ConfigClass = TransformSourceTableConfig
1222
1223
1224class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1225 dimensions=("instrument", "visit",),
1226 defaultTemplates={"calexpType": ""}):
1227 calexp = connectionTypes.Input(
1228 doc="Processed exposures used for metadata",
1229 name="calexp",
1230 storageClass="ExposureF",
1231 dimensions=("instrument", "visit", "detector"),
1232 deferLoad=True,
1233 multiple=True,
1234 )
1235 visitSummary = connectionTypes.Output(
1236 doc=("Per-visit consolidated exposure metadata. These catalogs use "
1237 "detector id for the id and are sorted for fast lookups of a "
1238 "detector."),
1239 name="visitSummary",
1240 storageClass="ExposureCatalog",
1241 dimensions=("instrument", "visit"),
1242 )
1243 visitSummarySchema = connectionTypes.InitOutput(
1244 doc="Schema of the visitSummary catalog",
1245 name="visitSummary_schema",
1246 storageClass="ExposureCatalog",
1247 )
1248
1249
1250class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1251 pipelineConnections=ConsolidateVisitSummaryConnections):
1252 """Config for ConsolidateVisitSummaryTask"""
1253 pass
1254
1255
1256class ConsolidateVisitSummaryTask(pipeBase.PipelineTask):
1257 """Task to consolidate per-detector visit metadata.
1258
1259 This task aggregates the following metadata from all the detectors in a
1260 single visit into an exposure catalog:
1261 - The visitInfo.
1262 - The wcs.
1263 - The photoCalib.
1264 - The physical_filter and band (if available).
1265 - The psf size, shape, and effective area at the center of the detector.
1266 - The corners of the bounding box in right ascension/declination.
1267
1268 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1269 are not persisted here because of storage concerns, and because of their
1270 limited utility as summary statistics.
1271
1272 Tests for this task are performed in ci_hsc_gen3.
1273 """
1274 _DefaultName = "consolidateVisitSummary"
1275 ConfigClass = ConsolidateVisitSummaryConfig
1276
1277 def __init__(self, **kwargs):
1278 super().__init__(**kwargs)
1279 self.schema = afwTable.ExposureTable.makeMinimalSchema()
1280 self.schema.addField('visit', type='L', doc='Visit number')
1281 self.schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1282 self.schema.addField('band', type='String', size=32, doc='Name of band')
1283 ExposureSummaryStats.update_schema(self.schema)
1284 self.visitSummarySchema = afwTable.ExposureCatalog(self.schema)
1285
1286 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1287 dataRefs = butlerQC.get(inputRefs.calexp)
1288 visit = dataRefs[0].dataId.byName()['visit']
1289
1290 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1291 len(dataRefs), visit)
1292
1293 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1294
1295 butlerQC.put(expCatalog, outputRefs.visitSummary)
1296
1297 def _combineExposureMetadata(self, visit, dataRefs):
1298 """Make a combined exposure catalog from a list of dataRefs.
1299 These dataRefs must point to exposures with wcs, summaryStats,
1300 and other visit metadata.
1301
1302 Parameters
1303 ----------
1304 visit : `int`
1305 Visit identification number.
1306 dataRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1307 List of dataRefs in visit.
1308
1309 Returns
1310 -------
1311 visitSummary : `lsst.afw.table.ExposureCatalog`
1312 Exposure catalog with per-detector summary information.
1313 """
1314 cat = afwTable.ExposureCatalog(self.schema)
1315 cat.resize(len(dataRefs))
1316
1317 cat['visit'] = visit
1318
1319 for i, dataRef in enumerate(dataRefs):
1320 visitInfo = dataRef.get(component='visitInfo')
1321 filterLabel = dataRef.get(component='filter')
1322 summaryStats = dataRef.get(component='summaryStats')
1323 detector = dataRef.get(component='detector')
1324 wcs = dataRef.get(component='wcs')
1325 photoCalib = dataRef.get(component='photoCalib')
1326 detector = dataRef.get(component='detector')
1327 bbox = dataRef.get(component='bbox')
1328 validPolygon = dataRef.get(component='validPolygon')
1329
1330 rec = cat[i]
1331 rec.setBBox(bbox)
1332 rec.setVisitInfo(visitInfo)
1333 rec.setWcs(wcs)
1334 rec.setPhotoCalib(photoCalib)
1335 rec.setValidPolygon(validPolygon)
1336
1337 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1338 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1339 rec.setId(detector.getId())
1340 summaryStats.update_record(rec)
1341
1342 metadata = dafBase.PropertyList()
1343 metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1344 # We are looping over existing datarefs, so the following is true
1345 metadata.add("COMMENT", "Only detectors with data have entries.")
1346 cat.setMetadata(metadata)
1347
1348 cat.sort()
1349 return cat
1350
1351
1352class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1353 defaultTemplates={"catalogType": ""},
1354 dimensions=("instrument", "visit")):
1355 inputCatalogs = connectionTypes.Input(
1356 doc="Input per-detector Source Tables",
1357 name="{catalogType}sourceTable",
1358 storageClass="DataFrame",
1359 dimensions=("instrument", "visit", "detector"),
1360 multiple=True
1361 )
1362 outputCatalog = connectionTypes.Output(
1363 doc="Per-visit concatenation of Source Table",
1364 name="{catalogType}sourceTable_visit",
1365 storageClass="DataFrame",
1366 dimensions=("instrument", "visit")
1367 )
1368
1369
1370class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1371 pipelineConnections=ConsolidateSourceTableConnections):
1372 pass
1373
1374
1375class ConsolidateSourceTableTask(pipeBase.PipelineTask):
1376 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1377 """
1378 _DefaultName = 'consolidateSourceTable'
1379 ConfigClass = ConsolidateSourceTableConfig
1380
1381 inputDataset = 'sourceTable'
1382 outputDataset = 'sourceTable_visit'
1383
1384 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1385 from .makeWarp import reorderRefs
1386
1387 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs]
1388 detectorOrder.sort()
1389 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector')
1390 inputs = butlerQC.get(inputRefs)
1391 self.log.info("Concatenating %s per-detector Source Tables",
1392 len(inputs['inputCatalogs']))
1393 df = pd.concat(inputs['inputCatalogs'])
1394 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1395
1396
1397class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1398 dimensions=("instrument",),
1399 defaultTemplates={"calexpType": ""}):
1400 visitSummaryRefs = connectionTypes.Input(
1401 doc="Data references for per-visit consolidated exposure metadata",
1402 name="finalVisitSummary",
1403 storageClass="ExposureCatalog",
1404 dimensions=("instrument", "visit"),
1405 multiple=True,
1406 deferLoad=True,
1407 )
1408 outputCatalog = connectionTypes.Output(
1409 doc="CCD and Visit metadata table",
1410 name="ccdVisitTable",
1411 storageClass="DataFrame",
1412 dimensions=("instrument",)
1413 )
1414
1415
1416class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1417 pipelineConnections=MakeCcdVisitTableConnections):
1418 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
1419
1420
1421class MakeCcdVisitTableTask(pipeBase.PipelineTask):
1422 """Produce a `ccdVisitTable` from the visit summary exposure catalogs.
1423 """
1424 _DefaultName = 'makeCcdVisitTable'
1425 ConfigClass = MakeCcdVisitTableConfig
1426
1427 def run(self, visitSummaryRefs):
1428 """Make a table of ccd information from the visit summary catalogs.
1429
1430 Parameters
1431 ----------
1432 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1433 List of DeferredDatasetHandles pointing to exposure catalogs with
1434 per-detector summary information.
1435
1436 Returns
1437 -------
1438 result : `lsst.pipe.Base.Struct`
1439 Results struct with attribute:
1440
1441 ``outputCatalog``
1442 Catalog of ccd and visit information.
1443 """
1444 ccdEntries = []
1445 for visitSummaryRef in visitSummaryRefs:
1446 visitSummary = visitSummaryRef.get()
1447 visitInfo = visitSummary[0].getVisitInfo()
1448
1449 ccdEntry = {}
1450 summaryTable = visitSummary.asAstropy()
1451 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance',
1452 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise',
1453 'astromOffsetMean', 'astromOffsetStd', 'nPsfStar',
1454 'psfStarDeltaE1Median', 'psfStarDeltaE2Median',
1455 'psfStarDeltaE1Scatter', 'psfStarDeltaE2Scatter',
1456 'psfStarDeltaSizeMedian', 'psfStarDeltaSizeScatter',
1457 'psfStarScaledDeltaSizeScatter',
1458 'psfTraceRadiusDelta', 'maxDistToNearestPsf']
1459 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1460 # 'visit' is the human readable visit number.
1461 # 'visitId' is the key to the visitId table. They are the same.
1462 # Technically you should join to get the visit from the visit
1463 # table.
1464 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"})
1465 ccdEntry['ccdVisitId'] = [
1466 self.config.idGenerator.apply(
1467 visitSummaryRef.dataId,
1468 detector=detector_id,
1469 is_exposure=False,
1470 ).catalog_id # The "catalog ID" here is the ccdVisit ID
1471 # because it's usually the ID for a whole catalog
1472 # with a {visit, detector}, and that's the main
1473 # use case for IdGenerator. This usage for a
1474 # summary table is rare.
1475 for detector_id in summaryTable['id']
1476 ]
1477 ccdEntry['detector'] = summaryTable['id']
1478 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() if vR.getWcs()
1479 else np.nan for vR in visitSummary])
1480 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1481
1482 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1483 ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1484 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1485 expTime = visitInfo.getExposureTime()
1486 ccdEntry['expTime'] = expTime
1487 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1488 expTime_days = expTime / (60*60*24)
1489 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days
1490 ccdEntry['darkTime'] = visitInfo.getDarkTime()
1491 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1492 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1493 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1494 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1495 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1496 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1497 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1498 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1499 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1500 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1501 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY,
1502 # and flags, and decide if WCS, and llcx, llcy, ulcx, ulcy, etc.
1503 # values are actually wanted.
1504 ccdEntries.append(ccdEntry)
1505
1506 outputCatalog = pd.concat(ccdEntries)
1507 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True)
1508 return pipeBase.Struct(outputCatalog=outputCatalog)
1509
1510
1511class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1512 dimensions=("instrument",),
1513 defaultTemplates={"calexpType": ""}):
1514 visitSummaries = connectionTypes.Input(
1515 doc="Per-visit consolidated exposure metadata",
1516 name="finalVisitSummary",
1517 storageClass="ExposureCatalog",
1518 dimensions=("instrument", "visit",),
1519 multiple=True,
1520 deferLoad=True,
1521 )
1522 outputCatalog = connectionTypes.Output(
1523 doc="Visit metadata table",
1524 name="visitTable",
1525 storageClass="DataFrame",
1526 dimensions=("instrument",)
1527 )
1528
1529
1530class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1531 pipelineConnections=MakeVisitTableConnections):
1532 pass
1533
1534
1535class MakeVisitTableTask(pipeBase.PipelineTask):
1536 """Produce a `visitTable` from the visit summary exposure catalogs.
1537 """
1538 _DefaultName = 'makeVisitTable'
1539 ConfigClass = MakeVisitTableConfig
1540
1541 def run(self, visitSummaries):
1542 """Make a table of visit information from the visit summary catalogs.
1543
1544 Parameters
1545 ----------
1546 visitSummaries : `list` of `lsst.afw.table.ExposureCatalog`
1547 List of exposure catalogs with per-detector summary information.
1548 Returns
1549 -------
1550 result : `lsst.pipe.Base.Struct`
1551 Results struct with attribute:
1552
1553 ``outputCatalog``
1554 Catalog of visit information.
1555 """
1556 visitEntries = []
1557 for visitSummary in visitSummaries:
1558 visitSummary = visitSummary.get()
1559 visitRow = visitSummary[0]
1560 visitInfo = visitRow.getVisitInfo()
1561
1562 visitEntry = {}
1563 visitEntry["visitId"] = visitRow['visit']
1564 visitEntry["visit"] = visitRow['visit']
1565 visitEntry["physical_filter"] = visitRow['physical_filter']
1566 visitEntry["band"] = visitRow['band']
1567 raDec = visitInfo.getBoresightRaDec()
1568 visitEntry["ra"] = raDec.getRa().asDegrees()
1569 visitEntry["decl"] = raDec.getDec().asDegrees()
1570 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1571 azAlt = visitInfo.getBoresightAzAlt()
1572 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1573 visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1574 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1575 visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1576 expTime = visitInfo.getExposureTime()
1577 visitEntry["expTime"] = expTime
1578 visitEntry["expMidpt"] = visitInfo.getDate().toPython()
1579 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1580 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1581 expTime_days = expTime / (60*60*24)
1582 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days
1583 visitEntries.append(visitEntry)
1584
1585 # TODO: DM-30623, Add programId, exposureType, cameraTemp,
1586 # mirror1Temp, mirror2Temp, mirror3Temp, domeTemp, externalTemp,
1587 # dimmSeeing, pwvGPS, pwvMW, flags, nExposures.
1588
1589 outputCatalog = pd.DataFrame(data=visitEntries)
1590 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True)
1591 return pipeBase.Struct(outputCatalog=outputCatalog)
1592
1593
1594class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1595 dimensions=("instrument", "visit", "detector", "skymap", "tract")):
1596
1597 inputCatalog = connectionTypes.Input(
1598 doc="Primary per-detector, single-epoch forced-photometry catalog. "
1599 "By default, it is the output of ForcedPhotCcdTask on calexps",
1600 name="forced_src",
1601 storageClass="SourceCatalog",
1602 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1603 )
1604 inputCatalogDiff = connectionTypes.Input(
1605 doc="Secondary multi-epoch, per-detector, forced photometry catalog. "
1606 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1607 name="forced_diff",
1608 storageClass="SourceCatalog",
1609 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1610 )
1611 outputCatalog = connectionTypes.Output(
1612 doc="InputCatalogs horizonatally joined on `objectId` in DataFrame parquet format",
1613 name="mergedForcedSource",
1614 storageClass="DataFrame",
1615 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1616 )
1617
1618
1619class WriteForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1620 pipelineConnections=WriteForcedSourceTableConnections):
1621 key = lsst.pex.config.Field(
1622 doc="Column on which to join the two input tables on and make the primary key of the output",
1623 dtype=str,
1624 default="objectId",
1625 )
1626 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
1627
1628
1629class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1630 """Merge and convert per-detector forced source catalogs to DataFrame Parquet format.
1631
1632 Because the predecessor ForcedPhotCcdTask operates per-detector,
1633 per-tract, (i.e., it has tract in its dimensions), detectors
1634 on the tract boundary may have multiple forced source catalogs.
1635
1636 The successor task TransformForcedSourceTable runs per-patch
1637 and temporally-aggregates overlapping mergedForcedSource catalogs from all
1638 available multiple epochs.
1639 """
1640 _DefaultName = "writeForcedSourceTable"
1641 ConfigClass = WriteForcedSourceTableConfig
1642
1643 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1644 inputs = butlerQC.get(inputRefs)
1645 # Add ccdVisitId to allow joining with CcdVisitTable
1646 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
1647 inputs['ccdVisitId'] = idGenerator.catalog_id
1648 inputs['band'] = butlerQC.quantum.dataId.full['band']
1649 outputs = self.run(**inputs)
1650 butlerQC.put(outputs, outputRefs)
1651
1652 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1653 dfs = []
1654 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')):
1655 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False)
1656 df = df.reindex(sorted(df.columns), axis=1)
1657 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA
1658 df['band'] = band if band else pd.NA
1659 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns],
1660 names=('dataset', 'column'))
1661
1662 dfs.append(df)
1663
1664 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
1665 return pipeBase.Struct(outputCatalog=outputCatalog)
1666
1667
1668class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1669 dimensions=("instrument", "skymap", "patch", "tract")):
1670
1671 inputCatalogs = connectionTypes.Input(
1672 doc="DataFrames of merged ForcedSources produced by WriteForcedSourceTableTask",
1673 name="mergedForcedSource",
1674 storageClass="DataFrame",
1675 dimensions=("instrument", "visit", "detector", "skymap", "tract"),
1676 multiple=True,
1677 deferLoad=True
1678 )
1679 referenceCatalog = connectionTypes.Input(
1680 doc="Reference catalog which was used to seed the forcedPhot. Columns "
1681 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1682 "are expected.",
1683 name="objectTable",
1684 storageClass="DataFrame",
1685 dimensions=("tract", "patch", "skymap"),
1686 deferLoad=True
1687 )
1688 outputCatalog = connectionTypes.Output(
1689 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1690 "specified set of functors",
1691 name="forcedSourceTable",
1692 storageClass="DataFrame",
1693 dimensions=("tract", "patch", "skymap")
1694 )
1695
1696
1697class TransformForcedSourceTableConfig(TransformCatalogBaseConfig,
1698 pipelineConnections=TransformForcedSourceTableConnections):
1699 referenceColumns = pexConfig.ListField(
1700 dtype=str,
1701 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"],
1702 optional=True,
1703 doc="Columns to pull from reference catalog",
1704 )
1705 keyRef = lsst.pex.config.Field(
1706 doc="Column on which to join the two input tables on and make the primary key of the output",
1707 dtype=str,
1708 default="objectId",
1709 )
1710 key = lsst.pex.config.Field(
1711 doc="Rename the output DataFrame index to this name",
1712 dtype=str,
1713 default="forcedSourceId",
1714 )
1715
1716 def setDefaults(self):
1717 super().setDefaults()
1718 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml')
1719 self.columnsFromDataId = ['tract', 'patch']
1720
1721
1722class TransformForcedSourceTableTask(TransformCatalogBaseTask):
1723 """Transform/standardize a ForcedSource catalog
1724
1725 Transforms each wide, per-detector forcedSource DataFrame per the
1726 specification file (per-camera defaults found in ForcedSource.yaml).
1727 All epochs that overlap the patch are aggregated into one per-patch
1728 narrow-DataFrame file.
1729
1730 No de-duplication of rows is performed. Duplicate resolutions flags are
1731 pulled in from the referenceCatalog: `detect_isPrimary`,
1732 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1733 for analysis or compare duplicates for QA.
1734
1735 The resulting table includes multiple bands. Epochs (MJDs) and other useful
1736 per-visit rows can be retreived by joining with the CcdVisitTable on
1737 ccdVisitId.
1738 """
1739 _DefaultName = "transformForcedSourceTable"
1740 ConfigClass = TransformForcedSourceTableConfig
1741
1742 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1743 inputs = butlerQC.get(inputRefs)
1744 if self.funcs is None:
1745 raise ValueError("config.functorFile is None. "
1746 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1747 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs,
1748 dataId=outputRefs.outputCatalog.dataId.full)
1749
1750 butlerQC.put(outputs, outputRefs)
1751
1752 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1753 dfs = []
1754 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns})
1755 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs)))
1756 for handle in inputCatalogs:
1757 result = self.transform(None, handle, funcs, dataId)
1758 # Filter for only rows that were detected on (overlap) the patch
1759 dfs.append(result.df.join(ref, how='inner'))
1760
1761 outputCatalog = pd.concat(dfs)
1762
1763 # Now that we are done joining on config.keyRef
1764 # Change index to config.key by
1765 outputCatalog.index.rename(self.config.keyRef, inplace=True)
1766 # Add config.keyRef to the column list
1767 outputCatalog.reset_index(inplace=True)
1768 # Set the forcedSourceId to the index. This is specified in the
1769 # ForcedSource.yaml
1770 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True)
1771 # Rename it to the config.key
1772 outputCatalog.index.rename(self.config.key, inplace=True)
1773
1774 self.log.info("Made a table of %d columns and %d rows",
1775 len(outputCatalog.columns), len(outputCatalog))
1776 return pipeBase.Struct(outputCatalog=outputCatalog)
1777
1778
1779class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1780 defaultTemplates={"catalogType": ""},
1781 dimensions=("instrument", "tract")):
1782 inputCatalogs = connectionTypes.Input(
1783 doc="Input per-patch DataFrame Tables to be concatenated",
1784 name="{catalogType}ForcedSourceTable",
1785 storageClass="DataFrame",
1786 dimensions=("tract", "patch", "skymap"),
1787 multiple=True,
1788 )
1789
1790 outputCatalog = connectionTypes.Output(
1791 doc="Output per-tract concatenation of DataFrame Tables",
1792 name="{catalogType}ForcedSourceTable_tract",
1793 storageClass="DataFrame",
1794 dimensions=("tract", "skymap"),
1795 )
1796
1797
1798class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1799 pipelineConnections=ConsolidateTractConnections):
1800 pass
1801
1802
1803class ConsolidateTractTask(pipeBase.PipelineTask):
1804 """Concatenate any per-patch, dataframe list into a single
1805 per-tract DataFrame.
1806 """
1807 _DefaultName = 'ConsolidateTract'
1808 ConfigClass = ConsolidateTractConfig
1809
1810 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1811 inputs = butlerQC.get(inputRefs)
1812 # Not checking at least one inputCatalog exists because that'd be an
1813 # empty QG.
1814 self.log.info("Concatenating %s per-patch %s Tables",
1815 len(inputs['inputCatalogs']),
1816 inputRefs.inputCatalogs[0].datasetType.name)
1817 df = pd.concat(inputs['inputCatalogs'])
1818 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def __init__(self, handles, functors, filt=None, flags=None, refFlags=None, forcedFlags=None)
Definition: postprocess.py:698
def compute(self, dropna=False, pool=None)
Definition: postprocess.py:743
def getAnalysis(self, handles, funcs=None, band=None)
Definition: postprocess.py:947
def run(self, handle, funcs=None, dataId=None, band=None)
Definition: postprocess.py:914
def runQuantum(self, butlerQC, inputRefs, outputRefs)
Definition: postprocess.py:904
def transform(self, band, handles, funcs, dataId)
Definition: postprocess.py:953
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)
Definition: postprocess.py:60