lsst.pipe.tasks g4a6547c0d5+de68eba77a
Loading...
Searching...
No Matches
postprocess.py
Go to the documentation of this file.
1# This file is part of pipe_tasks
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22import functools
23import pandas as pd
24from collections import defaultdict
25import logging
26import numpy as np
27import numbers
28
29import lsst.geom
30import lsst.pex.config as pexConfig
31import lsst.pipe.base as pipeBase
32import lsst.daf.base as dafBase
33from lsst.obs.base import ExposureIdInfo
34from lsst.pipe.base import connectionTypes
35import lsst.afw.table as afwTable
36from lsst.meas.base import SingleFrameMeasurementTask
37from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
38from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
39from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate
40from lsst.skymap import BaseSkyMap
41
42from .parquetTable import ParquetTable
43from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
44from .functors import CompositeFunctor, Column
45
46log = logging.getLogger(__name__)
47
48
49def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
50 """Flattens a dataframe with multilevel column index
51 """
52 newDf = pd.DataFrame()
53 # band is the level 0 index
54 dfBands = df.columns.unique(level=0).values
55 for band in dfBands:
56 subdf = df[band]
57 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
58 newColumns = {c: columnFormat.format(band, c)
59 for c in subdf.columns if c not in noDupCols}
60 cols = list(newColumns.keys())
61 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
62
63 # Band must be present in the input and output or else column is all NaN:
64 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
65 # Get the unexploded columns from any present band's partition
66 noDupDf = df[presentBands[0]][noDupCols]
67 newDf = pd.concat([noDupDf, newDf], axis=1)
68 return newDf
69
70
71class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
72 defaultTemplates={"coaddName": "deep"},
73 dimensions=("tract", "patch", "skymap")):
74 inputCatalogMeas = connectionTypes.Input(
75 doc="Catalog of source measurements on the deepCoadd.",
76 dimensions=("tract", "patch", "band", "skymap"),
77 storageClass="SourceCatalog",
78 name="{coaddName}Coadd_meas",
79 multiple=True
80 )
81 inputCatalogForcedSrc = connectionTypes.Input(
82 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
83 dimensions=("tract", "patch", "band", "skymap"),
84 storageClass="SourceCatalog",
85 name="{coaddName}Coadd_forced_src",
86 multiple=True
87 )
88 inputCatalogRef = connectionTypes.Input(
89 doc="Catalog marking the primary detection (which band provides a good shape and position)"
90 "for each detection in deepCoadd_mergeDet.",
91 dimensions=("tract", "patch", "skymap"),
92 storageClass="SourceCatalog",
93 name="{coaddName}Coadd_ref"
94 )
95 outputCatalog = connectionTypes.Output(
96 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
97 "stored as a DataFrame with a multi-level column index per-patch.",
98 dimensions=("tract", "patch", "skymap"),
99 storageClass="DataFrame",
100 name="{coaddName}Coadd_obj"
101 )
102
103
104class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
105 pipelineConnections=WriteObjectTableConnections):
106 engine = pexConfig.Field(
107 dtype=str,
108 default="pyarrow",
109 doc="Parquet engine for writing (pyarrow or fastparquet)"
110 )
111 coaddName = pexConfig.Field(
112 dtype=str,
113 default="deep",
114 doc="Name of coadd"
115 )
116
117
118class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
119 """Write filter-merged source tables to parquet
120 """
121 _DefaultName = "writeObjectTable"
122 ConfigClass = WriteObjectTableConfig
123 RunnerClass = MergeSourcesRunner
124
125 # Names of table datasets to be merged
126 inputDatasets = ('forced_src', 'meas', 'ref')
127
128 # Tag of output dataset written by `MergeSourcesTask.write`
129 outputDataset = 'obj'
130
131 def __init__(self, butler=None, schema=None, **kwargs):
132 # It is a shame that this class can't use the default init for CmdLineTask
133 # But to do so would require its own special task runner, which is many
134 # more lines of specialization, so this is how it is for now
135 super().__init__(**kwargs)
136
137 def runDataRef(self, patchRefList):
138 """!
139 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
140 subclasses that inherit from MergeSourcesTask.
141 @param[in] patchRefList list of data references for each filter
142 """
143 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
144 dataId = patchRefList[0].dataId
145 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
146 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog))
147
148 def runQuantum(self, butlerQC, inputRefs, outputRefs):
149 inputs = butlerQC.get(inputRefs)
150
151 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
152 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
153 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
154 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
155
156 catalogs = {}
157 for band in measDict.keys():
158 catalogs[band] = {'meas': measDict[band]['meas'],
159 'forced_src': forcedSourceDict[band]['forced_src'],
160 'ref': inputs['inputCatalogRef']}
161 dataId = butlerQC.quantum.dataId
162 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
163 outputs = pipeBase.Struct(outputCatalog=df)
164 butlerQC.put(outputs, outputRefs)
165
166 @classmethod
167 def _makeArgumentParser(cls):
168 """Create a suitable ArgumentParser.
169
170 We will use the ArgumentParser to get a list of data
171 references for patches; the RunnerClass will sort them into lists
172 of data references for the same patch.
173
174 References first of self.inputDatasets, rather than
175 self.inputDataset
176 """
177 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
178
179 def readCatalog(self, patchRef):
180 """Read input catalogs
181
182 Read all the input datasets given by the 'inputDatasets'
183 attribute.
184
185 Parameters
186 ----------
187 patchRef : `lsst.daf.persistence.ButlerDataRef`
188 Data reference for patch
189
190 Returns
191 -------
192 Tuple consisting of band name and a dict of catalogs, keyed by
193 dataset name
194 """
195 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel
196 catalogDict = {}
197 for dataset in self.inputDatasets:
198 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
199 self.log.info("Read %d sources from %s for band %s: %s",
200 len(catalog), dataset, band, patchRef.dataId)
201 catalogDict[dataset] = catalog
202 return band, catalogDict
203
204 def run(self, catalogs, tract, patch):
205 """Merge multiple catalogs.
206
207 Parameters
208 ----------
209 catalogs : `dict`
210 Mapping from filter names to dict of catalogs.
211 tract : int
212 tractId to use for the tractId column
213 patch : str
214 patchId to use for the patchId column
215
216 Returns
217 -------
218 catalog : `pandas.DataFrame`
219 Merged dataframe
220 """
221
222 dfs = []
223 for filt, tableDict in catalogs.items():
224 for dataset, table in tableDict.items():
225 # Convert afwTable to pandas DataFrame
226 df = table.asAstropy().to_pandas().set_index('id', drop=True)
227
228 # Sort columns by name, to ensure matching schema among patches
229 df = df.reindex(sorted(df.columns), axis=1)
230 df['tractId'] = tract
231 df['patchId'] = patch
232
233 # Make columns a 3-level MultiIndex
234 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
235 names=('dataset', 'band', 'column'))
236 dfs.append(df)
237
238 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
239 return catalog
240
241 def write(self, patchRef, catalog):
242 """Write the output.
243
244 Parameters
245 ----------
246 catalog : `ParquetTable`
247 Catalog to write
248 patchRef : `lsst.daf.persistence.ButlerDataRef`
249 Data reference for patch
250 """
251 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
252 # since the filter isn't actually part of the data ID for the dataset we're saving,
253 # it's confusing to see it in the log message, even if the butler simply ignores it.
254 mergeDataId = patchRef.dataId.copy()
255 del mergeDataId["filter"]
256 self.log.info("Wrote merged catalog: %s", mergeDataId)
257
258 def writeMetadata(self, dataRefList):
259 """No metadata to write, and not sure how to write it for a list of dataRefs.
260 """
261 pass
262
263
264class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
265 defaultTemplates={"catalogType": ""},
266 dimensions=("instrument", "visit", "detector")):
267
268 catalog = connectionTypes.Input(
269 doc="Input full-depth catalog of sources produced by CalibrateTask",
270 name="{catalogType}src",
271 storageClass="SourceCatalog",
272 dimensions=("instrument", "visit", "detector")
273 )
274 outputCatalog = connectionTypes.Output(
275 doc="Catalog of sources, `src` in Parquet format. The 'id' column is "
276 "replaced with an index; all other columns are unchanged.",
277 name="{catalogType}source",
278 storageClass="DataFrame",
279 dimensions=("instrument", "visit", "detector")
280 )
281
282
283class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
284 pipelineConnections=WriteSourceTableConnections):
285 doApplyExternalPhotoCalib = pexConfig.Field(
286 dtype=bool,
287 default=False,
288 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
289 "generating Source Tables from older src tables which do not already have local calib columns"),
290 deprecated="This field is no longer used. Use WriteRecalibratedSourceTableTask instead."
291 )
292 doApplyExternalSkyWcs = pexConfig.Field(
293 dtype=bool,
294 default=False,
295 doc=("Add local WCS columns from the calexp.wcs? Should only set True if "
296 "generating Source Tables from older src tables which do not already have local calib columns"),
297 deprecated="This field is no longer used. Use WriteRecalibratedSourceTableTask instead."
298 )
299
300 def validate(self):
301 super().validate()
302 if self.doApplyExternalSkyWcs or self.doApplyExternalPhotoCalib:
303 raise ValueError(f"doApplyExternalSkyWcs={self.doApplyExternalSkyWcs} and "
304 f"doApplyExternalPhotoCalib={self.doApplyExternalPhotoCalib}. "
305 "These config parameters are no-ops for WriteSourceTableTask. "
306 "Set to False or use WriteRecalibratedSourceTableTask instead. ")
307
308
309class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
310 """Write source table to parquet
311 """
312 _DefaultName = "writeSourceTable"
313 ConfigClass = WriteSourceTableConfig
314
315 def runDataRef(self, dataRef):
316 src = dataRef.get('src')
317 ccdVisitId = dataRef.get('ccdExposureId')
318 result = self.run(src, ccdVisitId=ccdVisitId)
319 dataRef.put(result.table, 'source')
320
321 def runQuantum(self, butlerQC, inputRefs, outputRefs):
322 inputs = butlerQC.get(inputRefs)
323 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
324 result = self.run(**inputs).table
325 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
326 butlerQC.put(outputs, outputRefs)
327
328 def run(self, catalog, ccdVisitId=None, **kwargs):
329 """Convert `src` catalog to parquet
330
331 Parameters
332 ----------
333 catalog: `afwTable.SourceCatalog`
334 catalog to be converted
335 ccdVisitId: `int`
336 ccdVisitId to be added as a column
337
338 Returns
339 -------
340 result : `lsst.pipe.base.Struct`
341 ``table``
342 `ParquetTable` version of the input catalog
343 """
344 self.log.info("Generating parquet table from src catalog ccdVisitId=%s", ccdVisitId)
345 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
346 df['ccdVisitId'] = ccdVisitId
347 return pipeBase.Struct(table=ParquetTable(dataFrame=df))
348
349 def writeMetadata(self, dataRef):
350 """No metadata to write.
351 """
352 pass
353
354 @classmethod
355 def _makeArgumentParser(cls):
356 parser = ArgumentParser(name=cls._DefaultName)
357 parser.add_id_argument("--id", 'src',
358 help="data ID, e.g. --id visit=12345 ccd=0")
359 return parser
360
361
362class WriteRecalibratedSourceTableConnections(WriteSourceTableConnections,
363 defaultTemplates={"catalogType": "",
364 "skyWcsName": "jointcal",
365 "photoCalibName": "fgcm"},
366 dimensions=("instrument", "visit", "detector", "skymap")):
367 skyMap = connectionTypes.Input(
368 doc="skyMap needed to choose which tract-level calibrations to use when multiple available",
369 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
370 storageClass="SkyMap",
371 dimensions=("skymap",),
372 )
373 exposure = connectionTypes.Input(
374 doc="Input exposure to perform photometry on.",
375 name="calexp",
376 storageClass="ExposureF",
377 dimensions=["instrument", "visit", "detector"],
378 )
379 externalSkyWcsTractCatalog = connectionTypes.Input(
380 doc=("Per-tract, per-visit wcs calibrations. These catalogs use the detector "
381 "id for the catalog id, sorted on id for fast lookup."),
382 name="{skyWcsName}SkyWcsCatalog",
383 storageClass="ExposureCatalog",
384 dimensions=["instrument", "visit", "tract"],
385 multiple=True
386 )
387 externalSkyWcsGlobalCatalog = connectionTypes.Input(
388 doc=("Per-visit wcs calibrations computed globally (with no tract information). "
389 "These catalogs use the detector id for the catalog id, sorted on id for "
390 "fast lookup."),
391 name="{skyWcsName}SkyWcsCatalog",
392 storageClass="ExposureCatalog",
393 dimensions=["instrument", "visit"],
394 )
395 externalPhotoCalibTractCatalog = connectionTypes.Input(
396 doc=("Per-tract, per-visit photometric calibrations. These catalogs use the "
397 "detector id for the catalog id, sorted on id for fast lookup."),
398 name="{photoCalibName}PhotoCalibCatalog",
399 storageClass="ExposureCatalog",
400 dimensions=["instrument", "visit", "tract"],
401 multiple=True
402 )
403 externalPhotoCalibGlobalCatalog = connectionTypes.Input(
404 doc=("Per-visit photometric calibrations computed globally (with no tract "
405 "information). These catalogs use the detector id for the catalog id, "
406 "sorted on id for fast lookup."),
407 name="{photoCalibName}PhotoCalibCatalog",
408 storageClass="ExposureCatalog",
409 dimensions=["instrument", "visit"],
410 )
411
412 def __init__(self, *, config=None):
413 super().__init__(config=config)
414 # Same connection boilerplate as all other applications of
415 # Global/Tract calibrations
416 if config.doApplyExternalSkyWcs and config.doReevaluateSkyWcs:
417 if config.useGlobalExternalSkyWcs:
418 self.inputs.remove("externalSkyWcsTractCatalog")
419 else:
420 self.inputs.remove("externalSkyWcsGlobalCatalog")
421 else:
422 self.inputs.remove("externalSkyWcsTractCatalog")
423 self.inputs.remove("externalSkyWcsGlobalCatalog")
424 if config.doApplyExternalPhotoCalib and config.doReevaluatePhotoCalib:
425 if config.useGlobalExternalPhotoCalib:
426 self.inputs.remove("externalPhotoCalibTractCatalog")
427 else:
428 self.inputs.remove("externalPhotoCalibGlobalCatalog")
429 else:
430 self.inputs.remove("externalPhotoCalibTractCatalog")
431 self.inputs.remove("externalPhotoCalibGlobalCatalog")
432
433
434class WriteRecalibratedSourceTableConfig(WriteSourceTableConfig,
435 pipelineConnections=WriteRecalibratedSourceTableConnections):
436
437 doReevaluatePhotoCalib = pexConfig.Field(
438 dtype=bool,
439 default=False,
440 doc=("Add or replace local photoCalib columns from either the calexp.photoCalib or jointcal/FGCM")
441 )
442 doReevaluateSkyWcs = pexConfig.Field(
443 dtype=bool,
444 default=False,
445 doc=("Add or replace local WCS columns from either the calexp.wcs or or jointcal")
446 )
447 doReevaluateLocalBackground = pexConfig.Field(
448 dtype=bool,
449 default=False,
450 doc=("Add or replace local Background columns")
451 )
452 doApplyExternalPhotoCalib = pexConfig.Field(
453 dtype=bool,
454 default=False,
455 doc=("Whether to apply external photometric calibration via an "
456 "`lsst.afw.image.PhotoCalib` object. Uses the "
457 "``externalPhotoCalibName`` field to determine which calibration "
458 "to load."),
459 )
460 doApplyExternalSkyWcs = pexConfig.Field(
461 dtype=bool,
462 default=False,
463 doc=("Whether to apply external astrometric calibration via an "
464 "`lsst.afw.geom.SkyWcs` object. Uses ``externalSkyWcsName`` "
465 "field to determine which calibration to load."),
466 )
467 useGlobalExternalPhotoCalib = pexConfig.Field(
468 dtype=bool,
469 default=True,
470 doc=("When using doApplyExternalPhotoCalib, use 'global' calibrations "
471 "that are not run per-tract. When False, use per-tract photometric "
472 "calibration files.")
473 )
474 useGlobalExternalSkyWcs = pexConfig.Field(
475 dtype=bool,
476 default=False,
477 doc=("When using doApplyExternalSkyWcs, use 'global' calibrations "
478 "that are not run per-tract. When False, use per-tract wcs "
479 "files.")
480 )
481
482 def validate(self):
483 super().validate()
484 if self.doApplyExternalSkyWcs and not self.doReevaluateSkyWcs:
485 log.warning("doApplyExternalSkyWcs=True but doReevaluateSkyWcs=False"
486 "External SkyWcs will not be read or evaluated.")
487 if self.doApplyExternalPhotoCalib and not self.doReevaluatePhotoCalib:
488 log.warning("doApplyExternalPhotoCalib=True but doReevaluatePhotoCalib=False."
489 "External PhotoCalib will not be read or evaluated.")
490
491
492class WriteRecalibratedSourceTableTask(WriteSourceTableTask):
493 """Write source table to parquet
494 """
495 _DefaultName = "writeRecalibratedSourceTable"
496 ConfigClass = WriteRecalibratedSourceTableConfig
497
498 def runQuantum(self, butlerQC, inputRefs, outputRefs):
499 inputs = butlerQC.get(inputRefs)
500 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
501 inputs['exposureIdInfo'] = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "visit_detector")
502
503 if self.config.doReevaluatePhotoCalib or self.config.doReevaluateSkyWcs:
504 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
505 inputs['exposure'] = self.attachCalibs(inputRefs, **inputs)
506
507 inputs['catalog'] = self.addCalibColumns(**inputs)
508
509 result = self.run(**inputs).table
510 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
511 butlerQC.put(outputs, outputRefs)
512
513 def attachCalibs(self, inputRefs, skyMap, exposure, externalSkyWcsGlobalCatalog=None,
514 externalSkyWcsTractCatalog=None, externalPhotoCalibGlobalCatalog=None,
515 externalPhotoCalibTractCatalog=None, **kwargs):
516 """Apply external calibrations to exposure per configuration
517
518 When multiple tract-level calibrations overlap, select the one with the
519 center closest to detector.
520
521 Parameters
522 ----------
523 inputRefs : `lsst.pipe.base.InputQuantizedConnection`, for dataIds of
524 tract-level calibs.
525 skyMap : `lsst.skymap.SkyMap`
526 exposure : `lsst.afw.image.exposure.Exposure`
527 Input exposure to adjust calibrations.
528 externalSkyWcsGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
529 Exposure catalog with external skyWcs to be applied per config
530 externalSkyWcsTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
531 Exposure catalog with external skyWcs to be applied per config
532 externalPhotoCalibGlobalCatalog : `lsst.afw.table.ExposureCatalog`, optional
533 Exposure catalog with external photoCalib to be applied per config
534 externalPhotoCalibTractCatalog : `lsst.afw.table.ExposureCatalog`, optional
535
536
537 Returns
538 -------
539 exposure : `lsst.afw.image.exposure.Exposure`
540 Exposure with adjusted calibrations.
541 """
542 if not self.config.doApplyExternalSkyWcs:
543 # Do not modify the exposure's SkyWcs
544 externalSkyWcsCatalog = None
545 elif self.config.useGlobalExternalSkyWcs:
546 # Use the global external SkyWcs
547 externalSkyWcsCatalog = externalSkyWcsGlobalCatalog
548 self.log.info('Applying global SkyWcs')
549 else:
550 # use tract-level external SkyWcs from the closest overlapping tract
551 inputRef = getattr(inputRefs, 'externalSkyWcsTractCatalog')
552 tracts = [ref.dataId['tract'] for ref in inputRef]
553 if len(tracts) == 1:
554 ind = 0
555 self.log.info('Applying tract-level SkyWcs from tract %s', tracts[ind])
556 else:
557 ind = self.getClosestTract(tracts, skyMap,
558 exposure.getBBox(), exposure.getWcs())
559 self.log.info('Multiple overlapping externalSkyWcsTractCatalogs found (%s). '
560 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
561
562 externalSkyWcsCatalog = externalSkyWcsTractCatalog[ind]
563
564 if not self.config.doApplyExternalPhotoCalib:
565 # Do not modify the exposure's PhotoCalib
566 externalPhotoCalibCatalog = None
567 elif self.config.useGlobalExternalPhotoCalib:
568 # Use the global external PhotoCalib
569 externalPhotoCalibCatalog = externalPhotoCalibGlobalCatalog
570 self.log.info('Applying global PhotoCalib')
571 else:
572 # use tract-level external PhotoCalib from the closest overlapping tract
573 inputRef = getattr(inputRefs, 'externalPhotoCalibTractCatalog')
574 tracts = [ref.dataId['tract'] for ref in inputRef]
575 if len(tracts) == 1:
576 ind = 0
577 self.log.info('Applying tract-level PhotoCalib from tract %s', tracts[ind])
578 else:
579 ind = self.getClosestTract(tracts, skyMap,
580 exposure.getBBox(), exposure.getWcs())
581 self.log.info('Multiple overlapping externalPhotoCalibTractCatalogs found (%s). '
582 'Applying closest to detector center: tract=%s', str(tracts), tracts[ind])
583
584 externalPhotoCalibCatalog = externalPhotoCalibTractCatalog[ind]
585
586 return self.prepareCalibratedExposure(exposure, externalSkyWcsCatalog, externalPhotoCalibCatalog)
587
588 def getClosestTract(self, tracts, skyMap, bbox, wcs):
589 """Find the index of the tract closest to detector from list of tractIds
590
591 Parameters
592 ----------
593 tracts: `list` [`int`]
594 Iterable of integer tractIds
595 skyMap : `lsst.skymap.SkyMap`
596 skyMap to lookup tract geometry and wcs
597 bbox : `lsst.geom.Box2I`
598 Detector bbox, center of which will compared to tract centers
600 Detector Wcs object to map the detector center to SkyCoord
601
602 Returns
603 -------
604 index : `int`
605 """
606 if len(tracts) == 1:
607 return 0
608
609 center = wcs.pixelToSky(bbox.getCenter())
610 sep = []
611 for tractId in tracts:
612 tract = skyMap[tractId]
613 tractCenter = tract.getWcs().pixelToSky(tract.getBBox().getCenter())
614 sep.append(center.separation(tractCenter))
615
616 return np.argmin(sep)
617
618 def prepareCalibratedExposure(self, exposure, externalSkyWcsCatalog=None, externalPhotoCalibCatalog=None):
619 """Prepare a calibrated exposure and apply external calibrations
620 if so configured.
621
622 Parameters
623 ----------
624 exposure : `lsst.afw.image.exposure.Exposure`
625 Input exposure to adjust calibrations.
626 externalSkyWcsCatalog : `lsst.afw.table.ExposureCatalog`, optional
627 Exposure catalog with external skyWcs to be applied
628 if config.doApplyExternalSkyWcs=True. Catalog uses the detector id
629 for the catalog id, sorted on id for fast lookup.
630 externalPhotoCalibCatalog : `lsst.afw.table.ExposureCatalog`, optional
631 Exposure catalog with external photoCalib to be applied
632 if config.doApplyExternalPhotoCalib=True. Catalog uses the detector
633 id for the catalog id, sorted on id for fast lookup.
634
635 Returns
636 -------
637 exposure : `lsst.afw.image.exposure.Exposure`
638 Exposure with adjusted calibrations.
639 """
640 detectorId = exposure.getInfo().getDetector().getId()
641
642 if externalPhotoCalibCatalog is not None:
643 row = externalPhotoCalibCatalog.find(detectorId)
644 if row is None:
645 self.log.warning("Detector id %s not found in externalPhotoCalibCatalog; "
646 "Using original photoCalib.", detectorId)
647 else:
648 photoCalib = row.getPhotoCalib()
649 if photoCalib is None:
650 self.log.warning("Detector id %s has None for photoCalib in externalPhotoCalibCatalog; "
651 "Using original photoCalib.", detectorId)
652 else:
653 exposure.setPhotoCalib(photoCalib)
654
655 if externalSkyWcsCatalog is not None:
656 row = externalSkyWcsCatalog.find(detectorId)
657 if row is None:
658 self.log.warning("Detector id %s not found in externalSkyWcsCatalog; "
659 "Using original skyWcs.", detectorId)
660 else:
661 skyWcs = row.getWcs()
662 if skyWcs is None:
663 self.log.warning("Detector id %s has None for skyWcs in externalSkyWcsCatalog; "
664 "Using original skyWcs.", detectorId)
665 else:
666 exposure.setWcs(skyWcs)
667
668 return exposure
669
670 def addCalibColumns(self, catalog, exposure, exposureIdInfo, **kwargs):
671 """Add replace columns with calibs evaluated at each centroid
672
673 Add or replace 'base_LocalWcs' `base_LocalPhotoCalib' columns in a
674 a source catalog, by rerunning the plugins.
675
676 Parameters
677 ----------
679 catalog to which calib columns will be added
680 exposure : `lsst.afw.image.exposure.Exposure`
681 Exposure with attached PhotoCalibs and SkyWcs attributes to be
682 reevaluated at local centroids. Pixels are not required.
683 exposureIdInfo : `lsst.obs.base.ExposureIdInfo`
684
685 Returns
686 -------
688 Source Catalog with requested local calib columns
689 """
690 measureConfig = SingleFrameMeasurementTask.ConfigClass()
691 measureConfig.doReplaceWithNoise = False
692
693 measureConfig.plugins.names = []
694 if self.config.doReevaluateSkyWcs:
695 measureConfig.plugins.names.add('base_LocalWcs')
696 self.log.info("Re-evaluating base_LocalWcs plugin")
697 if self.config.doReevaluatePhotoCalib:
698 measureConfig.plugins.names.add('base_LocalPhotoCalib')
699 self.log.info("Re-evaluating base_LocalPhotoCalib plugin")
700 if self.config.doReevaluateLocalBackground:
701 measureConfig.plugins.names.add('base_LocalBackground')
702 self.log.info("Re-evaluating base_LocalBackground plugin")
703 pluginsNotToCopy = tuple(measureConfig.plugins.names)
704
705 # Create a new schema and catalog
706 # Copy all columns from original except for the ones to reevaluate
707 aliasMap = catalog.schema.getAliasMap()
708 mapper = afwTable.SchemaMapper(catalog.schema)
709 for item in catalog.schema:
710 if not item.field.getName().startswith(pluginsNotToCopy):
711 mapper.addMapping(item.key)
712
713 schema = mapper.getOutputSchema()
714 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
715 schema.setAliasMap(aliasMap)
716 newCat = afwTable.SourceCatalog(schema)
717 newCat.extend(catalog, mapper=mapper)
718
719 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
720
721 return newCat
722
723
724class PostprocessAnalysis(object):
725 """Calculate columns from ParquetTable
726
727 This object manages and organizes an arbitrary set of computations
728 on a catalog. The catalog is defined by a
729 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
730 `deepCoadd_obj` dataset, and the computations are defined by a collection
731 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
732 a `CompositeFunctor`).
733
734 After the object is initialized, accessing the `.df` attribute (which
735 holds the `pandas.DataFrame` containing the results of the calculations) triggers
736 computation of said dataframe.
737
738 One of the conveniences of using this object is the ability to define a desired common
739 filter for all functors. This enables the same functor collection to be passed to
740 several different `PostprocessAnalysis` objects without having to change the original
741 functor collection, since the `filt` keyword argument of this object triggers an
742 overwrite of the `filt` property for all functors in the collection.
743
744 This object also allows a list of refFlags to be passed, and defines a set of default
745 refFlags that are always included even if not requested.
746
747 If a list of `ParquetTable` object is passed, rather than a single one, then the
748 calculations will be mapped over all the input catalogs. In principle, it should
749 be straightforward to parallelize this activity, but initial tests have failed
750 (see TODO in code comments).
751
752 Parameters
753 ----------
754 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
755 Source catalog(s) for computation
756
757 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
758 Computations to do (functors that act on `parq`).
759 If a dict, the output
760 DataFrame will have columns keyed accordingly.
761 If a list, the column keys will come from the
762 `.shortname` attribute of each functor.
763
764 filt : `str` (optional)
765 Filter in which to calculate. If provided,
766 this will overwrite any existing `.filt` attribute
767 of the provided functors.
768
769 flags : `list` (optional)
770 List of flags (per-band) to include in output table.
771 Taken from the `meas` dataset if applied to a multilevel Object Table.
772
773 refFlags : `list` (optional)
774 List of refFlags (only reference band) to include in output table.
775
776 forcedFlags : `list` (optional)
777 List of flags (per-band) to include in output table.
778 Taken from the ``forced_src`` dataset if applied to a
779 multilevel Object Table. Intended for flags from measurement plugins
780 only run during multi-band forced-photometry.
781 """
782 _defaultRefFlags = []
783 _defaultFuncs = ()
784
785 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
786 self.parq = parq
787 self.functors = functors
788
789 self.filt = filt
790 self.flags = list(flags) if flags is not None else []
791 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else []
792 self.refFlags = list(self._defaultRefFlags)
793 if refFlags is not None:
794 self.refFlags += list(refFlags)
795
796 self._df = None
797
798 @property
799 def defaultFuncs(self):
800 funcs = dict(self._defaultFuncs)
801 return funcs
802
803 @property
804 def func(self):
805 additionalFuncs = self.defaultFuncs
806 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags})
807 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
808 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
809
810 if isinstance(self.functors, CompositeFunctor):
811 func = self.functors
812 else:
813 func = CompositeFunctor(self.functors)
814
815 func.funcDict.update(additionalFuncs)
816 func.filt = self.filt
817
818 return func
819
820 @property
821 def noDupCols(self):
822 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
823
824 @property
825 def df(self):
826 if self._df is None:
827 self.compute()
828 return self._df
829
830 def compute(self, dropna=False, pool=None):
831 # map over multiple parquet tables
832 if type(self.parq) in (list, tuple):
833 if pool is None:
834 dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
835 else:
836 # TODO: Figure out why this doesn't work (pyarrow pickling issues?)
837 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
838 self._df = pd.concat(dflist)
839 else:
840 self._df = self.func(self.parq, dropna=dropna)
841
842 return self._df
843
844
845class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
846 dimensions=()):
847 """Expected Connections for subclasses of TransformCatalogBaseTask.
848
849 Must be subclassed.
850 """
851 inputCatalog = connectionTypes.Input(
852 name="",
853 storageClass="DataFrame",
854 )
855 outputCatalog = connectionTypes.Output(
856 name="",
857 storageClass="DataFrame",
858 )
859
860
861class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
862 pipelineConnections=TransformCatalogBaseConnections):
863 functorFile = pexConfig.Field(
864 dtype=str,
865 doc="Path to YAML file specifying Science Data Model functors to use "
866 "when copying columns and computing calibrated values.",
867 default=None,
868 optional=True
869 )
870 primaryKey = pexConfig.Field(
871 dtype=str,
872 doc="Name of column to be set as the DataFrame index. If None, the index"
873 "will be named `id`",
874 default=None,
875 optional=True
876 )
877
878
879class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask):
880 """Base class for transforming/standardizing a catalog
881
882 by applying functors that convert units and apply calibrations.
883 The purpose of this task is to perform a set of computations on
884 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
885 results to a new dataset (which needs to be declared in an `outputDataset`
886 attribute).
887
888 The calculations to be performed are defined in a YAML file that specifies
889 a set of functors to be computed, provided as
890 a `--functorFile` config parameter. An example of such a YAML file
891 is the following:
892
893 funcs:
894 psfMag:
895 functor: Mag
896 args:
897 - base_PsfFlux
898 filt: HSC-G
899 dataset: meas
900 cmodel_magDiff:
901 functor: MagDiff
902 args:
903 - modelfit_CModel
904 - base_PsfFlux
905 filt: HSC-G
906 gauss_magDiff:
907 functor: MagDiff
908 args:
909 - base_GaussianFlux
910 - base_PsfFlux
911 filt: HSC-G
912 count:
913 functor: Column
914 args:
915 - base_InputCount_value
916 filt: HSC-G
917 deconvolved_moments:
918 functor: DeconvolvedMoments
919 filt: HSC-G
920 dataset: forced_src
921 refFlags:
922 - calib_psfUsed
923 - merge_measurement_i
924 - merge_measurement_r
925 - merge_measurement_z
926 - merge_measurement_y
927 - merge_measurement_g
928 - base_PixelFlags_flag_inexact_psfCenter
929 - detect_isPrimary
930
931 The names for each entry under "func" will become the names of columns in the
932 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
933 Positional arguments to be passed to each functor are in the `args` list,
934 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
935 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
936
937 The "flags" entry is the default shortcut for `Column` functors.
938 All columns listed under "flags" will be copied to the output table
939 untransformed. They can be of any datatype.
940 In the special case of transforming a multi-level oject table with
941 band and dataset indices (deepCoadd_obj), these will be taked from the
942 `meas` dataset and exploded out per band.
943
944 There are two special shortcuts that only apply when transforming
945 multi-level Object (deepCoadd_obj) tables:
946 - The "refFlags" entry is shortcut for `Column` functor
947 taken from the `'ref'` dataset if transforming an ObjectTable.
948 - The "forcedFlags" entry is shortcut for `Column` functors.
949 taken from the ``forced_src`` dataset if transforming an ObjectTable.
950 These are expanded out per band.
951
952
953 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
954 to organize and excecute the calculations.
955
956 """
957 @property
958 def _DefaultName(self):
959 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
960
961 @property
962 def outputDataset(self):
963 raise NotImplementedError('Subclass must define "outputDataset" attribute')
964
965 @property
966 def inputDataset(self):
967 raise NotImplementedError('Subclass must define "inputDataset" attribute')
968
969 @property
970 def ConfigClass(self):
971 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
972
973 def __init__(self, *args, **kwargs):
974 super().__init__(*args, **kwargs)
975 if self.config.functorFile:
976 self.log.info('Loading tranform functor definitions from %s',
977 self.config.functorFile)
978 self.funcs = CompositeFunctor.from_file(self.config.functorFile)
979 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs))
980 else:
981 self.funcs = None
982
983 def runQuantum(self, butlerQC, inputRefs, outputRefs):
984 inputs = butlerQC.get(inputRefs)
985 if self.funcs is None:
986 raise ValueError("config.functorFile is None. "
987 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
988 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs,
989 dataId=outputRefs.outputCatalog.dataId.full)
990 outputs = pipeBase.Struct(outputCatalog=result)
991 butlerQC.put(outputs, outputRefs)
992
993 def runDataRef(self, dataRef):
994 parq = dataRef.get()
995 if self.funcs is None:
996 raise ValueError("config.functorFile is None. "
997 "Must be a valid path to yaml in order to run as a CommandlineTask.")
998 df = self.run(parq, funcs=self.funcs, dataId=dataRef.dataId)
999 self.write(df, dataRef)
1000 return df
1001
1002 def run(self, parq, funcs=None, dataId=None, band=None):
1003 """Do postprocessing calculations
1004
1005 Takes a `ParquetTable` object and dataId,
1006 returns a dataframe with results of postprocessing calculations.
1007
1008 Parameters
1009 ----------
1011 ParquetTable from which calculations are done.
1012 funcs : `lsst.pipe.tasks.functors.Functors`
1013 Functors to apply to the table's columns
1014 dataId : dict, optional
1015 Used to add a `patchId` column to the output dataframe.
1016 band : `str`, optional
1017 Filter band that is being processed.
1018
1019 Returns
1020 ------
1021 `pandas.DataFrame`
1022
1023 """
1024 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
1025
1026 df = self.transform(band, parq, funcs, dataId).df
1027 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
1028 return df
1029
1030 def getFunctors(self):
1031 return self.funcs
1032
1033 def getAnalysis(self, parq, funcs=None, band=None):
1034 if funcs is None:
1035 funcs = self.funcs
1036 analysis = PostprocessAnalysis(parq, funcs, filt=band)
1037 return analysis
1038
1039 def transform(self, band, parq, funcs, dataId):
1040 analysis = self.getAnalysis(parq, funcs=funcs, band=band)
1041 df = analysis.df
1042 if dataId is not None:
1043 for key, value in dataId.items():
1044 df[str(key)] = value
1045
1046 if self.config.primaryKey:
1047 if df.index.name != self.config.primaryKey and self.config.primaryKey in df:
1048 df.reset_index(inplace=True, drop=True)
1049 df.set_index(self.config.primaryKey, inplace=True)
1050
1051 return pipeBase.Struct(
1052 df=df,
1053 analysis=analysis
1054 )
1055
1056 def write(self, df, parqRef):
1057 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset)
1058
1059 def writeMetadata(self, dataRef):
1060 """No metadata to write.
1061 """
1062 pass
1063
1064
1065class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
1066 defaultTemplates={"coaddName": "deep"},
1067 dimensions=("tract", "patch", "skymap")):
1068 inputCatalog = connectionTypes.Input(
1069 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
1070 "stored as a DataFrame with a multi-level column index per-patch.",
1071 dimensions=("tract", "patch", "skymap"),
1072 storageClass="DataFrame",
1073 name="{coaddName}Coadd_obj",
1074 deferLoad=True,
1075 )
1076 outputCatalog = connectionTypes.Output(
1077 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
1078 "data model.",
1079 dimensions=("tract", "patch", "skymap"),
1080 storageClass="DataFrame",
1081 name="objectTable"
1082 )
1083
1084
1085class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
1086 pipelineConnections=TransformObjectCatalogConnections):
1087 coaddName = pexConfig.Field(
1088 dtype=str,
1089 default="deep",
1090 doc="Name of coadd"
1091 )
1092 # TODO: remove in DM-27177
1093 filterMap = pexConfig.DictField(
1094 keytype=str,
1095 itemtype=str,
1096 default={},
1097 doc=("Dictionary mapping full filter name to short one for column name munging."
1098 "These filters determine the output columns no matter what filters the "
1099 "input data actually contain."),
1100 deprecated=("Coadds are now identified by the band, so this transform is unused."
1101 "Will be removed after v22.")
1102 )
1103 outputBands = pexConfig.ListField(
1104 dtype=str,
1105 default=None,
1106 optional=True,
1107 doc=("These bands and only these bands will appear in the output,"
1108 " NaN-filled if the input does not include them."
1109 " If None, then use all bands found in the input.")
1110 )
1111 camelCase = pexConfig.Field(
1112 dtype=bool,
1113 default=False,
1114 doc=("Write per-band columns names with camelCase, else underscore "
1115 "For example: gPsFlux instead of g_PsFlux.")
1116 )
1117 multilevelOutput = pexConfig.Field(
1118 dtype=bool,
1119 default=False,
1120 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
1121 "and name-munged (False).")
1122 )
1123 goodFlags = pexConfig.ListField(
1124 dtype=str,
1125 default=[],
1126 doc=("List of 'good' flags that should be set False when populating empty tables. "
1127 "All other flags are considered to be 'bad' flags and will be set to True.")
1128 )
1129 floatFillValue = pexConfig.Field(
1130 dtype=float,
1131 default=np.nan,
1132 doc="Fill value for float fields when populating empty tables."
1133 )
1134 integerFillValue = pexConfig.Field(
1135 dtype=int,
1136 default=-1,
1137 doc="Fill value for integer fields when populating empty tables."
1138 )
1139
1140 def setDefaults(self):
1141 super().setDefaults()
1142 self.primaryKey = 'objectId'
1143 self.goodFlags = ['calib_astrometry_used',
1144 'calib_photometry_reserved',
1145 'calib_photometry_used',
1146 'calib_psf_candidate',
1147 'calib_psf_reserved',
1148 'calib_psf_used']
1149
1150
1151class TransformObjectCatalogTask(TransformCatalogBaseTask):
1152 """Produce a flattened Object Table to match the format specified in
1153 sdm_schemas.
1154
1155 Do the same set of postprocessing calculations on all bands
1156
1157 This is identical to `TransformCatalogBaseTask`, except for that it does the
1158 specified functor calculations for all filters present in the
1159 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
1160 by the YAML file will be superceded.
1161 """
1162 _DefaultName = "transformObjectCatalog"
1163 ConfigClass = TransformObjectCatalogConfig
1164
1165 # Used by Gen 2 runDataRef only:
1166 inputDataset = 'deepCoadd_obj'
1167 outputDataset = 'objectTable'
1168
1169 @classmethod
1170 def _makeArgumentParser(cls):
1171 parser = ArgumentParser(name=cls._DefaultName)
1172 parser.add_id_argument("--id", cls.inputDataset,
1173 ContainerClass=CoaddDataIdContainer,
1174 help="data ID, e.g. --id tract=12345 patch=1,2")
1175 return parser
1176
1177 def run(self, parq, funcs=None, dataId=None, band=None):
1178 # NOTE: band kwarg is ignored here.
1179 dfDict = {}
1180 analysisDict = {}
1181 templateDf = pd.DataFrame()
1182
1183 if isinstance(parq, DeferredDatasetHandle):
1184 columns = parq.get(component='columns')
1185 inputBands = columns.unique(level=1).values
1186 else:
1187 inputBands = parq.columnLevelNames['band']
1188
1189 outputBands = self.config.outputBands if self.config.outputBands else inputBands
1190
1191 # Perform transform for data of filters that exist in parq.
1192 for inputBand in inputBands:
1193 if inputBand not in outputBands:
1194 self.log.info("Ignoring %s band data in the input", inputBand)
1195 continue
1196 self.log.info("Transforming the catalog of band %s", inputBand)
1197 result = self.transform(inputBand, parq, funcs, dataId)
1198 dfDict[inputBand] = result.df
1199 analysisDict[inputBand] = result.analysis
1200 if templateDf.empty:
1201 templateDf = result.df
1202
1203 # Put filler values in columns of other wanted bands
1204 for filt in outputBands:
1205 if filt not in dfDict:
1206 self.log.info("Adding empty columns for band %s", filt)
1207 dfTemp = templateDf.copy()
1208 for col in dfTemp.columns:
1209 testValue = dfTemp[col].values[0]
1210 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
1211 # Boolean flag type, check if it is a "good" flag
1212 if col in self.config.goodFlags:
1213 fillValue = False
1214 else:
1215 fillValue = True
1216 elif isinstance(testValue, numbers.Integral):
1217 # Checking numbers.Integral catches all flavors
1218 # of python, numpy, pandas, etc. integers.
1219 # We must ensure this is not an unsigned integer.
1220 if isinstance(testValue, np.unsignedinteger):
1221 raise ValueError("Parquet tables may not have unsigned integer columns.")
1222 else:
1223 fillValue = self.config.integerFillValue
1224 else:
1225 fillValue = self.config.floatFillValue
1226 dfTemp[col].values[:] = fillValue
1227 dfDict[filt] = dfTemp
1228
1229 # This makes a multilevel column index, with band as first level
1230 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
1231
1232 if not self.config.multilevelOutput:
1233 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
1234 if self.config.primaryKey in noDupCols:
1235 noDupCols.remove(self.config.primaryKey)
1236 if dataId is not None:
1237 noDupCols += list(dataId.keys())
1238 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
1239 inputBands=inputBands)
1240
1241 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
1242
1243 return df
1244
1245
1246class TractObjectDataIdContainer(CoaddDataIdContainer):
1247
1248 def makeDataRefList(self, namespace):
1249 """Make self.refList from self.idList
1250
1251 Generate a list of data references given tract and/or patch.
1252 This was adapted from `TractQADataIdContainer`, which was
1253 `TractDataIdContainer` modifie to not require "filter".
1254 Only existing dataRefs are returned.
1255 """
1256 def getPatchRefList(tract):
1257 return [namespace.butler.dataRef(datasetType=self.datasetType,
1258 tract=tract.getId(),
1259 patch="%d,%d" % patch.getIndex()) for patch in tract]
1260
1261 tractRefs = defaultdict(list) # Data references for each tract
1262 for dataId in self.idList:
1263 skymap = self.getSkymap(namespace)
1264
1265 if "tract" in dataId:
1266 tractId = dataId["tract"]
1267 if "patch" in dataId:
1268 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
1269 tract=tractId,
1270 patch=dataId['patch']))
1271 else:
1272 tractRefs[tractId] += getPatchRefList(skymap[tractId])
1273 else:
1274 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
1275 for tract in skymap)
1276 outputRefList = []
1277 for tractRefList in tractRefs.values():
1278 existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
1279 outputRefList.append(existingRefs)
1280
1281 self.refList = outputRefList
1282
1283
1284class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
1285 dimensions=("tract", "skymap")):
1286 inputCatalogs = connectionTypes.Input(
1287 doc="Per-Patch objectTables conforming to the standard data model.",
1288 name="objectTable",
1289 storageClass="DataFrame",
1290 dimensions=("tract", "patch", "skymap"),
1291 multiple=True,
1292 )
1293 outputCatalog = connectionTypes.Output(
1294 doc="Pre-tract horizontal concatenation of the input objectTables",
1295 name="objectTable_tract",
1296 storageClass="DataFrame",
1297 dimensions=("tract", "skymap"),
1298 )
1299
1300
1301class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
1302 pipelineConnections=ConsolidateObjectTableConnections):
1303 coaddName = pexConfig.Field(
1304 dtype=str,
1305 default="deep",
1306 doc="Name of coadd"
1307 )
1308
1309
1310class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
1311 """Write patch-merged source tables to a tract-level parquet file
1312
1313 Concatenates `objectTable` list into a per-visit `objectTable_tract`
1314 """
1315 _DefaultName = "consolidateObjectTable"
1316 ConfigClass = ConsolidateObjectTableConfig
1317
1318 inputDataset = 'objectTable'
1319 outputDataset = 'objectTable_tract'
1320
1321 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1322 inputs = butlerQC.get(inputRefs)
1323 self.log.info("Concatenating %s per-patch Object Tables",
1324 len(inputs['inputCatalogs']))
1325 df = pd.concat(inputs['inputCatalogs'])
1326 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1327
1328 @classmethod
1329 def _makeArgumentParser(cls):
1330 parser = ArgumentParser(name=cls._DefaultName)
1331
1332 parser.add_id_argument("--id", cls.inputDataset,
1333 help="data ID, e.g. --id tract=12345",
1334 ContainerClass=TractObjectDataIdContainer)
1335 return parser
1336
1337 def runDataRef(self, patchRefList):
1338 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
1339 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1340
1341 def writeMetadata(self, dataRef):
1342 """No metadata to write.
1343 """
1344 pass
1345
1346
1347class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1348 defaultTemplates={"catalogType": ""},
1349 dimensions=("instrument", "visit", "detector")):
1350
1351 inputCatalog = connectionTypes.Input(
1352 doc="Wide input catalog of sources produced by WriteSourceTableTask",
1353 name="{catalogType}source",
1354 storageClass="DataFrame",
1355 dimensions=("instrument", "visit", "detector"),
1356 deferLoad=True
1357 )
1358 outputCatalog = connectionTypes.Output(
1359 doc="Narrower, per-detector Source Table transformed and converted per a "
1360 "specified set of functors",
1361 name="{catalogType}sourceTable",
1362 storageClass="DataFrame",
1363 dimensions=("instrument", "visit", "detector")
1364 )
1365
1366
1367class TransformSourceTableConfig(TransformCatalogBaseConfig,
1368 pipelineConnections=TransformSourceTableConnections):
1369
1370 def setDefaults(self):
1371 super().setDefaults()
1372 self.primaryKey = 'sourceId'
1373
1374
1375class TransformSourceTableTask(TransformCatalogBaseTask):
1376 """Transform/standardize a source catalog
1377 """
1378 _DefaultName = "transformSourceTable"
1379 ConfigClass = TransformSourceTableConfig
1380
1381 inputDataset = 'source'
1382 outputDataset = 'sourceTable'
1383
1384 @classmethod
1385 def _makeArgumentParser(cls):
1386 parser = ArgumentParser(name=cls._DefaultName)
1387 parser.add_id_argument("--id", datasetType=cls.inputDataset,
1388 level="sensor",
1389 help="data ID, e.g. --id visit=12345 ccd=0")
1390 return parser
1391
1392 def runDataRef(self, dataRef):
1393 """Override to specify band label to run()."""
1394 parq = dataRef.get()
1395 funcs = self.getFunctors()
1396 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel
1397 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
1398 self.write(df, dataRef)
1399 return df
1400
1401
1402class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1403 dimensions=("instrument", "visit",),
1404 defaultTemplates={"calexpType": ""}):
1405 calexp = connectionTypes.Input(
1406 doc="Processed exposures used for metadata",
1407 name="{calexpType}calexp",
1408 storageClass="ExposureF",
1409 dimensions=("instrument", "visit", "detector"),
1410 deferLoad=True,
1411 multiple=True,
1412 )
1413 visitSummary = connectionTypes.Output(
1414 doc=("Per-visit consolidated exposure metadata. These catalogs use "
1415 "detector id for the id and are sorted for fast lookups of a "
1416 "detector."),
1417 name="{calexpType}visitSummary",
1418 storageClass="ExposureCatalog",
1419 dimensions=("instrument", "visit"),
1420 )
1421
1422
1423class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1424 pipelineConnections=ConsolidateVisitSummaryConnections):
1425 """Config for ConsolidateVisitSummaryTask"""
1426 pass
1427
1428
1429class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1430 """Task to consolidate per-detector visit metadata.
1431
1432 This task aggregates the following metadata from all the detectors in a
1433 single visit into an exposure catalog:
1434 - The visitInfo.
1435 - The wcs.
1436 - The photoCalib.
1437 - The physical_filter and band (if available).
1438 - The psf size, shape, and effective area at the center of the detector.
1439 - The corners of the bounding box in right ascension/declination.
1440
1441 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1442 are not persisted here because of storage concerns, and because of their
1443 limited utility as summary statistics.
1444
1445 Tests for this task are performed in ci_hsc_gen3.
1446 """
1447 _DefaultName = "consolidateVisitSummary"
1448 ConfigClass = ConsolidateVisitSummaryConfig
1449
1450 @classmethod
1451 def _makeArgumentParser(cls):
1452 parser = ArgumentParser(name=cls._DefaultName)
1453
1454 parser.add_id_argument("--id", "calexp",
1455 help="data ID, e.g. --id visit=12345",
1456 ContainerClass=VisitDataIdContainer)
1457 return parser
1458
1459 def writeMetadata(self, dataRef):
1460 """No metadata to persist, so override to remove metadata persistance.
1461 """
1462 pass
1463
1464 def writeConfig(self, butler, clobber=False, doBackup=True):
1465 """No config to persist, so override to remove config persistance.
1466 """
1467 pass
1468
1469 def runDataRef(self, dataRefList):
1470 visit = dataRefList[0].dataId['visit']
1471
1472 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1473 len(dataRefList), visit)
1474
1475 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
1476
1477 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
1478
1479 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1480 dataRefs = butlerQC.get(inputRefs.calexp)
1481 visit = dataRefs[0].dataId.byName()['visit']
1482
1483 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1484 len(dataRefs), visit)
1485
1486 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1487
1488 butlerQC.put(expCatalog, outputRefs.visitSummary)
1489
1490 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1491 """Make a combined exposure catalog from a list of dataRefs.
1492 These dataRefs must point to exposures with wcs, summaryStats,
1493 and other visit metadata.
1494
1495 Parameters
1496 ----------
1497 visit : `int`
1498 Visit identification number.
1499 dataRefs : `list`
1500 List of dataRefs in visit. May be list of
1501 `lsst.daf.persistence.ButlerDataRef` (Gen2) or
1502 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1503 isGen3 : `bool`, optional
1504 Specifies if this is a Gen3 list of datarefs.
1505
1506 Returns
1507 -------
1508 visitSummary : `lsst.afw.table.ExposureCatalog`
1509 Exposure catalog with per-detector summary information.
1510 """
1511 schema = self._makeVisitSummarySchema()
1512 cat = afwTable.ExposureCatalog(schema)
1513 cat.resize(len(dataRefs))
1514
1515 cat['visit'] = visit
1516
1517 for i, dataRef in enumerate(dataRefs):
1518 if isGen3:
1519 visitInfo = dataRef.get(component='visitInfo')
1520 filterLabel = dataRef.get(component='filterLabel')
1521 summaryStats = dataRef.get(component='summaryStats')
1522 detector = dataRef.get(component='detector')
1523 wcs = dataRef.get(component='wcs')
1524 photoCalib = dataRef.get(component='photoCalib')
1525 detector = dataRef.get(component='detector')
1526 bbox = dataRef.get(component='bbox')
1527 validPolygon = dataRef.get(component='validPolygon')
1528 else:
1529 # Note that we need to read the calexp because there is
1530 # no magic access to the psf except through the exposure.
1531 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
1532 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
1533 visitInfo = exp.getInfo().getVisitInfo()
1534 filterLabel = dataRef.get("calexp_filterLabel")
1535 summaryStats = exp.getInfo().getSummaryStats()
1536 wcs = exp.getWcs()
1537 photoCalib = exp.getPhotoCalib()
1538 detector = exp.getDetector()
1539 bbox = dataRef.get(datasetType='calexp_bbox')
1540 validPolygon = exp.getInfo().getValidPolygon()
1541
1542 rec = cat[i]
1543 rec.setBBox(bbox)
1544 rec.setVisitInfo(visitInfo)
1545 rec.setWcs(wcs)
1546 rec.setPhotoCalib(photoCalib)
1547 rec.setValidPolygon(validPolygon)
1548
1549 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1550 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1551 rec.setId(detector.getId())
1552 rec['psfSigma'] = summaryStats.psfSigma
1553 rec['psfIxx'] = summaryStats.psfIxx
1554 rec['psfIyy'] = summaryStats.psfIyy
1555 rec['psfIxy'] = summaryStats.psfIxy
1556 rec['psfArea'] = summaryStats.psfArea
1557 rec['raCorners'][:] = summaryStats.raCorners
1558 rec['decCorners'][:] = summaryStats.decCorners
1559 rec['ra'] = summaryStats.ra
1560 rec['decl'] = summaryStats.decl
1561 rec['zenithDistance'] = summaryStats.zenithDistance
1562 rec['zeroPoint'] = summaryStats.zeroPoint
1563 rec['skyBg'] = summaryStats.skyBg
1564 rec['skyNoise'] = summaryStats.skyNoise
1565 rec['meanVar'] = summaryStats.meanVar
1566 rec['astromOffsetMean'] = summaryStats.astromOffsetMean
1567 rec['astromOffsetStd'] = summaryStats.astromOffsetStd
1568 rec['nPsfStar'] = summaryStats.nPsfStar
1569 rec['psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median
1570 rec['psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median
1571 rec['psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter
1572 rec['psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter
1573 rec['psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian
1574 rec['psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter
1575 rec['psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter
1576
1577 metadata = dafBase.PropertyList()
1578 metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1579 # We are looping over existing datarefs, so the following is true
1580 metadata.add("COMMENT", "Only detectors with data have entries.")
1581 cat.setMetadata(metadata)
1582
1583 cat.sort()
1584 return cat
1585
1586 def _makeVisitSummarySchema(self):
1587 """Make the schema for the visitSummary catalog."""
1588 schema = afwTable.ExposureTable.makeMinimalSchema()
1589 schema.addField('visit', type='I', doc='Visit number')
1590 schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1591 schema.addField('band', type='String', size=32, doc='Name of band')
1592 schema.addField('psfSigma', type='F',
1593 doc='PSF model second-moments determinant radius (center of chip) (pixel)')
1594 schema.addField('psfArea', type='F',
1595 doc='PSF model effective area (center of chip) (pixel**2)')
1596 schema.addField('psfIxx', type='F',
1597 doc='PSF model Ixx (center of chip) (pixel**2)')
1598 schema.addField('psfIyy', type='F',
1599 doc='PSF model Iyy (center of chip) (pixel**2)')
1600 schema.addField('psfIxy', type='F',
1601 doc='PSF model Ixy (center of chip) (pixel**2)')
1602 schema.addField('raCorners', type='ArrayD', size=4,
1603 doc='Right Ascension of bounding box corners (degrees)')
1604 schema.addField('decCorners', type='ArrayD', size=4,
1605 doc='Declination of bounding box corners (degrees)')
1606 schema.addField('ra', type='D',
1607 doc='Right Ascension of bounding box center (degrees)')
1608 schema.addField('decl', type='D',
1609 doc='Declination of bounding box center (degrees)')
1610 schema.addField('zenithDistance', type='F',
1611 doc='Zenith distance of bounding box center (degrees)')
1612 schema.addField('zeroPoint', type='F',
1613 doc='Mean zeropoint in detector (mag)')
1614 schema.addField('skyBg', type='F',
1615 doc='Average sky background (ADU)')
1616 schema.addField('skyNoise', type='F',
1617 doc='Average sky noise (ADU)')
1618 schema.addField('meanVar', type='F',
1619 doc='Mean variance of the weight plane (ADU**2)')
1620 schema.addField('astromOffsetMean', type='F',
1621 doc='Mean offset of astrometric calibration matches (arcsec)')
1622 schema.addField('astromOffsetStd', type='F',
1623 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)')
1624 schema.addField('nPsfStar', type='I', doc='Number of stars used for PSF model')
1625 schema.addField('psfStarDeltaE1Median', type='F',
1626 doc='Median E1 residual (starE1 - psfE1) for psf stars')
1627 schema.addField('psfStarDeltaE2Median', type='F',
1628 doc='Median E2 residual (starE2 - psfE2) for psf stars')
1629 schema.addField('psfStarDeltaE1Scatter', type='F',
1630 doc='Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars')
1631 schema.addField('psfStarDeltaE2Scatter', type='F',
1632 doc='Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars')
1633 schema.addField('psfStarDeltaSizeMedian', type='F',
1634 doc='Median size residual (starSize - psfSize) for psf stars (pixel)')
1635 schema.addField('psfStarDeltaSizeScatter', type='F',
1636 doc='Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)')
1637 schema.addField('psfStarScaledDeltaSizeScatter', type='F',
1638 doc='Scatter (via MAD) of size residual scaled by median size squared')
1639
1640 return schema
1641
1642
1643class VisitDataIdContainer(DataIdContainer):
1644 """DataIdContainer that groups sensor-level id's by visit
1645 """
1646
1647 def makeDataRefList(self, namespace):
1648 """Make self.refList from self.idList
1649
1650 Generate a list of data references grouped by visit.
1651
1652 Parameters
1653 ----------
1654 namespace : `argparse.Namespace`
1655 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1656 """
1657 # Group by visits
1658 visitRefs = defaultdict(list)
1659 for dataId in self.idList:
1660 if "visit" in dataId:
1661 visitId = dataId["visit"]
1662 # append all subsets to
1663 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1664 visitRefs[visitId].extend([dataRef for dataRef in subset])
1665
1666 outputRefList = []
1667 for refList in visitRefs.values():
1668 existingRefs = [ref for ref in refList if ref.datasetExists()]
1669 if existingRefs:
1670 outputRefList.append(existingRefs)
1671
1672 self.refList = outputRefList
1673
1674
1675class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1676 defaultTemplates={"catalogType": ""},
1677 dimensions=("instrument", "visit")):
1678 inputCatalogs = connectionTypes.Input(
1679 doc="Input per-detector Source Tables",
1680 name="{catalogType}sourceTable",
1681 storageClass="DataFrame",
1682 dimensions=("instrument", "visit", "detector"),
1683 multiple=True
1684 )
1685 outputCatalog = connectionTypes.Output(
1686 doc="Per-visit concatenation of Source Table",
1687 name="{catalogType}sourceTable_visit",
1688 storageClass="DataFrame",
1689 dimensions=("instrument", "visit")
1690 )
1691
1692
1693class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1694 pipelineConnections=ConsolidateSourceTableConnections):
1695 pass
1696
1697
1698class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1699 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1700 """
1701 _DefaultName = 'consolidateSourceTable'
1702 ConfigClass = ConsolidateSourceTableConfig
1703
1704 inputDataset = 'sourceTable'
1705 outputDataset = 'sourceTable_visit'
1706
1707 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1708 inputs = butlerQC.get(inputRefs)
1709 self.log.info("Concatenating %s per-detector Source Tables",
1710 len(inputs['inputCatalogs']))
1711 df = pd.concat(inputs['inputCatalogs'])
1712 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1713
1714 def runDataRef(self, dataRefList):
1715 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1716 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1717 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1718
1719 @classmethod
1720 def _makeArgumentParser(cls):
1721 parser = ArgumentParser(name=cls._DefaultName)
1722
1723 parser.add_id_argument("--id", cls.inputDataset,
1724 help="data ID, e.g. --id visit=12345",
1725 ContainerClass=VisitDataIdContainer)
1726 return parser
1727
1728 def writeMetadata(self, dataRef):
1729 """No metadata to write.
1730 """
1731 pass
1732
1733 def writeConfig(self, butler, clobber=False, doBackup=True):
1734 """No config to write.
1735 """
1736 pass
1737
1738
1739class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1740 dimensions=("instrument",),
1741 defaultTemplates={}):
1742 visitSummaryRefs = connectionTypes.Input(
1743 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1744 name="visitSummary",
1745 storageClass="ExposureCatalog",
1746 dimensions=("instrument", "visit"),
1747 multiple=True,
1748 deferLoad=True,
1749 )
1750 outputCatalog = connectionTypes.Output(
1751 doc="CCD and Visit metadata table",
1752 name="ccdVisitTable",
1753 storageClass="DataFrame",
1754 dimensions=("instrument",)
1755 )
1756
1757
1758class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1759 pipelineConnections=MakeCcdVisitTableConnections):
1760 pass
1761
1762
1763class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1764 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1765 """
1766 _DefaultName = 'makeCcdVisitTable'
1767 ConfigClass = MakeCcdVisitTableConfig
1768
1769 def run(self, visitSummaryRefs):
1770 """ Make a table of ccd information from the `visitSummary` catalogs.
1771 Parameters
1772 ----------
1773 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1774 List of DeferredDatasetHandles pointing to exposure catalogs with
1775 per-detector summary information.
1776 Returns
1777 -------
1778 result : `lsst.pipe.Base.Struct`
1779 Results struct with attribute:
1780 - `outputCatalog`
1781 Catalog of ccd and visit information.
1782 """
1783 ccdEntries = []
1784 for visitSummaryRef in visitSummaryRefs:
1785 visitSummary = visitSummaryRef.get()
1786 visitInfo = visitSummary[0].getVisitInfo()
1787
1788 ccdEntry = {}
1789 summaryTable = visitSummary.asAstropy()
1790 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance',
1791 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise']
1792 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1793 # 'visit' is the human readible visit number
1794 # 'visitId' is the key to the visitId table. They are the same
1795 # Technically you should join to get the visit from the visit table
1796 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"})
1797 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in
1798 summaryTable['id']]
1799 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId)
1800 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds]
1801 ccdEntry['ccdVisitId'] = ccdVisitIds
1802 ccdEntry['detector'] = summaryTable['id']
1803 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary])
1804 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1805
1806 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1807 ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1808 ccdEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1809 expTime = visitInfo.getExposureTime()
1810 ccdEntry['expTime'] = expTime
1811 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1812 expTime_days = expTime / (60*60*24)
1813 ccdEntry["obsStartMJD"] = ccdEntry["expMidptMJD"] - 0.5 * expTime_days
1814 ccdEntry['darkTime'] = visitInfo.getDarkTime()
1815 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1816 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1817 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1818 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1819 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1820 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1821 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1822 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1823 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1824 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1825 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags,
1826 # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted.
1827 ccdEntries.append(ccdEntry)
1828
1829 outputCatalog = pd.concat(ccdEntries)
1830 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True)
1831 return pipeBase.Struct(outputCatalog=outputCatalog)
1832
1833
1834class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1835 dimensions=("instrument",),
1836 defaultTemplates={}):
1837 visitSummaries = connectionTypes.Input(
1838 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1839 name="visitSummary",
1840 storageClass="ExposureCatalog",
1841 dimensions=("instrument", "visit",),
1842 multiple=True,
1843 deferLoad=True,
1844 )
1845 outputCatalog = connectionTypes.Output(
1846 doc="Visit metadata table",
1847 name="visitTable",
1848 storageClass="DataFrame",
1849 dimensions=("instrument",)
1850 )
1851
1852
1853class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1854 pipelineConnections=MakeVisitTableConnections):
1855 pass
1856
1857
1858class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1859 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1860 """
1861 _DefaultName = 'makeVisitTable'
1862 ConfigClass = MakeVisitTableConfig
1863
1864 def run(self, visitSummaries):
1865 """ Make a table of visit information from the `visitSummary` catalogs
1866
1867 Parameters
1868 ----------
1869 visitSummaries : list of `lsst.afw.table.ExposureCatalog`
1870 List of exposure catalogs with per-detector summary information.
1871 Returns
1872 -------
1873 result : `lsst.pipe.Base.Struct`
1874 Results struct with attribute:
1875 ``outputCatalog``
1876 Catalog of visit information.
1877 """
1878 visitEntries = []
1879 for visitSummary in visitSummaries:
1880 visitSummary = visitSummary.get()
1881 visitRow = visitSummary[0]
1882 visitInfo = visitRow.getVisitInfo()
1883
1884 visitEntry = {}
1885 visitEntry["visitId"] = visitRow['visit']
1886 visitEntry["visit"] = visitRow['visit']
1887 visitEntry["physical_filter"] = visitRow['physical_filter']
1888 visitEntry["band"] = visitRow['band']
1889 raDec = visitInfo.getBoresightRaDec()
1890 visitEntry["ra"] = raDec.getRa().asDegrees()
1891 visitEntry["decl"] = raDec.getDec().asDegrees()
1892 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1893 azAlt = visitInfo.getBoresightAzAlt()
1894 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1895 visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1896 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1897 visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1898 expTime = visitInfo.getExposureTime()
1899 visitEntry["expTime"] = expTime
1900 visitEntry["expMidpt"] = visitInfo.getDate().toPython()
1901 visitEntry["expMidptMJD"] = visitInfo.getDate().get(dafBase.DateTime.MJD)
1902 visitEntry["obsStart"] = visitEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1903 expTime_days = expTime / (60*60*24)
1904 visitEntry["obsStartMJD"] = visitEntry["expMidptMJD"] - 0.5 * expTime_days
1905 visitEntries.append(visitEntry)
1906
1907 # TODO: DM-30623, Add programId, exposureType, cameraTemp, mirror1Temp, mirror2Temp,
1908 # mirror3Temp, domeTemp, externalTemp, dimmSeeing, pwvGPS, pwvMW, flags, nExposures
1909
1910 outputCatalog = pd.DataFrame(data=visitEntries)
1911 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True)
1912 return pipeBase.Struct(outputCatalog=outputCatalog)
1913
1914
1915class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1916 dimensions=("instrument", "visit", "detector", "skymap", "tract")):
1917
1918 inputCatalog = connectionTypes.Input(
1919 doc="Primary per-detector, single-epoch forced-photometry catalog. "
1920 "By default, it is the output of ForcedPhotCcdTask on calexps",
1921 name="forced_src",
1922 storageClass="SourceCatalog",
1923 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1924 )
1925 inputCatalogDiff = connectionTypes.Input(
1926 doc="Secondary multi-epoch, per-detector, forced photometry catalog. "
1927 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1928 name="forced_diff",
1929 storageClass="SourceCatalog",
1930 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1931 )
1932 outputCatalog = connectionTypes.Output(
1933 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format",
1934 name="mergedForcedSource",
1935 storageClass="DataFrame",
1936 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1937 )
1938
1939
1940class WriteForcedSourceTableConfig(WriteSourceTableConfig,
1941 pipelineConnections=WriteForcedSourceTableConnections):
1942 key = lsst.pex.config.Field(
1943 doc="Column on which to join the two input tables on and make the primary key of the output",
1944 dtype=str,
1945 default="objectId",
1946 )
1947
1948
1949class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1950 """Merge and convert per-detector forced source catalogs to parquet
1951
1952 Because the predecessor ForcedPhotCcdTask operates per-detector,
1953 per-tract, (i.e., it has tract in its dimensions), detectors
1954 on the tract boundary may have multiple forced source catalogs.
1955
1956 The successor task TransformForcedSourceTable runs per-patch
1957 and temporally-aggregates overlapping mergedForcedSource catalogs from all
1958 available multiple epochs.
1959 """
1960 _DefaultName = "writeForcedSourceTable"
1961 ConfigClass = WriteForcedSourceTableConfig
1962
1963 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1964 inputs = butlerQC.get(inputRefs)
1965 # Add ccdVisitId to allow joining with CcdVisitTable
1966 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
1967 inputs['band'] = butlerQC.quantum.dataId.full['band']
1968 outputs = self.run(**inputs)
1969 butlerQC.put(outputs, outputRefs)
1970
1971 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1972 dfs = []
1973 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')):
1974 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False)
1975 df = df.reindex(sorted(df.columns), axis=1)
1976 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA
1977 df['band'] = band if band else pd.NA
1978 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns],
1979 names=('dataset', 'column'))
1980
1981 dfs.append(df)
1982
1983 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
1984 return pipeBase.Struct(outputCatalog=outputCatalog)
1985
1986
1987class RewriteForcedSourceOnDiaObjectConnections(WriteForcedSourceTableConnections):
1988 exposure = connectionTypes.Input(
1989 doc="Input exposure to perform photometry on.",
1990 name="calexp",
1991 storageClass="ExposureF",
1992 dimensions=["instrument", "visit", "detector"],
1993 )
1994
1995
1996class RewriteForcedSourceOnDiaObjectConfig(WriteForcedSourceTableConfig,
1997 pipelineConnections=RewriteForcedSourceOnDiaObjectConnections):
1998 reevaluate = pexConfig.ConfigurableField(
1999 target=WriteRecalibratedSourceTableTask,
2000 doc="Subtask with addCalibColumns method",
2001 )
2002
2003 def setDefaults(self):
2004 super().setDefaults()
2005 self.reevaluate.doReevaluatePhotoCalib = True
2006 self.reevaluate.doReevaluateSkyWcs = True
2007 self.reevaluate.doReevaluateLocalBackground = True
2008 self.connections.inputCatalogDiff = "forced_diff_diaObject"
2009 self.connections.inputCatalog = "forced_src_diaObject"
2010 self.connections.outputCatalog = "mergedForcedSourceOnDiaObject"
2011 self.key = "diaObjectId"
2012
2013
2014class RewriteForcedSourceOnDiaObjectTask(WriteForcedSourceTableTask):
2015 """Specialized afterburner to recalibrate DP0.2 ForcedSourceOnDiaObject"""
2016 _DefaultName = "RewriteForcedSourceOnDiaObject"
2017 ConfigClass = RewriteForcedSourceOnDiaObjectConfig
2018
2019 def __init__(self, **kwargs):
2020 super().__init__(**kwargs)
2021 self.makeSubtask("reevaluate")
2022
2023 def runQuantum(self, butlerQC, inputRefs, outputRefs):
2024 inputs = butlerQC.get(inputRefs)
2025 # Add ccdVisitId to allow joining with CcdVisitTable
2026 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
2027 inputs['band'] = butlerQC.quantum.dataId.full['band']
2028 exposureIdInfo = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "visit_detector")
2029 exposure = inputs.pop('exposure')
2030
2031 inputs['inputCatalog'] = self.reevaluate.addCalibColumns(catalog=inputs['inputCatalog'],
2032 exposure=exposure,
2033 exposureIdInfo=exposureIdInfo)
2034 inputs['inputCatalogDiff'] = self.reevaluate.addCalibColumns(catalog=inputs['inputCatalogDiff'],
2035 exposure=exposure,
2036 exposureIdInfo=exposureIdInfo)
2037
2038 outputs = self.run(**inputs)
2039 butlerQC.put(outputs, outputRefs)
2040
2041
2042class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
2043 dimensions=("instrument", "skymap", "patch", "tract")):
2044
2045 inputCatalogs = connectionTypes.Input(
2046 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
2047 name="mergedForcedSource",
2048 storageClass="DataFrame",
2049 dimensions=("instrument", "visit", "detector", "skymap", "tract"),
2050 multiple=True,
2051 deferLoad=True
2052 )
2053 referenceCatalog = connectionTypes.Input(
2054 doc="Reference catalog which was used to seed the forcedPhot. Columns "
2055 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
2056 "are expected.",
2057 name="objectTable",
2058 storageClass="DataFrame",
2059 dimensions=("tract", "patch", "skymap"),
2060 deferLoad=True
2061 )
2062 outputCatalog = connectionTypes.Output(
2063 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
2064 "specified set of functors",
2065 name="forcedSourceTable",
2066 storageClass="DataFrame",
2067 dimensions=("tract", "patch", "skymap")
2068 )
2069
2070
2071class TransformForcedSourceTableConfig(TransformCatalogBaseConfig,
2072 pipelineConnections=TransformForcedSourceTableConnections):
2073 referenceColumns = pexConfig.ListField(
2074 dtype=str,
2075 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"],
2076 optional=True,
2077 doc="Columns to pull from reference catalog",
2078 )
2079 keyRef = lsst.pex.config.Field(
2080 doc="Column on which to join the two input tables on and make the primary key of the output",
2081 dtype=str,
2082 default="objectId",
2083 )
2084 key = lsst.pex.config.Field(
2085 doc="Rename the output DataFrame index to this name",
2086 dtype=str,
2087 default="forcedSourceId",
2088 )
2089
2090
2091class TransformForcedSourceTableTask(TransformCatalogBaseTask):
2092 """Transform/standardize a ForcedSource catalog
2093
2094 Transforms each wide, per-detector forcedSource parquet table per the
2095 specification file (per-camera defaults found in ForcedSource.yaml).
2096 All epochs that overlap the patch are aggregated into one per-patch
2097 narrow-parquet file.
2098
2099 No de-duplication of rows is performed. Duplicate resolutions flags are
2100 pulled in from the referenceCatalog: `detect_isPrimary`,
2101 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
2102 for analysis or compare duplicates for QA.
2103
2104 The resulting table includes multiple bands. Epochs (MJDs) and other useful
2105 per-visit rows can be retreived by joining with the CcdVisitTable on
2106 ccdVisitId.
2107 """
2108 _DefaultName = "transformForcedSourceTable"
2109 ConfigClass = TransformForcedSourceTableConfig
2110
2111 def runQuantum(self, butlerQC, inputRefs, outputRefs):
2112 inputs = butlerQC.get(inputRefs)
2113 if self.funcs is None:
2114 raise ValueError("config.functorFile is None. "
2115 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
2116 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs,
2117 dataId=outputRefs.outputCatalog.dataId.full)
2118
2119 butlerQC.put(outputs, outputRefs)
2120
2121 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
2122 dfs = []
2123 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns})
2124 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs)))
2125 for handle in inputCatalogs:
2126 result = self.transform(None, handle, funcs, dataId)
2127 # Filter for only rows that were detected on (overlap) the patch
2128 dfs.append(result.df.join(ref, how='inner'))
2129
2130 outputCatalog = pd.concat(dfs)
2131
2132 # Now that we are done joining on config.keyRef
2133 # Change index to config.key by
2134 outputCatalog.index.rename(self.config.keyRef, inplace=True)
2135 # Add config.keyRef to the column list
2136 outputCatalog.reset_index(inplace=True)
2137 # set the forcedSourceId to the index. This is specified in the ForcedSource.yaml
2138 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True)
2139 # Rename it to the config.key
2140 outputCatalog.index.rename(self.config.key, inplace=True)
2141
2142 self.log.info("Made a table of %d columns and %d rows",
2143 len(outputCatalog.columns), len(outputCatalog))
2144 return pipeBase.Struct(outputCatalog=outputCatalog)
2145
2146
2147class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
2148 defaultTemplates={"catalogType": ""},
2149 dimensions=("instrument", "tract")):
2150 inputCatalogs = connectionTypes.Input(
2151 doc="Input per-patch DataFrame Tables to be concatenated",
2152 name="{catalogType}ForcedSourceTable",
2153 storageClass="DataFrame",
2154 dimensions=("tract", "patch", "skymap"),
2155 multiple=True,
2156 )
2157
2158 outputCatalog = connectionTypes.Output(
2159 doc="Output per-tract concatenation of DataFrame Tables",
2160 name="{catalogType}ForcedSourceTable_tract",
2161 storageClass="DataFrame",
2162 dimensions=("tract", "skymap"),
2163 )
2164
2165
2166class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
2167 pipelineConnections=ConsolidateTractConnections):
2168 pass
2169
2170
2171class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask):
2172 """Concatenate any per-patch, dataframe list into a single
2173 per-tract DataFrame
2174 """
2175 _DefaultName = 'ConsolidateTract'
2176 ConfigClass = ConsolidateTractConfig
2177
2178 def runQuantum(self, butlerQC, inputRefs, outputRefs):
2179 inputs = butlerQC.get(inputRefs)
2180 # Not checking at least one inputCatalog exists because that'd be an empty QG
2181 self.log.info("Concatenating %s per-patch %s Tables",
2182 len(inputs['inputCatalogs']),
2183 inputRefs.inputCatalogs[0].datasetType.name)
2184 df = pd.concat(inputs['inputCatalogs'])
2185 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def getAnalysis(self, parq, funcs=None, band=None)
def transform(self, band, parq, funcs, dataId)
def run(self, parq, funcs=None, dataId=None, band=None)
def runQuantum(self, butlerQC, inputRefs, outputRefs)
Definition: postprocess.py:983
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)
Definition: postprocess.py:49