lsst.pipe.tasks gec3662e80e+34949ea1e7
postprocess.py
Go to the documentation of this file.
1# This file is part of pipe_tasks
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22import functools
23import pandas as pd
24from collections import defaultdict
25import numpy as np
26import numbers
27import os
28
29import lsst.geom
30import lsst.pex.config as pexConfig
31import lsst.pipe.base as pipeBase
32import lsst.daf.base as dafBase
33from lsst.pipe.base import connectionTypes
34import lsst.afw.table as afwTable
35from lsst.meas.base import SingleFrameMeasurementTask
36from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
37from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
38from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate
39
40from .parquetTable import ParquetTable
41from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
42from .functors import CompositeFunctor, Column
43
44
45def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
46 """Flattens a dataframe with multilevel column index
47 """
48 newDf = pd.DataFrame()
49 # band is the level 0 index
50 dfBands = df.columns.unique(level=0).values
51 for band in dfBands:
52 subdf = df[band]
53 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
54 newColumns = {c: columnFormat.format(band, c)
55 for c in subdf.columns if c not in noDupCols}
56 cols = list(newColumns.keys())
57 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
58
59 # Band must be present in the input and output or else column is all NaN:
60 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
61 # Get the unexploded columns from any present band's partition
62 noDupDf = df[presentBands[0]][noDupCols]
63 newDf = pd.concat([noDupDf, newDf], axis=1)
64 return newDf
65
66
67class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
68 defaultTemplates={"coaddName": "deep"},
69 dimensions=("tract", "patch", "skymap")):
70 inputCatalogMeas = connectionTypes.Input(
71 doc="Catalog of source measurements on the deepCoadd.",
72 dimensions=("tract", "patch", "band", "skymap"),
73 storageClass="SourceCatalog",
74 name="{coaddName}Coadd_meas",
75 multiple=True
76 )
77 inputCatalogForcedSrc = connectionTypes.Input(
78 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
79 dimensions=("tract", "patch", "band", "skymap"),
80 storageClass="SourceCatalog",
81 name="{coaddName}Coadd_forced_src",
82 multiple=True
83 )
84 inputCatalogRef = connectionTypes.Input(
85 doc="Catalog marking the primary detection (which band provides a good shape and position)"
86 "for each detection in deepCoadd_mergeDet.",
87 dimensions=("tract", "patch", "skymap"),
88 storageClass="SourceCatalog",
89 name="{coaddName}Coadd_ref"
90 )
91 outputCatalog = connectionTypes.Output(
92 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
93 "stored as a DataFrame with a multi-level column index per-patch.",
94 dimensions=("tract", "patch", "skymap"),
95 storageClass="DataFrame",
96 name="{coaddName}Coadd_obj"
97 )
98
99
100class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
101 pipelineConnections=WriteObjectTableConnections):
102 engine = pexConfig.Field(
103 dtype=str,
104 default="pyarrow",
105 doc="Parquet engine for writing (pyarrow or fastparquet)"
106 )
107 coaddName = pexConfig.Field(
108 dtype=str,
109 default="deep",
110 doc="Name of coadd"
111 )
112
113
114class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
115 """Write filter-merged source tables to parquet
116 """
117 _DefaultName = "writeObjectTable"
118 ConfigClass = WriteObjectTableConfig
119 RunnerClass = MergeSourcesRunner
120
121 # Names of table datasets to be merged
122 inputDatasets = ('forced_src', 'meas', 'ref')
123
124 # Tag of output dataset written by `MergeSourcesTask.write`
125 outputDataset = 'obj'
126
127 def __init__(self, butler=None, schema=None, **kwargs):
128 # It is a shame that this class can't use the default init for CmdLineTask
129 # But to do so would require its own special task runner, which is many
130 # more lines of specialization, so this is how it is for now
131 super().__init__(**kwargs)
132
133 def runDataRef(self, patchRefList):
134 """!
135 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
136 subclasses that inherit from MergeSourcesTask.
137 @param[in] patchRefList list of data references for each filter
138 """
139 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
140 dataId = patchRefList[0].dataId
141 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
142 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog))
143
144 def runQuantum(self, butlerQC, inputRefs, outputRefs):
145 inputs = butlerQC.get(inputRefs)
146
147 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
148 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
149 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
150 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
151
152 catalogs = {}
153 for band in measDict.keys():
154 catalogs[band] = {'meas': measDict[band]['meas'],
155 'forced_src': forcedSourceDict[band]['forced_src'],
156 'ref': inputs['inputCatalogRef']}
157 dataId = butlerQC.quantum.dataId
158 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
159 outputs = pipeBase.Struct(outputCatalog=df)
160 butlerQC.put(outputs, outputRefs)
161
162 @classmethod
163 def _makeArgumentParser(cls):
164 """Create a suitable ArgumentParser.
165
166 We will use the ArgumentParser to get a list of data
167 references for patches; the RunnerClass will sort them into lists
168 of data references for the same patch.
169
170 References first of self.inputDatasets, rather than
171 self.inputDataset
172 """
173 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
174
175 def readCatalog(self, patchRef):
176 """Read input catalogs
177
178 Read all the input datasets given by the 'inputDatasets'
179 attribute.
180
181 Parameters
182 ----------
183 patchRef : `lsst.daf.persistence.ButlerDataRef`
184 Data reference for patch
185
186 Returns
187 -------
188 Tuple consisting of band name and a dict of catalogs, keyed by
189 dataset name
190 """
191 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel
192 catalogDict = {}
193 for dataset in self.inputDatasets:
194 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
195 self.log.info("Read %d sources from %s for band %s: %s",
196 len(catalog), dataset, band, patchRef.dataId)
197 catalogDict[dataset] = catalog
198 return band, catalogDict
199
200 def run(self, catalogs, tract, patch):
201 """Merge multiple catalogs.
202
203 Parameters
204 ----------
205 catalogs : `dict`
206 Mapping from filter names to dict of catalogs.
207 tract : int
208 tractId to use for the tractId column
209 patch : str
210 patchId to use for the patchId column
211
212 Returns
213 -------
214 catalog : `pandas.DataFrame`
215 Merged dataframe
216 """
217
218 dfs = []
219 for filt, tableDict in catalogs.items():
220 for dataset, table in tableDict.items():
221 # Convert afwTable to pandas DataFrame
222 df = table.asAstropy().to_pandas().set_index('id', drop=True)
223
224 # Sort columns by name, to ensure matching schema among patches
225 df = df.reindex(sorted(df.columns), axis=1)
226 df['tractId'] = tract
227 df['patchId'] = patch
228
229 # Make columns a 3-level MultiIndex
230 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
231 names=('dataset', 'band', 'column'))
232 dfs.append(df)
233
234 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
235 return catalog
236
237 def write(self, patchRef, catalog):
238 """Write the output.
239
240 Parameters
241 ----------
242 catalog : `ParquetTable`
243 Catalog to write
244 patchRef : `lsst.daf.persistence.ButlerDataRef`
245 Data reference for patch
246 """
247 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
248 # since the filter isn't actually part of the data ID for the dataset we're saving,
249 # it's confusing to see it in the log message, even if the butler simply ignores it.
250 mergeDataId = patchRef.dataId.copy()
251 del mergeDataId["filter"]
252 self.log.info("Wrote merged catalog: %s", mergeDataId)
253
254 def writeMetadata(self, dataRefList):
255 """No metadata to write, and not sure how to write it for a list of dataRefs.
256 """
257 pass
258
259
260class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
261 defaultTemplates={"catalogType": ""},
262 dimensions=("instrument", "visit", "detector")):
263
264 catalog = connectionTypes.Input(
265 doc="Input full-depth catalog of sources produced by CalibrateTask",
266 name="{catalogType}src",
267 storageClass="SourceCatalog",
268 dimensions=("instrument", "visit", "detector")
269 )
270 outputCatalog = connectionTypes.Output(
271 doc="Catalog of sources, `src` in Parquet format. The 'id' column is "
272 "replaced with an index; all other columns are unchanged.",
273 name="{catalogType}source",
274 storageClass="DataFrame",
275 dimensions=("instrument", "visit", "detector")
276 )
277
278
279class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
280 pipelineConnections=WriteSourceTableConnections):
281 doApplyExternalPhotoCalib = pexConfig.Field(
282 dtype=bool,
283 default=False,
284 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
285 "generating Source Tables from older src tables which do not already have local calib columns")
286 )
287 doApplyExternalSkyWcs = pexConfig.Field(
288 dtype=bool,
289 default=False,
290 doc=("Add local WCS columns from the calexp.wcs? Should only set True if "
291 "generating Source Tables from older src tables which do not already have local calib columns")
292 )
293
294
295class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
296 """Write source table to parquet
297 """
298 _DefaultName = "writeSourceTable"
299 ConfigClass = WriteSourceTableConfig
300
301 def runDataRef(self, dataRef):
302 src = dataRef.get('src')
303 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
304 src = self.addCalibColumns(src, dataRef)
305
306 ccdVisitId = dataRef.get('ccdExposureId')
307 result = self.run(src, ccdVisitId=ccdVisitId)
308 dataRef.put(result.table, 'source')
309
310 def runQuantum(self, butlerQC, inputRefs, outputRefs):
311 inputs = butlerQC.get(inputRefs)
312 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
313 result = self.run(**inputs).table
314 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
315 butlerQC.put(outputs, outputRefs)
316
317 def run(self, catalog, ccdVisitId=None):
318 """Convert `src` catalog to parquet
319
320 Parameters
321 ----------
322 catalog: `afwTable.SourceCatalog`
323 catalog to be converted
324 ccdVisitId: `int`
325 ccdVisitId to be added as a column
326
327 Returns
328 -------
329 result : `lsst.pipe.base.Struct`
330 ``table``
331 `ParquetTable` version of the input catalog
332 """
333 self.log.info("Generating parquet table from src catalog %s", ccdVisitId)
334 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
335 df['ccdVisitId'] = ccdVisitId
336 return pipeBase.Struct(table=ParquetTable(dataFrame=df))
337
338 def addCalibColumns(self, catalog, dataRef):
339 """Add columns with local calibration evaluated at each centroid
340
341 for backwards compatibility with old repos.
342 This exists for the purpose of converting old src catalogs
343 (which don't have the expected local calib columns) to Source Tables.
344
345 Parameters
346 ----------
347 catalog: `afwTable.SourceCatalog`
348 catalog to which calib columns will be added
349 dataRef: `lsst.daf.persistence.ButlerDataRef
350 for fetching the calibs from disk.
351
352 Returns
353 -------
354 newCat: `afwTable.SourceCatalog`
355 Source Catalog with requested local calib columns
356 """
357 mapper = afwTable.SchemaMapper(catalog.schema)
358 measureConfig = SingleFrameMeasurementTask.ConfigClass()
359 measureConfig.doReplaceWithNoise = False
360
361 # Just need the WCS or the PhotoCalib attached to an exposue
362 exposure = dataRef.get('calexp_sub',
364
365 mapper = afwTable.SchemaMapper(catalog.schema)
366 mapper.addMinimalSchema(catalog.schema, True)
367 schema = mapper.getOutputSchema()
368
369 exposureIdInfo = dataRef.get("expIdInfo")
370 measureConfig.plugins.names = []
371 if self.config.doApplyExternalSkyWcs:
372 plugin = 'base_LocalWcs'
373 if plugin in schema:
374 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
375 else:
376 measureConfig.plugins.names.add(plugin)
377
378 if self.config.doApplyExternalPhotoCalib:
379 plugin = 'base_LocalPhotoCalib'
380 if plugin in schema:
381 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
382 else:
383 measureConfig.plugins.names.add(plugin)
384
385 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
386 newCat = afwTable.SourceCatalog(schema)
387 newCat.extend(catalog, mapper=mapper)
388 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
389 return newCat
390
391 def writeMetadata(self, dataRef):
392 """No metadata to write.
393 """
394 pass
395
396 @classmethod
397 def _makeArgumentParser(cls):
398 parser = ArgumentParser(name=cls._DefaultName)
399 parser.add_id_argument("--id", 'src',
400 help="data ID, e.g. --id visit=12345 ccd=0")
401 return parser
402
403
404class PostprocessAnalysis(object):
405 """Calculate columns from ParquetTable
406
407 This object manages and organizes an arbitrary set of computations
408 on a catalog. The catalog is defined by a
409 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
410 `deepCoadd_obj` dataset, and the computations are defined by a collection
411 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
412 a `CompositeFunctor`).
413
414 After the object is initialized, accessing the `.df` attribute (which
415 holds the `pandas.DataFrame` containing the results of the calculations) triggers
416 computation of said dataframe.
417
418 One of the conveniences of using this object is the ability to define a desired common
419 filter for all functors. This enables the same functor collection to be passed to
420 several different `PostprocessAnalysis` objects without having to change the original
421 functor collection, since the `filt` keyword argument of this object triggers an
422 overwrite of the `filt` property for all functors in the collection.
423
424 This object also allows a list of refFlags to be passed, and defines a set of default
425 refFlags that are always included even if not requested.
426
427 If a list of `ParquetTable` object is passed, rather than a single one, then the
428 calculations will be mapped over all the input catalogs. In principle, it should
429 be straightforward to parallelize this activity, but initial tests have failed
430 (see TODO in code comments).
431
432 Parameters
433 ----------
434 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
435 Source catalog(s) for computation
436
437 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
438 Computations to do (functors that act on `parq`).
439 If a dict, the output
440 DataFrame will have columns keyed accordingly.
441 If a list, the column keys will come from the
442 `.shortname` attribute of each functor.
443
444 filt : `str` (optional)
445 Filter in which to calculate. If provided,
446 this will overwrite any existing `.filt` attribute
447 of the provided functors.
448
449 flags : `list` (optional)
450 List of flags (per-band) to include in output table.
451 Taken from the `meas` dataset if applied to a multilevel Object Table.
452
453 refFlags : `list` (optional)
454 List of refFlags (only reference band) to include in output table.
455
456 forcedFlags : `list` (optional)
457 List of flags (per-band) to include in output table.
458 Taken from the ``forced_src`` dataset if applied to a
459 multilevel Object Table. Intended for flags from measurement plugins
460 only run during multi-band forced-photometry.
461 """
462 _defaultRefFlags = []
463 _defaultFuncs = ()
464
465 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
466 self.parq = parq
467 self.functors = functors
468
469 self.filt = filt
470 self.flags = list(flags) if flags is not None else []
471 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else []
472 self.refFlags = list(self._defaultRefFlags)
473 if refFlags is not None:
474 self.refFlags += list(refFlags)
475
476 self._df = None
477
478 @property
479 def defaultFuncs(self):
480 funcs = dict(self._defaultFuncs)
481 return funcs
482
483 @property
484 def func(self):
485 additionalFuncs = self.defaultFuncs
486 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags})
487 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
488 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
489
490 if isinstance(self.functors, CompositeFunctor):
491 func = self.functors
492 else:
493 func = CompositeFunctor(self.functors)
494
495 func.funcDict.update(additionalFuncs)
496 func.filt = self.filt
497
498 return func
499
500 @property
501 def noDupCols(self):
502 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
503
504 @property
505 def df(self):
506 if self._df is None:
507 self.compute()
508 return self._df
509
510 def compute(self, dropna=False, pool=None):
511 # map over multiple parquet tables
512 if type(self.parq) in (list, tuple):
513 if pool is None:
514 dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
515 else:
516 # TODO: Figure out why this doesn't work (pyarrow pickling issues?)
517 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
518 self._df = pd.concat(dflist)
519 else:
520 self._df = self.func(self.parq, dropna=dropna)
521
522 return self._df
523
524
525class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
526 dimensions=()):
527 """Expected Connections for subclasses of TransformCatalogBaseTask.
528
529 Must be subclassed.
530 """
531 inputCatalog = connectionTypes.Input(
532 name="",
533 storageClass="DataFrame",
534 )
535 outputCatalog = connectionTypes.Output(
536 name="",
537 storageClass="DataFrame",
538 )
539
540
541class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
542 pipelineConnections=TransformCatalogBaseConnections):
543 functorFile = pexConfig.Field(
544 dtype=str,
545 doc="Path to YAML file specifying Science Data Model functors to use "
546 "when copying columns and computing calibrated values.",
547 default=None,
548 optional=True
549 )
550 primaryKey = pexConfig.Field(
551 dtype=str,
552 doc="Name of column to be set as the DataFrame index. If None, the index"
553 "will be named `id`",
554 default=None,
555 optional=True
556 )
557
558
559class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask):
560 """Base class for transforming/standardizing a catalog
561
562 by applying functors that convert units and apply calibrations.
563 The purpose of this task is to perform a set of computations on
564 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
565 results to a new dataset (which needs to be declared in an `outputDataset`
566 attribute).
567
568 The calculations to be performed are defined in a YAML file that specifies
569 a set of functors to be computed, provided as
570 a `--functorFile` config parameter. An example of such a YAML file
571 is the following:
572
573 funcs:
574 psfMag:
575 functor: Mag
576 args:
577 - base_PsfFlux
578 filt: HSC-G
579 dataset: meas
580 cmodel_magDiff:
581 functor: MagDiff
582 args:
583 - modelfit_CModel
584 - base_PsfFlux
585 filt: HSC-G
586 gauss_magDiff:
587 functor: MagDiff
588 args:
589 - base_GaussianFlux
590 - base_PsfFlux
591 filt: HSC-G
592 count:
593 functor: Column
594 args:
595 - base_InputCount_value
596 filt: HSC-G
597 deconvolved_moments:
598 functor: DeconvolvedMoments
599 filt: HSC-G
600 dataset: forced_src
601 refFlags:
602 - calib_psfUsed
603 - merge_measurement_i
604 - merge_measurement_r
605 - merge_measurement_z
606 - merge_measurement_y
607 - merge_measurement_g
608 - base_PixelFlags_flag_inexact_psfCenter
609 - detect_isPrimary
610
611 The names for each entry under "func" will become the names of columns in the
612 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
613 Positional arguments to be passed to each functor are in the `args` list,
614 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
615 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
616
617 The "flags" entry is the default shortcut for `Column` functors.
618 All columns listed under "flags" will be copied to the output table
619 untransformed. They can be of any datatype.
620 In the special case of transforming a multi-level oject table with
621 band and dataset indices (deepCoadd_obj), these will be taked from the
622 `meas` dataset and exploded out per band.
623
624 There are two special shortcuts that only apply when transforming
625 multi-level Object (deepCoadd_obj) tables:
626 - The "refFlags" entry is shortcut for `Column` functor
627 taken from the `'ref'` dataset if transforming an ObjectTable.
628 - The "forcedFlags" entry is shortcut for `Column` functors.
629 taken from the ``forced_src`` dataset if transforming an ObjectTable.
630 These are expanded out per band.
631
632
633 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
634 to organize and excecute the calculations.
635
636 """
637 @property
638 def _DefaultName(self):
639 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
640
641 @property
642 def outputDataset(self):
643 raise NotImplementedError('Subclass must define "outputDataset" attribute')
644
645 @property
646 def inputDataset(self):
647 raise NotImplementedError('Subclass must define "inputDataset" attribute')
648
649 @property
650 def ConfigClass(self):
651 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
652
653 def __init__(self, *args, **kwargs):
654 super().__init__(*args, **kwargs)
655 if self.config.functorFile:
656 self.log.info('Loading tranform functor definitions from %s',
657 self.config.functorFile)
658 self.funcsfuncs = CompositeFunctor.from_file(self.config.functorFile)
659 self.funcsfuncs.update(dict(PostprocessAnalysis._defaultFuncs))
660 else:
661 self.funcsfuncs = None
662
663 def runQuantum(self, butlerQC, inputRefs, outputRefs):
664 inputs = butlerQC.get(inputRefs)
665 if self.funcsfuncs is None:
666 raise ValueError("config.functorFile is None. "
667 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
668 result = self.runrun(parq=inputs['inputCatalog'], funcs=self.funcsfuncs,
669 dataId=outputRefs.outputCatalog.dataId.full)
670 outputs = pipeBase.Struct(outputCatalog=result)
671 butlerQC.put(outputs, outputRefs)
672
673 def runDataRef(self, dataRef):
674 parq = dataRef.get()
675 if self.funcsfuncs is None:
676 raise ValueError("config.functorFile is None. "
677 "Must be a valid path to yaml in order to run as a CommandlineTask.")
678 df = self.runrun(parq, funcs=self.funcsfuncs, dataId=dataRef.dataId)
679 self.writewrite(df, dataRef)
680 return df
681
682 def run(self, parq, funcs=None, dataId=None, band=None):
683 """Do postprocessing calculations
684
685 Takes a `ParquetTable` object and dataId,
686 returns a dataframe with results of postprocessing calculations.
687
688 Parameters
689 ----------
691 ParquetTable from which calculations are done.
692 funcs : `lsst.pipe.tasks.functors.Functors`
693 Functors to apply to the table's columns
694 dataId : dict, optional
695 Used to add a `patchId` column to the output dataframe.
696 band : `str`, optional
697 Filter band that is being processed.
698
699 Returns
700 ------
701 `pandas.DataFrame`
702
703 """
704 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
705
706 df = self.transformtransform(band, parq, funcs, dataId).df
707 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
708 return df
709
710 def getFunctors(self):
711 return self.funcsfuncs
712
713 def getAnalysis(self, parq, funcs=None, band=None):
714 if funcs is None:
715 funcs = self.funcsfuncs
716 analysis = PostprocessAnalysis(parq, funcs, filt=band)
717 return analysis
718
719 def transform(self, band, parq, funcs, dataId):
720 analysis = self.getAnalysisgetAnalysis(parq, funcs=funcs, band=band)
721 df = analysis.df
722 if dataId is not None:
723 for key, value in dataId.items():
724 df[str(key)] = value
725
726 if self.config.primaryKey:
727 if df.index.name != self.config.primaryKey and self.config.primaryKey in df:
728 df.reset_index(inplace=True, drop=True)
729 df.set_index(self.config.primaryKey, inplace=True)
730
731 return pipeBase.Struct(
732 df=df,
733 analysis=analysis
734 )
735
736 def write(self, df, parqRef):
737 parqRef.put(ParquetTable(dataFrame=df), self.outputDatasetoutputDataset)
738
739 def writeMetadata(self, dataRef):
740 """No metadata to write.
741 """
742 pass
743
744
745class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
746 defaultTemplates={"coaddName": "deep"},
747 dimensions=("tract", "patch", "skymap")):
748 inputCatalog = connectionTypes.Input(
749 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
750 "stored as a DataFrame with a multi-level column index per-patch.",
751 dimensions=("tract", "patch", "skymap"),
752 storageClass="DataFrame",
753 name="{coaddName}Coadd_obj",
754 deferLoad=True,
755 )
756 outputCatalog = connectionTypes.Output(
757 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
758 "data model.",
759 dimensions=("tract", "patch", "skymap"),
760 storageClass="DataFrame",
761 name="objectTable"
762 )
763
764
765class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
766 pipelineConnections=TransformObjectCatalogConnections):
767 coaddName = pexConfig.Field(
768 dtype=str,
769 default="deep",
770 doc="Name of coadd"
771 )
772 # TODO: remove in DM-27177
773 filterMap = pexConfig.DictField(
774 keytype=str,
775 itemtype=str,
776 default={},
777 doc=("Dictionary mapping full filter name to short one for column name munging."
778 "These filters determine the output columns no matter what filters the "
779 "input data actually contain."),
780 deprecated=("Coadds are now identified by the band, so this transform is unused."
781 "Will be removed after v22.")
782 )
783 outputBands = pexConfig.ListField(
784 dtype=str,
785 default=None,
786 optional=True,
787 doc=("These bands and only these bands will appear in the output,"
788 " NaN-filled if the input does not include them."
789 " If None, then use all bands found in the input.")
790 )
791 camelCase = pexConfig.Field(
792 dtype=bool,
793 default=False,
794 doc=("Write per-band columns names with camelCase, else underscore "
795 "For example: gPsFlux instead of g_PsFlux.")
796 )
797 multilevelOutput = pexConfig.Field(
798 dtype=bool,
799 default=False,
800 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
801 "and name-munged (False).")
802 )
803 goodFlags = pexConfig.ListField(
804 dtype=str,
805 default=[],
806 doc=("List of 'good' flags that should be set False when populating empty tables. "
807 "All other flags are considered to be 'bad' flags and will be set to True.")
808 )
809 floatFillValue = pexConfig.Field(
810 dtype=float,
811 default=np.nan,
812 doc="Fill value for float fields when populating empty tables."
813 )
814 integerFillValue = pexConfig.Field(
815 dtype=int,
816 default=-1,
817 doc="Fill value for integer fields when populating empty tables."
818 )
819
820 def setDefaults(self):
821 super().setDefaults()
822 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml')
823 self.primaryKey = 'objectId'
824 self.goodFlags = ['calib_astrometry_used',
825 'calib_photometry_reserved',
826 'calib_photometry_used',
827 'calib_psf_candidate',
828 'calib_psf_reserved',
829 'calib_psf_used']
830
831
832class TransformObjectCatalogTask(TransformCatalogBaseTask):
833 """Produce a flattened Object Table to match the format specified in
834 sdm_schemas.
835
836 Do the same set of postprocessing calculations on all bands
837
838 This is identical to `TransformCatalogBaseTask`, except for that it does the
839 specified functor calculations for all filters present in the
840 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
841 by the YAML file will be superceded.
842 """
843 _DefaultName = "transformObjectCatalog"
844 ConfigClass = TransformObjectCatalogConfig
845
846 # Used by Gen 2 runDataRef only:
847 inputDataset = 'deepCoadd_obj'
848 outputDataset = 'objectTable'
849
850 @classmethod
851 def _makeArgumentParser(cls):
852 parser = ArgumentParser(name=cls._DefaultName)
853 parser.add_id_argument("--id", cls.inputDataset,
854 ContainerClass=CoaddDataIdContainer,
855 help="data ID, e.g. --id tract=12345 patch=1,2")
856 return parser
857
858 def run(self, parq, funcs=None, dataId=None, band=None):
859 # NOTE: band kwarg is ignored here.
860 dfDict = {}
861 analysisDict = {}
862 templateDf = pd.DataFrame()
863
864 if isinstance(parq, DeferredDatasetHandle):
865 columns = parq.get(component='columns')
866 inputBands = columns.unique(level=1).values
867 else:
868 inputBands = parq.columnLevelNames['band']
869
870 outputBands = self.config.outputBands if self.config.outputBands else inputBands
871
872 # Perform transform for data of filters that exist in parq.
873 for inputBand in inputBands:
874 if inputBand not in outputBands:
875 self.log.info("Ignoring %s band data in the input", inputBand)
876 continue
877 self.log.info("Transforming the catalog of band %s", inputBand)
878 result = self.transform(inputBand, parq, funcs, dataId)
879 dfDict[inputBand] = result.df
880 analysisDict[inputBand] = result.analysis
881 if templateDf.empty:
882 templateDf = result.df
883
884 # Put filler values in columns of other wanted bands
885 for filt in outputBands:
886 if filt not in dfDict:
887 self.log.info("Adding empty columns for band %s", filt)
888 dfTemp = templateDf.copy()
889 for col in dfTemp.columns:
890 testValue = dfTemp[col].values[0]
891 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
892 # Boolean flag type, check if it is a "good" flag
893 if col in self.config.goodFlags:
894 fillValue = False
895 else:
896 fillValue = True
897 elif isinstance(testValue, numbers.Integral):
898 # Checking numbers.Integral catches all flavors
899 # of python, numpy, pandas, etc. integers.
900 # We must ensure this is not an unsigned integer.
901 if isinstance(testValue, np.unsignedinteger):
902 raise ValueError("Parquet tables may not have unsigned integer columns.")
903 else:
904 fillValue = self.config.integerFillValue
905 else:
906 fillValue = self.config.floatFillValue
907 dfTemp[col].values[:] = fillValue
908 dfDict[filt] = dfTemp
909
910 # This makes a multilevel column index, with band as first level
911 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
912
913 if not self.config.multilevelOutput:
914 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
915 if self.config.primaryKey in noDupCols:
916 noDupCols.remove(self.config.primaryKey)
917 if dataId is not None:
918 noDupCols += list(dataId.keys())
919 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
920 inputBands=inputBands)
921
922 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
923
924 return df
925
926
927class TractObjectDataIdContainer(CoaddDataIdContainer):
928
929 def makeDataRefList(self, namespace):
930 """Make self.refList from self.idList
931
932 Generate a list of data references given tract and/or patch.
933 This was adapted from `TractQADataIdContainer`, which was
934 `TractDataIdContainer` modifie to not require "filter".
935 Only existing dataRefs are returned.
936 """
937 def getPatchRefList(tract):
938 return [namespace.butler.dataRef(datasetType=self.datasetType,
939 tract=tract.getId(),
940 patch="%d,%d" % patch.getIndex()) for patch in tract]
941
942 tractRefs = defaultdict(list) # Data references for each tract
943 for dataId in self.idList:
944 skymap = self.getSkymap(namespace)
945
946 if "tract" in dataId:
947 tractId = dataId["tract"]
948 if "patch" in dataId:
949 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
950 tract=tractId,
951 patch=dataId['patch']))
952 else:
953 tractRefs[tractId] += getPatchRefList(skymap[tractId])
954 else:
955 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
956 for tract in skymap)
957 outputRefList = []
958 for tractRefList in tractRefs.values():
959 existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
960 outputRefList.append(existingRefs)
961
962 self.refList = outputRefList
963
964
965class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
966 dimensions=("tract", "skymap")):
967 inputCatalogs = connectionTypes.Input(
968 doc="Per-Patch objectTables conforming to the standard data model.",
969 name="objectTable",
970 storageClass="DataFrame",
971 dimensions=("tract", "patch", "skymap"),
972 multiple=True,
973 )
974 outputCatalog = connectionTypes.Output(
975 doc="Pre-tract horizontal concatenation of the input objectTables",
976 name="objectTable_tract",
977 storageClass="DataFrame",
978 dimensions=("tract", "skymap"),
979 )
980
981
982class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
983 pipelineConnections=ConsolidateObjectTableConnections):
984 coaddName = pexConfig.Field(
985 dtype=str,
986 default="deep",
987 doc="Name of coadd"
988 )
989
990
991class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
992 """Write patch-merged source tables to a tract-level parquet file
993
994 Concatenates `objectTable` list into a per-visit `objectTable_tract`
995 """
996 _DefaultName = "consolidateObjectTable"
997 ConfigClass = ConsolidateObjectTableConfig
998
999 inputDataset = 'objectTable'
1000 outputDataset = 'objectTable_tract'
1001
1002 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1003 inputs = butlerQC.get(inputRefs)
1004 self.log.info("Concatenating %s per-patch Object Tables",
1005 len(inputs['inputCatalogs']))
1006 df = pd.concat(inputs['inputCatalogs'])
1007 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1008
1009 @classmethod
1010 def _makeArgumentParser(cls):
1011 parser = ArgumentParser(name=cls._DefaultName)
1012
1013 parser.add_id_argument("--id", cls.inputDataset,
1014 help="data ID, e.g. --id tract=12345",
1015 ContainerClass=TractObjectDataIdContainer)
1016 return parser
1017
1018 def runDataRef(self, patchRefList):
1019 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
1020 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1021
1022 def writeMetadata(self, dataRef):
1023 """No metadata to write.
1024 """
1025 pass
1026
1027
1028class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1029 defaultTemplates={"catalogType": ""},
1030 dimensions=("instrument", "visit", "detector")):
1031
1032 inputCatalog = connectionTypes.Input(
1033 doc="Wide input catalog of sources produced by WriteSourceTableTask",
1034 name="{catalogType}source",
1035 storageClass="DataFrame",
1036 dimensions=("instrument", "visit", "detector"),
1037 deferLoad=True
1038 )
1039 outputCatalog = connectionTypes.Output(
1040 doc="Narrower, per-detector Source Table transformed and converted per a "
1041 "specified set of functors",
1042 name="{catalogType}sourceTable",
1043 storageClass="DataFrame",
1044 dimensions=("instrument", "visit", "detector")
1045 )
1046
1047
1048class TransformSourceTableConfig(TransformCatalogBaseConfig,
1049 pipelineConnections=TransformSourceTableConnections):
1050
1051 def setDefaults(self):
1052 super().setDefaults()
1053 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml')
1054 self.primaryKey = 'sourceId'
1055
1056
1057class TransformSourceTableTask(TransformCatalogBaseTask):
1058 """Transform/standardize a source catalog
1059 """
1060 _DefaultName = "transformSourceTable"
1061 ConfigClass = TransformSourceTableConfig
1062
1063 inputDataset = 'source'
1064 outputDataset = 'sourceTable'
1065
1066 @classmethod
1067 def _makeArgumentParser(cls):
1068 parser = ArgumentParser(name=cls._DefaultName)
1069 parser.add_id_argument("--id", datasetType=cls.inputDataset,
1070 level="sensor",
1071 help="data ID, e.g. --id visit=12345 ccd=0")
1072 return parser
1073
1074 def runDataRef(self, dataRef):
1075 """Override to specify band label to run()."""
1076 parq = dataRef.get()
1077 funcs = self.getFunctors()
1078 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel
1079 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
1080 self.write(df, dataRef)
1081 return df
1082
1083
1084class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1085 dimensions=("instrument", "visit",),
1086 defaultTemplates={"calexpType": ""}):
1087 calexp = connectionTypes.Input(
1088 doc="Processed exposures used for metadata",
1089 name="{calexpType}calexp",
1090 storageClass="ExposureF",
1091 dimensions=("instrument", "visit", "detector"),
1092 deferLoad=True,
1093 multiple=True,
1094 )
1095 visitSummary = connectionTypes.Output(
1096 doc=("Per-visit consolidated exposure metadata. These catalogs use "
1097 "detector id for the id and are sorted for fast lookups of a "
1098 "detector."),
1099 name="{calexpType}visitSummary",
1100 storageClass="ExposureCatalog",
1101 dimensions=("instrument", "visit"),
1102 )
1103
1104
1105class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1106 pipelineConnections=ConsolidateVisitSummaryConnections):
1107 """Config for ConsolidateVisitSummaryTask"""
1108 pass
1109
1110
1111class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1112 """Task to consolidate per-detector visit metadata.
1113
1114 This task aggregates the following metadata from all the detectors in a
1115 single visit into an exposure catalog:
1116 - The visitInfo.
1117 - The wcs.
1118 - The photoCalib.
1119 - The physical_filter and band (if available).
1120 - The psf size, shape, and effective area at the center of the detector.
1121 - The corners of the bounding box in right ascension/declination.
1122
1123 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1124 are not persisted here because of storage concerns, and because of their
1125 limited utility as summary statistics.
1126
1127 Tests for this task are performed in ci_hsc_gen3.
1128 """
1129 _DefaultName = "consolidateVisitSummary"
1130 ConfigClass = ConsolidateVisitSummaryConfig
1131
1132 @classmethod
1133 def _makeArgumentParser(cls):
1134 parser = ArgumentParser(name=cls._DefaultName)
1135
1136 parser.add_id_argument("--id", "calexp",
1137 help="data ID, e.g. --id visit=12345",
1138 ContainerClass=VisitDataIdContainer)
1139 return parser
1140
1141 def writeMetadata(self, dataRef):
1142 """No metadata to persist, so override to remove metadata persistance.
1143 """
1144 pass
1145
1146 def writeConfig(self, butler, clobber=False, doBackup=True):
1147 """No config to persist, so override to remove config persistance.
1148 """
1149 pass
1150
1151 def runDataRef(self, dataRefList):
1152 visit = dataRefList[0].dataId['visit']
1153
1154 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1155 len(dataRefList), visit)
1156
1157 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
1158
1159 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
1160
1161 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1162 dataRefs = butlerQC.get(inputRefs.calexp)
1163 visit = dataRefs[0].dataId.byName()['visit']
1164
1165 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1166 len(dataRefs), visit)
1167
1168 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1169
1170 butlerQC.put(expCatalog, outputRefs.visitSummary)
1171
1172 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1173 """Make a combined exposure catalog from a list of dataRefs.
1174 These dataRefs must point to exposures with wcs, summaryStats,
1175 and other visit metadata.
1176
1177 Parameters
1178 ----------
1179 visit : `int`
1180 Visit identification number.
1181 dataRefs : `list`
1182 List of dataRefs in visit. May be list of
1183 `lsst.daf.persistence.ButlerDataRef` (Gen2) or
1184 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1185 isGen3 : `bool`, optional
1186 Specifies if this is a Gen3 list of datarefs.
1187
1188 Returns
1189 -------
1190 visitSummary : `lsst.afw.table.ExposureCatalog`
1191 Exposure catalog with per-detector summary information.
1192 """
1193 schema = self._makeVisitSummarySchema()
1194 cat = afwTable.ExposureCatalog(schema)
1195 cat.resize(len(dataRefs))
1196
1197 cat['visit'] = visit
1198
1199 for i, dataRef in enumerate(dataRefs):
1200 if isGen3:
1201 visitInfo = dataRef.get(component='visitInfo')
1202 filterLabel = dataRef.get(component='filterLabel')
1203 summaryStats = dataRef.get(component='summaryStats')
1204 detector = dataRef.get(component='detector')
1205 wcs = dataRef.get(component='wcs')
1206 photoCalib = dataRef.get(component='photoCalib')
1207 detector = dataRef.get(component='detector')
1208 bbox = dataRef.get(component='bbox')
1209 validPolygon = dataRef.get(component='validPolygon')
1210 else:
1211 # Note that we need to read the calexp because there is
1212 # no magic access to the psf except through the exposure.
1213 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
1214 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
1215 visitInfo = exp.getInfo().getVisitInfo()
1216 filterLabel = dataRef.get("calexp_filterLabel")
1217 summaryStats = exp.getInfo().getSummaryStats()
1218 wcs = exp.getWcs()
1219 photoCalib = exp.getPhotoCalib()
1220 detector = exp.getDetector()
1221 bbox = dataRef.get(datasetType='calexp_bbox')
1222 validPolygon = exp.getInfo().getValidPolygon()
1223
1224 rec = cat[i]
1225 rec.setBBox(bbox)
1226 rec.setVisitInfo(visitInfo)
1227 rec.setWcs(wcs)
1228 rec.setPhotoCalib(photoCalib)
1229 rec.setValidPolygon(validPolygon)
1230
1231 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1232 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1233 rec.setId(detector.getId())
1234 rec['psfSigma'] = summaryStats.psfSigma
1235 rec['psfIxx'] = summaryStats.psfIxx
1236 rec['psfIyy'] = summaryStats.psfIyy
1237 rec['psfIxy'] = summaryStats.psfIxy
1238 rec['psfArea'] = summaryStats.psfArea
1239 rec['raCorners'][:] = summaryStats.raCorners
1240 rec['decCorners'][:] = summaryStats.decCorners
1241 rec['ra'] = summaryStats.ra
1242 rec['decl'] = summaryStats.decl
1243 rec['zenithDistance'] = summaryStats.zenithDistance
1244 rec['zeroPoint'] = summaryStats.zeroPoint
1245 rec['skyBg'] = summaryStats.skyBg
1246 rec['skyNoise'] = summaryStats.skyNoise
1247 rec['meanVar'] = summaryStats.meanVar
1248 rec['astromOffsetMean'] = summaryStats.astromOffsetMean
1249 rec['astromOffsetStd'] = summaryStats.astromOffsetStd
1250 rec['nPsfStar'] = summaryStats.nPsfStar
1251 rec['psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median
1252 rec['psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median
1253 rec['psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter
1254 rec['psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter
1255 rec['psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian
1256 rec['psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter
1257 rec['psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter
1258
1259 metadata = dafBase.PropertyList()
1260 metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1261 # We are looping over existing datarefs, so the following is true
1262 metadata.add("COMMENT", "Only detectors with data have entries.")
1263 cat.setMetadata(metadata)
1264
1265 cat.sort()
1266 return cat
1267
1268 def _makeVisitSummarySchema(self):
1269 """Make the schema for the visitSummary catalog."""
1270 schema = afwTable.ExposureTable.makeMinimalSchema()
1271 schema.addField('visit', type='I', doc='Visit number')
1272 schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1273 schema.addField('band', type='String', size=32, doc='Name of band')
1274 schema.addField('psfSigma', type='F',
1275 doc='PSF model second-moments determinant radius (center of chip) (pixel)')
1276 schema.addField('psfArea', type='F',
1277 doc='PSF model effective area (center of chip) (pixel**2)')
1278 schema.addField('psfIxx', type='F',
1279 doc='PSF model Ixx (center of chip) (pixel**2)')
1280 schema.addField('psfIyy', type='F',
1281 doc='PSF model Iyy (center of chip) (pixel**2)')
1282 schema.addField('psfIxy', type='F',
1283 doc='PSF model Ixy (center of chip) (pixel**2)')
1284 schema.addField('raCorners', type='ArrayD', size=4,
1285 doc='Right Ascension of bounding box corners (degrees)')
1286 schema.addField('decCorners', type='ArrayD', size=4,
1287 doc='Declination of bounding box corners (degrees)')
1288 schema.addField('ra', type='D',
1289 doc='Right Ascension of bounding box center (degrees)')
1290 schema.addField('decl', type='D',
1291 doc='Declination of bounding box center (degrees)')
1292 schema.addField('zenithDistance', type='F',
1293 doc='Zenith distance of bounding box center (degrees)')
1294 schema.addField('zeroPoint', type='F',
1295 doc='Mean zeropoint in detector (mag)')
1296 schema.addField('skyBg', type='F',
1297 doc='Average sky background (ADU)')
1298 schema.addField('skyNoise', type='F',
1299 doc='Average sky noise (ADU)')
1300 schema.addField('meanVar', type='F',
1301 doc='Mean variance of the weight plane (ADU**2)')
1302 schema.addField('astromOffsetMean', type='F',
1303 doc='Mean offset of astrometric calibration matches (arcsec)')
1304 schema.addField('astromOffsetStd', type='F',
1305 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)')
1306 schema.addField('nPsfStar', type='I', doc='Number of stars used for PSF model')
1307 schema.addField('psfStarDeltaE1Median', type='F',
1308 doc='Median E1 residual (starE1 - psfE1) for psf stars')
1309 schema.addField('psfStarDeltaE2Median', type='F',
1310 doc='Median E2 residual (starE2 - psfE2) for psf stars')
1311 schema.addField('psfStarDeltaE1Scatter', type='F',
1312 doc='Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars')
1313 schema.addField('psfStarDeltaE2Scatter', type='F',
1314 doc='Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars')
1315 schema.addField('psfStarDeltaSizeMedian', type='F',
1316 doc='Median size residual (starSize - psfSize) for psf stars (pixel)')
1317 schema.addField('psfStarDeltaSizeScatter', type='F',
1318 doc='Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)')
1319 schema.addField('psfStarScaledDeltaSizeScatter', type='F',
1320 doc='Scatter (via MAD) of size residual scaled by median size squared')
1321
1322 return schema
1323
1324
1325class VisitDataIdContainer(DataIdContainer):
1326 """DataIdContainer that groups sensor-level id's by visit
1327 """
1328
1329 def makeDataRefList(self, namespace):
1330 """Make self.refList from self.idList
1331
1332 Generate a list of data references grouped by visit.
1333
1334 Parameters
1335 ----------
1336 namespace : `argparse.Namespace`
1337 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1338 """
1339 # Group by visits
1340 visitRefs = defaultdict(list)
1341 for dataId in self.idList:
1342 if "visit" in dataId:
1343 visitId = dataId["visit"]
1344 # append all subsets to
1345 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1346 visitRefs[visitId].extend([dataRef for dataRef in subset])
1347
1348 outputRefList = []
1349 for refList in visitRefs.values():
1350 existingRefs = [ref for ref in refList if ref.datasetExists()]
1351 if existingRefs:
1352 outputRefList.append(existingRefs)
1353
1354 self.refList = outputRefList
1355
1356
1357class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1358 defaultTemplates={"catalogType": ""},
1359 dimensions=("instrument", "visit")):
1360 inputCatalogs = connectionTypes.Input(
1361 doc="Input per-detector Source Tables",
1362 name="{catalogType}sourceTable",
1363 storageClass="DataFrame",
1364 dimensions=("instrument", "visit", "detector"),
1365 multiple=True
1366 )
1367 outputCatalog = connectionTypes.Output(
1368 doc="Per-visit concatenation of Source Table",
1369 name="{catalogType}sourceTable_visit",
1370 storageClass="DataFrame",
1371 dimensions=("instrument", "visit")
1372 )
1373
1374
1375class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1376 pipelineConnections=ConsolidateSourceTableConnections):
1377 pass
1378
1379
1380class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1381 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1382 """
1383 _DefaultName = 'consolidateSourceTable'
1384 ConfigClass = ConsolidateSourceTableConfig
1385
1386 inputDataset = 'sourceTable'
1387 outputDataset = 'sourceTable_visit'
1388
1389 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1390 inputs = butlerQC.get(inputRefs)
1391 self.log.info("Concatenating %s per-detector Source Tables",
1392 len(inputs['inputCatalogs']))
1393 df = pd.concat(inputs['inputCatalogs'])
1394 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1395
1396 def runDataRef(self, dataRefList):
1397 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1398 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1399 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1400
1401 @classmethod
1402 def _makeArgumentParser(cls):
1403 parser = ArgumentParser(name=cls._DefaultName)
1404
1405 parser.add_id_argument("--id", cls.inputDataset,
1406 help="data ID, e.g. --id visit=12345",
1407 ContainerClass=VisitDataIdContainer)
1408 return parser
1409
1410 def writeMetadata(self, dataRef):
1411 """No metadata to write.
1412 """
1413 pass
1414
1415 def writeConfig(self, butler, clobber=False, doBackup=True):
1416 """No config to write.
1417 """
1418 pass
1419
1420
1421class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1422 dimensions=("instrument",),
1423 defaultTemplates={"calexpType": ""}):
1424 visitSummaryRefs = connectionTypes.Input(
1425 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1426 name="{calexpType}visitSummary",
1427 storageClass="ExposureCatalog",
1428 dimensions=("instrument", "visit"),
1429 multiple=True,
1430 deferLoad=True,
1431 )
1432 outputCatalog = connectionTypes.Output(
1433 doc="CCD and Visit metadata table",
1434 name="ccdVisitTable",
1435 storageClass="DataFrame",
1436 dimensions=("instrument",)
1437 )
1438
1439
1440class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1441 pipelineConnections=MakeCcdVisitTableConnections):
1442 pass
1443
1444
1445class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1446 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1447 """
1448 _DefaultName = 'makeCcdVisitTable'
1449 ConfigClass = MakeCcdVisitTableConfig
1450
1451 def run(self, visitSummaryRefs):
1452 """ Make a table of ccd information from the `visitSummary` catalogs.
1453 Parameters
1454 ----------
1455 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1456 List of DeferredDatasetHandles pointing to exposure catalogs with
1457 per-detector summary information.
1458 Returns
1459 -------
1460 result : `lsst.pipe.Base.Struct`
1461 Results struct with attribute:
1462 - `outputCatalog`
1463 Catalog of ccd and visit information.
1464 """
1465 ccdEntries = []
1466 for visitSummaryRef in visitSummaryRefs:
1467 visitSummary = visitSummaryRef.get()
1468 visitInfo = visitSummary[0].getVisitInfo()
1469
1470 ccdEntry = {}
1471 summaryTable = visitSummary.asAstropy()
1472 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance',
1473 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise']
1474 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1475 # 'visit' is the human readible visit number
1476 # 'visitId' is the key to the visitId table. They are the same
1477 # Technically you should join to get the visit from the visit table
1478 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"})
1479 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in
1480 summaryTable['id']]
1481 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId)
1482 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds]
1483 ccdEntry['ccdVisitId'] = ccdVisitIds
1484 ccdEntry['detector'] = summaryTable['id']
1485 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary])
1486 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1487
1488 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1489 ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1490 expTime = visitInfo.getExposureTime()
1491 ccdEntry['expTime'] = expTime
1492 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1493 ccdEntry['darkTime'] = visitInfo.getDarkTime()
1494 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1495 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1496 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1497 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1498 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1499 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1500 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1501 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1502 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1503 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1504 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags,
1505 # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted.
1506 ccdEntries.append(ccdEntry)
1507
1508 outputCatalog = pd.concat(ccdEntries)
1509 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True)
1510 return pipeBase.Struct(outputCatalog=outputCatalog)
1511
1512
1513class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1514 dimensions=("instrument",),
1515 defaultTemplates={"calexpType": ""}):
1516 visitSummaries = connectionTypes.Input(
1517 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1518 name="{calexpType}visitSummary",
1519 storageClass="ExposureCatalog",
1520 dimensions=("instrument", "visit",),
1521 multiple=True,
1522 deferLoad=True,
1523 )
1524 outputCatalog = connectionTypes.Output(
1525 doc="Visit metadata table",
1526 name="visitTable",
1527 storageClass="DataFrame",
1528 dimensions=("instrument",)
1529 )
1530
1531
1532class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1533 pipelineConnections=MakeVisitTableConnections):
1534 pass
1535
1536
1537class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1538 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1539 """
1540 _DefaultName = 'makeVisitTable'
1541 ConfigClass = MakeVisitTableConfig
1542
1543 def run(self, visitSummaries):
1544 """ Make a table of visit information from the `visitSummary` catalogs
1545
1546 Parameters
1547 ----------
1548 visitSummaries : list of `lsst.afw.table.ExposureCatalog`
1549 List of exposure catalogs with per-detector summary information.
1550 Returns
1551 -------
1552 result : `lsst.pipe.Base.Struct`
1553 Results struct with attribute:
1554 ``outputCatalog``
1555 Catalog of visit information.
1556 """
1557 visitEntries = []
1558 for visitSummary in visitSummaries:
1559 visitSummary = visitSummary.get()
1560 visitRow = visitSummary[0]
1561 visitInfo = visitRow.getVisitInfo()
1562
1563 visitEntry = {}
1564 visitEntry["visitId"] = visitRow['visit']
1565 visitEntry["visit"] = visitRow['visit']
1566 visitEntry["physical_filter"] = visitRow['physical_filter']
1567 visitEntry["band"] = visitRow['band']
1568 raDec = visitInfo.getBoresightRaDec()
1569 visitEntry["ra"] = raDec.getRa().asDegrees()
1570 visitEntry["decl"] = raDec.getDec().asDegrees()
1571 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1572 azAlt = visitInfo.getBoresightAzAlt()
1573 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1574 visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1575 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1576 visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1577 visitEntry["obsStart"] = visitInfo.getDate().toPython()
1578 visitEntry["expTime"] = visitInfo.getExposureTime()
1579 visitEntries.append(visitEntry)
1580 # TODO: DM-30623, Add programId, exposureType, expMidpt, cameraTemp, mirror1Temp, mirror2Temp,
1581 # mirror3Temp, domeTemp, externalTemp, dimmSeeing, pwvGPS, pwvMW, flags, nExposures
1582
1583 outputCatalog = pd.DataFrame(data=visitEntries)
1584 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True)
1585 return pipeBase.Struct(outputCatalog=outputCatalog)
1586
1587
1588class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1589 dimensions=("instrument", "visit", "detector", "skymap", "tract")):
1590
1591 inputCatalog = connectionTypes.Input(
1592 doc="Primary per-detector, single-epoch forced-photometry catalog. "
1593 "By default, it is the output of ForcedPhotCcdTask on calexps",
1594 name="forced_src",
1595 storageClass="SourceCatalog",
1596 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1597 )
1598 inputCatalogDiff = connectionTypes.Input(
1599 doc="Secondary multi-epoch, per-detector, forced photometry catalog. "
1600 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1601 name="forced_diff",
1602 storageClass="SourceCatalog",
1603 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1604 )
1605 outputCatalog = connectionTypes.Output(
1606 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format",
1607 name="mergedForcedSource",
1608 storageClass="DataFrame",
1609 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1610 )
1611
1612
1613class WriteForcedSourceTableConfig(WriteSourceTableConfig,
1614 pipelineConnections=WriteForcedSourceTableConnections):
1615 key = lsst.pex.config.Field(
1616 doc="Column on which to join the two input tables on and make the primary key of the output",
1617 dtype=str,
1618 default="objectId",
1619 )
1620
1621
1622class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1623 """Merge and convert per-detector forced source catalogs to parquet
1624
1625 Because the predecessor ForcedPhotCcdTask operates per-detector,
1626 per-tract, (i.e., it has tract in its dimensions), detectors
1627 on the tract boundary may have multiple forced source catalogs.
1628
1629 The successor task TransformForcedSourceTable runs per-patch
1630 and temporally-aggregates overlapping mergedForcedSource catalogs from all
1631 available multiple epochs.
1632 """
1633 _DefaultName = "writeForcedSourceTable"
1634 ConfigClass = WriteForcedSourceTableConfig
1635
1636 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1637 inputs = butlerQC.get(inputRefs)
1638 # Add ccdVisitId to allow joining with CcdVisitTable
1639 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
1640 inputs['band'] = butlerQC.quantum.dataId.full['band']
1641 outputs = self.run(**inputs)
1642 butlerQC.put(outputs, outputRefs)
1643
1644 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1645 dfs = []
1646 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')):
1647 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False)
1648 df = df.reindex(sorted(df.columns), axis=1)
1649 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA
1650 df['band'] = band if band else pd.NA
1651 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns],
1652 names=('dataset', 'column'))
1653
1654 dfs.append(df)
1655
1656 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
1657 return pipeBase.Struct(outputCatalog=outputCatalog)
1658
1659
1660class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1661 dimensions=("instrument", "skymap", "patch", "tract")):
1662
1663 inputCatalogs = connectionTypes.Input(
1664 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1665 name="mergedForcedSource",
1666 storageClass="DataFrame",
1667 dimensions=("instrument", "visit", "detector", "skymap", "tract"),
1668 multiple=True,
1669 deferLoad=True
1670 )
1671 referenceCatalog = connectionTypes.Input(
1672 doc="Reference catalog which was used to seed the forcedPhot. Columns "
1673 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1674 "are expected.",
1675 name="objectTable",
1676 storageClass="DataFrame",
1677 dimensions=("tract", "patch", "skymap"),
1678 deferLoad=True
1679 )
1680 outputCatalog = connectionTypes.Output(
1681 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1682 "specified set of functors",
1683 name="forcedSourceTable",
1684 storageClass="DataFrame",
1685 dimensions=("tract", "patch", "skymap")
1686 )
1687
1688
1689class TransformForcedSourceTableConfig(TransformCatalogBaseConfig,
1690 pipelineConnections=TransformForcedSourceTableConnections):
1691 referenceColumns = pexConfig.ListField(
1692 dtype=str,
1693 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"],
1694 optional=True,
1695 doc="Columns to pull from reference catalog",
1696 )
1697 keyRef = lsst.pex.config.Field(
1698 doc="Column on which to join the two input tables on and make the primary key of the output",
1699 dtype=str,
1700 default="objectId",
1701 )
1702 key = lsst.pex.config.Field(
1703 doc="Rename the output DataFrame index to this name",
1704 dtype=str,
1705 default="forcedSourceId",
1706 )
1707
1708 def setDefaults(self):
1709 super().setDefaults()
1710 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml')
1711
1712
1713class TransformForcedSourceTableTask(TransformCatalogBaseTask):
1714 """Transform/standardize a ForcedSource catalog
1715
1716 Transforms each wide, per-detector forcedSource parquet table per the
1717 specification file (per-camera defaults found in ForcedSource.yaml).
1718 All epochs that overlap the patch are aggregated into one per-patch
1719 narrow-parquet file.
1720
1721 No de-duplication of rows is performed. Duplicate resolutions flags are
1722 pulled in from the referenceCatalog: `detect_isPrimary`,
1723 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1724 for analysis or compare duplicates for QA.
1725
1726 The resulting table includes multiple bands. Epochs (MJDs) and other useful
1727 per-visit rows can be retreived by joining with the CcdVisitTable on
1728 ccdVisitId.
1729 """
1730 _DefaultName = "transformForcedSourceTable"
1731 ConfigClass = TransformForcedSourceTableConfig
1732
1733 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1734 inputs = butlerQC.get(inputRefs)
1735 if self.funcs is None:
1736 raise ValueError("config.functorFile is None. "
1737 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1738 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs,
1739 dataId=outputRefs.outputCatalog.dataId.full)
1740
1741 butlerQC.put(outputs, outputRefs)
1742
1743 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1744 dfs = []
1745 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns})
1746 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs)))
1747 for handle in inputCatalogs:
1748 result = self.transform(None, handle, funcs, dataId)
1749 # Filter for only rows that were detected on (overlap) the patch
1750 dfs.append(result.df.join(ref, how='inner'))
1751
1752 outputCatalog = pd.concat(dfs)
1753
1754 # Now that we are done joining on config.keyRef
1755 # Change index to config.key by
1756 outputCatalog.index.rename(self.config.keyRef, inplace=True)
1757 # Add config.keyRef to the column list
1758 outputCatalog.reset_index(inplace=True)
1759 # set the forcedSourceId to the index. This is specified in the ForcedSource.yaml
1760 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True)
1761 # Rename it to the config.key
1762 outputCatalog.index.rename(self.config.key, inplace=True)
1763
1764 self.log.info("Made a table of %d columns and %d rows",
1765 len(outputCatalog.columns), len(outputCatalog))
1766 return pipeBase.Struct(outputCatalog=outputCatalog)
1767
1768
1769class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1770 defaultTemplates={"catalogType": ""},
1771 dimensions=("instrument", "tract")):
1772 inputCatalogs = connectionTypes.Input(
1773 doc="Input per-patch DataFrame Tables to be concatenated",
1774 name="{catalogType}ForcedSourceTable",
1775 storageClass="DataFrame",
1776 dimensions=("tract", "patch", "skymap"),
1777 multiple=True,
1778 )
1779
1780 outputCatalog = connectionTypes.Output(
1781 doc="Output per-tract concatenation of DataFrame Tables",
1782 name="{catalogType}ForcedSourceTable_tract",
1783 storageClass="DataFrame",
1784 dimensions=("tract", "skymap"),
1785 )
1786
1787
1788class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1789 pipelineConnections=ConsolidateTractConnections):
1790 pass
1791
1792
1793class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask):
1794 """Concatenate any per-patch, dataframe list into a single
1795 per-tract DataFrame
1796 """
1797 _DefaultName = 'ConsolidateTract'
1798 ConfigClass = ConsolidateTractConfig
1799
1800 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1801 inputs = butlerQC.get(inputRefs)
1802 # Not checking at least one inputCatalog exists because that'd be an empty QG
1803 self.log.info("Concatenating %s per-patch %s Tables",
1804 len(inputs['inputCatalogs']),
1805 inputRefs.inputCatalogs[0].datasetType.name)
1806 df = pd.concat(inputs['inputCatalogs'])
1807 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def getAnalysis(self, parq, funcs=None, band=None)
Definition: postprocess.py:713
def transform(self, band, parq, funcs, dataId)
Definition: postprocess.py:719
def run(self, parq, funcs=None, dataId=None, band=None)
Definition: postprocess.py:682
def runQuantum(self, butlerQC, inputRefs, outputRefs)
Definition: postprocess.py:663
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def readCatalog(task, patchRef)
Read input catalog.
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)
Definition: postprocess.py:45