lsst.pipe.tasks  21.0.0-65-g9ea87ca1+ca40f17e88
postprocess.py
Go to the documentation of this file.
1 # This file is part of pipe_tasks
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 import functools
23 import pandas as pd
24 from collections import defaultdict
25 
26 import lsst.geom
27 import lsst.pex.config as pexConfig
28 import lsst.pipe.base as pipeBase
29 import lsst.daf.base as dafBase
30 from lsst.pipe.base import connectionTypes
31 import lsst.afw.table as afwTable
32 from lsst.meas.base import SingleFrameMeasurementTask
33 from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
34 from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
35 from lsst.daf.butler import DeferredDatasetHandle
36 
37 from .parquetTable import ParquetTable
38 from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
39 from .functors import CompositeFunctor, RAColumn, DecColumn, Column
40 
41 
42 def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
43  """Flattens a dataframe with multilevel column index
44  """
45  newDf = pd.DataFrame()
46  # band is the level 0 index
47  dfBands = df.columns.unique(level=0).values
48  for band in dfBands:
49  subdf = df[band]
50  columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
51  newColumns = {c: columnFormat.format(band, c)
52  for c in subdf.columns if c not in noDupCols}
53  cols = list(newColumns.keys())
54  newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
55 
56  # Band must be present in the input and output or else column is all NaN:
57  presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
58  # Get the unexploded columns from any present band's partition
59  noDupDf = df[presentBands[0]][noDupCols]
60  newDf = pd.concat([noDupDf, newDf], axis=1)
61  return newDf
62 
63 
64 class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
65  defaultTemplates={"coaddName": "deep"},
66  dimensions=("tract", "patch", "skymap")):
67  inputCatalogMeas = connectionTypes.Input(
68  doc="Catalog of source measurements on the deepCoadd.",
69  dimensions=("tract", "patch", "band", "skymap"),
70  storageClass="SourceCatalog",
71  name="{coaddName}Coadd_meas",
72  multiple=True
73  )
74  inputCatalogForcedSrc = connectionTypes.Input(
75  doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
76  dimensions=("tract", "patch", "band", "skymap"),
77  storageClass="SourceCatalog",
78  name="{coaddName}Coadd_forced_src",
79  multiple=True
80  )
81  inputCatalogRef = connectionTypes.Input(
82  doc="Catalog marking the primary detection (which band provides a good shape and position)"
83  "for each detection in deepCoadd_mergeDet.",
84  dimensions=("tract", "patch", "skymap"),
85  storageClass="SourceCatalog",
86  name="{coaddName}Coadd_ref"
87  )
88  outputCatalog = connectionTypes.Output(
89  doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
90  "stored as a DataFrame with a multi-level column index per-patch.",
91  dimensions=("tract", "patch", "skymap"),
92  storageClass="DataFrame",
93  name="{coaddName}Coadd_obj"
94  )
95 
96 
97 class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
98  pipelineConnections=WriteObjectTableConnections):
99  engine = pexConfig.Field(
100  dtype=str,
101  default="pyarrow",
102  doc="Parquet engine for writing (pyarrow or fastparquet)"
103  )
104  coaddName = pexConfig.Field(
105  dtype=str,
106  default="deep",
107  doc="Name of coadd"
108  )
109 
110 
111 class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
112  """Write filter-merged source tables to parquet
113  """
114  _DefaultName = "writeObjectTable"
115  ConfigClass = WriteObjectTableConfig
116  RunnerClass = MergeSourcesRunner
117 
118  # Names of table datasets to be merged
119  inputDatasets = ('forced_src', 'meas', 'ref')
120 
121  # Tag of output dataset written by `MergeSourcesTask.write`
122  outputDataset = 'obj'
123 
124  def __init__(self, butler=None, schema=None, **kwargs):
125  # It is a shame that this class can't use the default init for CmdLineTask
126  # But to do so would require its own special task runner, which is many
127  # more lines of specialization, so this is how it is for now
128  super().__init__(**kwargs)
129 
130  def runDataRef(self, patchRefList):
131  """!
132  @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
133  subclasses that inherit from MergeSourcesTask.
134  @param[in] patchRefList list of data references for each filter
135  """
136  catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
137  dataId = patchRefList[0].dataId
138  mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
139  self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog))
140 
141  def runQuantum(self, butlerQC, inputRefs, outputRefs):
142  inputs = butlerQC.get(inputRefs)
143 
144  measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
145  zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
146  forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
147  zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
148 
149  catalogs = {}
150  for band in measDict.keys():
151  catalogs[band] = {'meas': measDict[band]['meas'],
152  'forced_src': forcedSourceDict[band]['forced_src'],
153  'ref': inputs['inputCatalogRef']}
154  dataId = butlerQC.quantum.dataId
155  df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
156  outputs = pipeBase.Struct(outputCatalog=df)
157  butlerQC.put(outputs, outputRefs)
158 
159  @classmethod
160  def _makeArgumentParser(cls):
161  """Create a suitable ArgumentParser.
162 
163  We will use the ArgumentParser to get a list of data
164  references for patches; the RunnerClass will sort them into lists
165  of data references for the same patch.
166 
167  References first of self.inputDatasets, rather than
168  self.inputDataset
169  """
170  return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
171 
172  def readCatalog(self, patchRef):
173  """Read input catalogs
174 
175  Read all the input datasets given by the 'inputDatasets'
176  attribute.
177 
178  Parameters
179  ----------
180  patchRef : `lsst.daf.persistence.ButlerDataRef`
181  Data reference for patch
182 
183  Returns
184  -------
185  Tuple consisting of band name and a dict of catalogs, keyed by
186  dataset name
187  """
188  band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel
189  catalogDict = {}
190  for dataset in self.inputDatasets:
191  catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
192  self.log.info("Read %d sources from %s for band %s: %s" %
193  (len(catalog), dataset, band, patchRef.dataId))
194  catalogDict[dataset] = catalog
195  return band, catalogDict
196 
197  def run(self, catalogs, tract, patch):
198  """Merge multiple catalogs.
199 
200  Parameters
201  ----------
202  catalogs : `dict`
203  Mapping from filter names to dict of catalogs.
204  tract : int
205  tractId to use for the tractId column
206  patch : str
207  patchId to use for the patchId column
208 
209  Returns
210  -------
211  catalog : `pandas.DataFrame`
212  Merged dataframe
213  """
214 
215  dfs = []
216  for filt, tableDict in catalogs.items():
217  for dataset, table in tableDict.items():
218  # Convert afwTable to pandas DataFrame
219  df = table.asAstropy().to_pandas().set_index('id', drop=True)
220 
221  # Sort columns by name, to ensure matching schema among patches
222  df = df.reindex(sorted(df.columns), axis=1)
223  df['tractId'] = tract
224  df['patchId'] = patch
225 
226  # Make columns a 3-level MultiIndex
227  df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
228  names=('dataset', 'band', 'column'))
229  dfs.append(df)
230 
231  catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
232  return catalog
233 
234  def write(self, patchRef, catalog):
235  """Write the output.
236 
237  Parameters
238  ----------
239  catalog : `ParquetTable`
240  Catalog to write
241  patchRef : `lsst.daf.persistence.ButlerDataRef`
242  Data reference for patch
243  """
244  patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
245  # since the filter isn't actually part of the data ID for the dataset we're saving,
246  # it's confusing to see it in the log message, even if the butler simply ignores it.
247  mergeDataId = patchRef.dataId.copy()
248  del mergeDataId["filter"]
249  self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
250 
251  def writeMetadata(self, dataRefList):
252  """No metadata to write, and not sure how to write it for a list of dataRefs.
253  """
254  pass
255 
256 
257 class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
258  dimensions=("instrument", "visit", "detector")):
259 
260  catalog = connectionTypes.Input(
261  doc="Input full-depth catalog of sources produced by CalibrateTask",
262  name="src",
263  storageClass="SourceCatalog",
264  dimensions=("instrument", "visit", "detector")
265  )
266  outputCatalog = connectionTypes.Output(
267  doc="Catalog of sources, `src` in Parquet format",
268  name="source",
269  storageClass="DataFrame",
270  dimensions=("instrument", "visit", "detector")
271  )
272 
273 
274 class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
275  pipelineConnections=WriteSourceTableConnections):
276  doApplyExternalPhotoCalib = pexConfig.Field(
277  dtype=bool,
278  default=False,
279  doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
280  "generating Source Tables from older src tables which do not already have local calib columns")
281  )
282  doApplyExternalSkyWcs = pexConfig.Field(
283  dtype=bool,
284  default=False,
285  doc=("Add local WCS columns from the calexp.wcs? Should only set True if "
286  "generating Source Tables from older src tables which do not already have local calib columns")
287  )
288 
289 
290 class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
291  """Write source table to parquet
292  """
293  _DefaultName = "writeSourceTable"
294  ConfigClass = WriteSourceTableConfig
295 
296  def runDataRef(self, dataRef):
297  src = dataRef.get('src')
298  if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
299  src = self.addCalibColumns(src, dataRef)
300 
301  ccdVisitId = dataRef.get('ccdExposureId')
302  result = self.run(src, ccdVisitId=ccdVisitId)
303  dataRef.put(result.table, 'source')
304 
305  def runQuantum(self, butlerQC, inputRefs, outputRefs):
306  inputs = butlerQC.get(inputRefs)
307  inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
308  result = self.run(**inputs).table
309  outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
310  butlerQC.put(outputs, outputRefs)
311 
312  def run(self, catalog, ccdVisitId=None):
313  """Convert `src` catalog to parquet
314 
315  Parameters
316  ----------
317  catalog: `afwTable.SourceCatalog`
318  catalog to be converted
319  ccdVisitId: `int`
320  ccdVisitId to be added as a column
321 
322  Returns
323  -------
324  result : `lsst.pipe.base.Struct`
325  ``table``
326  `ParquetTable` version of the input catalog
327  """
328  self.log.info("Generating parquet table from src catalog %s", ccdVisitId)
329  df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
330  df['ccdVisitId'] = ccdVisitId
331  return pipeBase.Struct(table=ParquetTable(dataFrame=df))
332 
333  def addCalibColumns(self, catalog, dataRef):
334  """Add columns with local calibration evaluated at each centroid
335 
336  for backwards compatibility with old repos.
337  This exists for the purpose of converting old src catalogs
338  (which don't have the expected local calib columns) to Source Tables.
339 
340  Parameters
341  ----------
342  catalog: `afwTable.SourceCatalog`
343  catalog to which calib columns will be added
344  dataRef: `lsst.daf.persistence.ButlerDataRef
345  for fetching the calibs from disk.
346 
347  Returns
348  -------
349  newCat: `afwTable.SourceCatalog`
350  Source Catalog with requested local calib columns
351  """
352  mapper = afwTable.SchemaMapper(catalog.schema)
353  measureConfig = SingleFrameMeasurementTask.ConfigClass()
354  measureConfig.doReplaceWithNoise = False
355 
356  # Just need the WCS or the PhotoCalib attached to an exposue
357  exposure = dataRef.get('calexp_sub',
359 
360  mapper = afwTable.SchemaMapper(catalog.schema)
361  mapper.addMinimalSchema(catalog.schema, True)
362  schema = mapper.getOutputSchema()
363 
364  exposureIdInfo = dataRef.get("expIdInfo")
365  measureConfig.plugins.names = []
366  if self.config.doApplyExternalSkyWcs:
367  plugin = 'base_LocalWcs'
368  if plugin in schema:
369  raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
370  else:
371  measureConfig.plugins.names.add(plugin)
372 
373  if self.config.doApplyExternalPhotoCalib:
374  plugin = 'base_LocalPhotoCalib'
375  if plugin in schema:
376  raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
377  else:
378  measureConfig.plugins.names.add(plugin)
379 
380  measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
381  newCat = afwTable.SourceCatalog(schema)
382  newCat.extend(catalog, mapper=mapper)
383  measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
384  return newCat
385 
386  def writeMetadata(self, dataRef):
387  """No metadata to write.
388  """
389  pass
390 
391  @classmethod
392  def _makeArgumentParser(cls):
393  parser = ArgumentParser(name=cls._DefaultName)
394  parser.add_id_argument("--id", 'src',
395  help="data ID, e.g. --id visit=12345 ccd=0")
396  return parser
397 
398 
399 class PostprocessAnalysis(object):
400  """Calculate columns from ParquetTable
401 
402  This object manages and organizes an arbitrary set of computations
403  on a catalog. The catalog is defined by a
404  `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
405  `deepCoadd_obj` dataset, and the computations are defined by a collection
406  of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
407  a `CompositeFunctor`).
408 
409  After the object is initialized, accessing the `.df` attribute (which
410  holds the `pandas.DataFrame` containing the results of the calculations) triggers
411  computation of said dataframe.
412 
413  One of the conveniences of using this object is the ability to define a desired common
414  filter for all functors. This enables the same functor collection to be passed to
415  several different `PostprocessAnalysis` objects without having to change the original
416  functor collection, since the `filt` keyword argument of this object triggers an
417  overwrite of the `filt` property for all functors in the collection.
418 
419  This object also allows a list of refFlags to be passed, and defines a set of default
420  refFlags that are always included even if not requested.
421 
422  If a list of `ParquetTable` object is passed, rather than a single one, then the
423  calculations will be mapped over all the input catalogs. In principle, it should
424  be straightforward to parallelize this activity, but initial tests have failed
425  (see TODO in code comments).
426 
427  Parameters
428  ----------
429  parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
430  Source catalog(s) for computation
431 
432  functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
433  Computations to do (functors that act on `parq`).
434  If a dict, the output
435  DataFrame will have columns keyed accordingly.
436  If a list, the column keys will come from the
437  `.shortname` attribute of each functor.
438 
439  filt : `str` (optional)
440  Filter in which to calculate. If provided,
441  this will overwrite any existing `.filt` attribute
442  of the provided functors.
443 
444  flags : `list` (optional)
445  List of flags (per-band) to include in output table.
446 
447  refFlags : `list` (optional)
448  List of refFlags (only reference band) to include in output table.
449 
450 
451  """
452  _defaultRefFlags = []
453  _defaultFuncs = (('coord_ra', RAColumn()),
454  ('coord_dec', DecColumn()))
455 
456  def __init__(self, parq, functors, filt=None, flags=None, refFlags=None):
457  self.parq = parq
458  self.functors = functors
459 
460  self.filt = filt
461  self.flags = list(flags) if flags is not None else []
462  self.refFlags = list(self._defaultRefFlags)
463  if refFlags is not None:
464  self.refFlags += list(refFlags)
465 
466  self._df = None
467 
468  @property
469  def defaultFuncs(self):
470  funcs = dict(self._defaultFuncs)
471  return funcs
472 
473  @property
474  def func(self):
475  additionalFuncs = self.defaultFuncs
476  additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
477  additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
478 
479  if isinstance(self.functors, CompositeFunctor):
480  func = self.functors
481  else:
482  func = CompositeFunctor(self.functors)
483 
484  func.funcDict.update(additionalFuncs)
485  func.filt = self.filt
486 
487  return func
488 
489  @property
490  def noDupCols(self):
491  return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
492 
493  @property
494  def df(self):
495  if self._df is None:
496  self.compute()
497  return self._df
498 
499  def compute(self, dropna=False, pool=None):
500  # map over multiple parquet tables
501  if type(self.parq) in (list, tuple):
502  if pool is None:
503  dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
504  else:
505  # TODO: Figure out why this doesn't work (pyarrow pickling issues?)
506  dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
507  self._df = pd.concat(dflist)
508  else:
509  self._df = self.func(self.parq, dropna=dropna)
510 
511  return self._df
512 
513 
514 class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
515  dimensions=()):
516  """Expected Connections for subclasses of TransformCatalogBaseTask.
517 
518  Must be subclassed.
519  """
520  inputCatalog = connectionTypes.Input(
521  name="",
522  storageClass="DataFrame",
523  )
524  outputCatalog = connectionTypes.Output(
525  name="",
526  storageClass="DataFrame",
527  )
528 
529 
530 class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
531  pipelineConnections=TransformCatalogBaseConnections):
532  functorFile = pexConfig.Field(
533  dtype=str,
534  doc='Path to YAML file specifying functors to be computed',
535  default=None,
536  optional=True
537  )
538 
539 
540 class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask):
541  """Base class for transforming/standardizing a catalog
542 
543  by applying functors that convert units and apply calibrations.
544  The purpose of this task is to perform a set of computations on
545  an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
546  results to a new dataset (which needs to be declared in an `outputDataset`
547  attribute).
548 
549  The calculations to be performed are defined in a YAML file that specifies
550  a set of functors to be computed, provided as
551  a `--functorFile` config parameter. An example of such a YAML file
552  is the following:
553 
554  funcs:
555  psfMag:
556  functor: Mag
557  args:
558  - base_PsfFlux
559  filt: HSC-G
560  dataset: meas
561  cmodel_magDiff:
562  functor: MagDiff
563  args:
564  - modelfit_CModel
565  - base_PsfFlux
566  filt: HSC-G
567  gauss_magDiff:
568  functor: MagDiff
569  args:
570  - base_GaussianFlux
571  - base_PsfFlux
572  filt: HSC-G
573  count:
574  functor: Column
575  args:
576  - base_InputCount_value
577  filt: HSC-G
578  deconvolved_moments:
579  functor: DeconvolvedMoments
580  filt: HSC-G
581  dataset: forced_src
582  refFlags:
583  - calib_psfUsed
584  - merge_measurement_i
585  - merge_measurement_r
586  - merge_measurement_z
587  - merge_measurement_y
588  - merge_measurement_g
589  - base_PixelFlags_flag_inexact_psfCenter
590  - detect_isPrimary
591 
592  The names for each entry under "func" will become the names of columns in the
593  output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
594  Positional arguments to be passed to each functor are in the `args` list,
595  and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
596  `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
597 
598  The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and
599  taken from the `'ref'` dataset.
600 
601  The "flags" entry will be expanded out per band.
602 
603  This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
604  to organize and excecute the calculations.
605 
606  """
607  @property
608  def _DefaultName(self):
609  raise NotImplementedError('Subclass must define "_DefaultName" attribute')
610 
611  @property
612  def outputDataset(self):
613  raise NotImplementedError('Subclass must define "outputDataset" attribute')
614 
615  @property
616  def inputDataset(self):
617  raise NotImplementedError('Subclass must define "inputDataset" attribute')
618 
619  @property
620  def ConfigClass(self):
621  raise NotImplementedError('Subclass must define "ConfigClass" attribute')
622 
623  def __init__(self, *args, **kwargs):
624  super().__init__(*args, **kwargs)
625  if self.config.functorFile:
626  self.log.info('Loading tranform functor definitions from %s',
627  self.config.functorFile)
628  self.funcsfuncs = CompositeFunctor.from_file(self.config.functorFile)
629  self.funcsfuncs.update(dict(PostprocessAnalysis._defaultFuncs))
630  else:
631  self.funcsfuncs = None
632 
633  def runQuantum(self, butlerQC, inputRefs, outputRefs):
634  inputs = butlerQC.get(inputRefs)
635  if self.funcsfuncs is None:
636  raise ValueError("config.functorFile is None. "
637  "Must be a valid path to yaml in order to run Task as a PipelineTask.")
638  result = self.runrun(parq=inputs['inputCatalog'], funcs=self.funcsfuncs,
639  dataId=outputRefs.outputCatalog.dataId.full)
640  outputs = pipeBase.Struct(outputCatalog=result)
641  butlerQC.put(outputs, outputRefs)
642 
643  def runDataRef(self, dataRef):
644  parq = dataRef.get()
645  if self.funcsfuncs is None:
646  raise ValueError("config.functorFile is None. "
647  "Must be a valid path to yaml in order to run as a CommandlineTask.")
648  df = self.runrun(parq, funcs=self.funcsfuncs, dataId=dataRef.dataId)
649  self.writewrite(df, dataRef)
650  return df
651 
652  def run(self, parq, funcs=None, dataId=None, band=None):
653  """Do postprocessing calculations
654 
655  Takes a `ParquetTable` object and dataId,
656  returns a dataframe with results of postprocessing calculations.
657 
658  Parameters
659  ----------
660  parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
661  ParquetTable from which calculations are done.
662  funcs : `lsst.pipe.tasks.functors.Functors`
663  Functors to apply to the table's columns
664  dataId : dict, optional
665  Used to add a `patchId` column to the output dataframe.
666  band : `str`, optional
667  Filter band that is being processed.
668 
669  Returns
670  ------
671  `pandas.DataFrame`
672 
673  """
674  self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
675 
676  df = self.transformtransform(band, parq, funcs, dataId).df
677  self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
678  return df
679 
680  def getFunctors(self):
681  return self.funcsfuncs
682 
683  def getAnalysis(self, parq, funcs=None, band=None):
684  if funcs is None:
685  funcs = self.funcsfuncs
686  analysis = PostprocessAnalysis(parq, funcs, filt=band)
687  return analysis
688 
689  def transform(self, band, parq, funcs, dataId):
690  analysis = self.getAnalysisgetAnalysis(parq, funcs=funcs, band=band)
691  df = analysis.df
692  if dataId is not None:
693  for key, value in dataId.items():
694  df[str(key)] = value
695 
696  return pipeBase.Struct(
697  df=df,
698  analysis=analysis
699  )
700 
701  def write(self, df, parqRef):
702  parqRef.put(ParquetTable(dataFrame=df), self.outputDatasetoutputDataset)
703 
704  def writeMetadata(self, dataRef):
705  """No metadata to write.
706  """
707  pass
708 
709 
710 class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
711  defaultTemplates={"coaddName": "deep"},
712  dimensions=("tract", "patch", "skymap")):
713  inputCatalog = connectionTypes.Input(
714  doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
715  "stored as a DataFrame with a multi-level column index per-patch.",
716  dimensions=("tract", "patch", "skymap"),
717  storageClass="DataFrame",
718  name="{coaddName}Coadd_obj",
719  deferLoad=True,
720  )
721  outputCatalog = connectionTypes.Output(
722  doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
723  "data model.",
724  dimensions=("tract", "patch", "skymap"),
725  storageClass="DataFrame",
726  name="objectTable"
727  )
728 
729 
730 class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
731  pipelineConnections=TransformObjectCatalogConnections):
732  coaddName = pexConfig.Field(
733  dtype=str,
734  default="deep",
735  doc="Name of coadd"
736  )
737  # TODO: remove in DM-27177
738  filterMap = pexConfig.DictField(
739  keytype=str,
740  itemtype=str,
741  default={},
742  doc=("Dictionary mapping full filter name to short one for column name munging."
743  "These filters determine the output columns no matter what filters the "
744  "input data actually contain."),
745  deprecated=("Coadds are now identified by the band, so this transform is unused."
746  "Will be removed after v22.")
747  )
748  outputBands = pexConfig.ListField(
749  dtype=str,
750  default=None,
751  optional=True,
752  doc=("These bands and only these bands will appear in the output,"
753  " NaN-filled if the input does not include them."
754  " If None, then use all bands found in the input.")
755  )
756  camelCase = pexConfig.Field(
757  dtype=bool,
758  default=True,
759  doc=("Write per-band columns names with camelCase, else underscore "
760  "For example: gPsFlux instead of g_PsFlux.")
761  )
762  multilevelOutput = pexConfig.Field(
763  dtype=bool,
764  default=False,
765  doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
766  "and name-munged (False).")
767  )
768 
769 
770 class TransformObjectCatalogTask(TransformCatalogBaseTask):
771  """Produce a flattened Object Table to match the format specified in
772  sdm_schemas.
773 
774  Do the same set of postprocessing calculations on all bands
775 
776  This is identical to `TransformCatalogBaseTask`, except for that it does the
777  specified functor calculations for all filters present in the
778  input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
779  by the YAML file will be superceded.
780  """
781  _DefaultName = "transformObjectCatalog"
782  ConfigClass = TransformObjectCatalogConfig
783 
784  # Used by Gen 2 runDataRef only:
785  inputDataset = 'deepCoadd_obj'
786  outputDataset = 'objectTable'
787 
788  @classmethod
789  def _makeArgumentParser(cls):
790  parser = ArgumentParser(name=cls._DefaultName)
791  parser.add_id_argument("--id", cls.inputDataset,
792  ContainerClass=CoaddDataIdContainer,
793  help="data ID, e.g. --id tract=12345 patch=1,2")
794  return parser
795 
796  def run(self, parq, funcs=None, dataId=None, band=None):
797  # NOTE: band kwarg is ignored here.
798  dfDict = {}
799  analysisDict = {}
800  templateDf = pd.DataFrame()
801 
802  if isinstance(parq, DeferredDatasetHandle):
803  columns = parq.get(component='columns')
804  inputBands = columns.unique(level=1).values
805  else:
806  inputBands = parq.columnLevelNames['band']
807 
808  outputBands = self.config.outputBands if self.config.outputBands else inputBands
809 
810  # Perform transform for data of filters that exist in parq.
811  for inputBand in inputBands:
812  if inputBand not in outputBands:
813  self.log.info("Ignoring %s band data in the input", inputBand)
814  continue
815  self.log.info("Transforming the catalog of band %s", inputBand)
816  result = self.transform(inputBand, parq, funcs, dataId)
817  dfDict[inputBand] = result.df
818  analysisDict[inputBand] = result.analysis
819  if templateDf.empty:
820  templateDf = result.df
821 
822  # Fill NaNs in columns of other wanted bands
823  for filt in outputBands:
824  if filt not in dfDict:
825  self.log.info("Adding empty columns for band %s", filt)
826  dfDict[filt] = pd.DataFrame().reindex_like(templateDf)
827 
828  # This makes a multilevel column index, with band as first level
829  df = pd.concat(dfDict, axis=1, names=['band', 'column'])
830 
831  if not self.config.multilevelOutput:
832  noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
833  if dataId is not None:
834  noDupCols += list(dataId.keys())
835  df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
836  inputBands=inputBands)
837 
838  self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
839  return df
840 
841 
842 class TractObjectDataIdContainer(CoaddDataIdContainer):
843 
844  def makeDataRefList(self, namespace):
845  """Make self.refList from self.idList
846 
847  Generate a list of data references given tract and/or patch.
848  This was adapted from `TractQADataIdContainer`, which was
849  `TractDataIdContainer` modifie to not require "filter".
850  Only existing dataRefs are returned.
851  """
852  def getPatchRefList(tract):
853  return [namespace.butler.dataRef(datasetType=self.datasetType,
854  tract=tract.getId(),
855  patch="%d,%d" % patch.getIndex()) for patch in tract]
856 
857  tractRefs = defaultdict(list) # Data references for each tract
858  for dataId in self.idList:
859  skymap = self.getSkymap(namespace)
860 
861  if "tract" in dataId:
862  tractId = dataId["tract"]
863  if "patch" in dataId:
864  tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
865  tract=tractId,
866  patch=dataId['patch']))
867  else:
868  tractRefs[tractId] += getPatchRefList(skymap[tractId])
869  else:
870  tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
871  for tract in skymap)
872  outputRefList = []
873  for tractRefList in tractRefs.values():
874  existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
875  outputRefList.append(existingRefs)
876 
877  self.refList = outputRefList
878 
879 
880 class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
881  dimensions=("tract", "skymap")):
882  inputCatalogs = connectionTypes.Input(
883  doc="Per-Patch objectTables conforming to the standard data model.",
884  name="objectTable",
885  storageClass="DataFrame",
886  dimensions=("tract", "patch", "skymap"),
887  multiple=True,
888  )
889  outputCatalog = connectionTypes.Output(
890  doc="Pre-tract horizontal concatenation of the input objectTables",
891  name="objectTable_tract",
892  storageClass="DataFrame",
893  dimensions=("tract", "skymap"),
894  )
895 
896 
897 class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
898  pipelineConnections=ConsolidateObjectTableConnections):
899  coaddName = pexConfig.Field(
900  dtype=str,
901  default="deep",
902  doc="Name of coadd"
903  )
904 
905 
906 class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
907  """Write patch-merged source tables to a tract-level parquet file
908 
909  Concatenates `objectTable` list into a per-visit `objectTable_tract`
910  """
911  _DefaultName = "consolidateObjectTable"
912  ConfigClass = ConsolidateObjectTableConfig
913 
914  inputDataset = 'objectTable'
915  outputDataset = 'objectTable_tract'
916 
917  def runQuantum(self, butlerQC, inputRefs, outputRefs):
918  inputs = butlerQC.get(inputRefs)
919  self.log.info("Concatenating %s per-patch Object Tables",
920  len(inputs['inputCatalogs']))
921  df = pd.concat(inputs['inputCatalogs'])
922  butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
923 
924  @classmethod
925  def _makeArgumentParser(cls):
926  parser = ArgumentParser(name=cls._DefaultName)
927 
928  parser.add_id_argument("--id", cls.inputDataset,
929  help="data ID, e.g. --id tract=12345",
930  ContainerClass=TractObjectDataIdContainer)
931  return parser
932 
933  def runDataRef(self, patchRefList):
934  df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
935  patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
936 
937  def writeMetadata(self, dataRef):
938  """No metadata to write.
939  """
940  pass
941 
942 
943 class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
944  dimensions=("instrument", "visit", "detector")):
945 
946  inputCatalog = connectionTypes.Input(
947  doc="Wide input catalog of sources produced by WriteSourceTableTask",
948  name="source",
949  storageClass="DataFrame",
950  dimensions=("instrument", "visit", "detector"),
951  deferLoad=True
952  )
953  outputCatalog = connectionTypes.Output(
954  doc="Narrower, per-detector Source Table transformed and converted per a "
955  "specified set of functors",
956  name="sourceTable",
957  storageClass="DataFrame",
958  dimensions=("instrument", "visit", "detector")
959  )
960 
961 
962 class TransformSourceTableConfig(TransformCatalogBaseConfig,
963  pipelineConnections=TransformSourceTableConnections):
964  pass
965 
966 
967 class TransformSourceTableTask(TransformCatalogBaseTask):
968  """Transform/standardize a source catalog
969  """
970  _DefaultName = "transformSourceTable"
971  ConfigClass = TransformSourceTableConfig
972 
973  inputDataset = 'source'
974  outputDataset = 'sourceTable'
975 
976  @classmethod
977  def _makeArgumentParser(cls):
978  parser = ArgumentParser(name=cls._DefaultName)
979  parser.add_id_argument("--id", datasetType=cls.inputDataset,
980  level="sensor",
981  help="data ID, e.g. --id visit=12345 ccd=0")
982  return parser
983 
984  def runDataRef(self, dataRef):
985  """Override to specify band label to run()."""
986  parq = dataRef.get()
987  funcs = self.getFunctors()
988  band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel
989  df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
990  self.write(df, dataRef)
991  return df
992 
993 
994 class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
995  dimensions=("instrument", "visit",),
996  defaultTemplates={}):
997  calexp = connectionTypes.Input(
998  doc="Processed exposures used for metadata",
999  name="calexp",
1000  storageClass="ExposureF",
1001  dimensions=("instrument", "visit", "detector"),
1002  deferLoad=True,
1003  multiple=True,
1004  )
1005  visitSummary = connectionTypes.Output(
1006  doc=("Per-visit consolidated exposure metadata. These catalogs use "
1007  "detector id for the id and are sorted for fast lookups of a "
1008  "detector."),
1009  name="visitSummary",
1010  storageClass="ExposureCatalog",
1011  dimensions=("instrument", "visit"),
1012  )
1013 
1014 
1015 class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1016  pipelineConnections=ConsolidateVisitSummaryConnections):
1017  """Config for ConsolidateVisitSummaryTask"""
1018  pass
1019 
1020 
1021 class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1022  """Task to consolidate per-detector visit metadata.
1023 
1024  This task aggregates the following metadata from all the detectors in a
1025  single visit into an exposure catalog:
1026  - The visitInfo.
1027  - The wcs.
1028  - The photoCalib.
1029  - The physical_filter and band (if available).
1030  - The psf size, shape, and effective area at the center of the detector.
1031  - The corners of the bounding box in right ascension/declination.
1032 
1033  Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1034  are not persisted here because of storage concerns, and because of their
1035  limited utility as summary statistics.
1036 
1037  Tests for this task are performed in ci_hsc_gen3.
1038  """
1039  _DefaultName = "consolidateVisitSummary"
1040  ConfigClass = ConsolidateVisitSummaryConfig
1041 
1042  @classmethod
1043  def _makeArgumentParser(cls):
1044  parser = ArgumentParser(name=cls._DefaultName)
1045 
1046  parser.add_id_argument("--id", "calexp",
1047  help="data ID, e.g. --id visit=12345",
1048  ContainerClass=VisitDataIdContainer)
1049  return parser
1050 
1051  def writeMetadata(self, dataRef):
1052  """No metadata to persist, so override to remove metadata persistance.
1053  """
1054  pass
1055 
1056  def writeConfig(self, butler, clobber=False, doBackup=True):
1057  """No config to persist, so override to remove config persistance.
1058  """
1059  pass
1060 
1061  def runDataRef(self, dataRefList):
1062  visit = dataRefList[0].dataId['visit']
1063 
1064  self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" %
1065  (len(dataRefList), visit))
1066 
1067  expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
1068 
1069  dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
1070 
1071  def runQuantum(self, butlerQC, inputRefs, outputRefs):
1072  dataRefs = butlerQC.get(inputRefs.calexp)
1073  visit = dataRefs[0].dataId.byName()['visit']
1074 
1075  self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" %
1076  (len(dataRefs), visit))
1077 
1078  expCatalog = self._combineExposureMetadata(visit, dataRefs)
1079 
1080  butlerQC.put(expCatalog, outputRefs.visitSummary)
1081 
1082  def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1083  """Make a combined exposure catalog from a list of dataRefs.
1084  These dataRefs must point to exposures with wcs, summaryStats,
1085  and other visit metadata.
1086 
1087  Parameters
1088  ----------
1089  visit : `int`
1090  Visit identification number.
1091  dataRefs : `list`
1092  List of dataRefs in visit. May be list of
1093  `lsst.daf.persistence.ButlerDataRef` (Gen2) or
1094  `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1095  isGen3 : `bool`, optional
1096  Specifies if this is a Gen3 list of datarefs.
1097 
1098  Returns
1099  -------
1100  visitSummary : `lsst.afw.table.ExposureCatalog`
1101  Exposure catalog with per-detector summary information.
1102  """
1103  schema = self._makeVisitSummarySchema()
1104  cat = afwTable.ExposureCatalog(schema)
1105  cat.resize(len(dataRefs))
1106 
1107  cat['visit'] = visit
1108 
1109  for i, dataRef in enumerate(dataRefs):
1110  if isGen3:
1111  visitInfo = dataRef.get(component='visitInfo')
1112  filterLabel = dataRef.get(component='filterLabel')
1113  summaryStats = dataRef.get(component='summaryStats')
1114  detector = dataRef.get(component='detector')
1115  wcs = dataRef.get(component='wcs')
1116  photoCalib = dataRef.get(component='photoCalib')
1117  detector = dataRef.get(component='detector')
1118  bbox = dataRef.get(component='bbox')
1119  validPolygon = dataRef.get(component='validPolygon')
1120  else:
1121  # Note that we need to read the calexp because there is
1122  # no magic access to the psf except through the exposure.
1123  gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
1124  exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
1125  visitInfo = exp.getInfo().getVisitInfo()
1126  filterLabel = dataRef.get("calexp_filterLabel")
1127  summaryStats = exp.getInfo().getSummaryStats()
1128  wcs = exp.getWcs()
1129  photoCalib = exp.getPhotoCalib()
1130  detector = exp.getDetector()
1131  bbox = dataRef.get(datasetType='calexp_bbox')
1132  validPolygon = exp.getInfo().getValidPolygon()
1133 
1134  rec = cat[i]
1135  rec.setBBox(bbox)
1136  rec.setVisitInfo(visitInfo)
1137  rec.setWcs(wcs)
1138  rec.setPhotoCalib(photoCalib)
1139  rec.setValidPolygon(validPolygon)
1140 
1141  rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1142  rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1143  rec.setId(detector.getId())
1144  rec['psfSigma'] = summaryStats.psfSigma
1145  rec['psfIxx'] = summaryStats.psfIxx
1146  rec['psfIyy'] = summaryStats.psfIyy
1147  rec['psfIxy'] = summaryStats.psfIxy
1148  rec['psfArea'] = summaryStats.psfArea
1149  rec['raCorners'][:] = summaryStats.raCorners
1150  rec['decCorners'][:] = summaryStats.decCorners
1151  rec['ra'] = summaryStats.ra
1152  rec['decl'] = summaryStats.decl
1153  rec['zenithDistance'] = summaryStats.zenithDistance
1154  rec['zeroPoint'] = summaryStats.zeroPoint
1155  rec['skyBg'] = summaryStats.skyBg
1156  rec['skyNoise'] = summaryStats.skyNoise
1157  rec['meanVar'] = summaryStats.meanVar
1158 
1159  metadata = dafBase.PropertyList()
1160  metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1161  # We are looping over existing datarefs, so the following is true
1162  metadata.add("COMMENT", "Only detectors with data have entries.")
1163  cat.setMetadata(metadata)
1164 
1165  cat.sort()
1166  return cat
1167 
1168  def _makeVisitSummarySchema(self):
1169  """Make the schema for the visitSummary catalog."""
1170  schema = afwTable.ExposureTable.makeMinimalSchema()
1171  schema.addField('visit', type='I', doc='Visit number')
1172  schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1173  schema.addField('band', type='String', size=32, doc='Name of band')
1174  schema.addField('psfSigma', type='F',
1175  doc='PSF model second-moments determinant radius (center of chip) (pixel)')
1176  schema.addField('psfArea', type='F',
1177  doc='PSF model effective area (center of chip) (pixel**2)')
1178  schema.addField('psfIxx', type='F',
1179  doc='PSF model Ixx (center of chip) (pixel**2)')
1180  schema.addField('psfIyy', type='F',
1181  doc='PSF model Iyy (center of chip) (pixel**2)')
1182  schema.addField('psfIxy', type='F',
1183  doc='PSF model Ixy (center of chip) (pixel**2)')
1184  schema.addField('raCorners', type='ArrayD', size=4,
1185  doc='Right Ascension of bounding box corners (degrees)')
1186  schema.addField('decCorners', type='ArrayD', size=4,
1187  doc='Declination of bounding box corners (degrees)')
1188  schema.addField('ra', type='D',
1189  doc='Right Ascension of bounding box center (degrees)')
1190  schema.addField('decl', type='D',
1191  doc='Declination of bounding box center (degrees)')
1192  schema.addField('zenithDistance', type='F',
1193  doc='Zenith distance of bounding box center (degrees)')
1194  schema.addField('zeroPoint', type='F',
1195  doc='Mean zeropoint in detector (mag)')
1196  schema.addField('skyBg', type='F',
1197  doc='Average sky background (ADU)')
1198  schema.addField('skyNoise', type='F',
1199  doc='Average sky noise (ADU)')
1200  schema.addField('meanVar', type='F',
1201  doc='Mean variance of the weight plane (ADU**2)')
1202 
1203  return schema
1204 
1205 
1206 class VisitDataIdContainer(DataIdContainer):
1207  """DataIdContainer that groups sensor-level id's by visit
1208  """
1209 
1210  def makeDataRefList(self, namespace):
1211  """Make self.refList from self.idList
1212 
1213  Generate a list of data references grouped by visit.
1214 
1215  Parameters
1216  ----------
1217  namespace : `argparse.Namespace`
1218  Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1219  """
1220  # Group by visits
1221  visitRefs = defaultdict(list)
1222  for dataId in self.idList:
1223  if "visit" in dataId:
1224  visitId = dataId["visit"]
1225  # append all subsets to
1226  subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1227  visitRefs[visitId].extend([dataRef for dataRef in subset])
1228 
1229  outputRefList = []
1230  for refList in visitRefs.values():
1231  existingRefs = [ref for ref in refList if ref.datasetExists()]
1232  if existingRefs:
1233  outputRefList.append(existingRefs)
1234 
1235  self.refList = outputRefList
1236 
1237 
1238 class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1239  dimensions=("instrument", "visit")):
1240  inputCatalogs = connectionTypes.Input(
1241  doc="Input per-detector Source Tables",
1242  name="sourceTable",
1243  storageClass="DataFrame",
1244  dimensions=("instrument", "visit", "detector"),
1245  multiple=True
1246  )
1247  outputCatalog = connectionTypes.Output(
1248  doc="Per-visit concatenation of Source Table",
1249  name="sourceTable_visit",
1250  storageClass="DataFrame",
1251  dimensions=("instrument", "visit")
1252  )
1253 
1254 
1255 class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1256  pipelineConnections=ConsolidateSourceTableConnections):
1257  pass
1258 
1259 
1260 class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1261  """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1262  """
1263  _DefaultName = 'consolidateSourceTable'
1264  ConfigClass = ConsolidateSourceTableConfig
1265 
1266  inputDataset = 'sourceTable'
1267  outputDataset = 'sourceTable_visit'
1268 
1269  def runQuantum(self, butlerQC, inputRefs, outputRefs):
1270  inputs = butlerQC.get(inputRefs)
1271  self.log.info("Concatenating %s per-detector Source Tables",
1272  len(inputs['inputCatalogs']))
1273  df = pd.concat(inputs['inputCatalogs'])
1274  butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1275 
1276  def runDataRef(self, dataRefList):
1277  self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1278  df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1279  dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1280 
1281  @classmethod
1282  def _makeArgumentParser(cls):
1283  parser = ArgumentParser(name=cls._DefaultName)
1284 
1285  parser.add_id_argument("--id", cls.inputDataset,
1286  help="data ID, e.g. --id visit=12345",
1287  ContainerClass=VisitDataIdContainer)
1288  return parser
1289 
1290  def writeMetadata(self, dataRef):
1291  """No metadata to write.
1292  """
1293  pass
1294 
1295  def writeConfig(self, butler, clobber=False, doBackup=True):
1296  """No config to write.
1297  """
1298  pass
def getAnalysis(self, parq, funcs=None, band=None)
Definition: postprocess.py:683
def transform(self, band, parq, funcs, dataId)
Definition: postprocess.py:689
def run(self, parq, funcs=None, dataId=None, band=None)
Definition: postprocess.py:652
def runQuantum(self, butlerQC, inputRefs, outputRefs)
Definition: postprocess.py:633
def run(self, skyInfo, tempExpRefList, imageScalerList, weightList, altMaskList=None, mask=None, supplementaryData=None)
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def readCatalog(task, patchRef)
Read input catalog.
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)
Definition: postprocess.py:42