lsst.pipe.tasks  21.0.0-95-g1e789892+c91f73bc7b
postprocess.py
Go to the documentation of this file.
1 # This file is part of pipe_tasks
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 import functools
23 import pandas as pd
24 from collections import defaultdict
25 
26 import lsst.geom
27 import lsst.pex.config as pexConfig
28 import lsst.pipe.base as pipeBase
29 import lsst.daf.base as dafBase
30 from lsst.pipe.base import connectionTypes
31 import lsst.afw.table as afwTable
32 from lsst.meas.base import SingleFrameMeasurementTask
33 from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
34 from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
35 from lsst.daf.butler import DeferredDatasetHandle
36 
37 from .parquetTable import ParquetTable
38 from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
39 from .functors import CompositeFunctor, RAColumn, DecColumn, Column
40 
41 
42 def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
43  """Flattens a dataframe with multilevel column index
44  """
45  newDf = pd.DataFrame()
46  # band is the level 0 index
47  dfBands = df.columns.unique(level=0).values
48  for band in dfBands:
49  subdf = df[band]
50  columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
51  newColumns = {c: columnFormat.format(band, c)
52  for c in subdf.columns if c not in noDupCols}
53  cols = list(newColumns.keys())
54  newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
55 
56  # Band must be present in the input and output or else column is all NaN:
57  presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
58  # Get the unexploded columns from any present band's partition
59  noDupDf = df[presentBands[0]][noDupCols]
60  newDf = pd.concat([noDupDf, newDf], axis=1)
61  return newDf
62 
63 
64 class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
65  defaultTemplates={"coaddName": "deep"},
66  dimensions=("tract", "patch", "skymap")):
67  inputCatalogMeas = connectionTypes.Input(
68  doc="Catalog of source measurements on the deepCoadd.",
69  dimensions=("tract", "patch", "band", "skymap"),
70  storageClass="SourceCatalog",
71  name="{coaddName}Coadd_meas",
72  multiple=True
73  )
74  inputCatalogForcedSrc = connectionTypes.Input(
75  doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
76  dimensions=("tract", "patch", "band", "skymap"),
77  storageClass="SourceCatalog",
78  name="{coaddName}Coadd_forced_src",
79  multiple=True
80  )
81  inputCatalogRef = connectionTypes.Input(
82  doc="Catalog marking the primary detection (which band provides a good shape and position)"
83  "for each detection in deepCoadd_mergeDet.",
84  dimensions=("tract", "patch", "skymap"),
85  storageClass="SourceCatalog",
86  name="{coaddName}Coadd_ref"
87  )
88  outputCatalog = connectionTypes.Output(
89  doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
90  "stored as a DataFrame with a multi-level column index per-patch.",
91  dimensions=("tract", "patch", "skymap"),
92  storageClass="DataFrame",
93  name="{coaddName}Coadd_obj"
94  )
95 
96 
97 class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
98  pipelineConnections=WriteObjectTableConnections):
99  engine = pexConfig.Field(
100  dtype=str,
101  default="pyarrow",
102  doc="Parquet engine for writing (pyarrow or fastparquet)"
103  )
104  coaddName = pexConfig.Field(
105  dtype=str,
106  default="deep",
107  doc="Name of coadd"
108  )
109 
110 
111 class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
112  """Write filter-merged source tables to parquet
113  """
114  _DefaultName = "writeObjectTable"
115  ConfigClass = WriteObjectTableConfig
116  RunnerClass = MergeSourcesRunner
117 
118  # Names of table datasets to be merged
119  inputDatasets = ('forced_src', 'meas', 'ref')
120 
121  # Tag of output dataset written by `MergeSourcesTask.write`
122  outputDataset = 'obj'
123 
124  def __init__(self, butler=None, schema=None, **kwargs):
125  # It is a shame that this class can't use the default init for CmdLineTask
126  # But to do so would require its own special task runner, which is many
127  # more lines of specialization, so this is how it is for now
128  super().__init__(**kwargs)
129 
130  def runDataRef(self, patchRefList):
131  """!
132  @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
133  subclasses that inherit from MergeSourcesTask.
134  @param[in] patchRefList list of data references for each filter
135  """
136  catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
137  dataId = patchRefList[0].dataId
138  mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
139  self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog))
140 
141  def runQuantum(self, butlerQC, inputRefs, outputRefs):
142  inputs = butlerQC.get(inputRefs)
143 
144  measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
145  zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
146  forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
147  zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
148 
149  catalogs = {}
150  for band in measDict.keys():
151  catalogs[band] = {'meas': measDict[band]['meas'],
152  'forced_src': forcedSourceDict[band]['forced_src'],
153  'ref': inputs['inputCatalogRef']}
154  dataId = butlerQC.quantum.dataId
155  df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
156  outputs = pipeBase.Struct(outputCatalog=df)
157  butlerQC.put(outputs, outputRefs)
158 
159  @classmethod
160  def _makeArgumentParser(cls):
161  """Create a suitable ArgumentParser.
162 
163  We will use the ArgumentParser to get a list of data
164  references for patches; the RunnerClass will sort them into lists
165  of data references for the same patch.
166 
167  References first of self.inputDatasets, rather than
168  self.inputDataset
169  """
170  return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
171 
172  def readCatalog(self, patchRef):
173  """Read input catalogs
174 
175  Read all the input datasets given by the 'inputDatasets'
176  attribute.
177 
178  Parameters
179  ----------
180  patchRef : `lsst.daf.persistence.ButlerDataRef`
181  Data reference for patch
182 
183  Returns
184  -------
185  Tuple consisting of band name and a dict of catalogs, keyed by
186  dataset name
187  """
188  band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel
189  catalogDict = {}
190  for dataset in self.inputDatasets:
191  catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
192  self.log.info("Read %d sources from %s for band %s: %s" %
193  (len(catalog), dataset, band, patchRef.dataId))
194  catalogDict[dataset] = catalog
195  return band, catalogDict
196 
197  def run(self, catalogs, tract, patch):
198  """Merge multiple catalogs.
199 
200  Parameters
201  ----------
202  catalogs : `dict`
203  Mapping from filter names to dict of catalogs.
204  tract : int
205  tractId to use for the tractId column
206  patch : str
207  patchId to use for the patchId column
208 
209  Returns
210  -------
211  catalog : `pandas.DataFrame`
212  Merged dataframe
213  """
214 
215  dfs = []
216  for filt, tableDict in catalogs.items():
217  for dataset, table in tableDict.items():
218  # Convert afwTable to pandas DataFrame
219  df = table.asAstropy().to_pandas().set_index('id', drop=True)
220 
221  # Sort columns by name, to ensure matching schema among patches
222  df = df.reindex(sorted(df.columns), axis=1)
223  df['tractId'] = tract
224  df['patchId'] = patch
225 
226  # Make columns a 3-level MultiIndex
227  df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
228  names=('dataset', 'band', 'column'))
229  dfs.append(df)
230 
231  catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
232  return catalog
233 
234  def write(self, patchRef, catalog):
235  """Write the output.
236 
237  Parameters
238  ----------
239  catalog : `ParquetTable`
240  Catalog to write
241  patchRef : `lsst.daf.persistence.ButlerDataRef`
242  Data reference for patch
243  """
244  patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
245  # since the filter isn't actually part of the data ID for the dataset we're saving,
246  # it's confusing to see it in the log message, even if the butler simply ignores it.
247  mergeDataId = patchRef.dataId.copy()
248  del mergeDataId["filter"]
249  self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
250 
251  def writeMetadata(self, dataRefList):
252  """No metadata to write, and not sure how to write it for a list of dataRefs.
253  """
254  pass
255 
256 
257 class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
258  defaultTemplates={"catalogType": ""},
259  dimensions=("instrument", "visit", "detector")):
260 
261  catalog = connectionTypes.Input(
262  doc="Input full-depth catalog of sources produced by CalibrateTask",
263  name="{catalogType}src",
264  storageClass="SourceCatalog",
265  dimensions=("instrument", "visit", "detector")
266  )
267  outputCatalog = connectionTypes.Output(
268  doc="Catalog of sources, `src` in Parquet format",
269  name="{catalogType}source",
270  storageClass="DataFrame",
271  dimensions=("instrument", "visit", "detector")
272  )
273 
274 
275 class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
276  pipelineConnections=WriteSourceTableConnections):
277  doApplyExternalPhotoCalib = pexConfig.Field(
278  dtype=bool,
279  default=False,
280  doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
281  "generating Source Tables from older src tables which do not already have local calib columns")
282  )
283  doApplyExternalSkyWcs = pexConfig.Field(
284  dtype=bool,
285  default=False,
286  doc=("Add local WCS columns from the calexp.wcs? Should only set True if "
287  "generating Source Tables from older src tables which do not already have local calib columns")
288  )
289 
290 
291 class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
292  """Write source table to parquet
293  """
294  _DefaultName = "writeSourceTable"
295  ConfigClass = WriteSourceTableConfig
296 
297  def runDataRef(self, dataRef):
298  src = dataRef.get('src')
299  if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
300  src = self.addCalibColumns(src, dataRef)
301 
302  ccdVisitId = dataRef.get('ccdExposureId')
303  result = self.run(src, ccdVisitId=ccdVisitId)
304  dataRef.put(result.table, 'source')
305 
306  def runQuantum(self, butlerQC, inputRefs, outputRefs):
307  inputs = butlerQC.get(inputRefs)
308  inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
309  result = self.run(**inputs).table
310  outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
311  butlerQC.put(outputs, outputRefs)
312 
313  def run(self, catalog, ccdVisitId=None):
314  """Convert `src` catalog to parquet
315 
316  Parameters
317  ----------
318  catalog: `afwTable.SourceCatalog`
319  catalog to be converted
320  ccdVisitId: `int`
321  ccdVisitId to be added as a column
322 
323  Returns
324  -------
325  result : `lsst.pipe.base.Struct`
326  ``table``
327  `ParquetTable` version of the input catalog
328  """
329  self.log.info("Generating parquet table from src catalog %s", ccdVisitId)
330  df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
331  df['ccdVisitId'] = ccdVisitId
332  return pipeBase.Struct(table=ParquetTable(dataFrame=df))
333 
334  def addCalibColumns(self, catalog, dataRef):
335  """Add columns with local calibration evaluated at each centroid
336 
337  for backwards compatibility with old repos.
338  This exists for the purpose of converting old src catalogs
339  (which don't have the expected local calib columns) to Source Tables.
340 
341  Parameters
342  ----------
343  catalog: `afwTable.SourceCatalog`
344  catalog to which calib columns will be added
345  dataRef: `lsst.daf.persistence.ButlerDataRef
346  for fetching the calibs from disk.
347 
348  Returns
349  -------
350  newCat: `afwTable.SourceCatalog`
351  Source Catalog with requested local calib columns
352  """
353  mapper = afwTable.SchemaMapper(catalog.schema)
354  measureConfig = SingleFrameMeasurementTask.ConfigClass()
355  measureConfig.doReplaceWithNoise = False
356 
357  # Just need the WCS or the PhotoCalib attached to an exposue
358  exposure = dataRef.get('calexp_sub',
360 
361  mapper = afwTable.SchemaMapper(catalog.schema)
362  mapper.addMinimalSchema(catalog.schema, True)
363  schema = mapper.getOutputSchema()
364 
365  exposureIdInfo = dataRef.get("expIdInfo")
366  measureConfig.plugins.names = []
367  if self.config.doApplyExternalSkyWcs:
368  plugin = 'base_LocalWcs'
369  if plugin in schema:
370  raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
371  else:
372  measureConfig.plugins.names.add(plugin)
373 
374  if self.config.doApplyExternalPhotoCalib:
375  plugin = 'base_LocalPhotoCalib'
376  if plugin in schema:
377  raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
378  else:
379  measureConfig.plugins.names.add(plugin)
380 
381  measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
382  newCat = afwTable.SourceCatalog(schema)
383  newCat.extend(catalog, mapper=mapper)
384  measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
385  return newCat
386 
387  def writeMetadata(self, dataRef):
388  """No metadata to write.
389  """
390  pass
391 
392  @classmethod
393  def _makeArgumentParser(cls):
394  parser = ArgumentParser(name=cls._DefaultName)
395  parser.add_id_argument("--id", 'src',
396  help="data ID, e.g. --id visit=12345 ccd=0")
397  return parser
398 
399 
400 class PostprocessAnalysis(object):
401  """Calculate columns from ParquetTable
402 
403  This object manages and organizes an arbitrary set of computations
404  on a catalog. The catalog is defined by a
405  `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
406  `deepCoadd_obj` dataset, and the computations are defined by a collection
407  of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
408  a `CompositeFunctor`).
409 
410  After the object is initialized, accessing the `.df` attribute (which
411  holds the `pandas.DataFrame` containing the results of the calculations) triggers
412  computation of said dataframe.
413 
414  One of the conveniences of using this object is the ability to define a desired common
415  filter for all functors. This enables the same functor collection to be passed to
416  several different `PostprocessAnalysis` objects without having to change the original
417  functor collection, since the `filt` keyword argument of this object triggers an
418  overwrite of the `filt` property for all functors in the collection.
419 
420  This object also allows a list of refFlags to be passed, and defines a set of default
421  refFlags that are always included even if not requested.
422 
423  If a list of `ParquetTable` object is passed, rather than a single one, then the
424  calculations will be mapped over all the input catalogs. In principle, it should
425  be straightforward to parallelize this activity, but initial tests have failed
426  (see TODO in code comments).
427 
428  Parameters
429  ----------
430  parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
431  Source catalog(s) for computation
432 
433  functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
434  Computations to do (functors that act on `parq`).
435  If a dict, the output
436  DataFrame will have columns keyed accordingly.
437  If a list, the column keys will come from the
438  `.shortname` attribute of each functor.
439 
440  filt : `str` (optional)
441  Filter in which to calculate. If provided,
442  this will overwrite any existing `.filt` attribute
443  of the provided functors.
444 
445  flags : `list` (optional)
446  List of flags (per-band) to include in output table.
447 
448  refFlags : `list` (optional)
449  List of refFlags (only reference band) to include in output table.
450 
451 
452  """
453  _defaultRefFlags = []
454  _defaultFuncs = (('coord_ra', RAColumn()),
455  ('coord_dec', DecColumn()))
456 
457  def __init__(self, parq, functors, filt=None, flags=None, refFlags=None):
458  self.parq = parq
459  self.functors = functors
460 
461  self.filt = filt
462  self.flags = list(flags) if flags is not None else []
463  self.refFlags = list(self._defaultRefFlags)
464  if refFlags is not None:
465  self.refFlags += list(refFlags)
466 
467  self._df = None
468 
469  @property
470  def defaultFuncs(self):
471  funcs = dict(self._defaultFuncs)
472  return funcs
473 
474  @property
475  def func(self):
476  additionalFuncs = self.defaultFuncs
477  additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
478  additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
479 
480  if isinstance(self.functors, CompositeFunctor):
481  func = self.functors
482  else:
483  func = CompositeFunctor(self.functors)
484 
485  func.funcDict.update(additionalFuncs)
486  func.filt = self.filt
487 
488  return func
489 
490  @property
491  def noDupCols(self):
492  return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
493 
494  @property
495  def df(self):
496  if self._df is None:
497  self.compute()
498  return self._df
499 
500  def compute(self, dropna=False, pool=None):
501  # map over multiple parquet tables
502  if type(self.parq) in (list, tuple):
503  if pool is None:
504  dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
505  else:
506  # TODO: Figure out why this doesn't work (pyarrow pickling issues?)
507  dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
508  self._df = pd.concat(dflist)
509  else:
510  self._df = self.func(self.parq, dropna=dropna)
511 
512  return self._df
513 
514 
515 class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
516  dimensions=()):
517  """Expected Connections for subclasses of TransformCatalogBaseTask.
518 
519  Must be subclassed.
520  """
521  inputCatalog = connectionTypes.Input(
522  name="",
523  storageClass="DataFrame",
524  )
525  outputCatalog = connectionTypes.Output(
526  name="",
527  storageClass="DataFrame",
528  )
529 
530 
531 class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
532  pipelineConnections=TransformCatalogBaseConnections):
533  functorFile = pexConfig.Field(
534  dtype=str,
535  doc='Path to YAML file specifying functors to be computed',
536  default=None,
537  optional=True
538  )
539 
540 
541 class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask):
542  """Base class for transforming/standardizing a catalog
543 
544  by applying functors that convert units and apply calibrations.
545  The purpose of this task is to perform a set of computations on
546  an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
547  results to a new dataset (which needs to be declared in an `outputDataset`
548  attribute).
549 
550  The calculations to be performed are defined in a YAML file that specifies
551  a set of functors to be computed, provided as
552  a `--functorFile` config parameter. An example of such a YAML file
553  is the following:
554 
555  funcs:
556  psfMag:
557  functor: Mag
558  args:
559  - base_PsfFlux
560  filt: HSC-G
561  dataset: meas
562  cmodel_magDiff:
563  functor: MagDiff
564  args:
565  - modelfit_CModel
566  - base_PsfFlux
567  filt: HSC-G
568  gauss_magDiff:
569  functor: MagDiff
570  args:
571  - base_GaussianFlux
572  - base_PsfFlux
573  filt: HSC-G
574  count:
575  functor: Column
576  args:
577  - base_InputCount_value
578  filt: HSC-G
579  deconvolved_moments:
580  functor: DeconvolvedMoments
581  filt: HSC-G
582  dataset: forced_src
583  refFlags:
584  - calib_psfUsed
585  - merge_measurement_i
586  - merge_measurement_r
587  - merge_measurement_z
588  - merge_measurement_y
589  - merge_measurement_g
590  - base_PixelFlags_flag_inexact_psfCenter
591  - detect_isPrimary
592 
593  The names for each entry under "func" will become the names of columns in the
594  output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
595  Positional arguments to be passed to each functor are in the `args` list,
596  and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
597  `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
598 
599  The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and
600  taken from the `'ref'` dataset.
601 
602  The "flags" entry will be expanded out per band.
603 
604  This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
605  to organize and excecute the calculations.
606 
607  """
608  @property
609  def _DefaultName(self):
610  raise NotImplementedError('Subclass must define "_DefaultName" attribute')
611 
612  @property
613  def outputDataset(self):
614  raise NotImplementedError('Subclass must define "outputDataset" attribute')
615 
616  @property
617  def inputDataset(self):
618  raise NotImplementedError('Subclass must define "inputDataset" attribute')
619 
620  @property
621  def ConfigClass(self):
622  raise NotImplementedError('Subclass must define "ConfigClass" attribute')
623 
624  def __init__(self, *args, **kwargs):
625  super().__init__(*args, **kwargs)
626  if self.config.functorFile:
627  self.log.info('Loading tranform functor definitions from %s',
628  self.config.functorFile)
629  self.funcsfuncs = CompositeFunctor.from_file(self.config.functorFile)
630  self.funcsfuncs.update(dict(PostprocessAnalysis._defaultFuncs))
631  else:
632  self.funcsfuncs = None
633 
634  def runQuantum(self, butlerQC, inputRefs, outputRefs):
635  inputs = butlerQC.get(inputRefs)
636  if self.funcsfuncs is None:
637  raise ValueError("config.functorFile is None. "
638  "Must be a valid path to yaml in order to run Task as a PipelineTask.")
639  result = self.runrun(parq=inputs['inputCatalog'], funcs=self.funcsfuncs,
640  dataId=outputRefs.outputCatalog.dataId.full)
641  outputs = pipeBase.Struct(outputCatalog=result)
642  butlerQC.put(outputs, outputRefs)
643 
644  def runDataRef(self, dataRef):
645  parq = dataRef.get()
646  if self.funcsfuncs is None:
647  raise ValueError("config.functorFile is None. "
648  "Must be a valid path to yaml in order to run as a CommandlineTask.")
649  df = self.runrun(parq, funcs=self.funcsfuncs, dataId=dataRef.dataId)
650  self.writewrite(df, dataRef)
651  return df
652 
653  def run(self, parq, funcs=None, dataId=None, band=None):
654  """Do postprocessing calculations
655 
656  Takes a `ParquetTable` object and dataId,
657  returns a dataframe with results of postprocessing calculations.
658 
659  Parameters
660  ----------
661  parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
662  ParquetTable from which calculations are done.
663  funcs : `lsst.pipe.tasks.functors.Functors`
664  Functors to apply to the table's columns
665  dataId : dict, optional
666  Used to add a `patchId` column to the output dataframe.
667  band : `str`, optional
668  Filter band that is being processed.
669 
670  Returns
671  ------
672  `pandas.DataFrame`
673 
674  """
675  self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
676 
677  df = self.transformtransform(band, parq, funcs, dataId).df
678  self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
679  return df
680 
681  def getFunctors(self):
682  return self.funcsfuncs
683 
684  def getAnalysis(self, parq, funcs=None, band=None):
685  if funcs is None:
686  funcs = self.funcsfuncs
687  analysis = PostprocessAnalysis(parq, funcs, filt=band)
688  return analysis
689 
690  def transform(self, band, parq, funcs, dataId):
691  analysis = self.getAnalysisgetAnalysis(parq, funcs=funcs, band=band)
692  df = analysis.df
693  if dataId is not None:
694  for key, value in dataId.items():
695  df[str(key)] = value
696 
697  return pipeBase.Struct(
698  df=df,
699  analysis=analysis
700  )
701 
702  def write(self, df, parqRef):
703  parqRef.put(ParquetTable(dataFrame=df), self.outputDatasetoutputDataset)
704 
705  def writeMetadata(self, dataRef):
706  """No metadata to write.
707  """
708  pass
709 
710 
711 class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
712  defaultTemplates={"coaddName": "deep"},
713  dimensions=("tract", "patch", "skymap")):
714  inputCatalog = connectionTypes.Input(
715  doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
716  "stored as a DataFrame with a multi-level column index per-patch.",
717  dimensions=("tract", "patch", "skymap"),
718  storageClass="DataFrame",
719  name="{coaddName}Coadd_obj",
720  deferLoad=True,
721  )
722  outputCatalog = connectionTypes.Output(
723  doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
724  "data model.",
725  dimensions=("tract", "patch", "skymap"),
726  storageClass="DataFrame",
727  name="objectTable"
728  )
729 
730 
731 class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
732  pipelineConnections=TransformObjectCatalogConnections):
733  coaddName = pexConfig.Field(
734  dtype=str,
735  default="deep",
736  doc="Name of coadd"
737  )
738  # TODO: remove in DM-27177
739  filterMap = pexConfig.DictField(
740  keytype=str,
741  itemtype=str,
742  default={},
743  doc=("Dictionary mapping full filter name to short one for column name munging."
744  "These filters determine the output columns no matter what filters the "
745  "input data actually contain."),
746  deprecated=("Coadds are now identified by the band, so this transform is unused."
747  "Will be removed after v22.")
748  )
749  outputBands = pexConfig.ListField(
750  dtype=str,
751  default=None,
752  optional=True,
753  doc=("These bands and only these bands will appear in the output,"
754  " NaN-filled if the input does not include them."
755  " If None, then use all bands found in the input.")
756  )
757  camelCase = pexConfig.Field(
758  dtype=bool,
759  default=True,
760  doc=("Write per-band columns names with camelCase, else underscore "
761  "For example: gPsFlux instead of g_PsFlux.")
762  )
763  multilevelOutput = pexConfig.Field(
764  dtype=bool,
765  default=False,
766  doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
767  "and name-munged (False).")
768  )
769 
770 
771 class TransformObjectCatalogTask(TransformCatalogBaseTask):
772  """Produce a flattened Object Table to match the format specified in
773  sdm_schemas.
774 
775  Do the same set of postprocessing calculations on all bands
776 
777  This is identical to `TransformCatalogBaseTask`, except for that it does the
778  specified functor calculations for all filters present in the
779  input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
780  by the YAML file will be superceded.
781  """
782  _DefaultName = "transformObjectCatalog"
783  ConfigClass = TransformObjectCatalogConfig
784 
785  # Used by Gen 2 runDataRef only:
786  inputDataset = 'deepCoadd_obj'
787  outputDataset = 'objectTable'
788 
789  @classmethod
790  def _makeArgumentParser(cls):
791  parser = ArgumentParser(name=cls._DefaultName)
792  parser.add_id_argument("--id", cls.inputDataset,
793  ContainerClass=CoaddDataIdContainer,
794  help="data ID, e.g. --id tract=12345 patch=1,2")
795  return parser
796 
797  def run(self, parq, funcs=None, dataId=None, band=None):
798  # NOTE: band kwarg is ignored here.
799  dfDict = {}
800  analysisDict = {}
801  templateDf = pd.DataFrame()
802 
803  if isinstance(parq, DeferredDatasetHandle):
804  columns = parq.get(component='columns')
805  inputBands = columns.unique(level=1).values
806  else:
807  inputBands = parq.columnLevelNames['band']
808 
809  outputBands = self.config.outputBands if self.config.outputBands else inputBands
810 
811  # Perform transform for data of filters that exist in parq.
812  for inputBand in inputBands:
813  if inputBand not in outputBands:
814  self.log.info("Ignoring %s band data in the input", inputBand)
815  continue
816  self.log.info("Transforming the catalog of band %s", inputBand)
817  result = self.transform(inputBand, parq, funcs, dataId)
818  dfDict[inputBand] = result.df
819  analysisDict[inputBand] = result.analysis
820  if templateDf.empty:
821  templateDf = result.df
822 
823  # Fill NaNs in columns of other wanted bands
824  for filt in outputBands:
825  if filt not in dfDict:
826  self.log.info("Adding empty columns for band %s", filt)
827  dfDict[filt] = pd.DataFrame().reindex_like(templateDf)
828 
829  # This makes a multilevel column index, with band as first level
830  df = pd.concat(dfDict, axis=1, names=['band', 'column'])
831 
832  if not self.config.multilevelOutput:
833  noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
834  if dataId is not None:
835  noDupCols += list(dataId.keys())
836  df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
837  inputBands=inputBands)
838 
839  self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
840  return df
841 
842 
843 class TractObjectDataIdContainer(CoaddDataIdContainer):
844 
845  def makeDataRefList(self, namespace):
846  """Make self.refList from self.idList
847 
848  Generate a list of data references given tract and/or patch.
849  This was adapted from `TractQADataIdContainer`, which was
850  `TractDataIdContainer` modifie to not require "filter".
851  Only existing dataRefs are returned.
852  """
853  def getPatchRefList(tract):
854  return [namespace.butler.dataRef(datasetType=self.datasetType,
855  tract=tract.getId(),
856  patch="%d,%d" % patch.getIndex()) for patch in tract]
857 
858  tractRefs = defaultdict(list) # Data references for each tract
859  for dataId in self.idList:
860  skymap = self.getSkymap(namespace)
861 
862  if "tract" in dataId:
863  tractId = dataId["tract"]
864  if "patch" in dataId:
865  tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
866  tract=tractId,
867  patch=dataId['patch']))
868  else:
869  tractRefs[tractId] += getPatchRefList(skymap[tractId])
870  else:
871  tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
872  for tract in skymap)
873  outputRefList = []
874  for tractRefList in tractRefs.values():
875  existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
876  outputRefList.append(existingRefs)
877 
878  self.refList = outputRefList
879 
880 
881 class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
882  dimensions=("tract", "skymap")):
883  inputCatalogs = connectionTypes.Input(
884  doc="Per-Patch objectTables conforming to the standard data model.",
885  name="objectTable",
886  storageClass="DataFrame",
887  dimensions=("tract", "patch", "skymap"),
888  multiple=True,
889  )
890  outputCatalog = connectionTypes.Output(
891  doc="Pre-tract horizontal concatenation of the input objectTables",
892  name="objectTable_tract",
893  storageClass="DataFrame",
894  dimensions=("tract", "skymap"),
895  )
896 
897 
898 class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
899  pipelineConnections=ConsolidateObjectTableConnections):
900  coaddName = pexConfig.Field(
901  dtype=str,
902  default="deep",
903  doc="Name of coadd"
904  )
905 
906 
907 class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
908  """Write patch-merged source tables to a tract-level parquet file
909 
910  Concatenates `objectTable` list into a per-visit `objectTable_tract`
911  """
912  _DefaultName = "consolidateObjectTable"
913  ConfigClass = ConsolidateObjectTableConfig
914 
915  inputDataset = 'objectTable'
916  outputDataset = 'objectTable_tract'
917 
918  def runQuantum(self, butlerQC, inputRefs, outputRefs):
919  inputs = butlerQC.get(inputRefs)
920  self.log.info("Concatenating %s per-patch Object Tables",
921  len(inputs['inputCatalogs']))
922  df = pd.concat(inputs['inputCatalogs'])
923  butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
924 
925  @classmethod
926  def _makeArgumentParser(cls):
927  parser = ArgumentParser(name=cls._DefaultName)
928 
929  parser.add_id_argument("--id", cls.inputDataset,
930  help="data ID, e.g. --id tract=12345",
931  ContainerClass=TractObjectDataIdContainer)
932  return parser
933 
934  def runDataRef(self, patchRefList):
935  df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
936  patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
937 
938  def writeMetadata(self, dataRef):
939  """No metadata to write.
940  """
941  pass
942 
943 
944 class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
945  defaultTemplates={"catalogType": ""},
946  dimensions=("instrument", "visit", "detector")):
947 
948  inputCatalog = connectionTypes.Input(
949  doc="Wide input catalog of sources produced by WriteSourceTableTask",
950  name="{catalogType}source",
951  storageClass="DataFrame",
952  dimensions=("instrument", "visit", "detector"),
953  deferLoad=True
954  )
955  outputCatalog = connectionTypes.Output(
956  doc="Narrower, per-detector Source Table transformed and converted per a "
957  "specified set of functors",
958  name="{catalogType}sourceTable",
959  storageClass="DataFrame",
960  dimensions=("instrument", "visit", "detector")
961  )
962 
963 
964 class TransformSourceTableConfig(TransformCatalogBaseConfig,
965  pipelineConnections=TransformSourceTableConnections):
966  pass
967 
968 
969 class TransformSourceTableTask(TransformCatalogBaseTask):
970  """Transform/standardize a source catalog
971  """
972  _DefaultName = "transformSourceTable"
973  ConfigClass = TransformSourceTableConfig
974 
975  inputDataset = 'source'
976  outputDataset = 'sourceTable'
977 
978  @classmethod
979  def _makeArgumentParser(cls):
980  parser = ArgumentParser(name=cls._DefaultName)
981  parser.add_id_argument("--id", datasetType=cls.inputDataset,
982  level="sensor",
983  help="data ID, e.g. --id visit=12345 ccd=0")
984  return parser
985 
986  def runDataRef(self, dataRef):
987  """Override to specify band label to run()."""
988  parq = dataRef.get()
989  funcs = self.getFunctors()
990  band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel
991  df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
992  self.write(df, dataRef)
993  return df
994 
995 
996 class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
997  dimensions=("instrument", "visit",),
998  defaultTemplates={"calexpType": ""}):
999  calexp = connectionTypes.Input(
1000  doc="Processed exposures used for metadata",
1001  name="{calexpType}calexp",
1002  storageClass="ExposureF",
1003  dimensions=("instrument", "visit", "detector"),
1004  deferLoad=True,
1005  multiple=True,
1006  )
1007  visitSummary = connectionTypes.Output(
1008  doc=("Per-visit consolidated exposure metadata. These catalogs use "
1009  "detector id for the id and are sorted for fast lookups of a "
1010  "detector."),
1011  name="{calexpType}visitSummary",
1012  storageClass="ExposureCatalog",
1013  dimensions=("instrument", "visit"),
1014  )
1015 
1016 
1017 class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1018  pipelineConnections=ConsolidateVisitSummaryConnections):
1019  """Config for ConsolidateVisitSummaryTask"""
1020  pass
1021 
1022 
1023 class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1024  """Task to consolidate per-detector visit metadata.
1025 
1026  This task aggregates the following metadata from all the detectors in a
1027  single visit into an exposure catalog:
1028  - The visitInfo.
1029  - The wcs.
1030  - The photoCalib.
1031  - The physical_filter and band (if available).
1032  - The psf size, shape, and effective area at the center of the detector.
1033  - The corners of the bounding box in right ascension/declination.
1034 
1035  Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1036  are not persisted here because of storage concerns, and because of their
1037  limited utility as summary statistics.
1038 
1039  Tests for this task are performed in ci_hsc_gen3.
1040  """
1041  _DefaultName = "consolidateVisitSummary"
1042  ConfigClass = ConsolidateVisitSummaryConfig
1043 
1044  @classmethod
1045  def _makeArgumentParser(cls):
1046  parser = ArgumentParser(name=cls._DefaultName)
1047 
1048  parser.add_id_argument("--id", "calexp",
1049  help="data ID, e.g. --id visit=12345",
1050  ContainerClass=VisitDataIdContainer)
1051  return parser
1052 
1053  def writeMetadata(self, dataRef):
1054  """No metadata to persist, so override to remove metadata persistance.
1055  """
1056  pass
1057 
1058  def writeConfig(self, butler, clobber=False, doBackup=True):
1059  """No config to persist, so override to remove config persistance.
1060  """
1061  pass
1062 
1063  def runDataRef(self, dataRefList):
1064  visit = dataRefList[0].dataId['visit']
1065 
1066  self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" %
1067  (len(dataRefList), visit))
1068 
1069  expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
1070 
1071  dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
1072 
1073  def runQuantum(self, butlerQC, inputRefs, outputRefs):
1074  dataRefs = butlerQC.get(inputRefs.calexp)
1075  visit = dataRefs[0].dataId.byName()['visit']
1076 
1077  self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" %
1078  (len(dataRefs), visit))
1079 
1080  expCatalog = self._combineExposureMetadata(visit, dataRefs)
1081 
1082  butlerQC.put(expCatalog, outputRefs.visitSummary)
1083 
1084  def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1085  """Make a combined exposure catalog from a list of dataRefs.
1086  These dataRefs must point to exposures with wcs, summaryStats,
1087  and other visit metadata.
1088 
1089  Parameters
1090  ----------
1091  visit : `int`
1092  Visit identification number.
1093  dataRefs : `list`
1094  List of dataRefs in visit. May be list of
1095  `lsst.daf.persistence.ButlerDataRef` (Gen2) or
1096  `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1097  isGen3 : `bool`, optional
1098  Specifies if this is a Gen3 list of datarefs.
1099 
1100  Returns
1101  -------
1102  visitSummary : `lsst.afw.table.ExposureCatalog`
1103  Exposure catalog with per-detector summary information.
1104  """
1105  schema = self._makeVisitSummarySchema()
1106  cat = afwTable.ExposureCatalog(schema)
1107  cat.resize(len(dataRefs))
1108 
1109  cat['visit'] = visit
1110 
1111  for i, dataRef in enumerate(dataRefs):
1112  if isGen3:
1113  visitInfo = dataRef.get(component='visitInfo')
1114  filterLabel = dataRef.get(component='filterLabel')
1115  summaryStats = dataRef.get(component='summaryStats')
1116  detector = dataRef.get(component='detector')
1117  wcs = dataRef.get(component='wcs')
1118  photoCalib = dataRef.get(component='photoCalib')
1119  detector = dataRef.get(component='detector')
1120  bbox = dataRef.get(component='bbox')
1121  validPolygon = dataRef.get(component='validPolygon')
1122  else:
1123  # Note that we need to read the calexp because there is
1124  # no magic access to the psf except through the exposure.
1125  gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
1126  exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
1127  visitInfo = exp.getInfo().getVisitInfo()
1128  filterLabel = dataRef.get("calexp_filterLabel")
1129  summaryStats = exp.getInfo().getSummaryStats()
1130  wcs = exp.getWcs()
1131  photoCalib = exp.getPhotoCalib()
1132  detector = exp.getDetector()
1133  bbox = dataRef.get(datasetType='calexp_bbox')
1134  validPolygon = exp.getInfo().getValidPolygon()
1135 
1136  rec = cat[i]
1137  rec.setBBox(bbox)
1138  rec.setVisitInfo(visitInfo)
1139  rec.setWcs(wcs)
1140  rec.setPhotoCalib(photoCalib)
1141  rec.setValidPolygon(validPolygon)
1142 
1143  rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1144  rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1145  rec.setId(detector.getId())
1146  rec['psfSigma'] = summaryStats.psfSigma
1147  rec['psfIxx'] = summaryStats.psfIxx
1148  rec['psfIyy'] = summaryStats.psfIyy
1149  rec['psfIxy'] = summaryStats.psfIxy
1150  rec['psfArea'] = summaryStats.psfArea
1151  rec['raCorners'][:] = summaryStats.raCorners
1152  rec['decCorners'][:] = summaryStats.decCorners
1153  rec['ra'] = summaryStats.ra
1154  rec['decl'] = summaryStats.decl
1155  rec['zenithDistance'] = summaryStats.zenithDistance
1156  rec['zeroPoint'] = summaryStats.zeroPoint
1157  rec['skyBg'] = summaryStats.skyBg
1158  rec['skyNoise'] = summaryStats.skyNoise
1159  rec['meanVar'] = summaryStats.meanVar
1160 
1161  metadata = dafBase.PropertyList()
1162  metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1163  # We are looping over existing datarefs, so the following is true
1164  metadata.add("COMMENT", "Only detectors with data have entries.")
1165  cat.setMetadata(metadata)
1166 
1167  cat.sort()
1168  return cat
1169 
1170  def _makeVisitSummarySchema(self):
1171  """Make the schema for the visitSummary catalog."""
1172  schema = afwTable.ExposureTable.makeMinimalSchema()
1173  schema.addField('visit', type='I', doc='Visit number')
1174  schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1175  schema.addField('band', type='String', size=32, doc='Name of band')
1176  schema.addField('psfSigma', type='F',
1177  doc='PSF model second-moments determinant radius (center of chip) (pixel)')
1178  schema.addField('psfArea', type='F',
1179  doc='PSF model effective area (center of chip) (pixel**2)')
1180  schema.addField('psfIxx', type='F',
1181  doc='PSF model Ixx (center of chip) (pixel**2)')
1182  schema.addField('psfIyy', type='F',
1183  doc='PSF model Iyy (center of chip) (pixel**2)')
1184  schema.addField('psfIxy', type='F',
1185  doc='PSF model Ixy (center of chip) (pixel**2)')
1186  schema.addField('raCorners', type='ArrayD', size=4,
1187  doc='Right Ascension of bounding box corners (degrees)')
1188  schema.addField('decCorners', type='ArrayD', size=4,
1189  doc='Declination of bounding box corners (degrees)')
1190  schema.addField('ra', type='D',
1191  doc='Right Ascension of bounding box center (degrees)')
1192  schema.addField('decl', type='D',
1193  doc='Declination of bounding box center (degrees)')
1194  schema.addField('zenithDistance', type='F',
1195  doc='Zenith distance of bounding box center (degrees)')
1196  schema.addField('zeroPoint', type='F',
1197  doc='Mean zeropoint in detector (mag)')
1198  schema.addField('skyBg', type='F',
1199  doc='Average sky background (ADU)')
1200  schema.addField('skyNoise', type='F',
1201  doc='Average sky noise (ADU)')
1202  schema.addField('meanVar', type='F',
1203  doc='Mean variance of the weight plane (ADU**2)')
1204 
1205  return schema
1206 
1207 
1208 class VisitDataIdContainer(DataIdContainer):
1209  """DataIdContainer that groups sensor-level id's by visit
1210  """
1211 
1212  def makeDataRefList(self, namespace):
1213  """Make self.refList from self.idList
1214 
1215  Generate a list of data references grouped by visit.
1216 
1217  Parameters
1218  ----------
1219  namespace : `argparse.Namespace`
1220  Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1221  """
1222  # Group by visits
1223  visitRefs = defaultdict(list)
1224  for dataId in self.idList:
1225  if "visit" in dataId:
1226  visitId = dataId["visit"]
1227  # append all subsets to
1228  subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1229  visitRefs[visitId].extend([dataRef for dataRef in subset])
1230 
1231  outputRefList = []
1232  for refList in visitRefs.values():
1233  existingRefs = [ref for ref in refList if ref.datasetExists()]
1234  if existingRefs:
1235  outputRefList.append(existingRefs)
1236 
1237  self.refList = outputRefList
1238 
1239 
1240 class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1241  defaultTemplates={"catalogType": ""},
1242  dimensions=("instrument", "visit")):
1243  inputCatalogs = connectionTypes.Input(
1244  doc="Input per-detector Source Tables",
1245  name="{catalogType}sourceTable",
1246  storageClass="DataFrame",
1247  dimensions=("instrument", "visit", "detector"),
1248  multiple=True
1249  )
1250  outputCatalog = connectionTypes.Output(
1251  doc="Per-visit concatenation of Source Table",
1252  name="{catalogType}sourceTable_visit",
1253  storageClass="DataFrame",
1254  dimensions=("instrument", "visit")
1255  )
1256 
1257 
1258 class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1259  pipelineConnections=ConsolidateSourceTableConnections):
1260  pass
1261 
1262 
1263 class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1264  """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1265  """
1266  _DefaultName = 'consolidateSourceTable'
1267  ConfigClass = ConsolidateSourceTableConfig
1268 
1269  inputDataset = 'sourceTable'
1270  outputDataset = 'sourceTable_visit'
1271 
1272  def runQuantum(self, butlerQC, inputRefs, outputRefs):
1273  inputs = butlerQC.get(inputRefs)
1274  self.log.info("Concatenating %s per-detector Source Tables",
1275  len(inputs['inputCatalogs']))
1276  df = pd.concat(inputs['inputCatalogs'])
1277  butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1278 
1279  def runDataRef(self, dataRefList):
1280  self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1281  df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1282  dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1283 
1284  @classmethod
1285  def _makeArgumentParser(cls):
1286  parser = ArgumentParser(name=cls._DefaultName)
1287 
1288  parser.add_id_argument("--id", cls.inputDataset,
1289  help="data ID, e.g. --id visit=12345",
1290  ContainerClass=VisitDataIdContainer)
1291  return parser
1292 
1293  def writeMetadata(self, dataRef):
1294  """No metadata to write.
1295  """
1296  pass
1297 
1298  def writeConfig(self, butler, clobber=False, doBackup=True):
1299  """No config to write.
1300  """
1301  pass
def getAnalysis(self, parq, funcs=None, band=None)
Definition: postprocess.py:684
def transform(self, band, parq, funcs, dataId)
Definition: postprocess.py:690
def run(self, parq, funcs=None, dataId=None, band=None)
Definition: postprocess.py:653
def runQuantum(self, butlerQC, inputRefs, outputRefs)
Definition: postprocess.py:634
def run(self, skyInfo, tempExpRefList, imageScalerList, weightList, altMaskList=None, mask=None, supplementaryData=None)
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def readCatalog(task, patchRef)
Read input catalog.
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)
Definition: postprocess.py:42