lsst.pipe.tasks  19.0.0-54-g1bde8684
postprocess.py
Go to the documentation of this file.
1 # This file is part of pipe_tasks
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 import functools
23 import pandas as pd
24 from collections import defaultdict
25 
26 import lsst.pex.config as pexConfig
27 import lsst.pipe.base as pipeBase
28 from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
29 from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
30 
31 from .parquetTable import ParquetTable
32 from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
33 from .functors import CompositeFunctor, RAColumn, DecColumn, Column
34 
35 
36 def flattenFilters(df, filterDict, noDupCols=['coord_ra', 'coord_dec'], camelCase=False):
37  """Flattens a dataframe with multilevel column index
38  """
39  newDf = pd.DataFrame()
40  for filt, filtShort in filterDict.items():
41  subdf = df[filt]
42  columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
43  newColumns = {c: columnFormat.format(filtShort, c)
44  for c in subdf.columns if c not in noDupCols}
45  cols = list(newColumns.keys())
46  newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
47 
48  newDf = pd.concat([subdf[noDupCols], newDf], axis=1)
49  return newDf
50 
51 
52 class WriteObjectTableConfig(pexConfig.Config):
53  priorityList = pexConfig.ListField(
54  dtype=str,
55  default=[],
56  doc="Priority-ordered list of bands for the merge."
57  )
58  engine = pexConfig.Field(
59  dtype=str,
60  default="pyarrow",
61  doc="Parquet engine for writing (pyarrow or fastparquet)"
62  )
63  coaddName = pexConfig.Field(
64  dtype=str,
65  default="deep",
66  doc="Name of coadd"
67  )
68 
69  def validate(self):
70  pexConfig.Config.validate(self)
71  if len(self.priorityList) == 0:
72  raise RuntimeError("No priority list provided")
73 
74 
75 class WriteObjectTableTask(CmdLineTask):
76  """Write filter-merged source tables to parquet
77  """
78  _DefaultName = "writeObjectTable"
79  ConfigClass = WriteObjectTableConfig
80  RunnerClass = MergeSourcesRunner
81 
82  # Names of table datasets to be merged
83  inputDatasets = ('forced_src', 'meas', 'ref')
84 
85  # Tag of output dataset written by `MergeSourcesTask.write`
86  outputDataset = 'obj'
87 
88  def __init__(self, butler=None, schema=None, **kwargs):
89  # It is a shame that this class can't use the default init for CmdLineTask
90  # But to do so would require its own special task runner, which is many
91  # more lines of specialization, so this is how it is for now
92  CmdLineTask.__init__(self, **kwargs)
93 
94  def runDataRef(self, patchRefList):
95  """!
96  @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
97  subclasses that inherit from MergeSourcesTask.
98  @param[in] patchRefList list of data references for each filter
99  """
100  catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
101  dataId = patchRefList[0].dataId
102  mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
103  self.write(patchRefList[0], mergedCatalog)
104 
105  @classmethod
106  def _makeArgumentParser(cls):
107  """Create a suitable ArgumentParser.
108 
109  We will use the ArgumentParser to get a list of data
110  references for patches; the RunnerClass will sort them into lists
111  of data references for the same patch.
112 
113  References first of self.inputDatasets, rather than
114  self.inputDataset
115  """
117 
118  def readCatalog(self, patchRef):
119  """Read input catalogs
120 
121  Read all the input datasets given by the 'inputDatasets'
122  attribute.
123 
124  Parameters
125  ----------
126  patchRef : `lsst.daf.persistence.ButlerDataRef`
127  Data reference for patch
128 
129  Returns
130  -------
131  Tuple consisting of filter name and a dict of catalogs, keyed by
132  dataset name
133  """
134  filterName = patchRef.dataId["filter"]
135  catalogDict = {}
136  for dataset in self.inputDatasets:
137  catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
138  self.log.info("Read %d sources from %s for filter %s: %s" %
139  (len(catalog), dataset, filterName, patchRef.dataId))
140  catalogDict[dataset] = catalog
141  return filterName, catalogDict
142 
143  def run(self, catalogs, tract, patch):
144  """Merge multiple catalogs.
145 
146  Parameters
147  ----------
148  catalogs : `dict`
149  Mapping from filter names to dict of catalogs.
150  tract : int
151  tractId to use for the tractId column
152  patch : str
153  patchId to use for the patchId column
154 
155  Returns
156  -------
157  catalog : `lsst.pipe.tasks.parquetTable.ParquetTable`
158  Merged dataframe, with each column prefixed by
159  `filter_tag(filt)`, wrapped in the parquet writer shim class.
160  """
161 
162  dfs = []
163  for filt, tableDict in catalogs.items():
164  for dataset, table in tableDict.items():
165  # Convert afwTable to pandas DataFrame
166  df = table.asAstropy().to_pandas().set_index('id', drop=True)
167 
168  # Sort columns by name, to ensure matching schema among patches
169  df = df.reindex(sorted(df.columns), axis=1)
170  df['tractId'] = tract
171  df['patchId'] = patch
172 
173  # Make columns a 3-level MultiIndex
174  df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
175  names=('dataset', 'filter', 'column'))
176  dfs.append(df)
177 
178  catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
179  return ParquetTable(dataFrame=catalog)
180 
181  def write(self, patchRef, catalog):
182  """Write the output.
183 
184  Parameters
185  ----------
186  catalog : `ParquetTable`
187  Catalog to write
188  patchRef : `lsst.daf.persistence.ButlerDataRef`
189  Data reference for patch
190  """
191  patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
192  # since the filter isn't actually part of the data ID for the dataset we're saving,
193  # it's confusing to see it in the log message, even if the butler simply ignores it.
194  mergeDataId = patchRef.dataId.copy()
195  del mergeDataId["filter"]
196  self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
197 
198  def writeMetadata(self, dataRefList):
199  """No metadata to write, and not sure how to write it for a list of dataRefs.
200  """
201  pass
202 
203 
204 class WriteSourceTableConfig(pexConfig.Config):
205  pass
206 
207 
208 class WriteSourceTableTask(CmdLineTask):
209  """Write source table to parquet
210  """
211  _DefaultName = "writeSourceTable"
212  ConfigClass = WriteSourceTableConfig
213 
214  def runDataRef(self, dataRef):
215  src = dataRef.get('src')
216  ccdVisitId = dataRef.get('ccdExposureId')
217  result = self.run(src, ccdVisitId=ccdVisitId)
218  dataRef.put(result.table, 'source')
219 
220  def run(self, catalog, ccdVisitId=None):
221  """Convert `src` catalog to parquet
222 
223  Parameters
224  ----------
225  catalog: `lsst.afw.table.SourceCatalog`
226  catalog to be converted
227  ccdVisitId: `int`
228  ccdVisitId to be added as a column
229 
230  Returns
231  -------
232  result : `lsst.pipe.base.Struct`
233  ``table``
234  `ParquetTable` version of the input catalog
235  """
236  self.log.info("Generating parquet table from src catalog")
237  df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
238  df['ccdVisitId'] = ccdVisitId
239  return pipeBase.Struct(table=ParquetTable(dataFrame=df))
240 
241  def writeMetadata(self, dataRef):
242  """No metadata to write.
243  """
244  pass
245 
246  def writeConfig(self, butler, clobber=False, doBackup=True):
247  """No config to write.
248  """
249  pass
250 
251  @classmethod
252  def _makeArgumentParser(cls):
253  parser = ArgumentParser(name=cls._DefaultName)
254  parser.add_id_argument("--id", 'src',
255  help="data ID, e.g. --id visit=12345 ccd=0")
256  return parser
257 
258 
259 class PostprocessAnalysis(object):
260  """Calculate columns from ParquetTable
261 
262  This object manages and organizes an arbitrary set of computations
263  on a catalog. The catalog is defined by a
264  `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
265  `deepCoadd_obj` dataset, and the computations are defined by a collection
266  of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
267  a `CompositeFunctor`).
268 
269  After the object is initialized, accessing the `.df` attribute (which
270  holds the `pandas.DataFrame` containing the results of the calculations) triggers
271  computation of said dataframe.
272 
273  One of the conveniences of using this object is the ability to define a desired common
274  filter for all functors. This enables the same functor collection to be passed to
275  several different `PostprocessAnalysis` objects without having to change the original
276  functor collection, since the `filt` keyword argument of this object triggers an
277  overwrite of the `filt` property for all functors in the collection.
278 
279  This object also allows a list of refFlags to be passed, and defines a set of default
280  refFlags that are always included even if not requested.
281 
282  If a list of `ParquetTable` object is passed, rather than a single one, then the
283  calculations will be mapped over all the input catalogs. In principle, it should
284  be straightforward to parallelize this activity, but initial tests have failed
285  (see TODO in code comments).
286 
287  Parameters
288  ----------
289  parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
290  Source catalog(s) for computation
291 
292  functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
293  Computations to do (functors that act on `parq`).
294  If a dict, the output
295  DataFrame will have columns keyed accordingly.
296  If a list, the column keys will come from the
297  `.shortname` attribute of each functor.
298 
299  filt : `str` (optional)
300  Filter in which to calculate. If provided,
301  this will overwrite any existing `.filt` attribute
302  of the provided functors.
303 
304  flags : `list` (optional)
305  List of flags (per-band) to include in output table.
306 
307  refFlags : `list` (optional)
308  List of refFlags (only reference band) to include in output table.
309 
310 
311  """
312  _defaultRefFlags = []
313  _defaultFuncs = (('coord_ra', RAColumn()),
314  ('coord_dec', DecColumn()))
315 
316  def __init__(self, parq, functors, filt=None, flags=None, refFlags=None):
317  self.parq = parq
318  self.functors = functors
319 
320  self.filt = filt
321  self.flags = list(flags) if flags is not None else []
322  self.refFlags = list(self._defaultRefFlags)
323  if refFlags is not None:
324  self.refFlags += list(refFlags)
325 
326  self._df = None
327 
328  @property
329  def defaultFuncs(self):
330  funcs = dict(self._defaultFuncs)
331  return funcs
332 
333  @property
334  def func(self):
335  additionalFuncs = self.defaultFuncs
336  additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
337  additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
338 
339  if isinstance(self.functors, CompositeFunctor):
340  func = self.functors
341  else:
342  func = CompositeFunctor(self.functors)
343 
344  func.funcDict.update(additionalFuncs)
345  func.filt = self.filt
346 
347  return func
348 
349  @property
350  def noDupCols(self):
351  return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
352 
353  @property
354  def df(self):
355  if self._df is None:
356  self.compute()
357  return self._df
358 
359  def compute(self, dropna=False, pool=None):
360  # map over multiple parquet tables
361  if type(self.parq) in (list, tuple):
362  if pool is None:
363  dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
364  else:
365  # TODO: Figure out why this doesn't work (pyarrow pickling issues?)
366  dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
367  self._df = pd.concat(dflist)
368  else:
369  self._df = self.func(self.parq, dropna=dropna)
370 
371  return self._df
372 
373 
374 class TransformCatalogBaseConfig(pexConfig.Config):
375  functorFile = pexConfig.Field(
376  dtype=str,
377  doc='Path to YAML file specifying functors to be computed',
378  default=None,
379  optional=True
380  )
381 
382 
383 class TransformCatalogBaseTask(CmdLineTask):
384  """Base class for transforming/standardizing a catalog
385 
386  by applying functors that convert units and apply calibrations.
387  The purpose of this task is to perform a set of computations on
388  an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
389  results to a new dataset (which needs to be declared in an `outputDataset`
390  attribute).
391 
392  The calculations to be performed are defined in a YAML file that specifies
393  a set of functors to be computed, provided as
394  a `--functorFile` config parameter. An example of such a YAML file
395  is the following:
396 
397  funcs:
398  psfMag:
399  functor: Mag
400  args:
401  - base_PsfFlux
402  filt: HSC-G
403  dataset: meas
404  cmodel_magDiff:
405  functor: MagDiff
406  args:
407  - modelfit_CModel
408  - base_PsfFlux
409  filt: HSC-G
410  gauss_magDiff:
411  functor: MagDiff
412  args:
413  - base_GaussianFlux
414  - base_PsfFlux
415  filt: HSC-G
416  count:
417  functor: Column
418  args:
419  - base_InputCount_value
420  filt: HSC-G
421  deconvolved_moments:
422  functor: DeconvolvedMoments
423  filt: HSC-G
424  dataset: forced_src
425  refFlags:
426  - calib_psfUsed
427  - merge_measurement_i
428  - merge_measurement_r
429  - merge_measurement_z
430  - merge_measurement_y
431  - merge_measurement_g
432  - base_PixelFlags_flag_inexact_psfCenter
433  - detect_isPrimary
434 
435  The names for each entry under "func" will become the names of columns in the
436  output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
437  Positional arguments to be passed to each functor are in the `args` list,
438  and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
439  `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
440 
441  The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and
442  taken from the `'ref'` dataset.
443 
444  The "flags" entry will be expanded out per band.
445 
446  Note, if `'filter'` is provided as part of the `dataId` when running this task (even though
447  `deepCoadd_obj` does not use `'filter'`), then this will override the `filt` kwargs
448  provided in the YAML file, and the calculations will be done in that filter.
449 
450  This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
451  to organize and excecute the calculations.
452 
453  """
454  @property
455  def _DefaultName(self):
456  raise NotImplementedError('Subclass must define "_DefaultName" attribute')
457 
458  @property
459  def outputDataset(self):
460  raise NotImplementedError('Subclass must define "outputDataset" attribute')
461 
462  @property
463  def inputDataset(self):
464  raise NotImplementedError('Subclass must define "inputDataset" attribute')
465 
466  @property
467  def ConfigClass(self):
468  raise NotImplementedError('Subclass must define "ConfigClass" attribute')
469 
470  def runDataRef(self, dataRef):
471  parq = dataRef.get()
472  funcs = self.getFunctors()
473  df = self.run(parq, funcs=funcs, dataId=dataRef.dataId)
474  self.write(df, dataRef)
475  return df
476 
477  def run(self, parq, funcs=None, dataId=None):
478  """Do postprocessing calculations
479 
480  Takes a `ParquetTable` object and dataId,
481  returns a dataframe with results of postprocessing calculations.
482 
483  Parameters
484  ----------
485  parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
486  ParquetTable from which calculations are done.
487  funcs : `lsst.pipe.tasks.functors.Functors`
488  Functors to apply to the table's columns
489  dataId : dict, optional
490  Used to add a `patchId` column to the output dataframe.
491 
492  Returns
493  ------
494  `pandas.DataFrame`
495 
496  """
497  self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
498 
499  filt = dataId.get('filter', None)
500  df = self.transform(filt, parq, funcs, dataId).df
501  self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
502  return df
503 
504  def getFunctors(self):
505  funcs = CompositeFunctor.from_file(self.config.functorFile)
506  funcs.update(dict(PostprocessAnalysis._defaultFuncs))
507  return funcs
508 
509  def getAnalysis(self, parq, funcs=None, filt=None):
510  # Avoids disk access if funcs is passed
511  if funcs is None:
512  funcs = self.getFunctors()
513  analysis = PostprocessAnalysis(parq, funcs, filt=filt)
514  return analysis
515 
516  def transform(self, filt, parq, funcs, dataId):
517  analysis = self.getAnalysis(parq, funcs=funcs, filt=filt)
518  df = analysis.df
519  if dataId is not None:
520  for key, value in dataId.items():
521  df[key] = value
522 
523  return pipeBase.Struct(
524  df=df,
525  analysis=analysis
526  )
527 
528  def write(self, df, parqRef):
529  parqRef.put(ParquetTable(dataFrame=df), self.outputDataset)
530 
531  def writeMetadata(self, dataRef):
532  """No metadata to write.
533  """
534  pass
535 
536 
537 class TransformObjectCatalogConfig(TransformCatalogBaseConfig):
538  coaddName = pexConfig.Field(
539  dtype=str,
540  default="deep",
541  doc="Name of coadd"
542  )
543  filterMap = pexConfig.DictField(
544  keytype=str,
545  itemtype=str,
546  default={},
547  doc=("Dictionary mapping full filter name to short one for column name munging."
548  "These filters determine the output columns no matter what filters the "
549  "input data actually contain.")
550  )
551  camelCase = pexConfig.Field(
552  dtype=bool,
553  default=True,
554  doc=("Write per-filter columns names with camelCase, else underscore "
555  "For example: gPsfFlux instead of g_PsfFlux.")
556  )
557  multilevelOutput = pexConfig.Field(
558  dtype=bool,
559  default=False,
560  doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
561  "and name-munged (False).")
562  )
563 
564 
566  """Compute Flatted Object Table as defined in the DPDD
567 
568  Do the same set of postprocessing calculations on all bands
569 
570  This is identical to `TransformCatalogBaseTask`, except for that it does the
571  specified functor calculations for all filters present in the
572  input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
573  by the YAML file will be superceded.
574  """
575  _DefaultName = "transformObjectCatalog"
576  ConfigClass = TransformObjectCatalogConfig
577 
578  inputDataset = 'deepCoadd_obj'
579  outputDataset = 'objectTable'
580 
581  @classmethod
582  def _makeArgumentParser(cls):
583  parser = ArgumentParser(name=cls._DefaultName)
584  parser.add_id_argument("--id", cls.inputDataset,
585  ContainerClass=CoaddDataIdContainer,
586  help="data ID, e.g. --id tract=12345 patch=1,2")
587  return parser
588 
589  def run(self, parq, funcs=None, dataId=None):
590  dfDict = {}
591  analysisDict = {}
592  templateDf = pd.DataFrame()
593  # Perform transform for data of filters that exist in parq and are
594  # specified in config.filterMap
595  for filt in parq.columnLevelNames['filter']:
596  if filt not in self.config.filterMap:
597  self.log.info("Ignoring %s data in the input", filt)
598  continue
599  self.log.info("Transforming the catalog of filter %s", filt)
600  result = self.transform(filt, parq, funcs, dataId)
601  dfDict[filt] = result.df
602  analysisDict[filt] = result.analysis
603  if templateDf.empty:
604  templateDf = result.df
605 
606  # Fill NaNs in columns of other wanted filters
607  for filt in self.config.filterMap:
608  if filt not in dfDict:
609  self.log.info("Adding empty columns for filter %s", filt)
610  dfDict[filt] = pd.DataFrame().reindex_like(templateDf)
611 
612  # This makes a multilevel column index, with filter as first level
613  df = pd.concat(dfDict, axis=1, names=['filter', 'column'])
614 
615  if not self.config.multilevelOutput:
616  noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
617  if dataId is not None:
618  noDupCols += list(dataId.keys())
619  df = flattenFilters(df, self.config.filterMap, noDupCols=noDupCols,
620  camelCase=self.config.camelCase)
621 
622  self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
623  return df
624 
625 
627 
628  def makeDataRefList(self, namespace):
629  """Make self.refList from self.idList
630 
631  Generate a list of data references given tract and/or patch.
632  This was adapted from `TractQADataIdContainer`, which was
633  `TractDataIdContainer` modifie to not require "filter".
634  Only existing dataRefs are returned.
635  """
636  def getPatchRefList(tract):
637  return [namespace.butler.dataRef(datasetType=self.datasetType,
638  tract=tract.getId(),
639  patch="%d,%d" % patch.getIndex()) for patch in tract]
640 
641  tractRefs = defaultdict(list) # Data references for each tract
642  for dataId in self.idList:
643  skymap = self.getSkymap(namespace)
644 
645  if "tract" in dataId:
646  tractId = dataId["tract"]
647  if "patch" in dataId:
648  tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
649  tract=tractId,
650  patch=dataId['patch']))
651  else:
652  tractRefs[tractId] += getPatchRefList(skymap[tractId])
653  else:
654  tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
655  for tract in skymap)
656  outputRefList = []
657  for tractRefList in tractRefs.values():
658  existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
659  outputRefList.append(existingRefs)
660 
661  self.refList = outputRefList
662 
663 
664 class ConsolidateObjectTableConfig(pexConfig.Config):
665  coaddName = pexConfig.Field(
666  dtype=str,
667  default="deep",
668  doc="Name of coadd"
669  )
670 
671 
672 class ConsolidateObjectTableTask(CmdLineTask):
673  """Write patch-merged source tables to a tract-level parquet file
674  """
675  _DefaultName = "consolidateObjectTable"
676  ConfigClass = ConsolidateObjectTableConfig
677 
678  inputDataset = 'objectTable'
679  outputDataset = 'objectTable_tract'
680 
681  @classmethod
682  def _makeArgumentParser(cls):
683  parser = ArgumentParser(name=cls._DefaultName)
684 
685  parser.add_id_argument("--id", cls.inputDataset,
686  help="data ID, e.g. --id tract=12345",
687  ContainerClass=TractObjectDataIdContainer)
688  return parser
689 
690  def runDataRef(self, patchRefList):
691  df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
692  patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
693 
694  def writeMetadata(self, dataRef):
695  """No metadata to write.
696  """
697  pass
698 
699 
700 class TransformSourceTableConfig(TransformCatalogBaseConfig):
701  pass
702 
703 
705  """Transform/standardize a source catalog
706  """
707  _DefaultName = "transformSourceTable"
708  ConfigClass = TransformSourceTableConfig
709 
710  inputDataset = 'source'
711  outputDataset = 'sourceTable'
712 
713  def writeMetadata(self, dataRef):
714  """No metadata to write.
715  """
716  pass
717 
718  @classmethod
719  def _makeArgumentParser(cls):
720  parser = ArgumentParser(name=cls._DefaultName)
721  parser.add_id_argument("--id", datasetType=cls.inputDataset,
722  level="sensor",
723  help="data ID, e.g. --id visit=12345 ccd=0")
724  return parser
725 
726 
727 class VisitDataIdContainer(DataIdContainer):
728  """DataIdContainer that groups sensor-level id's by visit
729  """
730 
731  def makeDataRefList(self, namespace):
732  """Make self.refList from self.idList
733 
734  Generate a list of data references grouped by visit.
735 
736  Parameters
737  ----------
738  namespace : `argparse.Namespace`
739  Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
740  """
741  def ccdDataRefList(visitId):
742  """Get all possible ccds for a given visit"""
743  ccds = namespace.butler.queryMetadata('src', ['ccd'], dataId={'visit': visitId})
744  return [namespace.butler.dataRef(datasetType=self.datasetType,
745  visit=visitId,
746  ccd=ccd) for ccd in ccds]
747  # Group by visits
748  visitRefs = defaultdict(list)
749  for dataId in self.idList:
750  if "visit" in dataId:
751  visitId = dataId["visit"]
752  if "ccd" in dataId:
753  visitRefs[visitId].append(namespace.butler.dataRef(datasetType=self.datasetType,
754  visit=visitId, ccd=dataId['ccd']))
755  else:
756  visitRefs[visitId] += ccdDataRefList(visitId)
757  outputRefList = []
758  for refList in visitRefs.values():
759  existingRefs = [ref for ref in refList if ref.datasetExists()]
760  outputRefList.append(existingRefs)
761 
762  self.refList = outputRefList
763 
764 
765 class ConsolidateSourceTableConfig(pexConfig.Config):
766  pass
767 
768 
769 class ConsolidateSourceTableTask(CmdLineTask):
770  """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
771  """
772  _DefaultName = 'consolidateSourceTable'
773  ConfigClass = ConsolidateSourceTableConfig
774 
775  inputDataset = 'sourceTable'
776  outputDataset = 'sourceTable_visit'
777 
778  def runDataRef(self, dataRefList):
779  self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
780  df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
781  dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
782 
783  @classmethod
784  def _makeArgumentParser(cls):
785  parser = ArgumentParser(name=cls._DefaultName)
786 
787  parser.add_id_argument("--id", cls.inputDataset,
788  help="data ID, e.g. --id visit=12345",
789  ContainerClass=VisitDataIdContainer)
790  return parser
791 
792  def writeMetadata(self, dataRef):
793  """No metadata to write.
794  """
795  pass
796 
797  def writeConfig(self, butler, clobber=False, doBackup=True):
798  """No config to write.
799  """
800  pass
lsst.pipe.tasks.postprocess.TransformSourceTableTask._DefaultName
_DefaultName
Definition: postprocess.py:707
lsst.pipe.tasks.postprocess.ConsolidateSourceTableTask.outputDataset
outputDataset
Definition: postprocess.py:776
lsst.pipe.tasks.postprocess.WriteObjectTableTask.readCatalog
def readCatalog(self, patchRef)
Definition: postprocess.py:118
lsst.pipe.tasks.postprocess.WriteObjectTableConfig
Definition: postprocess.py:52
lsst.pipe.tasks.postprocess.WriteObjectTableTask
Definition: postprocess.py:75
lsst.pipe.tasks.functors.RAColumn
Definition: functors.py:468
lsst.pipe.tasks.postprocess.TransformObjectCatalogTask.inputDataset
inputDataset
Definition: postprocess.py:578
lsst.pipe.tasks.postprocess.TractObjectDataIdContainer.refList
refList
Definition: postprocess.py:661
lsst.pipe.tasks.postprocess.WriteSourceTableTask.runDataRef
def runDataRef(self, dataRef)
Definition: postprocess.py:214
lsst.pipe.tasks.postprocess.ConsolidateSourceTableTask._DefaultName
_DefaultName
Definition: postprocess.py:772
lsst.pipe.tasks.postprocess.PostprocessAnalysis.functors
functors
Definition: postprocess.py:318
lsst.pipe.tasks.postprocess.TransformSourceTableTask.inputDataset
inputDataset
Definition: postprocess.py:710
lsst.pipe.tasks.postprocess.WriteObjectTableTask.outputDataset
outputDataset
Definition: postprocess.py:86
lsst.pipe.tasks.postprocess.ConsolidateObjectTableTask.writeMetadata
def writeMetadata(self, dataRef)
Definition: postprocess.py:694
lsst.pipe.tasks.postprocess.ConsolidateObjectTableTask._DefaultName
_DefaultName
Definition: postprocess.py:675
lsst.pipe.tasks.postprocess.ConsolidateObjectTableTask.inputDataset
inputDataset
Definition: postprocess.py:678
lsst::coadd::utils::coaddDataIdContainer::CoaddDataIdContainer
lsst.pipe.tasks.postprocess.WriteObjectTableTask.write
def write(self, patchRef, catalog)
Definition: postprocess.py:181
lsst.pipe.tasks.postprocess.WriteObjectTableTask.__init__
def __init__(self, butler=None, schema=None, **kwargs)
Definition: postprocess.py:88
lsst.pipe.tasks.postprocess.WriteObjectTableTask.runDataRef
def runDataRef(self, patchRefList)
Merge coadd sources from multiple bands.
Definition: postprocess.py:94
lsst.pipe.tasks.functors.DecColumn
Definition: functors.py:481
lsst.pipe.tasks.postprocess.TransformCatalogBaseTask.getAnalysis
def getAnalysis(self, parq, funcs=None, filt=None)
Definition: postprocess.py:509
lsst.pipe.tasks.postprocess.TransformCatalogBaseTask.inputDataset
def inputDataset(self)
Definition: postprocess.py:463
lsst.pipe.tasks.postprocess.TransformSourceTableTask
Definition: postprocess.py:704
lsst.pipe.tasks.postprocess.TractObjectDataIdContainer
Definition: postprocess.py:626
lsst.pipe.tasks.postprocess.WriteObjectTableTask.writeMetadata
def writeMetadata(self, dataRefList)
Definition: postprocess.py:198
lsst.pipe.tasks.postprocess.TransformObjectCatalogTask.run
def run(self, parq, funcs=None, dataId=None)
Definition: postprocess.py:589
lsst.pipe.tasks.postprocess.TransformCatalogBaseTask.outputDataset
def outputDataset(self)
Definition: postprocess.py:459
lsst.pipe.tasks.postprocess.TransformCatalogBaseTask
Definition: postprocess.py:383
lsst.pipe.tasks.postprocess.flattenFilters
def flattenFilters(df, filterDict, noDupCols=['coord_ra', 'coord_dec'], camelCase=False)
Definition: postprocess.py:36
lsst.pipe.tasks.postprocess.ConsolidateSourceTableTask.runDataRef
def runDataRef(self, dataRefList)
Definition: postprocess.py:778
lsst.pipe.tasks.postprocess.TransformCatalogBaseConfig
Definition: postprocess.py:374
lsst.pipe.tasks.multiBandUtils.makeMergeArgumentParser
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
Definition: multiBandUtils.py:112
lsst.pipe.tasks.postprocess.ConsolidateObjectTableTask
Definition: postprocess.py:672
lsst.pipe.tasks.postprocess.VisitDataIdContainer.refList
refList
Definition: postprocess.py:762
lsst.pipe.tasks.postprocess.PostprocessAnalysis.flags
flags
Definition: postprocess.py:321
lsst.pipe.tasks.postprocess.ConsolidateObjectTableTask.runDataRef
def runDataRef(self, patchRefList)
Definition: postprocess.py:690
lsst.pipe.tasks.postprocess.TransformCatalogBaseTask.writeMetadata
def writeMetadata(self, dataRef)
Definition: postprocess.py:531
lsst.pipe.tasks.postprocess.TransformCatalogBaseTask.runDataRef
def runDataRef(self, dataRef)
Definition: postprocess.py:470
lsst.pipe.tasks.postprocess.WriteSourceTableTask.writeMetadata
def writeMetadata(self, dataRef)
Definition: postprocess.py:241
lsst.pipe.tasks.postprocess.WriteObjectTableTask._DefaultName
_DefaultName
Definition: postprocess.py:78
lsst.pipe.tasks.postprocess.VisitDataIdContainer
Definition: postprocess.py:727
lsst::coadd::utils::coaddDataIdContainer::CoaddDataIdContainer::getSkymap
def getSkymap(self, namespace)
lsst.pipe.tasks.postprocess.TransformCatalogBaseTask.run
def run(self, parq, funcs=None, dataId=None)
Definition: postprocess.py:477
lsst.pipe.tasks.postprocess.WriteSourceTableTask.writeConfig
def writeConfig(self, butler, clobber=False, doBackup=True)
Definition: postprocess.py:246
lsst.pipe.tasks.functors.CompositeFunctor
Definition: functors.py:209
lsst.pipe.tasks.postprocess.ConsolidateObjectTableTask.outputDataset
outputDataset
Definition: postprocess.py:679
lsst.pipe.tasks.postprocess.WriteObjectTableTask.run
def run(self, catalogs, tract, patch)
Definition: postprocess.py:143
lsst.pipe.tasks.postprocess.PostprocessAnalysis._df
_df
Definition: postprocess.py:326
lsst.pipe.tasks.postprocess.PostprocessAnalysis.parq
parq
Definition: postprocess.py:317
lsst.pipe.tasks.postprocess.PostprocessAnalysis.compute
def compute(self, dropna=False, pool=None)
Definition: postprocess.py:359
lsst.pipe.tasks.postprocess.PostprocessAnalysis.noDupCols
def noDupCols(self)
Definition: postprocess.py:350
lsst.pipe.tasks.postprocess.ConsolidateSourceTableTask.writeMetadata
def writeMetadata(self, dataRef)
Definition: postprocess.py:792
lsst.pipe.tasks.postprocess.ConsolidateSourceTableTask.writeConfig
def writeConfig(self, butler, clobber=False, doBackup=True)
Definition: postprocess.py:797
lsst.pipe.tasks.postprocess.TransformSourceTableTask.writeMetadata
def writeMetadata(self, dataRef)
Definition: postprocess.py:713
lsst.pipe.tasks.postprocess.TransformCatalogBaseTask.transform
def transform(self, filt, parq, funcs, dataId)
Definition: postprocess.py:516
lsst.pipe.tasks.postprocess.PostprocessAnalysis
Definition: postprocess.py:259
lsst.pipe.tasks.postprocess.PostprocessAnalysis.refFlags
refFlags
Definition: postprocess.py:322
lsst.pipe.tasks.postprocess.PostprocessAnalysis._defaultRefFlags
_defaultRefFlags
Definition: postprocess.py:312
lsst.pipe.tasks.postprocess.PostprocessAnalysis.df
def df(self)
Definition: postprocess.py:354
lsst.pipe.tasks.postprocess.WriteObjectTableConfig.validate
def validate(self)
Definition: postprocess.py:69
lsst.pipe.tasks.postprocess.WriteSourceTableTask.run
def run(self, catalog, ccdVisitId=None)
Definition: postprocess.py:220
lsst.pipe.tasks.postprocess.PostprocessAnalysis.func
def func(self)
Definition: postprocess.py:334
lsst.pipe.tasks.functors.Column
Definition: functors.py:409
lsst.pipe.tasks.postprocess.ConsolidateObjectTableConfig
Definition: postprocess.py:664
lsst.pipe.tasks.postprocess.TransformCatalogBaseTask.write
def write(self, df, parqRef)
Definition: postprocess.py:528
lsst.pipe.tasks.postprocess.ConsolidateSourceTableTask.inputDataset
inputDataset
Definition: postprocess.py:775
lsst.pipe.tasks.postprocess.PostprocessAnalysis.filt
filt
Definition: postprocess.py:320
lsst.pipe.tasks.postprocess.WriteObjectTableTask.inputDatasets
inputDatasets
Definition: postprocess.py:83
lsst.pipe.tasks.postprocess.TransformObjectCatalogTask._DefaultName
_DefaultName
Definition: postprocess.py:575
lsst.pipe.tasks.postprocess.TransformObjectCatalogTask
Definition: postprocess.py:565
lsst.pipe.tasks.postprocess.PostprocessAnalysis.defaultFuncs
def defaultFuncs(self)
Definition: postprocess.py:329
lsst::coadd::utils::coaddDataIdContainer
lsst.pipe::base
lsst.pipe.tasks.postprocess.ConsolidateSourceTableConfig
Definition: postprocess.py:765
lsst.pipe.tasks.parquetTable.ParquetTable
Definition: parquetTable.py:34
lsst.pipe.tasks.postprocess.WriteObjectTableConfig.priorityList
priorityList
Definition: postprocess.py:53
lsst.pipe.tasks.postprocess.TransformCatalogBaseTask.getFunctors
def getFunctors(self)
Definition: postprocess.py:504
lsst.pipe.tasks.postprocess.TractObjectDataIdContainer.makeDataRefList
def makeDataRefList(self, namespace)
Definition: postprocess.py:628
lsst.pipe.tasks.postprocess.VisitDataIdContainer.makeDataRefList
def makeDataRefList(self, namespace)
Definition: postprocess.py:731
lsst.pipe.tasks.postprocess.TransformCatalogBaseTask.ConfigClass
def ConfigClass(self)
Definition: postprocess.py:467
lsst.pipe.tasks.postprocess.PostprocessAnalysis._defaultFuncs
_defaultFuncs
Definition: postprocess.py:313
lsst.pipe.tasks.postprocess.PostprocessAnalysis.__init__
def __init__(self, parq, functors, filt=None, flags=None, refFlags=None)
Definition: postprocess.py:316
lsst.pipe.tasks.postprocess.WriteSourceTableTask
Definition: postprocess.py:208