lsst.pipe.tasks  21.0.0-131-g8cabc107+01e30aaa97
postprocess.py
Go to the documentation of this file.
1 # This file is part of pipe_tasks
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 import functools
23 import pandas as pd
24 from collections import defaultdict
25 import numpy as np
26 
27 import lsst.geom
28 import lsst.pex.config as pexConfig
29 import lsst.pipe.base as pipeBase
30 import lsst.daf.base as dafBase
31 from lsst.pipe.base import connectionTypes
32 import lsst.afw.table as afwTable
33 from lsst.meas.base import SingleFrameMeasurementTask
34 from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
35 from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
36 from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate
37 
38 from .parquetTable import ParquetTable
39 from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
40 from .functors import CompositeFunctor, RAColumn, DecColumn, Column
41 
42 
43 def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
44  """Flattens a dataframe with multilevel column index
45  """
46  newDf = pd.DataFrame()
47  # band is the level 0 index
48  dfBands = df.columns.unique(level=0).values
49  for band in dfBands:
50  subdf = df[band]
51  columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
52  newColumns = {c: columnFormat.format(band, c)
53  for c in subdf.columns if c not in noDupCols}
54  cols = list(newColumns.keys())
55  newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
56 
57  # Band must be present in the input and output or else column is all NaN:
58  presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
59  # Get the unexploded columns from any present band's partition
60  noDupDf = df[presentBands[0]][noDupCols]
61  newDf = pd.concat([noDupDf, newDf], axis=1)
62  return newDf
63 
64 
65 class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
66  defaultTemplates={"coaddName": "deep"},
67  dimensions=("tract", "patch", "skymap")):
68  inputCatalogMeas = connectionTypes.Input(
69  doc="Catalog of source measurements on the deepCoadd.",
70  dimensions=("tract", "patch", "band", "skymap"),
71  storageClass="SourceCatalog",
72  name="{coaddName}Coadd_meas",
73  multiple=True
74  )
75  inputCatalogForcedSrc = connectionTypes.Input(
76  doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
77  dimensions=("tract", "patch", "band", "skymap"),
78  storageClass="SourceCatalog",
79  name="{coaddName}Coadd_forced_src",
80  multiple=True
81  )
82  inputCatalogRef = connectionTypes.Input(
83  doc="Catalog marking the primary detection (which band provides a good shape and position)"
84  "for each detection in deepCoadd_mergeDet.",
85  dimensions=("tract", "patch", "skymap"),
86  storageClass="SourceCatalog",
87  name="{coaddName}Coadd_ref"
88  )
89  outputCatalog = connectionTypes.Output(
90  doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
91  "stored as a DataFrame with a multi-level column index per-patch.",
92  dimensions=("tract", "patch", "skymap"),
93  storageClass="DataFrame",
94  name="{coaddName}Coadd_obj"
95  )
96 
97 
98 class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
99  pipelineConnections=WriteObjectTableConnections):
100  engine = pexConfig.Field(
101  dtype=str,
102  default="pyarrow",
103  doc="Parquet engine for writing (pyarrow or fastparquet)"
104  )
105  coaddName = pexConfig.Field(
106  dtype=str,
107  default="deep",
108  doc="Name of coadd"
109  )
110 
111 
112 class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
113  """Write filter-merged source tables to parquet
114  """
115  _DefaultName = "writeObjectTable"
116  ConfigClass = WriteObjectTableConfig
117  RunnerClass = MergeSourcesRunner
118 
119  # Names of table datasets to be merged
120  inputDatasets = ('forced_src', 'meas', 'ref')
121 
122  # Tag of output dataset written by `MergeSourcesTask.write`
123  outputDataset = 'obj'
124 
125  def __init__(self, butler=None, schema=None, **kwargs):
126  # It is a shame that this class can't use the default init for CmdLineTask
127  # But to do so would require its own special task runner, which is many
128  # more lines of specialization, so this is how it is for now
129  super().__init__(**kwargs)
130 
131  def runDataRef(self, patchRefList):
132  """!
133  @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
134  subclasses that inherit from MergeSourcesTask.
135  @param[in] patchRefList list of data references for each filter
136  """
137  catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
138  dataId = patchRefList[0].dataId
139  mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
140  self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog))
141 
142  def runQuantum(self, butlerQC, inputRefs, outputRefs):
143  inputs = butlerQC.get(inputRefs)
144 
145  measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
146  zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
147  forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
148  zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
149 
150  catalogs = {}
151  for band in measDict.keys():
152  catalogs[band] = {'meas': measDict[band]['meas'],
153  'forced_src': forcedSourceDict[band]['forced_src'],
154  'ref': inputs['inputCatalogRef']}
155  dataId = butlerQC.quantum.dataId
156  df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
157  outputs = pipeBase.Struct(outputCatalog=df)
158  butlerQC.put(outputs, outputRefs)
159 
160  @classmethod
161  def _makeArgumentParser(cls):
162  """Create a suitable ArgumentParser.
163 
164  We will use the ArgumentParser to get a list of data
165  references for patches; the RunnerClass will sort them into lists
166  of data references for the same patch.
167 
168  References first of self.inputDatasets, rather than
169  self.inputDataset
170  """
171  return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
172 
173  def readCatalog(self, patchRef):
174  """Read input catalogs
175 
176  Read all the input datasets given by the 'inputDatasets'
177  attribute.
178 
179  Parameters
180  ----------
181  patchRef : `lsst.daf.persistence.ButlerDataRef`
182  Data reference for patch
183 
184  Returns
185  -------
186  Tuple consisting of band name and a dict of catalogs, keyed by
187  dataset name
188  """
189  band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel
190  catalogDict = {}
191  for dataset in self.inputDatasets:
192  catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
193  self.log.info("Read %d sources from %s for band %s: %s",
194  len(catalog), dataset, band, patchRef.dataId)
195  catalogDict[dataset] = catalog
196  return band, catalogDict
197 
198  def run(self, catalogs, tract, patch):
199  """Merge multiple catalogs.
200 
201  Parameters
202  ----------
203  catalogs : `dict`
204  Mapping from filter names to dict of catalogs.
205  tract : int
206  tractId to use for the tractId column
207  patch : str
208  patchId to use for the patchId column
209 
210  Returns
211  -------
212  catalog : `pandas.DataFrame`
213  Merged dataframe
214  """
215 
216  dfs = []
217  for filt, tableDict in catalogs.items():
218  for dataset, table in tableDict.items():
219  # Convert afwTable to pandas DataFrame
220  df = table.asAstropy().to_pandas().set_index('id', drop=True)
221 
222  # Sort columns by name, to ensure matching schema among patches
223  df = df.reindex(sorted(df.columns), axis=1)
224  df['tractId'] = tract
225  df['patchId'] = patch
226 
227  # Make columns a 3-level MultiIndex
228  df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
229  names=('dataset', 'band', 'column'))
230  dfs.append(df)
231 
232  catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
233  return catalog
234 
235  def write(self, patchRef, catalog):
236  """Write the output.
237 
238  Parameters
239  ----------
240  catalog : `ParquetTable`
241  Catalog to write
242  patchRef : `lsst.daf.persistence.ButlerDataRef`
243  Data reference for patch
244  """
245  patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
246  # since the filter isn't actually part of the data ID for the dataset we're saving,
247  # it's confusing to see it in the log message, even if the butler simply ignores it.
248  mergeDataId = patchRef.dataId.copy()
249  del mergeDataId["filter"]
250  self.log.info("Wrote merged catalog: %s", mergeDataId)
251 
252  def writeMetadata(self, dataRefList):
253  """No metadata to write, and not sure how to write it for a list of dataRefs.
254  """
255  pass
256 
257 
258 class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
259  defaultTemplates={"catalogType": ""},
260  dimensions=("instrument", "visit", "detector")):
261 
262  catalog = connectionTypes.Input(
263  doc="Input full-depth catalog of sources produced by CalibrateTask",
264  name="{catalogType}src",
265  storageClass="SourceCatalog",
266  dimensions=("instrument", "visit", "detector")
267  )
268  outputCatalog = connectionTypes.Output(
269  doc="Catalog of sources, `src` in Parquet format. The 'id' column is "
270  "replaced with an index; all other columns are unchanged.",
271  name="{catalogType}source",
272  storageClass="DataFrame",
273  dimensions=("instrument", "visit", "detector")
274  )
275 
276 
277 class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
278  pipelineConnections=WriteSourceTableConnections):
279  doApplyExternalPhotoCalib = pexConfig.Field(
280  dtype=bool,
281  default=False,
282  doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
283  "generating Source Tables from older src tables which do not already have local calib columns")
284  )
285  doApplyExternalSkyWcs = pexConfig.Field(
286  dtype=bool,
287  default=False,
288  doc=("Add local WCS columns from the calexp.wcs? Should only set True if "
289  "generating Source Tables from older src tables which do not already have local calib columns")
290  )
291 
292 
293 class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
294  """Write source table to parquet
295  """
296  _DefaultName = "writeSourceTable"
297  ConfigClass = WriteSourceTableConfig
298 
299  def runDataRef(self, dataRef):
300  src = dataRef.get('src')
301  if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
302  src = self.addCalibColumns(src, dataRef)
303 
304  ccdVisitId = dataRef.get('ccdExposureId')
305  result = self.run(src, ccdVisitId=ccdVisitId)
306  dataRef.put(result.table, 'source')
307 
308  def runQuantum(self, butlerQC, inputRefs, outputRefs):
309  inputs = butlerQC.get(inputRefs)
310  inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
311  result = self.run(**inputs).table
312  outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
313  butlerQC.put(outputs, outputRefs)
314 
315  def run(self, catalog, ccdVisitId=None):
316  """Convert `src` catalog to parquet
317 
318  Parameters
319  ----------
320  catalog: `afwTable.SourceCatalog`
321  catalog to be converted
322  ccdVisitId: `int`
323  ccdVisitId to be added as a column
324 
325  Returns
326  -------
327  result : `lsst.pipe.base.Struct`
328  ``table``
329  `ParquetTable` version of the input catalog
330  """
331  self.log.info("Generating parquet table from src catalog %s", ccdVisitId)
332  df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
333  df['ccdVisitId'] = ccdVisitId
334  return pipeBase.Struct(table=ParquetTable(dataFrame=df))
335 
336  def addCalibColumns(self, catalog, dataRef):
337  """Add columns with local calibration evaluated at each centroid
338 
339  for backwards compatibility with old repos.
340  This exists for the purpose of converting old src catalogs
341  (which don't have the expected local calib columns) to Source Tables.
342 
343  Parameters
344  ----------
345  catalog: `afwTable.SourceCatalog`
346  catalog to which calib columns will be added
347  dataRef: `lsst.daf.persistence.ButlerDataRef
348  for fetching the calibs from disk.
349 
350  Returns
351  -------
352  newCat: `afwTable.SourceCatalog`
353  Source Catalog with requested local calib columns
354  """
355  mapper = afwTable.SchemaMapper(catalog.schema)
356  measureConfig = SingleFrameMeasurementTask.ConfigClass()
357  measureConfig.doReplaceWithNoise = False
358 
359  # Just need the WCS or the PhotoCalib attached to an exposue
360  exposure = dataRef.get('calexp_sub',
362 
363  mapper = afwTable.SchemaMapper(catalog.schema)
364  mapper.addMinimalSchema(catalog.schema, True)
365  schema = mapper.getOutputSchema()
366 
367  exposureIdInfo = dataRef.get("expIdInfo")
368  measureConfig.plugins.names = []
369  if self.config.doApplyExternalSkyWcs:
370  plugin = 'base_LocalWcs'
371  if plugin in schema:
372  raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
373  else:
374  measureConfig.plugins.names.add(plugin)
375 
376  if self.config.doApplyExternalPhotoCalib:
377  plugin = 'base_LocalPhotoCalib'
378  if plugin in schema:
379  raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
380  else:
381  measureConfig.plugins.names.add(plugin)
382 
383  measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
384  newCat = afwTable.SourceCatalog(schema)
385  newCat.extend(catalog, mapper=mapper)
386  measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
387  return newCat
388 
389  def writeMetadata(self, dataRef):
390  """No metadata to write.
391  """
392  pass
393 
394  @classmethod
395  def _makeArgumentParser(cls):
396  parser = ArgumentParser(name=cls._DefaultName)
397  parser.add_id_argument("--id", 'src',
398  help="data ID, e.g. --id visit=12345 ccd=0")
399  return parser
400 
401 
402 class PostprocessAnalysis(object):
403  """Calculate columns from ParquetTable
404 
405  This object manages and organizes an arbitrary set of computations
406  on a catalog. The catalog is defined by a
407  `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
408  `deepCoadd_obj` dataset, and the computations are defined by a collection
409  of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
410  a `CompositeFunctor`).
411 
412  After the object is initialized, accessing the `.df` attribute (which
413  holds the `pandas.DataFrame` containing the results of the calculations) triggers
414  computation of said dataframe.
415 
416  One of the conveniences of using this object is the ability to define a desired common
417  filter for all functors. This enables the same functor collection to be passed to
418  several different `PostprocessAnalysis` objects without having to change the original
419  functor collection, since the `filt` keyword argument of this object triggers an
420  overwrite of the `filt` property for all functors in the collection.
421 
422  This object also allows a list of refFlags to be passed, and defines a set of default
423  refFlags that are always included even if not requested.
424 
425  If a list of `ParquetTable` object is passed, rather than a single one, then the
426  calculations will be mapped over all the input catalogs. In principle, it should
427  be straightforward to parallelize this activity, but initial tests have failed
428  (see TODO in code comments).
429 
430  Parameters
431  ----------
432  parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
433  Source catalog(s) for computation
434 
435  functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
436  Computations to do (functors that act on `parq`).
437  If a dict, the output
438  DataFrame will have columns keyed accordingly.
439  If a list, the column keys will come from the
440  `.shortname` attribute of each functor.
441 
442  filt : `str` (optional)
443  Filter in which to calculate. If provided,
444  this will overwrite any existing `.filt` attribute
445  of the provided functors.
446 
447  flags : `list` (optional)
448  List of flags (per-band) to include in output table.
449  Taken from the `meas` dataset if applied to a multilevel Object Table.
450 
451  refFlags : `list` (optional)
452  List of refFlags (only reference band) to include in output table.
453 
454  forcedFlags : `list` (optional)
455  List of flags (per-band) to include in output table.
456  Taken from the ``forced_src`` dataset if applied to a
457  multilevel Object Table. Intended for flags from measurement plugins
458  only run during multi-band forced-photometry.
459  """
460  _defaultRefFlags = []
461  _defaultFuncs = (('coord_ra', RAColumn()),
462  ('coord_dec', DecColumn()))
463 
464  def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
465  self.parq = parq
466  self.functors = functors
467 
468  self.filt = filt
469  self.flags = list(flags) if flags is not None else []
470  self.forcedFlags = list(forcedFlags) if forcedFlags is not None else []
471  self.refFlags = list(self._defaultRefFlags)
472  if refFlags is not None:
473  self.refFlags += list(refFlags)
474 
475  self._df = None
476 
477  @property
478  def defaultFuncs(self):
479  funcs = dict(self._defaultFuncs)
480  return funcs
481 
482  @property
483  def func(self):
484  additionalFuncs = self.defaultFuncs
485  additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags})
486  additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
487  additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
488 
489  if isinstance(self.functors, CompositeFunctor):
490  func = self.functors
491  else:
492  func = CompositeFunctor(self.functors)
493 
494  func.funcDict.update(additionalFuncs)
495  func.filt = self.filt
496 
497  return func
498 
499  @property
500  def noDupCols(self):
501  return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
502 
503  @property
504  def df(self):
505  if self._df is None:
506  self.compute()
507  return self._df
508 
509  def compute(self, dropna=False, pool=None):
510  # map over multiple parquet tables
511  if type(self.parq) in (list, tuple):
512  if pool is None:
513  dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
514  else:
515  # TODO: Figure out why this doesn't work (pyarrow pickling issues?)
516  dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
517  self._df = pd.concat(dflist)
518  else:
519  self._df = self.func(self.parq, dropna=dropna)
520 
521  return self._df
522 
523 
524 class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
525  dimensions=()):
526  """Expected Connections for subclasses of TransformCatalogBaseTask.
527 
528  Must be subclassed.
529  """
530  inputCatalog = connectionTypes.Input(
531  name="",
532  storageClass="DataFrame",
533  )
534  outputCatalog = connectionTypes.Output(
535  name="",
536  storageClass="DataFrame",
537  )
538 
539 
540 class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
541  pipelineConnections=TransformCatalogBaseConnections):
542  functorFile = pexConfig.Field(
543  dtype=str,
544  doc='Path to YAML file specifying functors to be computed',
545  default=None,
546  optional=True
547  )
548 
549 
550 class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask):
551  """Base class for transforming/standardizing a catalog
552 
553  by applying functors that convert units and apply calibrations.
554  The purpose of this task is to perform a set of computations on
555  an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
556  results to a new dataset (which needs to be declared in an `outputDataset`
557  attribute).
558 
559  The calculations to be performed are defined in a YAML file that specifies
560  a set of functors to be computed, provided as
561  a `--functorFile` config parameter. An example of such a YAML file
562  is the following:
563 
564  funcs:
565  psfMag:
566  functor: Mag
567  args:
568  - base_PsfFlux
569  filt: HSC-G
570  dataset: meas
571  cmodel_magDiff:
572  functor: MagDiff
573  args:
574  - modelfit_CModel
575  - base_PsfFlux
576  filt: HSC-G
577  gauss_magDiff:
578  functor: MagDiff
579  args:
580  - base_GaussianFlux
581  - base_PsfFlux
582  filt: HSC-G
583  count:
584  functor: Column
585  args:
586  - base_InputCount_value
587  filt: HSC-G
588  deconvolved_moments:
589  functor: DeconvolvedMoments
590  filt: HSC-G
591  dataset: forced_src
592  refFlags:
593  - calib_psfUsed
594  - merge_measurement_i
595  - merge_measurement_r
596  - merge_measurement_z
597  - merge_measurement_y
598  - merge_measurement_g
599  - base_PixelFlags_flag_inexact_psfCenter
600  - detect_isPrimary
601 
602  The names for each entry under "func" will become the names of columns in the
603  output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
604  Positional arguments to be passed to each functor are in the `args` list,
605  and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
606  `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
607 
608  The "flags" entry is the default shortcut for `Column` functors.
609  All columns listed under "flags" will be copied to the output table
610  untransformed. They can be of any datatype.
611  In the special case of transforming a multi-level oject table with
612  band and dataset indices (deepCoadd_obj), these will be taked from the
613  `meas` dataset and exploded out per band.
614 
615  There are two special shortcuts that only apply when transforming
616  multi-level Object (deepCoadd_obj) tables:
617  - The "refFlags" entry is shortcut for `Column` functor
618  taken from the `'ref'` dataset if transforming an ObjectTable.
619  - The "forcedFlags" entry is shortcut for `Column` functors.
620  taken from the ``forced_src`` dataset if transforming an ObjectTable.
621  These are expanded out per band.
622 
623 
624  This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
625  to organize and excecute the calculations.
626 
627  """
628  @property
629  def _DefaultName(self):
630  raise NotImplementedError('Subclass must define "_DefaultName" attribute')
631 
632  @property
633  def outputDataset(self):
634  raise NotImplementedError('Subclass must define "outputDataset" attribute')
635 
636  @property
637  def inputDataset(self):
638  raise NotImplementedError('Subclass must define "inputDataset" attribute')
639 
640  @property
641  def ConfigClass(self):
642  raise NotImplementedError('Subclass must define "ConfigClass" attribute')
643 
644  def __init__(self, *args, **kwargs):
645  super().__init__(*args, **kwargs)
646  if self.config.functorFile:
647  self.log.info('Loading tranform functor definitions from %s',
648  self.config.functorFile)
649  self.funcsfuncs = CompositeFunctor.from_file(self.config.functorFile)
650  self.funcsfuncs.update(dict(PostprocessAnalysis._defaultFuncs))
651  else:
652  self.funcsfuncs = None
653 
654  def runQuantum(self, butlerQC, inputRefs, outputRefs):
655  inputs = butlerQC.get(inputRefs)
656  if self.funcsfuncs is None:
657  raise ValueError("config.functorFile is None. "
658  "Must be a valid path to yaml in order to run Task as a PipelineTask.")
659  result = self.runrun(parq=inputs['inputCatalog'], funcs=self.funcsfuncs,
660  dataId=outputRefs.outputCatalog.dataId.full)
661  outputs = pipeBase.Struct(outputCatalog=result)
662  butlerQC.put(outputs, outputRefs)
663 
664  def runDataRef(self, dataRef):
665  parq = dataRef.get()
666  if self.funcsfuncs is None:
667  raise ValueError("config.functorFile is None. "
668  "Must be a valid path to yaml in order to run as a CommandlineTask.")
669  df = self.runrun(parq, funcs=self.funcsfuncs, dataId=dataRef.dataId)
670  self.writewrite(df, dataRef)
671  return df
672 
673  def run(self, parq, funcs=None, dataId=None, band=None):
674  """Do postprocessing calculations
675 
676  Takes a `ParquetTable` object and dataId,
677  returns a dataframe with results of postprocessing calculations.
678 
679  Parameters
680  ----------
681  parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
682  ParquetTable from which calculations are done.
683  funcs : `lsst.pipe.tasks.functors.Functors`
684  Functors to apply to the table's columns
685  dataId : dict, optional
686  Used to add a `patchId` column to the output dataframe.
687  band : `str`, optional
688  Filter band that is being processed.
689 
690  Returns
691  ------
692  `pandas.DataFrame`
693 
694  """
695  self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
696 
697  df = self.transformtransform(band, parq, funcs, dataId).df
698  self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
699  return df
700 
701  def getFunctors(self):
702  return self.funcsfuncs
703 
704  def getAnalysis(self, parq, funcs=None, band=None):
705  if funcs is None:
706  funcs = self.funcsfuncs
707  analysis = PostprocessAnalysis(parq, funcs, filt=band)
708  return analysis
709 
710  def transform(self, band, parq, funcs, dataId):
711  analysis = self.getAnalysisgetAnalysis(parq, funcs=funcs, band=band)
712  df = analysis.df
713  if dataId is not None:
714  for key, value in dataId.items():
715  df[str(key)] = value
716 
717  return pipeBase.Struct(
718  df=df,
719  analysis=analysis
720  )
721 
722  def write(self, df, parqRef):
723  parqRef.put(ParquetTable(dataFrame=df), self.outputDatasetoutputDataset)
724 
725  def writeMetadata(self, dataRef):
726  """No metadata to write.
727  """
728  pass
729 
730 
731 class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
732  defaultTemplates={"coaddName": "deep"},
733  dimensions=("tract", "patch", "skymap")):
734  inputCatalog = connectionTypes.Input(
735  doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
736  "stored as a DataFrame with a multi-level column index per-patch.",
737  dimensions=("tract", "patch", "skymap"),
738  storageClass="DataFrame",
739  name="{coaddName}Coadd_obj",
740  deferLoad=True,
741  )
742  outputCatalog = connectionTypes.Output(
743  doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
744  "data model.",
745  dimensions=("tract", "patch", "skymap"),
746  storageClass="DataFrame",
747  name="objectTable"
748  )
749 
750 
751 class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
752  pipelineConnections=TransformObjectCatalogConnections):
753  coaddName = pexConfig.Field(
754  dtype=str,
755  default="deep",
756  doc="Name of coadd"
757  )
758  # TODO: remove in DM-27177
759  filterMap = pexConfig.DictField(
760  keytype=str,
761  itemtype=str,
762  default={},
763  doc=("Dictionary mapping full filter name to short one for column name munging."
764  "These filters determine the output columns no matter what filters the "
765  "input data actually contain."),
766  deprecated=("Coadds are now identified by the band, so this transform is unused."
767  "Will be removed after v22.")
768  )
769  outputBands = pexConfig.ListField(
770  dtype=str,
771  default=None,
772  optional=True,
773  doc=("These bands and only these bands will appear in the output,"
774  " NaN-filled if the input does not include them."
775  " If None, then use all bands found in the input.")
776  )
777  camelCase = pexConfig.Field(
778  dtype=bool,
779  default=True,
780  doc=("Write per-band columns names with camelCase, else underscore "
781  "For example: gPsFlux instead of g_PsFlux.")
782  )
783  multilevelOutput = pexConfig.Field(
784  dtype=bool,
785  default=False,
786  doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
787  "and name-munged (False).")
788  )
789 
790 
791 class TransformObjectCatalogTask(TransformCatalogBaseTask):
792  """Produce a flattened Object Table to match the format specified in
793  sdm_schemas.
794 
795  Do the same set of postprocessing calculations on all bands
796 
797  This is identical to `TransformCatalogBaseTask`, except for that it does the
798  specified functor calculations for all filters present in the
799  input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
800  by the YAML file will be superceded.
801  """
802  _DefaultName = "transformObjectCatalog"
803  ConfigClass = TransformObjectCatalogConfig
804 
805  # Used by Gen 2 runDataRef only:
806  inputDataset = 'deepCoadd_obj'
807  outputDataset = 'objectTable'
808 
809  @classmethod
810  def _makeArgumentParser(cls):
811  parser = ArgumentParser(name=cls._DefaultName)
812  parser.add_id_argument("--id", cls.inputDataset,
813  ContainerClass=CoaddDataIdContainer,
814  help="data ID, e.g. --id tract=12345 patch=1,2")
815  return parser
816 
817  def run(self, parq, funcs=None, dataId=None, band=None):
818  # NOTE: band kwarg is ignored here.
819  dfDict = {}
820  analysisDict = {}
821  templateDf = pd.DataFrame()
822 
823  if isinstance(parq, DeferredDatasetHandle):
824  columns = parq.get(component='columns')
825  inputBands = columns.unique(level=1).values
826  else:
827  inputBands = parq.columnLevelNames['band']
828 
829  outputBands = self.config.outputBands if self.config.outputBands else inputBands
830 
831  # Perform transform for data of filters that exist in parq.
832  for inputBand in inputBands:
833  if inputBand not in outputBands:
834  self.log.info("Ignoring %s band data in the input", inputBand)
835  continue
836  self.log.info("Transforming the catalog of band %s", inputBand)
837  result = self.transform(inputBand, parq, funcs, dataId)
838  dfDict[inputBand] = result.df
839  analysisDict[inputBand] = result.analysis
840  if templateDf.empty:
841  templateDf = result.df
842 
843  # Fill NaNs in columns of other wanted bands
844  for filt in outputBands:
845  if filt not in dfDict:
846  self.log.info("Adding empty columns for band %s", filt)
847  dfDict[filt] = pd.DataFrame().reindex_like(templateDf)
848 
849  # This makes a multilevel column index, with band as first level
850  df = pd.concat(dfDict, axis=1, names=['band', 'column'])
851 
852  if not self.config.multilevelOutput:
853  noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
854  if dataId is not None:
855  noDupCols += list(dataId.keys())
856  df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
857  inputBands=inputBands)
858 
859  self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
860  return df
861 
862 
863 class TractObjectDataIdContainer(CoaddDataIdContainer):
864 
865  def makeDataRefList(self, namespace):
866  """Make self.refList from self.idList
867 
868  Generate a list of data references given tract and/or patch.
869  This was adapted from `TractQADataIdContainer`, which was
870  `TractDataIdContainer` modifie to not require "filter".
871  Only existing dataRefs are returned.
872  """
873  def getPatchRefList(tract):
874  return [namespace.butler.dataRef(datasetType=self.datasetType,
875  tract=tract.getId(),
876  patch="%d,%d" % patch.getIndex()) for patch in tract]
877 
878  tractRefs = defaultdict(list) # Data references for each tract
879  for dataId in self.idList:
880  skymap = self.getSkymap(namespace)
881 
882  if "tract" in dataId:
883  tractId = dataId["tract"]
884  if "patch" in dataId:
885  tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
886  tract=tractId,
887  patch=dataId['patch']))
888  else:
889  tractRefs[tractId] += getPatchRefList(skymap[tractId])
890  else:
891  tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
892  for tract in skymap)
893  outputRefList = []
894  for tractRefList in tractRefs.values():
895  existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
896  outputRefList.append(existingRefs)
897 
898  self.refList = outputRefList
899 
900 
901 class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
902  dimensions=("tract", "skymap")):
903  inputCatalogs = connectionTypes.Input(
904  doc="Per-Patch objectTables conforming to the standard data model.",
905  name="objectTable",
906  storageClass="DataFrame",
907  dimensions=("tract", "patch", "skymap"),
908  multiple=True,
909  )
910  outputCatalog = connectionTypes.Output(
911  doc="Pre-tract horizontal concatenation of the input objectTables",
912  name="objectTable_tract",
913  storageClass="DataFrame",
914  dimensions=("tract", "skymap"),
915  )
916 
917 
918 class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
919  pipelineConnections=ConsolidateObjectTableConnections):
920  coaddName = pexConfig.Field(
921  dtype=str,
922  default="deep",
923  doc="Name of coadd"
924  )
925 
926 
927 class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
928  """Write patch-merged source tables to a tract-level parquet file
929 
930  Concatenates `objectTable` list into a per-visit `objectTable_tract`
931  """
932  _DefaultName = "consolidateObjectTable"
933  ConfigClass = ConsolidateObjectTableConfig
934 
935  inputDataset = 'objectTable'
936  outputDataset = 'objectTable_tract'
937 
938  def runQuantum(self, butlerQC, inputRefs, outputRefs):
939  inputs = butlerQC.get(inputRefs)
940  self.log.info("Concatenating %s per-patch Object Tables",
941  len(inputs['inputCatalogs']))
942  df = pd.concat(inputs['inputCatalogs'])
943  butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
944 
945  @classmethod
946  def _makeArgumentParser(cls):
947  parser = ArgumentParser(name=cls._DefaultName)
948 
949  parser.add_id_argument("--id", cls.inputDataset,
950  help="data ID, e.g. --id tract=12345",
951  ContainerClass=TractObjectDataIdContainer)
952  return parser
953 
954  def runDataRef(self, patchRefList):
955  df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
956  patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
957 
958  def writeMetadata(self, dataRef):
959  """No metadata to write.
960  """
961  pass
962 
963 
964 class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
965  defaultTemplates={"catalogType": ""},
966  dimensions=("instrument", "visit", "detector")):
967 
968  inputCatalog = connectionTypes.Input(
969  doc="Wide input catalog of sources produced by WriteSourceTableTask",
970  name="{catalogType}source",
971  storageClass="DataFrame",
972  dimensions=("instrument", "visit", "detector"),
973  deferLoad=True
974  )
975  outputCatalog = connectionTypes.Output(
976  doc="Narrower, per-detector Source Table transformed and converted per a "
977  "specified set of functors",
978  name="{catalogType}sourceTable",
979  storageClass="DataFrame",
980  dimensions=("instrument", "visit", "detector")
981  )
982 
983 
984 class TransformSourceTableConfig(TransformCatalogBaseConfig,
985  pipelineConnections=TransformSourceTableConnections):
986  pass
987 
988 
989 class TransformSourceTableTask(TransformCatalogBaseTask):
990  """Transform/standardize a source catalog
991  """
992  _DefaultName = "transformSourceTable"
993  ConfigClass = TransformSourceTableConfig
994 
995  inputDataset = 'source'
996  outputDataset = 'sourceTable'
997 
998  @classmethod
999  def _makeArgumentParser(cls):
1000  parser = ArgumentParser(name=cls._DefaultName)
1001  parser.add_id_argument("--id", datasetType=cls.inputDataset,
1002  level="sensor",
1003  help="data ID, e.g. --id visit=12345 ccd=0")
1004  return parser
1005 
1006  def runDataRef(self, dataRef):
1007  """Override to specify band label to run()."""
1008  parq = dataRef.get()
1009  funcs = self.getFunctors()
1010  band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel
1011  df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
1012  self.write(df, dataRef)
1013  return df
1014 
1015 
1016 class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1017  dimensions=("instrument", "visit",),
1018  defaultTemplates={"calexpType": ""}):
1019  calexp = connectionTypes.Input(
1020  doc="Processed exposures used for metadata",
1021  name="{calexpType}calexp",
1022  storageClass="ExposureF",
1023  dimensions=("instrument", "visit", "detector"),
1024  deferLoad=True,
1025  multiple=True,
1026  )
1027  visitSummary = connectionTypes.Output(
1028  doc=("Per-visit consolidated exposure metadata. These catalogs use "
1029  "detector id for the id and are sorted for fast lookups of a "
1030  "detector."),
1031  name="{calexpType}visitSummary",
1032  storageClass="ExposureCatalog",
1033  dimensions=("instrument", "visit"),
1034  )
1035 
1036 
1037 class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1038  pipelineConnections=ConsolidateVisitSummaryConnections):
1039  """Config for ConsolidateVisitSummaryTask"""
1040  pass
1041 
1042 
1043 class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1044  """Task to consolidate per-detector visit metadata.
1045 
1046  This task aggregates the following metadata from all the detectors in a
1047  single visit into an exposure catalog:
1048  - The visitInfo.
1049  - The wcs.
1050  - The photoCalib.
1051  - The physical_filter and band (if available).
1052  - The psf size, shape, and effective area at the center of the detector.
1053  - The corners of the bounding box in right ascension/declination.
1054 
1055  Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1056  are not persisted here because of storage concerns, and because of their
1057  limited utility as summary statistics.
1058 
1059  Tests for this task are performed in ci_hsc_gen3.
1060  """
1061  _DefaultName = "consolidateVisitSummary"
1062  ConfigClass = ConsolidateVisitSummaryConfig
1063 
1064  @classmethod
1065  def _makeArgumentParser(cls):
1066  parser = ArgumentParser(name=cls._DefaultName)
1067 
1068  parser.add_id_argument("--id", "calexp",
1069  help="data ID, e.g. --id visit=12345",
1070  ContainerClass=VisitDataIdContainer)
1071  return parser
1072 
1073  def writeMetadata(self, dataRef):
1074  """No metadata to persist, so override to remove metadata persistance.
1075  """
1076  pass
1077 
1078  def writeConfig(self, butler, clobber=False, doBackup=True):
1079  """No config to persist, so override to remove config persistance.
1080  """
1081  pass
1082 
1083  def runDataRef(self, dataRefList):
1084  visit = dataRefList[0].dataId['visit']
1085 
1086  self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1087  len(dataRefList), visit)
1088 
1089  expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
1090 
1091  dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
1092 
1093  def runQuantum(self, butlerQC, inputRefs, outputRefs):
1094  dataRefs = butlerQC.get(inputRefs.calexp)
1095  visit = dataRefs[0].dataId.byName()['visit']
1096 
1097  self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1098  len(dataRefs), visit)
1099 
1100  expCatalog = self._combineExposureMetadata(visit, dataRefs)
1101 
1102  butlerQC.put(expCatalog, outputRefs.visitSummary)
1103 
1104  def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1105  """Make a combined exposure catalog from a list of dataRefs.
1106  These dataRefs must point to exposures with wcs, summaryStats,
1107  and other visit metadata.
1108 
1109  Parameters
1110  ----------
1111  visit : `int`
1112  Visit identification number.
1113  dataRefs : `list`
1114  List of dataRefs in visit. May be list of
1115  `lsst.daf.persistence.ButlerDataRef` (Gen2) or
1116  `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1117  isGen3 : `bool`, optional
1118  Specifies if this is a Gen3 list of datarefs.
1119 
1120  Returns
1121  -------
1122  visitSummary : `lsst.afw.table.ExposureCatalog`
1123  Exposure catalog with per-detector summary information.
1124  """
1125  schema = self._makeVisitSummarySchema()
1126  cat = afwTable.ExposureCatalog(schema)
1127  cat.resize(len(dataRefs))
1128 
1129  cat['visit'] = visit
1130 
1131  for i, dataRef in enumerate(dataRefs):
1132  if isGen3:
1133  visitInfo = dataRef.get(component='visitInfo')
1134  filterLabel = dataRef.get(component='filterLabel')
1135  summaryStats = dataRef.get(component='summaryStats')
1136  detector = dataRef.get(component='detector')
1137  wcs = dataRef.get(component='wcs')
1138  photoCalib = dataRef.get(component='photoCalib')
1139  detector = dataRef.get(component='detector')
1140  bbox = dataRef.get(component='bbox')
1141  validPolygon = dataRef.get(component='validPolygon')
1142  else:
1143  # Note that we need to read the calexp because there is
1144  # no magic access to the psf except through the exposure.
1145  gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
1146  exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
1147  visitInfo = exp.getInfo().getVisitInfo()
1148  filterLabel = dataRef.get("calexp_filterLabel")
1149  summaryStats = exp.getInfo().getSummaryStats()
1150  wcs = exp.getWcs()
1151  photoCalib = exp.getPhotoCalib()
1152  detector = exp.getDetector()
1153  bbox = dataRef.get(datasetType='calexp_bbox')
1154  validPolygon = exp.getInfo().getValidPolygon()
1155 
1156  rec = cat[i]
1157  rec.setBBox(bbox)
1158  rec.setVisitInfo(visitInfo)
1159  rec.setWcs(wcs)
1160  rec.setPhotoCalib(photoCalib)
1161  rec.setValidPolygon(validPolygon)
1162 
1163  rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1164  rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1165  rec.setId(detector.getId())
1166  rec['psfSigma'] = summaryStats.psfSigma
1167  rec['psfIxx'] = summaryStats.psfIxx
1168  rec['psfIyy'] = summaryStats.psfIyy
1169  rec['psfIxy'] = summaryStats.psfIxy
1170  rec['psfArea'] = summaryStats.psfArea
1171  rec['raCorners'][:] = summaryStats.raCorners
1172  rec['decCorners'][:] = summaryStats.decCorners
1173  rec['ra'] = summaryStats.ra
1174  rec['decl'] = summaryStats.decl
1175  rec['zenithDistance'] = summaryStats.zenithDistance
1176  rec['zeroPoint'] = summaryStats.zeroPoint
1177  rec['skyBg'] = summaryStats.skyBg
1178  rec['skyNoise'] = summaryStats.skyNoise
1179  rec['meanVar'] = summaryStats.meanVar
1180  rec['astromOffsetMean'] = summaryStats.astromOffsetMean
1181  rec['astromOffsetStd'] = summaryStats.astromOffsetStd
1182 
1183  metadata = dafBase.PropertyList()
1184  metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1185  # We are looping over existing datarefs, so the following is true
1186  metadata.add("COMMENT", "Only detectors with data have entries.")
1187  cat.setMetadata(metadata)
1188 
1189  cat.sort()
1190  return cat
1191 
1192  def _makeVisitSummarySchema(self):
1193  """Make the schema for the visitSummary catalog."""
1194  schema = afwTable.ExposureTable.makeMinimalSchema()
1195  schema.addField('visit', type='I', doc='Visit number')
1196  schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1197  schema.addField('band', type='String', size=32, doc='Name of band')
1198  schema.addField('psfSigma', type='F',
1199  doc='PSF model second-moments determinant radius (center of chip) (pixel)')
1200  schema.addField('psfArea', type='F',
1201  doc='PSF model effective area (center of chip) (pixel**2)')
1202  schema.addField('psfIxx', type='F',
1203  doc='PSF model Ixx (center of chip) (pixel**2)')
1204  schema.addField('psfIyy', type='F',
1205  doc='PSF model Iyy (center of chip) (pixel**2)')
1206  schema.addField('psfIxy', type='F',
1207  doc='PSF model Ixy (center of chip) (pixel**2)')
1208  schema.addField('raCorners', type='ArrayD', size=4,
1209  doc='Right Ascension of bounding box corners (degrees)')
1210  schema.addField('decCorners', type='ArrayD', size=4,
1211  doc='Declination of bounding box corners (degrees)')
1212  schema.addField('ra', type='D',
1213  doc='Right Ascension of bounding box center (degrees)')
1214  schema.addField('decl', type='D',
1215  doc='Declination of bounding box center (degrees)')
1216  schema.addField('zenithDistance', type='F',
1217  doc='Zenith distance of bounding box center (degrees)')
1218  schema.addField('zeroPoint', type='F',
1219  doc='Mean zeropoint in detector (mag)')
1220  schema.addField('skyBg', type='F',
1221  doc='Average sky background (ADU)')
1222  schema.addField('skyNoise', type='F',
1223  doc='Average sky noise (ADU)')
1224  schema.addField('meanVar', type='F',
1225  doc='Mean variance of the weight plane (ADU**2)')
1226  schema.addField('astromOffsetMean', type='F',
1227  doc='Mean offset of astrometric calibration matches (arcsec)')
1228  schema.addField('astromOffsetStd', type='F',
1229  doc='Standard deviation of offsets of astrometric calibration matches (arcsec)')
1230 
1231  return schema
1232 
1233 
1234 class VisitDataIdContainer(DataIdContainer):
1235  """DataIdContainer that groups sensor-level id's by visit
1236  """
1237 
1238  def makeDataRefList(self, namespace):
1239  """Make self.refList from self.idList
1240 
1241  Generate a list of data references grouped by visit.
1242 
1243  Parameters
1244  ----------
1245  namespace : `argparse.Namespace`
1246  Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1247  """
1248  # Group by visits
1249  visitRefs = defaultdict(list)
1250  for dataId in self.idList:
1251  if "visit" in dataId:
1252  visitId = dataId["visit"]
1253  # append all subsets to
1254  subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1255  visitRefs[visitId].extend([dataRef for dataRef in subset])
1256 
1257  outputRefList = []
1258  for refList in visitRefs.values():
1259  existingRefs = [ref for ref in refList if ref.datasetExists()]
1260  if existingRefs:
1261  outputRefList.append(existingRefs)
1262 
1263  self.refList = outputRefList
1264 
1265 
1266 class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1267  defaultTemplates={"catalogType": ""},
1268  dimensions=("instrument", "visit")):
1269  inputCatalogs = connectionTypes.Input(
1270  doc="Input per-detector Source Tables",
1271  name="{catalogType}sourceTable",
1272  storageClass="DataFrame",
1273  dimensions=("instrument", "visit", "detector"),
1274  multiple=True
1275  )
1276  outputCatalog = connectionTypes.Output(
1277  doc="Per-visit concatenation of Source Table",
1278  name="{catalogType}sourceTable_visit",
1279  storageClass="DataFrame",
1280  dimensions=("instrument", "visit")
1281  )
1282 
1283 
1284 class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1285  pipelineConnections=ConsolidateSourceTableConnections):
1286  pass
1287 
1288 
1289 class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1290  """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1291  """
1292  _DefaultName = 'consolidateSourceTable'
1293  ConfigClass = ConsolidateSourceTableConfig
1294 
1295  inputDataset = 'sourceTable'
1296  outputDataset = 'sourceTable_visit'
1297 
1298  def runQuantum(self, butlerQC, inputRefs, outputRefs):
1299  inputs = butlerQC.get(inputRefs)
1300  self.log.info("Concatenating %s per-detector Source Tables",
1301  len(inputs['inputCatalogs']))
1302  df = pd.concat(inputs['inputCatalogs'])
1303  butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1304 
1305  def runDataRef(self, dataRefList):
1306  self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1307  df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1308  dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1309 
1310  @classmethod
1311  def _makeArgumentParser(cls):
1312  parser = ArgumentParser(name=cls._DefaultName)
1313 
1314  parser.add_id_argument("--id", cls.inputDataset,
1315  help="data ID, e.g. --id visit=12345",
1316  ContainerClass=VisitDataIdContainer)
1317  return parser
1318 
1319  def writeMetadata(self, dataRef):
1320  """No metadata to write.
1321  """
1322  pass
1323 
1324  def writeConfig(self, butler, clobber=False, doBackup=True):
1325  """No config to write.
1326  """
1327  pass
1328 
1329 
1330 class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1331  dimensions=("instrument",),
1332  defaultTemplates={}):
1333  visitSummaryRefs = connectionTypes.Input(
1334  doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1335  name="visitSummary",
1336  storageClass="ExposureCatalog",
1337  dimensions=("instrument", "visit"),
1338  multiple=True,
1339  deferLoad=True,
1340  )
1341  outputCatalog = connectionTypes.Output(
1342  doc="CCD and Visit metadata table",
1343  name="CcdVisitTable",
1344  storageClass="DataFrame",
1345  dimensions=("instrument",)
1346  )
1347 
1348 
1349 class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1350  pipelineConnections=MakeCcdVisitTableConnections):
1351  pass
1352 
1353 
1354 class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1355  """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1356  """
1357  _DefaultName = 'makeCcdVisitTable'
1358  ConfigClass = MakeCcdVisitTableConfig
1359 
1360  def run(self, visitSummaryRefs):
1361  """ Make a table of ccd information from the `visitSummary` catalogs.
1362  Parameters
1363  ----------
1364  visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1365  List of DeferredDatasetHandles pointing to exposure catalogs with
1366  per-detector summary information.
1367  Returns
1368  -------
1369  result : `lsst.pipe.Base.Struct`
1370  Results struct with attribute:
1371  - `outputCatalog`
1372  Catalog of ccd and visit information.
1373  """
1374  ccdEntries = []
1375  for visitSummaryRef in visitSummaryRefs:
1376  visitSummary = visitSummaryRef.get()
1377  visitInfo = visitSummary[0].getVisitInfo()
1378 
1379  ccdEntry = {}
1380  summaryTable = visitSummary.asAstropy()
1381  selectColumns = ['id', 'visit', 'physical_filter', 'ra', 'decl', 'zenithDistance', 'zeroPoint',
1382  'psfSigma', 'skyBg', 'skyNoise']
1383  ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1384  ccdEntry = ccdEntry.rename(columns={"physical_filter": "filterName", "visit": "visitId"})
1385 
1386  dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in
1387  summaryTable['id']]
1388  packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId)
1389  ccdVisitIds = [packer.pack(dataId) for dataId in dataIds]
1390  ccdEntry['ccdVisitId'] = ccdVisitIds
1391 
1392  pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary])
1393  ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1394 
1395  ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1396  ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1397  expTime = visitInfo.getExposureTime()
1398  ccdEntry['expTime'] = expTime
1399  ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1400  ccdEntry['darkTime'] = visitInfo.getDarkTime()
1401  ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1402  ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1403  ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1404  ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1405  ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1406  ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1407  ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1408  ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1409  ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1410  ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1411  # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags,
1412  # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted.
1413  ccdEntries.append(ccdEntry)
1414 
1415  outputCatalog = pd.concat(ccdEntries)
1416  return pipeBase.Struct(outputCatalog=outputCatalog)
1417 
1418 
1419 class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1420  dimensions=("instrument",),
1421  defaultTemplates={}):
1422  visitSummaries = connectionTypes.Input(
1423  doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1424  name="visitSummary",
1425  storageClass="ExposureCatalog",
1426  dimensions=("instrument", "visit",),
1427  multiple=True,
1428  deferLoad=True,
1429  )
1430  outputCatalog = connectionTypes.Output(
1431  doc="Visit metadata table",
1432  name="visitTable",
1433  storageClass="DataFrame",
1434  dimensions=("instrument",)
1435  )
1436 
1437 
1438 class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1439  pipelineConnections=MakeVisitTableConnections):
1440  pass
1441 
1442 
1443 class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1444  """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1445  """
1446  _DefaultName = 'makeVisitTable'
1447  ConfigClass = MakeVisitTableConfig
1448 
1449  def run(self, visitSummaries):
1450  """ Make a table of visit information from the `visitSummary` catalogs
1451 
1452  Parameters
1453  ----------
1454  visitSummaries : list of `lsst.afw.table.ExposureCatalog`
1455  List of exposure catalogs with per-detector summary information.
1456  Returns
1457  -------
1458  result : `lsst.pipe.Base.Struct`
1459  Results struct with attribute:
1460  ``outputCatalog``
1461  Catalog of visit information.
1462  """
1463  visitEntries = []
1464  for visitSummary in visitSummaries:
1465  visitSummary = visitSummary.get()
1466  visitRow = visitSummary[0]
1467  visitInfo = visitRow.getVisitInfo()
1468 
1469  visitEntry = {}
1470  visitEntry["visitId"] = visitRow['visit']
1471  visitEntry["filterName"] = visitRow['physical_filter']
1472  raDec = visitInfo.getBoresightRaDec()
1473  visitEntry["ra"] = raDec.getRa().asDegrees()
1474  visitEntry["decl"] = raDec.getDec().asDegrees()
1475  visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1476  azAlt = visitInfo.getBoresightAzAlt()
1477  visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1478  visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1479  visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1480  visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1481  visitEntry["obsStart"] = visitInfo.getDate().toPython()
1482  visitEntry["expTime"] = visitInfo.getExposureTime()
1483  visitEntries.append(visitEntry)
1484  # TODO: DM-30623, Add programId, exposureType, expMidpt, cameraTemp, mirror1Temp, mirror2Temp,
1485  # mirror3Temp, domeTemp, externalTemp, dimmSeeing, pwvGPS, pwvMW, flags, nExposures
1486 
1487  outputCatalog = pd.DataFrame(data=visitEntries)
1488  return pipeBase.Struct(outputCatalog=outputCatalog)
def getAnalysis(self, parq, funcs=None, band=None)
Definition: postprocess.py:704
def transform(self, band, parq, funcs, dataId)
Definition: postprocess.py:710
def run(self, parq, funcs=None, dataId=None, band=None)
Definition: postprocess.py:673
def runQuantum(self, butlerQC, inputRefs, outputRefs)
Definition: postprocess.py:654
def run(self, skyInfo, tempExpRefList, imageScalerList, weightList, altMaskList=None, mask=None, supplementaryData=None)
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def readCatalog(task, patchRef)
Read input catalog.
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)
Definition: postprocess.py:43