lsst.pipe.tasks  21.0.0-119-g1a9a5132+a02c4f7b43
postprocess.py
Go to the documentation of this file.
1 # This file is part of pipe_tasks
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 import functools
23 import pandas as pd
24 from collections import defaultdict
25 import numpy as np
26 
27 import lsst.geom
28 import lsst.pex.config as pexConfig
29 import lsst.pipe.base as pipeBase
30 import lsst.daf.base as dafBase
31 from lsst.pipe.base import connectionTypes
32 import lsst.afw.table as afwTable
33 from lsst.meas.base import SingleFrameMeasurementTask
34 from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
35 from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
36 from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate
37 
38 from .parquetTable import ParquetTable
39 from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
40 from .functors import CompositeFunctor, RAColumn, DecColumn, Column
41 
42 
43 def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
44  """Flattens a dataframe with multilevel column index
45  """
46  newDf = pd.DataFrame()
47  # band is the level 0 index
48  dfBands = df.columns.unique(level=0).values
49  for band in dfBands:
50  subdf = df[band]
51  columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
52  newColumns = {c: columnFormat.format(band, c)
53  for c in subdf.columns if c not in noDupCols}
54  cols = list(newColumns.keys())
55  newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
56 
57  # Band must be present in the input and output or else column is all NaN:
58  presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
59  # Get the unexploded columns from any present band's partition
60  noDupDf = df[presentBands[0]][noDupCols]
61  newDf = pd.concat([noDupDf, newDf], axis=1)
62  return newDf
63 
64 
65 class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
66  defaultTemplates={"coaddName": "deep"},
67  dimensions=("tract", "patch", "skymap")):
68  inputCatalogMeas = connectionTypes.Input(
69  doc="Catalog of source measurements on the deepCoadd.",
70  dimensions=("tract", "patch", "band", "skymap"),
71  storageClass="SourceCatalog",
72  name="{coaddName}Coadd_meas",
73  multiple=True
74  )
75  inputCatalogForcedSrc = connectionTypes.Input(
76  doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
77  dimensions=("tract", "patch", "band", "skymap"),
78  storageClass="SourceCatalog",
79  name="{coaddName}Coadd_forced_src",
80  multiple=True
81  )
82  inputCatalogRef = connectionTypes.Input(
83  doc="Catalog marking the primary detection (which band provides a good shape and position)"
84  "for each detection in deepCoadd_mergeDet.",
85  dimensions=("tract", "patch", "skymap"),
86  storageClass="SourceCatalog",
87  name="{coaddName}Coadd_ref"
88  )
89  outputCatalog = connectionTypes.Output(
90  doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
91  "stored as a DataFrame with a multi-level column index per-patch.",
92  dimensions=("tract", "patch", "skymap"),
93  storageClass="DataFrame",
94  name="{coaddName}Coadd_obj"
95  )
96 
97 
98 class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
99  pipelineConnections=WriteObjectTableConnections):
100  engine = pexConfig.Field(
101  dtype=str,
102  default="pyarrow",
103  doc="Parquet engine for writing (pyarrow or fastparquet)"
104  )
105  coaddName = pexConfig.Field(
106  dtype=str,
107  default="deep",
108  doc="Name of coadd"
109  )
110 
111 
112 class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
113  """Write filter-merged source tables to parquet
114  """
115  _DefaultName = "writeObjectTable"
116  ConfigClass = WriteObjectTableConfig
117  RunnerClass = MergeSourcesRunner
118 
119  # Names of table datasets to be merged
120  inputDatasets = ('forced_src', 'meas', 'ref')
121 
122  # Tag of output dataset written by `MergeSourcesTask.write`
123  outputDataset = 'obj'
124 
125  def __init__(self, butler=None, schema=None, **kwargs):
126  # It is a shame that this class can't use the default init for CmdLineTask
127  # But to do so would require its own special task runner, which is many
128  # more lines of specialization, so this is how it is for now
129  super().__init__(**kwargs)
130 
131  def runDataRef(self, patchRefList):
132  """!
133  @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
134  subclasses that inherit from MergeSourcesTask.
135  @param[in] patchRefList list of data references for each filter
136  """
137  catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
138  dataId = patchRefList[0].dataId
139  mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
140  self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog))
141 
142  def runQuantum(self, butlerQC, inputRefs, outputRefs):
143  inputs = butlerQC.get(inputRefs)
144 
145  measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
146  zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
147  forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
148  zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
149 
150  catalogs = {}
151  for band in measDict.keys():
152  catalogs[band] = {'meas': measDict[band]['meas'],
153  'forced_src': forcedSourceDict[band]['forced_src'],
154  'ref': inputs['inputCatalogRef']}
155  dataId = butlerQC.quantum.dataId
156  df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
157  outputs = pipeBase.Struct(outputCatalog=df)
158  butlerQC.put(outputs, outputRefs)
159 
160  @classmethod
161  def _makeArgumentParser(cls):
162  """Create a suitable ArgumentParser.
163 
164  We will use the ArgumentParser to get a list of data
165  references for patches; the RunnerClass will sort them into lists
166  of data references for the same patch.
167 
168  References first of self.inputDatasets, rather than
169  self.inputDataset
170  """
171  return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
172 
173  def readCatalog(self, patchRef):
174  """Read input catalogs
175 
176  Read all the input datasets given by the 'inputDatasets'
177  attribute.
178 
179  Parameters
180  ----------
181  patchRef : `lsst.daf.persistence.ButlerDataRef`
182  Data reference for patch
183 
184  Returns
185  -------
186  Tuple consisting of band name and a dict of catalogs, keyed by
187  dataset name
188  """
189  band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel
190  catalogDict = {}
191  for dataset in self.inputDatasets:
192  catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
193  self.log.info("Read %d sources from %s for band %s: %s" %
194  (len(catalog), dataset, band, patchRef.dataId))
195  catalogDict[dataset] = catalog
196  return band, catalogDict
197 
198  def run(self, catalogs, tract, patch):
199  """Merge multiple catalogs.
200 
201  Parameters
202  ----------
203  catalogs : `dict`
204  Mapping from filter names to dict of catalogs.
205  tract : int
206  tractId to use for the tractId column
207  patch : str
208  patchId to use for the patchId column
209 
210  Returns
211  -------
212  catalog : `pandas.DataFrame`
213  Merged dataframe
214  """
215 
216  dfs = []
217  for filt, tableDict in catalogs.items():
218  for dataset, table in tableDict.items():
219  # Convert afwTable to pandas DataFrame
220  df = table.asAstropy().to_pandas().set_index('id', drop=True)
221 
222  # Sort columns by name, to ensure matching schema among patches
223  df = df.reindex(sorted(df.columns), axis=1)
224  df['tractId'] = tract
225  df['patchId'] = patch
226 
227  # Make columns a 3-level MultiIndex
228  df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
229  names=('dataset', 'band', 'column'))
230  dfs.append(df)
231 
232  catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
233  return catalog
234 
235  def write(self, patchRef, catalog):
236  """Write the output.
237 
238  Parameters
239  ----------
240  catalog : `ParquetTable`
241  Catalog to write
242  patchRef : `lsst.daf.persistence.ButlerDataRef`
243  Data reference for patch
244  """
245  patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
246  # since the filter isn't actually part of the data ID for the dataset we're saving,
247  # it's confusing to see it in the log message, even if the butler simply ignores it.
248  mergeDataId = patchRef.dataId.copy()
249  del mergeDataId["filter"]
250  self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
251 
252  def writeMetadata(self, dataRefList):
253  """No metadata to write, and not sure how to write it for a list of dataRefs.
254  """
255  pass
256 
257 
258 class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
259  defaultTemplates={"catalogType": ""},
260  dimensions=("instrument", "visit", "detector")):
261 
262  catalog = connectionTypes.Input(
263  doc="Input full-depth catalog of sources produced by CalibrateTask",
264  name="{catalogType}src",
265  storageClass="SourceCatalog",
266  dimensions=("instrument", "visit", "detector")
267  )
268  outputCatalog = connectionTypes.Output(
269  doc="Catalog of sources, `src` in Parquet format",
270  name="{catalogType}source",
271  storageClass="DataFrame",
272  dimensions=("instrument", "visit", "detector")
273  )
274 
275 
276 class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
277  pipelineConnections=WriteSourceTableConnections):
278  doApplyExternalPhotoCalib = pexConfig.Field(
279  dtype=bool,
280  default=False,
281  doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
282  "generating Source Tables from older src tables which do not already have local calib columns")
283  )
284  doApplyExternalSkyWcs = pexConfig.Field(
285  dtype=bool,
286  default=False,
287  doc=("Add local WCS columns from the calexp.wcs? Should only set True if "
288  "generating Source Tables from older src tables which do not already have local calib columns")
289  )
290 
291 
292 class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
293  """Write source table to parquet
294  """
295  _DefaultName = "writeSourceTable"
296  ConfigClass = WriteSourceTableConfig
297 
298  def runDataRef(self, dataRef):
299  src = dataRef.get('src')
300  if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
301  src = self.addCalibColumns(src, dataRef)
302 
303  ccdVisitId = dataRef.get('ccdExposureId')
304  result = self.run(src, ccdVisitId=ccdVisitId)
305  dataRef.put(result.table, 'source')
306 
307  def runQuantum(self, butlerQC, inputRefs, outputRefs):
308  inputs = butlerQC.get(inputRefs)
309  inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
310  result = self.run(**inputs).table
311  outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
312  butlerQC.put(outputs, outputRefs)
313 
314  def run(self, catalog, ccdVisitId=None):
315  """Convert `src` catalog to parquet
316 
317  Parameters
318  ----------
319  catalog: `afwTable.SourceCatalog`
320  catalog to be converted
321  ccdVisitId: `int`
322  ccdVisitId to be added as a column
323 
324  Returns
325  -------
326  result : `lsst.pipe.base.Struct`
327  ``table``
328  `ParquetTable` version of the input catalog
329  """
330  self.log.info("Generating parquet table from src catalog %s", ccdVisitId)
331  df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
332  df['ccdVisitId'] = ccdVisitId
333  return pipeBase.Struct(table=ParquetTable(dataFrame=df))
334 
335  def addCalibColumns(self, catalog, dataRef):
336  """Add columns with local calibration evaluated at each centroid
337 
338  for backwards compatibility with old repos.
339  This exists for the purpose of converting old src catalogs
340  (which don't have the expected local calib columns) to Source Tables.
341 
342  Parameters
343  ----------
344  catalog: `afwTable.SourceCatalog`
345  catalog to which calib columns will be added
346  dataRef: `lsst.daf.persistence.ButlerDataRef
347  for fetching the calibs from disk.
348 
349  Returns
350  -------
351  newCat: `afwTable.SourceCatalog`
352  Source Catalog with requested local calib columns
353  """
354  mapper = afwTable.SchemaMapper(catalog.schema)
355  measureConfig = SingleFrameMeasurementTask.ConfigClass()
356  measureConfig.doReplaceWithNoise = False
357 
358  # Just need the WCS or the PhotoCalib attached to an exposue
359  exposure = dataRef.get('calexp_sub',
361 
362  mapper = afwTable.SchemaMapper(catalog.schema)
363  mapper.addMinimalSchema(catalog.schema, True)
364  schema = mapper.getOutputSchema()
365 
366  exposureIdInfo = dataRef.get("expIdInfo")
367  measureConfig.plugins.names = []
368  if self.config.doApplyExternalSkyWcs:
369  plugin = 'base_LocalWcs'
370  if plugin in schema:
371  raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
372  else:
373  measureConfig.plugins.names.add(plugin)
374 
375  if self.config.doApplyExternalPhotoCalib:
376  plugin = 'base_LocalPhotoCalib'
377  if plugin in schema:
378  raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
379  else:
380  measureConfig.plugins.names.add(plugin)
381 
382  measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
383  newCat = afwTable.SourceCatalog(schema)
384  newCat.extend(catalog, mapper=mapper)
385  measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
386  return newCat
387 
388  def writeMetadata(self, dataRef):
389  """No metadata to write.
390  """
391  pass
392 
393  @classmethod
394  def _makeArgumentParser(cls):
395  parser = ArgumentParser(name=cls._DefaultName)
396  parser.add_id_argument("--id", 'src',
397  help="data ID, e.g. --id visit=12345 ccd=0")
398  return parser
399 
400 
401 class PostprocessAnalysis(object):
402  """Calculate columns from ParquetTable
403 
404  This object manages and organizes an arbitrary set of computations
405  on a catalog. The catalog is defined by a
406  `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
407  `deepCoadd_obj` dataset, and the computations are defined by a collection
408  of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
409  a `CompositeFunctor`).
410 
411  After the object is initialized, accessing the `.df` attribute (which
412  holds the `pandas.DataFrame` containing the results of the calculations) triggers
413  computation of said dataframe.
414 
415  One of the conveniences of using this object is the ability to define a desired common
416  filter for all functors. This enables the same functor collection to be passed to
417  several different `PostprocessAnalysis` objects without having to change the original
418  functor collection, since the `filt` keyword argument of this object triggers an
419  overwrite of the `filt` property for all functors in the collection.
420 
421  This object also allows a list of refFlags to be passed, and defines a set of default
422  refFlags that are always included even if not requested.
423 
424  If a list of `ParquetTable` object is passed, rather than a single one, then the
425  calculations will be mapped over all the input catalogs. In principle, it should
426  be straightforward to parallelize this activity, but initial tests have failed
427  (see TODO in code comments).
428 
429  Parameters
430  ----------
431  parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
432  Source catalog(s) for computation
433 
434  functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
435  Computations to do (functors that act on `parq`).
436  If a dict, the output
437  DataFrame will have columns keyed accordingly.
438  If a list, the column keys will come from the
439  `.shortname` attribute of each functor.
440 
441  filt : `str` (optional)
442  Filter in which to calculate. If provided,
443  this will overwrite any existing `.filt` attribute
444  of the provided functors.
445 
446  flags : `list` (optional)
447  List of flags (per-band) to include in output table.
448 
449  refFlags : `list` (optional)
450  List of refFlags (only reference band) to include in output table.
451 
452 
453  """
454  _defaultRefFlags = []
455  _defaultFuncs = (('coord_ra', RAColumn()),
456  ('coord_dec', DecColumn()))
457 
458  def __init__(self, parq, functors, filt=None, flags=None, refFlags=None):
459  self.parq = parq
460  self.functors = functors
461 
462  self.filt = filt
463  self.flags = list(flags) if flags is not None else []
464  self.refFlags = list(self._defaultRefFlags)
465  if refFlags is not None:
466  self.refFlags += list(refFlags)
467 
468  self._df = None
469 
470  @property
471  def defaultFuncs(self):
472  funcs = dict(self._defaultFuncs)
473  return funcs
474 
475  @property
476  def func(self):
477  additionalFuncs = self.defaultFuncs
478  additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
479  additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
480 
481  if isinstance(self.functors, CompositeFunctor):
482  func = self.functors
483  else:
484  func = CompositeFunctor(self.functors)
485 
486  func.funcDict.update(additionalFuncs)
487  func.filt = self.filt
488 
489  return func
490 
491  @property
492  def noDupCols(self):
493  return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
494 
495  @property
496  def df(self):
497  if self._df is None:
498  self.compute()
499  return self._df
500 
501  def compute(self, dropna=False, pool=None):
502  # map over multiple parquet tables
503  if type(self.parq) in (list, tuple):
504  if pool is None:
505  dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
506  else:
507  # TODO: Figure out why this doesn't work (pyarrow pickling issues?)
508  dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
509  self._df = pd.concat(dflist)
510  else:
511  self._df = self.func(self.parq, dropna=dropna)
512 
513  return self._df
514 
515 
516 class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
517  dimensions=()):
518  """Expected Connections for subclasses of TransformCatalogBaseTask.
519 
520  Must be subclassed.
521  """
522  inputCatalog = connectionTypes.Input(
523  name="",
524  storageClass="DataFrame",
525  )
526  outputCatalog = connectionTypes.Output(
527  name="",
528  storageClass="DataFrame",
529  )
530 
531 
532 class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
533  pipelineConnections=TransformCatalogBaseConnections):
534  functorFile = pexConfig.Field(
535  dtype=str,
536  doc='Path to YAML file specifying functors to be computed',
537  default=None,
538  optional=True
539  )
540 
541 
542 class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask):
543  """Base class for transforming/standardizing a catalog
544 
545  by applying functors that convert units and apply calibrations.
546  The purpose of this task is to perform a set of computations on
547  an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
548  results to a new dataset (which needs to be declared in an `outputDataset`
549  attribute).
550 
551  The calculations to be performed are defined in a YAML file that specifies
552  a set of functors to be computed, provided as
553  a `--functorFile` config parameter. An example of such a YAML file
554  is the following:
555 
556  funcs:
557  psfMag:
558  functor: Mag
559  args:
560  - base_PsfFlux
561  filt: HSC-G
562  dataset: meas
563  cmodel_magDiff:
564  functor: MagDiff
565  args:
566  - modelfit_CModel
567  - base_PsfFlux
568  filt: HSC-G
569  gauss_magDiff:
570  functor: MagDiff
571  args:
572  - base_GaussianFlux
573  - base_PsfFlux
574  filt: HSC-G
575  count:
576  functor: Column
577  args:
578  - base_InputCount_value
579  filt: HSC-G
580  deconvolved_moments:
581  functor: DeconvolvedMoments
582  filt: HSC-G
583  dataset: forced_src
584  refFlags:
585  - calib_psfUsed
586  - merge_measurement_i
587  - merge_measurement_r
588  - merge_measurement_z
589  - merge_measurement_y
590  - merge_measurement_g
591  - base_PixelFlags_flag_inexact_psfCenter
592  - detect_isPrimary
593 
594  The names for each entry under "func" will become the names of columns in the
595  output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
596  Positional arguments to be passed to each functor are in the `args` list,
597  and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
598  `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
599 
600  The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and
601  taken from the `'ref'` dataset.
602 
603  The "flags" entry will be expanded out per band.
604 
605  This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
606  to organize and excecute the calculations.
607 
608  """
609  @property
610  def _DefaultName(self):
611  raise NotImplementedError('Subclass must define "_DefaultName" attribute')
612 
613  @property
614  def outputDataset(self):
615  raise NotImplementedError('Subclass must define "outputDataset" attribute')
616 
617  @property
618  def inputDataset(self):
619  raise NotImplementedError('Subclass must define "inputDataset" attribute')
620 
621  @property
622  def ConfigClass(self):
623  raise NotImplementedError('Subclass must define "ConfigClass" attribute')
624 
625  def __init__(self, *args, **kwargs):
626  super().__init__(*args, **kwargs)
627  if self.config.functorFile:
628  self.log.info('Loading tranform functor definitions from %s',
629  self.config.functorFile)
630  self.funcsfuncs = CompositeFunctor.from_file(self.config.functorFile)
631  self.funcsfuncs.update(dict(PostprocessAnalysis._defaultFuncs))
632  else:
633  self.funcsfuncs = None
634 
635  def runQuantum(self, butlerQC, inputRefs, outputRefs):
636  inputs = butlerQC.get(inputRefs)
637  if self.funcsfuncs is None:
638  raise ValueError("config.functorFile is None. "
639  "Must be a valid path to yaml in order to run Task as a PipelineTask.")
640  result = self.runrun(parq=inputs['inputCatalog'], funcs=self.funcsfuncs,
641  dataId=outputRefs.outputCatalog.dataId.full)
642  outputs = pipeBase.Struct(outputCatalog=result)
643  butlerQC.put(outputs, outputRefs)
644 
645  def runDataRef(self, dataRef):
646  parq = dataRef.get()
647  if self.funcsfuncs is None:
648  raise ValueError("config.functorFile is None. "
649  "Must be a valid path to yaml in order to run as a CommandlineTask.")
650  df = self.runrun(parq, funcs=self.funcsfuncs, dataId=dataRef.dataId)
651  self.writewrite(df, dataRef)
652  return df
653 
654  def run(self, parq, funcs=None, dataId=None, band=None):
655  """Do postprocessing calculations
656 
657  Takes a `ParquetTable` object and dataId,
658  returns a dataframe with results of postprocessing calculations.
659 
660  Parameters
661  ----------
662  parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
663  ParquetTable from which calculations are done.
664  funcs : `lsst.pipe.tasks.functors.Functors`
665  Functors to apply to the table's columns
666  dataId : dict, optional
667  Used to add a `patchId` column to the output dataframe.
668  band : `str`, optional
669  Filter band that is being processed.
670 
671  Returns
672  ------
673  `pandas.DataFrame`
674 
675  """
676  self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
677 
678  df = self.transformtransform(band, parq, funcs, dataId).df
679  self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
680  return df
681 
682  def getFunctors(self):
683  return self.funcsfuncs
684 
685  def getAnalysis(self, parq, funcs=None, band=None):
686  if funcs is None:
687  funcs = self.funcsfuncs
688  analysis = PostprocessAnalysis(parq, funcs, filt=band)
689  return analysis
690 
691  def transform(self, band, parq, funcs, dataId):
692  analysis = self.getAnalysisgetAnalysis(parq, funcs=funcs, band=band)
693  df = analysis.df
694  if dataId is not None:
695  for key, value in dataId.items():
696  df[str(key)] = value
697 
698  return pipeBase.Struct(
699  df=df,
700  analysis=analysis
701  )
702 
703  def write(self, df, parqRef):
704  parqRef.put(ParquetTable(dataFrame=df), self.outputDatasetoutputDataset)
705 
706  def writeMetadata(self, dataRef):
707  """No metadata to write.
708  """
709  pass
710 
711 
712 class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
713  defaultTemplates={"coaddName": "deep"},
714  dimensions=("tract", "patch", "skymap")):
715  inputCatalog = connectionTypes.Input(
716  doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
717  "stored as a DataFrame with a multi-level column index per-patch.",
718  dimensions=("tract", "patch", "skymap"),
719  storageClass="DataFrame",
720  name="{coaddName}Coadd_obj",
721  deferLoad=True,
722  )
723  outputCatalog = connectionTypes.Output(
724  doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
725  "data model.",
726  dimensions=("tract", "patch", "skymap"),
727  storageClass="DataFrame",
728  name="objectTable"
729  )
730 
731 
732 class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
733  pipelineConnections=TransformObjectCatalogConnections):
734  coaddName = pexConfig.Field(
735  dtype=str,
736  default="deep",
737  doc="Name of coadd"
738  )
739  # TODO: remove in DM-27177
740  filterMap = pexConfig.DictField(
741  keytype=str,
742  itemtype=str,
743  default={},
744  doc=("Dictionary mapping full filter name to short one for column name munging."
745  "These filters determine the output columns no matter what filters the "
746  "input data actually contain."),
747  deprecated=("Coadds are now identified by the band, so this transform is unused."
748  "Will be removed after v22.")
749  )
750  outputBands = pexConfig.ListField(
751  dtype=str,
752  default=None,
753  optional=True,
754  doc=("These bands and only these bands will appear in the output,"
755  " NaN-filled if the input does not include them."
756  " If None, then use all bands found in the input.")
757  )
758  camelCase = pexConfig.Field(
759  dtype=bool,
760  default=True,
761  doc=("Write per-band columns names with camelCase, else underscore "
762  "For example: gPsFlux instead of g_PsFlux.")
763  )
764  multilevelOutput = pexConfig.Field(
765  dtype=bool,
766  default=False,
767  doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
768  "and name-munged (False).")
769  )
770 
771 
772 class TransformObjectCatalogTask(TransformCatalogBaseTask):
773  """Produce a flattened Object Table to match the format specified in
774  sdm_schemas.
775 
776  Do the same set of postprocessing calculations on all bands
777 
778  This is identical to `TransformCatalogBaseTask`, except for that it does the
779  specified functor calculations for all filters present in the
780  input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
781  by the YAML file will be superceded.
782  """
783  _DefaultName = "transformObjectCatalog"
784  ConfigClass = TransformObjectCatalogConfig
785 
786  # Used by Gen 2 runDataRef only:
787  inputDataset = 'deepCoadd_obj'
788  outputDataset = 'objectTable'
789 
790  @classmethod
791  def _makeArgumentParser(cls):
792  parser = ArgumentParser(name=cls._DefaultName)
793  parser.add_id_argument("--id", cls.inputDataset,
794  ContainerClass=CoaddDataIdContainer,
795  help="data ID, e.g. --id tract=12345 patch=1,2")
796  return parser
797 
798  def run(self, parq, funcs=None, dataId=None, band=None):
799  # NOTE: band kwarg is ignored here.
800  dfDict = {}
801  analysisDict = {}
802  templateDf = pd.DataFrame()
803 
804  if isinstance(parq, DeferredDatasetHandle):
805  columns = parq.get(component='columns')
806  inputBands = columns.unique(level=1).values
807  else:
808  inputBands = parq.columnLevelNames['band']
809 
810  outputBands = self.config.outputBands if self.config.outputBands else inputBands
811 
812  # Perform transform for data of filters that exist in parq.
813  for inputBand in inputBands:
814  if inputBand not in outputBands:
815  self.log.info("Ignoring %s band data in the input", inputBand)
816  continue
817  self.log.info("Transforming the catalog of band %s", inputBand)
818  result = self.transform(inputBand, parq, funcs, dataId)
819  dfDict[inputBand] = result.df
820  analysisDict[inputBand] = result.analysis
821  if templateDf.empty:
822  templateDf = result.df
823 
824  # Fill NaNs in columns of other wanted bands
825  for filt in outputBands:
826  if filt not in dfDict:
827  self.log.info("Adding empty columns for band %s", filt)
828  dfDict[filt] = pd.DataFrame().reindex_like(templateDf)
829 
830  # This makes a multilevel column index, with band as first level
831  df = pd.concat(dfDict, axis=1, names=['band', 'column'])
832 
833  if not self.config.multilevelOutput:
834  noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
835  if dataId is not None:
836  noDupCols += list(dataId.keys())
837  df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
838  inputBands=inputBands)
839 
840  self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
841  return df
842 
843 
844 class TractObjectDataIdContainer(CoaddDataIdContainer):
845 
846  def makeDataRefList(self, namespace):
847  """Make self.refList from self.idList
848 
849  Generate a list of data references given tract and/or patch.
850  This was adapted from `TractQADataIdContainer`, which was
851  `TractDataIdContainer` modifie to not require "filter".
852  Only existing dataRefs are returned.
853  """
854  def getPatchRefList(tract):
855  return [namespace.butler.dataRef(datasetType=self.datasetType,
856  tract=tract.getId(),
857  patch="%d,%d" % patch.getIndex()) for patch in tract]
858 
859  tractRefs = defaultdict(list) # Data references for each tract
860  for dataId in self.idList:
861  skymap = self.getSkymap(namespace)
862 
863  if "tract" in dataId:
864  tractId = dataId["tract"]
865  if "patch" in dataId:
866  tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
867  tract=tractId,
868  patch=dataId['patch']))
869  else:
870  tractRefs[tractId] += getPatchRefList(skymap[tractId])
871  else:
872  tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
873  for tract in skymap)
874  outputRefList = []
875  for tractRefList in tractRefs.values():
876  existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
877  outputRefList.append(existingRefs)
878 
879  self.refList = outputRefList
880 
881 
882 class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
883  dimensions=("tract", "skymap")):
884  inputCatalogs = connectionTypes.Input(
885  doc="Per-Patch objectTables conforming to the standard data model.",
886  name="objectTable",
887  storageClass="DataFrame",
888  dimensions=("tract", "patch", "skymap"),
889  multiple=True,
890  )
891  outputCatalog = connectionTypes.Output(
892  doc="Pre-tract horizontal concatenation of the input objectTables",
893  name="objectTable_tract",
894  storageClass="DataFrame",
895  dimensions=("tract", "skymap"),
896  )
897 
898 
899 class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
900  pipelineConnections=ConsolidateObjectTableConnections):
901  coaddName = pexConfig.Field(
902  dtype=str,
903  default="deep",
904  doc="Name of coadd"
905  )
906 
907 
908 class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
909  """Write patch-merged source tables to a tract-level parquet file
910 
911  Concatenates `objectTable` list into a per-visit `objectTable_tract`
912  """
913  _DefaultName = "consolidateObjectTable"
914  ConfigClass = ConsolidateObjectTableConfig
915 
916  inputDataset = 'objectTable'
917  outputDataset = 'objectTable_tract'
918 
919  def runQuantum(self, butlerQC, inputRefs, outputRefs):
920  inputs = butlerQC.get(inputRefs)
921  self.log.info("Concatenating %s per-patch Object Tables",
922  len(inputs['inputCatalogs']))
923  df = pd.concat(inputs['inputCatalogs'])
924  butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
925 
926  @classmethod
927  def _makeArgumentParser(cls):
928  parser = ArgumentParser(name=cls._DefaultName)
929 
930  parser.add_id_argument("--id", cls.inputDataset,
931  help="data ID, e.g. --id tract=12345",
932  ContainerClass=TractObjectDataIdContainer)
933  return parser
934 
935  def runDataRef(self, patchRefList):
936  df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
937  patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
938 
939  def writeMetadata(self, dataRef):
940  """No metadata to write.
941  """
942  pass
943 
944 
945 class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
946  defaultTemplates={"catalogType": ""},
947  dimensions=("instrument", "visit", "detector")):
948 
949  inputCatalog = connectionTypes.Input(
950  doc="Wide input catalog of sources produced by WriteSourceTableTask",
951  name="{catalogType}source",
952  storageClass="DataFrame",
953  dimensions=("instrument", "visit", "detector"),
954  deferLoad=True
955  )
956  outputCatalog = connectionTypes.Output(
957  doc="Narrower, per-detector Source Table transformed and converted per a "
958  "specified set of functors",
959  name="{catalogType}sourceTable",
960  storageClass="DataFrame",
961  dimensions=("instrument", "visit", "detector")
962  )
963 
964 
965 class TransformSourceTableConfig(TransformCatalogBaseConfig,
966  pipelineConnections=TransformSourceTableConnections):
967  pass
968 
969 
970 class TransformSourceTableTask(TransformCatalogBaseTask):
971  """Transform/standardize a source catalog
972  """
973  _DefaultName = "transformSourceTable"
974  ConfigClass = TransformSourceTableConfig
975 
976  inputDataset = 'source'
977  outputDataset = 'sourceTable'
978 
979  @classmethod
980  def _makeArgumentParser(cls):
981  parser = ArgumentParser(name=cls._DefaultName)
982  parser.add_id_argument("--id", datasetType=cls.inputDataset,
983  level="sensor",
984  help="data ID, e.g. --id visit=12345 ccd=0")
985  return parser
986 
987  def runDataRef(self, dataRef):
988  """Override to specify band label to run()."""
989  parq = dataRef.get()
990  funcs = self.getFunctors()
991  band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel
992  df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
993  self.write(df, dataRef)
994  return df
995 
996 
997 class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
998  dimensions=("instrument", "visit",),
999  defaultTemplates={"calexpType": ""}):
1000  calexp = connectionTypes.Input(
1001  doc="Processed exposures used for metadata",
1002  name="{calexpType}calexp",
1003  storageClass="ExposureF",
1004  dimensions=("instrument", "visit", "detector"),
1005  deferLoad=True,
1006  multiple=True,
1007  )
1008  visitSummary = connectionTypes.Output(
1009  doc=("Per-visit consolidated exposure metadata. These catalogs use "
1010  "detector id for the id and are sorted for fast lookups of a "
1011  "detector."),
1012  name="{calexpType}visitSummary",
1013  storageClass="ExposureCatalog",
1014  dimensions=("instrument", "visit"),
1015  )
1016 
1017 
1018 class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1019  pipelineConnections=ConsolidateVisitSummaryConnections):
1020  """Config for ConsolidateVisitSummaryTask"""
1021  pass
1022 
1023 
1024 class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1025  """Task to consolidate per-detector visit metadata.
1026 
1027  This task aggregates the following metadata from all the detectors in a
1028  single visit into an exposure catalog:
1029  - The visitInfo.
1030  - The wcs.
1031  - The photoCalib.
1032  - The physical_filter and band (if available).
1033  - The psf size, shape, and effective area at the center of the detector.
1034  - The corners of the bounding box in right ascension/declination.
1035 
1036  Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1037  are not persisted here because of storage concerns, and because of their
1038  limited utility as summary statistics.
1039 
1040  Tests for this task are performed in ci_hsc_gen3.
1041  """
1042  _DefaultName = "consolidateVisitSummary"
1043  ConfigClass = ConsolidateVisitSummaryConfig
1044 
1045  @classmethod
1046  def _makeArgumentParser(cls):
1047  parser = ArgumentParser(name=cls._DefaultName)
1048 
1049  parser.add_id_argument("--id", "calexp",
1050  help="data ID, e.g. --id visit=12345",
1051  ContainerClass=VisitDataIdContainer)
1052  return parser
1053 
1054  def writeMetadata(self, dataRef):
1055  """No metadata to persist, so override to remove metadata persistance.
1056  """
1057  pass
1058 
1059  def writeConfig(self, butler, clobber=False, doBackup=True):
1060  """No config to persist, so override to remove config persistance.
1061  """
1062  pass
1063 
1064  def runDataRef(self, dataRefList):
1065  visit = dataRefList[0].dataId['visit']
1066 
1067  self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" %
1068  (len(dataRefList), visit))
1069 
1070  expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
1071 
1072  dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
1073 
1074  def runQuantum(self, butlerQC, inputRefs, outputRefs):
1075  dataRefs = butlerQC.get(inputRefs.calexp)
1076  visit = dataRefs[0].dataId.byName()['visit']
1077 
1078  self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" %
1079  (len(dataRefs), visit))
1080 
1081  expCatalog = self._combineExposureMetadata(visit, dataRefs)
1082 
1083  butlerQC.put(expCatalog, outputRefs.visitSummary)
1084 
1085  def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1086  """Make a combined exposure catalog from a list of dataRefs.
1087  These dataRefs must point to exposures with wcs, summaryStats,
1088  and other visit metadata.
1089 
1090  Parameters
1091  ----------
1092  visit : `int`
1093  Visit identification number.
1094  dataRefs : `list`
1095  List of dataRefs in visit. May be list of
1096  `lsst.daf.persistence.ButlerDataRef` (Gen2) or
1097  `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1098  isGen3 : `bool`, optional
1099  Specifies if this is a Gen3 list of datarefs.
1100 
1101  Returns
1102  -------
1103  visitSummary : `lsst.afw.table.ExposureCatalog`
1104  Exposure catalog with per-detector summary information.
1105  """
1106  schema = self._makeVisitSummarySchema()
1107  cat = afwTable.ExposureCatalog(schema)
1108  cat.resize(len(dataRefs))
1109 
1110  cat['visit'] = visit
1111 
1112  for i, dataRef in enumerate(dataRefs):
1113  if isGen3:
1114  visitInfo = dataRef.get(component='visitInfo')
1115  filterLabel = dataRef.get(component='filterLabel')
1116  summaryStats = dataRef.get(component='summaryStats')
1117  detector = dataRef.get(component='detector')
1118  wcs = dataRef.get(component='wcs')
1119  photoCalib = dataRef.get(component='photoCalib')
1120  detector = dataRef.get(component='detector')
1121  bbox = dataRef.get(component='bbox')
1122  validPolygon = dataRef.get(component='validPolygon')
1123  else:
1124  # Note that we need to read the calexp because there is
1125  # no magic access to the psf except through the exposure.
1126  gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
1127  exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
1128  visitInfo = exp.getInfo().getVisitInfo()
1129  filterLabel = dataRef.get("calexp_filterLabel")
1130  summaryStats = exp.getInfo().getSummaryStats()
1131  wcs = exp.getWcs()
1132  photoCalib = exp.getPhotoCalib()
1133  detector = exp.getDetector()
1134  bbox = dataRef.get(datasetType='calexp_bbox')
1135  validPolygon = exp.getInfo().getValidPolygon()
1136 
1137  rec = cat[i]
1138  rec.setBBox(bbox)
1139  rec.setVisitInfo(visitInfo)
1140  rec.setWcs(wcs)
1141  rec.setPhotoCalib(photoCalib)
1142  rec.setValidPolygon(validPolygon)
1143 
1144  rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1145  rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1146  rec.setId(detector.getId())
1147  rec['psfSigma'] = summaryStats.psfSigma
1148  rec['psfIxx'] = summaryStats.psfIxx
1149  rec['psfIyy'] = summaryStats.psfIyy
1150  rec['psfIxy'] = summaryStats.psfIxy
1151  rec['psfArea'] = summaryStats.psfArea
1152  rec['raCorners'][:] = summaryStats.raCorners
1153  rec['decCorners'][:] = summaryStats.decCorners
1154  rec['ra'] = summaryStats.ra
1155  rec['decl'] = summaryStats.decl
1156  rec['zenithDistance'] = summaryStats.zenithDistance
1157  rec['zeroPoint'] = summaryStats.zeroPoint
1158  rec['skyBg'] = summaryStats.skyBg
1159  rec['skyNoise'] = summaryStats.skyNoise
1160  rec['meanVar'] = summaryStats.meanVar
1161  rec['astromOffsetMean'] = summaryStats.astromOffsetMean
1162  rec['astromOffsetStd'] = summaryStats.astromOffsetStd
1163 
1164  metadata = dafBase.PropertyList()
1165  metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1166  # We are looping over existing datarefs, so the following is true
1167  metadata.add("COMMENT", "Only detectors with data have entries.")
1168  cat.setMetadata(metadata)
1169 
1170  cat.sort()
1171  return cat
1172 
1173  def _makeVisitSummarySchema(self):
1174  """Make the schema for the visitSummary catalog."""
1175  schema = afwTable.ExposureTable.makeMinimalSchema()
1176  schema.addField('visit', type='I', doc='Visit number')
1177  schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1178  schema.addField('band', type='String', size=32, doc='Name of band')
1179  schema.addField('psfSigma', type='F',
1180  doc='PSF model second-moments determinant radius (center of chip) (pixel)')
1181  schema.addField('psfArea', type='F',
1182  doc='PSF model effective area (center of chip) (pixel**2)')
1183  schema.addField('psfIxx', type='F',
1184  doc='PSF model Ixx (center of chip) (pixel**2)')
1185  schema.addField('psfIyy', type='F',
1186  doc='PSF model Iyy (center of chip) (pixel**2)')
1187  schema.addField('psfIxy', type='F',
1188  doc='PSF model Ixy (center of chip) (pixel**2)')
1189  schema.addField('raCorners', type='ArrayD', size=4,
1190  doc='Right Ascension of bounding box corners (degrees)')
1191  schema.addField('decCorners', type='ArrayD', size=4,
1192  doc='Declination of bounding box corners (degrees)')
1193  schema.addField('ra', type='D',
1194  doc='Right Ascension of bounding box center (degrees)')
1195  schema.addField('decl', type='D',
1196  doc='Declination of bounding box center (degrees)')
1197  schema.addField('zenithDistance', type='F',
1198  doc='Zenith distance of bounding box center (degrees)')
1199  schema.addField('zeroPoint', type='F',
1200  doc='Mean zeropoint in detector (mag)')
1201  schema.addField('skyBg', type='F',
1202  doc='Average sky background (ADU)')
1203  schema.addField('skyNoise', type='F',
1204  doc='Average sky noise (ADU)')
1205  schema.addField('meanVar', type='F',
1206  doc='Mean variance of the weight plane (ADU**2)')
1207  schema.addField('astromOffsetMean', type='F',
1208  doc='Mean offset of astrometric calibration matches (arcsec)')
1209  schema.addField('astromOffsetStd', type='F',
1210  doc='Standard deviation of offsets of astrometric calibration matches (arcsec)')
1211 
1212  return schema
1213 
1214 
1215 class VisitDataIdContainer(DataIdContainer):
1216  """DataIdContainer that groups sensor-level id's by visit
1217  """
1218 
1219  def makeDataRefList(self, namespace):
1220  """Make self.refList from self.idList
1221 
1222  Generate a list of data references grouped by visit.
1223 
1224  Parameters
1225  ----------
1226  namespace : `argparse.Namespace`
1227  Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1228  """
1229  # Group by visits
1230  visitRefs = defaultdict(list)
1231  for dataId in self.idList:
1232  if "visit" in dataId:
1233  visitId = dataId["visit"]
1234  # append all subsets to
1235  subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1236  visitRefs[visitId].extend([dataRef for dataRef in subset])
1237 
1238  outputRefList = []
1239  for refList in visitRefs.values():
1240  existingRefs = [ref for ref in refList if ref.datasetExists()]
1241  if existingRefs:
1242  outputRefList.append(existingRefs)
1243 
1244  self.refList = outputRefList
1245 
1246 
1247 class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1248  defaultTemplates={"catalogType": ""},
1249  dimensions=("instrument", "visit")):
1250  inputCatalogs = connectionTypes.Input(
1251  doc="Input per-detector Source Tables",
1252  name="{catalogType}sourceTable",
1253  storageClass="DataFrame",
1254  dimensions=("instrument", "visit", "detector"),
1255  multiple=True
1256  )
1257  outputCatalog = connectionTypes.Output(
1258  doc="Per-visit concatenation of Source Table",
1259  name="{catalogType}sourceTable_visit",
1260  storageClass="DataFrame",
1261  dimensions=("instrument", "visit")
1262  )
1263 
1264 
1265 class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1266  pipelineConnections=ConsolidateSourceTableConnections):
1267  pass
1268 
1269 
1270 class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1271  """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1272  """
1273  _DefaultName = 'consolidateSourceTable'
1274  ConfigClass = ConsolidateSourceTableConfig
1275 
1276  inputDataset = 'sourceTable'
1277  outputDataset = 'sourceTable_visit'
1278 
1279  def runQuantum(self, butlerQC, inputRefs, outputRefs):
1280  inputs = butlerQC.get(inputRefs)
1281  self.log.info("Concatenating %s per-detector Source Tables",
1282  len(inputs['inputCatalogs']))
1283  df = pd.concat(inputs['inputCatalogs'])
1284  butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1285 
1286  def runDataRef(self, dataRefList):
1287  self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1288  df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1289  dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1290 
1291  @classmethod
1292  def _makeArgumentParser(cls):
1293  parser = ArgumentParser(name=cls._DefaultName)
1294 
1295  parser.add_id_argument("--id", cls.inputDataset,
1296  help="data ID, e.g. --id visit=12345",
1297  ContainerClass=VisitDataIdContainer)
1298  return parser
1299 
1300  def writeMetadata(self, dataRef):
1301  """No metadata to write.
1302  """
1303  pass
1304 
1305  def writeConfig(self, butler, clobber=False, doBackup=True):
1306  """No config to write.
1307  """
1308  pass
1309 
1310 
1311 class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1312  dimensions=("instrument",),
1313  defaultTemplates={}):
1314  visitSummaryRefs = connectionTypes.Input(
1315  doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1316  name="visitSummary",
1317  storageClass="ExposureCatalog",
1318  dimensions=("instrument", "visit"),
1319  multiple=True,
1320  deferLoad=True,
1321  )
1322  outputCatalog = connectionTypes.Output(
1323  doc="CCD and Visit metadata table",
1324  name="CcdVisitTable",
1325  storageClass="DataFrame",
1326  dimensions=("instrument",)
1327  )
1328 
1329 
1330 class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1331  pipelineConnections=MakeCcdVisitTableConnections):
1332  pass
1333 
1334 
1335 class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1336  """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1337  """
1338  _DefaultName = 'makeCcdVisitTable'
1339  ConfigClass = MakeCcdVisitTableConfig
1340 
1341  def run(self, visitSummaryRefs):
1342  """ Make a table of ccd information from the `visitSummary` catalogs.
1343  Parameters
1344  ----------
1345  visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1346  List of DeferredDatasetHandles pointing to exposure catalogs with
1347  per-detector summary information.
1348  Returns
1349  -------
1350  result : `lsst.pipe.Base.Struct`
1351  Results struct with attribute:
1352  - `outputCatalog`
1353  Catalog of ccd and visit information.
1354  """
1355  ccdEntries = []
1356  for visitSummaryRef in visitSummaryRefs:
1357  visitSummary = visitSummaryRef.get()
1358  visitInfo = visitSummary[0].getVisitInfo()
1359 
1360  ccdEntry = {}
1361  summaryTable = visitSummary.asAstropy()
1362  selectColumns = ['id', 'visit', 'physical_filter', 'ra', 'decl', 'zenithDistance', 'zeroPoint',
1363  'psfSigma', 'skyBg', 'skyNoise']
1364  ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1365  ccdEntry = ccdEntry.rename(columns={"physical_filter": "filterName", "visit": "visitId"})
1366 
1367  dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in
1368  summaryTable['id']]
1369  packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId)
1370  ccdVisitIds = [packer.pack(dataId) for dataId in dataIds]
1371  ccdEntry['ccdVisitId'] = ccdVisitIds
1372 
1373  pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary])
1374  ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1375 
1376  ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1377  ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1378  expTime = visitInfo.getExposureTime()
1379  ccdEntry['expTime'] = expTime
1380  ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1381  ccdEntry['darkTime'] = visitInfo.getDarkTime()
1382  ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1383  ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1384  ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1385  ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1386  ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1387  ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1388  ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1389  ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1390  ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1391  ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1392  # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags,
1393  # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted.
1394  ccdEntries.append(ccdEntry)
1395 
1396  outputCatalog = pd.concat(ccdEntries)
1397  return pipeBase.Struct(outputCatalog=outputCatalog)
1398 
1399 
1400 class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1401  dimensions=("instrument",),
1402  defaultTemplates={}):
1403  visitSummaries = connectionTypes.Input(
1404  doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1405  name="visitSummary",
1406  storageClass="ExposureCatalog",
1407  dimensions=("instrument", "visit",),
1408  multiple=True,
1409  deferLoad=True,
1410  )
1411  outputCatalog = connectionTypes.Output(
1412  doc="Visit metadata table",
1413  name="visitTable",
1414  storageClass="DataFrame",
1415  dimensions=("instrument",)
1416  )
1417 
1418 
1419 class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1420  pipelineConnections=MakeVisitTableConnections):
1421  pass
1422 
1423 
1424 class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1425  """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1426  """
1427  _DefaultName = 'makeVisitTable'
1428  ConfigClass = MakeVisitTableConfig
1429 
1430  def run(self, visitSummaries):
1431  """ Make a table of visit information from the `visitSummary` catalogs
1432 
1433  Parameters
1434  ----------
1435  visitSummaries : list of `lsst.afw.table.ExposureCatalog`
1436  List of exposure catalogs with per-detector summary information.
1437  Returns
1438  -------
1439  result : `lsst.pipe.Base.Struct`
1440  Results struct with attribute:
1441  ``outputCatalog``
1442  Catalog of visit information.
1443  """
1444  visitEntries = []
1445  for visitSummary in visitSummaries:
1446  visitSummary = visitSummary.get()
1447  visitRow = visitSummary[0]
1448  visitInfo = visitRow.getVisitInfo()
1449 
1450  visitEntry = {}
1451  visitEntry["visitId"] = visitRow['visit']
1452  visitEntry["filterName"] = visitRow['physical_filter']
1453  raDec = visitInfo.getBoresightRaDec()
1454  visitEntry["ra"] = raDec.getRa().asDegrees()
1455  visitEntry["decl"] = raDec.getDec().asDegrees()
1456  visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1457  azAlt = visitInfo.getBoresightAzAlt()
1458  visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1459  visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1460  visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1461  visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1462  visitEntry["obsStart"] = visitInfo.getDate().toPython()
1463  visitEntry["expTime"] = visitInfo.getExposureTime()
1464  visitEntries.append(visitEntry)
1465  # TODO: DM-30623, Add programId, exposureType, expMidpt, cameraTemp, mirror1Temp, mirror2Temp,
1466  # mirror3Temp, domeTemp, externalTemp, dimmSeeing, pwvGPS, pwvMW, flags, nExposures
1467 
1468  outputCatalog = pd.DataFrame(data=visitEntries)
1469  return pipeBase.Struct(outputCatalog=outputCatalog)
def getAnalysis(self, parq, funcs=None, band=None)
Definition: postprocess.py:685
def transform(self, band, parq, funcs, dataId)
Definition: postprocess.py:691
def run(self, parq, funcs=None, dataId=None, band=None)
Definition: postprocess.py:654
def runQuantum(self, butlerQC, inputRefs, outputRefs)
Definition: postprocess.py:635
def run(self, skyInfo, tempExpRefList, imageScalerList, weightList, altMaskList=None, mask=None, supplementaryData=None)
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def readCatalog(task, patchRef)
Read input catalog.
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)
Definition: postprocess.py:43