lsst.pipe.tasks  21.0.0-142-gef555c1e+5fb67ffcc0
postprocess.py
Go to the documentation of this file.
1 # This file is part of pipe_tasks
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 import functools
23 import pandas as pd
24 from collections import defaultdict
25 import numpy as np
26 
27 import lsst.geom
28 import lsst.pex.config as pexConfig
29 import lsst.pipe.base as pipeBase
30 import lsst.daf.base as dafBase
31 from lsst.pipe.base import connectionTypes
32 import lsst.afw.table as afwTable
33 from lsst.meas.base import SingleFrameMeasurementTask
34 from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
35 from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
36 from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate
37 
38 from .parquetTable import ParquetTable
39 from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
40 from .functors import CompositeFunctor, Column
41 
42 
43 def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
44  """Flattens a dataframe with multilevel column index
45  """
46  newDf = pd.DataFrame()
47  # band is the level 0 index
48  dfBands = df.columns.unique(level=0).values
49  for band in dfBands:
50  subdf = df[band]
51  columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
52  newColumns = {c: columnFormat.format(band, c)
53  for c in subdf.columns if c not in noDupCols}
54  cols = list(newColumns.keys())
55  newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
56 
57  # Band must be present in the input and output or else column is all NaN:
58  presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
59  # Get the unexploded columns from any present band's partition
60  noDupDf = df[presentBands[0]][noDupCols]
61  newDf = pd.concat([noDupDf, newDf], axis=1)
62  return newDf
63 
64 
65 class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
66  defaultTemplates={"coaddName": "deep"},
67  dimensions=("tract", "patch", "skymap")):
68  inputCatalogMeas = connectionTypes.Input(
69  doc="Catalog of source measurements on the deepCoadd.",
70  dimensions=("tract", "patch", "band", "skymap"),
71  storageClass="SourceCatalog",
72  name="{coaddName}Coadd_meas",
73  multiple=True
74  )
75  inputCatalogForcedSrc = connectionTypes.Input(
76  doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
77  dimensions=("tract", "patch", "band", "skymap"),
78  storageClass="SourceCatalog",
79  name="{coaddName}Coadd_forced_src",
80  multiple=True
81  )
82  inputCatalogRef = connectionTypes.Input(
83  doc="Catalog marking the primary detection (which band provides a good shape and position)"
84  "for each detection in deepCoadd_mergeDet.",
85  dimensions=("tract", "patch", "skymap"),
86  storageClass="SourceCatalog",
87  name="{coaddName}Coadd_ref"
88  )
89  outputCatalog = connectionTypes.Output(
90  doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
91  "stored as a DataFrame with a multi-level column index per-patch.",
92  dimensions=("tract", "patch", "skymap"),
93  storageClass="DataFrame",
94  name="{coaddName}Coadd_obj"
95  )
96 
97 
98 class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
99  pipelineConnections=WriteObjectTableConnections):
100  engine = pexConfig.Field(
101  dtype=str,
102  default="pyarrow",
103  doc="Parquet engine for writing (pyarrow or fastparquet)"
104  )
105  coaddName = pexConfig.Field(
106  dtype=str,
107  default="deep",
108  doc="Name of coadd"
109  )
110 
111 
112 class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
113  """Write filter-merged source tables to parquet
114  """
115  _DefaultName = "writeObjectTable"
116  ConfigClass = WriteObjectTableConfig
117  RunnerClass = MergeSourcesRunner
118 
119  # Names of table datasets to be merged
120  inputDatasets = ('forced_src', 'meas', 'ref')
121 
122  # Tag of output dataset written by `MergeSourcesTask.write`
123  outputDataset = 'obj'
124 
125  def __init__(self, butler=None, schema=None, **kwargs):
126  # It is a shame that this class can't use the default init for CmdLineTask
127  # But to do so would require its own special task runner, which is many
128  # more lines of specialization, so this is how it is for now
129  super().__init__(**kwargs)
130 
131  def runDataRef(self, patchRefList):
132  """!
133  @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
134  subclasses that inherit from MergeSourcesTask.
135  @param[in] patchRefList list of data references for each filter
136  """
137  catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
138  dataId = patchRefList[0].dataId
139  mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
140  self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog))
141 
142  def runQuantum(self, butlerQC, inputRefs, outputRefs):
143  inputs = butlerQC.get(inputRefs)
144 
145  measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
146  zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
147  forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
148  zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
149 
150  catalogs = {}
151  for band in measDict.keys():
152  catalogs[band] = {'meas': measDict[band]['meas'],
153  'forced_src': forcedSourceDict[band]['forced_src'],
154  'ref': inputs['inputCatalogRef']}
155  dataId = butlerQC.quantum.dataId
156  df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
157  outputs = pipeBase.Struct(outputCatalog=df)
158  butlerQC.put(outputs, outputRefs)
159 
160  @classmethod
161  def _makeArgumentParser(cls):
162  """Create a suitable ArgumentParser.
163 
164  We will use the ArgumentParser to get a list of data
165  references for patches; the RunnerClass will sort them into lists
166  of data references for the same patch.
167 
168  References first of self.inputDatasets, rather than
169  self.inputDataset
170  """
171  return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
172 
173  def readCatalog(self, patchRef):
174  """Read input catalogs
175 
176  Read all the input datasets given by the 'inputDatasets'
177  attribute.
178 
179  Parameters
180  ----------
181  patchRef : `lsst.daf.persistence.ButlerDataRef`
182  Data reference for patch
183 
184  Returns
185  -------
186  Tuple consisting of band name and a dict of catalogs, keyed by
187  dataset name
188  """
189  band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel
190  catalogDict = {}
191  for dataset in self.inputDatasets:
192  catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
193  self.log.info("Read %d sources from %s for band %s: %s",
194  len(catalog), dataset, band, patchRef.dataId)
195  catalogDict[dataset] = catalog
196  return band, catalogDict
197 
198  def run(self, catalogs, tract, patch):
199  """Merge multiple catalogs.
200 
201  Parameters
202  ----------
203  catalogs : `dict`
204  Mapping from filter names to dict of catalogs.
205  tract : int
206  tractId to use for the tractId column
207  patch : str
208  patchId to use for the patchId column
209 
210  Returns
211  -------
212  catalog : `pandas.DataFrame`
213  Merged dataframe
214  """
215 
216  dfs = []
217  for filt, tableDict in catalogs.items():
218  for dataset, table in tableDict.items():
219  # Convert afwTable to pandas DataFrame
220  df = table.asAstropy().to_pandas().set_index('id', drop=True)
221 
222  # Sort columns by name, to ensure matching schema among patches
223  df = df.reindex(sorted(df.columns), axis=1)
224  df['tractId'] = tract
225  df['patchId'] = patch
226 
227  # Make columns a 3-level MultiIndex
228  df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
229  names=('dataset', 'band', 'column'))
230  dfs.append(df)
231 
232  catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
233  return catalog
234 
235  def write(self, patchRef, catalog):
236  """Write the output.
237 
238  Parameters
239  ----------
240  catalog : `ParquetTable`
241  Catalog to write
242  patchRef : `lsst.daf.persistence.ButlerDataRef`
243  Data reference for patch
244  """
245  patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
246  # since the filter isn't actually part of the data ID for the dataset we're saving,
247  # it's confusing to see it in the log message, even if the butler simply ignores it.
248  mergeDataId = patchRef.dataId.copy()
249  del mergeDataId["filter"]
250  self.log.info("Wrote merged catalog: %s", mergeDataId)
251 
252  def writeMetadata(self, dataRefList):
253  """No metadata to write, and not sure how to write it for a list of dataRefs.
254  """
255  pass
256 
257 
258 class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
259  defaultTemplates={"catalogType": ""},
260  dimensions=("instrument", "visit", "detector")):
261 
262  catalog = connectionTypes.Input(
263  doc="Input full-depth catalog of sources produced by CalibrateTask",
264  name="{catalogType}src",
265  storageClass="SourceCatalog",
266  dimensions=("instrument", "visit", "detector")
267  )
268  outputCatalog = connectionTypes.Output(
269  doc="Catalog of sources, `src` in Parquet format. The 'id' column is "
270  "replaced with an index; all other columns are unchanged.",
271  name="{catalogType}source",
272  storageClass="DataFrame",
273  dimensions=("instrument", "visit", "detector")
274  )
275 
276 
277 class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
278  pipelineConnections=WriteSourceTableConnections):
279  doApplyExternalPhotoCalib = pexConfig.Field(
280  dtype=bool,
281  default=False,
282  doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
283  "generating Source Tables from older src tables which do not already have local calib columns")
284  )
285  doApplyExternalSkyWcs = pexConfig.Field(
286  dtype=bool,
287  default=False,
288  doc=("Add local WCS columns from the calexp.wcs? Should only set True if "
289  "generating Source Tables from older src tables which do not already have local calib columns")
290  )
291 
292 
293 class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
294  """Write source table to parquet
295  """
296  _DefaultName = "writeSourceTable"
297  ConfigClass = WriteSourceTableConfig
298 
299  def runDataRef(self, dataRef):
300  src = dataRef.get('src')
301  if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
302  src = self.addCalibColumns(src, dataRef)
303 
304  ccdVisitId = dataRef.get('ccdExposureId')
305  result = self.run(src, ccdVisitId=ccdVisitId)
306  dataRef.put(result.table, 'source')
307 
308  def runQuantum(self, butlerQC, inputRefs, outputRefs):
309  inputs = butlerQC.get(inputRefs)
310  inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
311  result = self.run(**inputs).table
312  outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
313  butlerQC.put(outputs, outputRefs)
314 
315  def run(self, catalog, ccdVisitId=None):
316  """Convert `src` catalog to parquet
317 
318  Parameters
319  ----------
320  catalog: `afwTable.SourceCatalog`
321  catalog to be converted
322  ccdVisitId: `int`
323  ccdVisitId to be added as a column
324 
325  Returns
326  -------
327  result : `lsst.pipe.base.Struct`
328  ``table``
329  `ParquetTable` version of the input catalog
330  """
331  self.log.info("Generating parquet table from src catalog %s", ccdVisitId)
332  df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
333  df['ccdVisitId'] = ccdVisitId
334  return pipeBase.Struct(table=ParquetTable(dataFrame=df))
335 
336  def addCalibColumns(self, catalog, dataRef):
337  """Add columns with local calibration evaluated at each centroid
338 
339  for backwards compatibility with old repos.
340  This exists for the purpose of converting old src catalogs
341  (which don't have the expected local calib columns) to Source Tables.
342 
343  Parameters
344  ----------
345  catalog: `afwTable.SourceCatalog`
346  catalog to which calib columns will be added
347  dataRef: `lsst.daf.persistence.ButlerDataRef
348  for fetching the calibs from disk.
349 
350  Returns
351  -------
352  newCat: `afwTable.SourceCatalog`
353  Source Catalog with requested local calib columns
354  """
355  mapper = afwTable.SchemaMapper(catalog.schema)
356  measureConfig = SingleFrameMeasurementTask.ConfigClass()
357  measureConfig.doReplaceWithNoise = False
358 
359  # Just need the WCS or the PhotoCalib attached to an exposue
360  exposure = dataRef.get('calexp_sub',
362 
363  mapper = afwTable.SchemaMapper(catalog.schema)
364  mapper.addMinimalSchema(catalog.schema, True)
365  schema = mapper.getOutputSchema()
366 
367  exposureIdInfo = dataRef.get("expIdInfo")
368  measureConfig.plugins.names = []
369  if self.config.doApplyExternalSkyWcs:
370  plugin = 'base_LocalWcs'
371  if plugin in schema:
372  raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
373  else:
374  measureConfig.plugins.names.add(plugin)
375 
376  if self.config.doApplyExternalPhotoCalib:
377  plugin = 'base_LocalPhotoCalib'
378  if plugin in schema:
379  raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
380  else:
381  measureConfig.plugins.names.add(plugin)
382 
383  measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
384  newCat = afwTable.SourceCatalog(schema)
385  newCat.extend(catalog, mapper=mapper)
386  measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
387  return newCat
388 
389  def writeMetadata(self, dataRef):
390  """No metadata to write.
391  """
392  pass
393 
394  @classmethod
395  def _makeArgumentParser(cls):
396  parser = ArgumentParser(name=cls._DefaultName)
397  parser.add_id_argument("--id", 'src',
398  help="data ID, e.g. --id visit=12345 ccd=0")
399  return parser
400 
401 
402 class PostprocessAnalysis(object):
403  """Calculate columns from ParquetTable
404 
405  This object manages and organizes an arbitrary set of computations
406  on a catalog. The catalog is defined by a
407  `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
408  `deepCoadd_obj` dataset, and the computations are defined by a collection
409  of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
410  a `CompositeFunctor`).
411 
412  After the object is initialized, accessing the `.df` attribute (which
413  holds the `pandas.DataFrame` containing the results of the calculations) triggers
414  computation of said dataframe.
415 
416  One of the conveniences of using this object is the ability to define a desired common
417  filter for all functors. This enables the same functor collection to be passed to
418  several different `PostprocessAnalysis` objects without having to change the original
419  functor collection, since the `filt` keyword argument of this object triggers an
420  overwrite of the `filt` property for all functors in the collection.
421 
422  This object also allows a list of refFlags to be passed, and defines a set of default
423  refFlags that are always included even if not requested.
424 
425  If a list of `ParquetTable` object is passed, rather than a single one, then the
426  calculations will be mapped over all the input catalogs. In principle, it should
427  be straightforward to parallelize this activity, but initial tests have failed
428  (see TODO in code comments).
429 
430  Parameters
431  ----------
432  parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
433  Source catalog(s) for computation
434 
435  functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
436  Computations to do (functors that act on `parq`).
437  If a dict, the output
438  DataFrame will have columns keyed accordingly.
439  If a list, the column keys will come from the
440  `.shortname` attribute of each functor.
441 
442  filt : `str` (optional)
443  Filter in which to calculate. If provided,
444  this will overwrite any existing `.filt` attribute
445  of the provided functors.
446 
447  flags : `list` (optional)
448  List of flags (per-band) to include in output table.
449  Taken from the `meas` dataset if applied to a multilevel Object Table.
450 
451  refFlags : `list` (optional)
452  List of refFlags (only reference band) to include in output table.
453 
454  forcedFlags : `list` (optional)
455  List of flags (per-band) to include in output table.
456  Taken from the ``forced_src`` dataset if applied to a
457  multilevel Object Table. Intended for flags from measurement plugins
458  only run during multi-band forced-photometry.
459  """
460  _defaultRefFlags = []
461  _defaultFuncs = ()
462 
463  def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
464  self.parq = parq
465  self.functors = functors
466 
467  self.filt = filt
468  self.flags = list(flags) if flags is not None else []
469  self.forcedFlags = list(forcedFlags) if forcedFlags is not None else []
470  self.refFlags = list(self._defaultRefFlags)
471  if refFlags is not None:
472  self.refFlags += list(refFlags)
473 
474  self._df = None
475 
476  @property
477  def defaultFuncs(self):
478  funcs = dict(self._defaultFuncs)
479  return funcs
480 
481  @property
482  def func(self):
483  additionalFuncs = self.defaultFuncs
484  additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags})
485  additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
486  additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
487 
488  if isinstance(self.functors, CompositeFunctor):
489  func = self.functors
490  else:
491  func = CompositeFunctor(self.functors)
492 
493  func.funcDict.update(additionalFuncs)
494  func.filt = self.filt
495 
496  return func
497 
498  @property
499  def noDupCols(self):
500  return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
501 
502  @property
503  def df(self):
504  if self._df is None:
505  self.compute()
506  return self._df
507 
508  def compute(self, dropna=False, pool=None):
509  # map over multiple parquet tables
510  if type(self.parq) in (list, tuple):
511  if pool is None:
512  dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
513  else:
514  # TODO: Figure out why this doesn't work (pyarrow pickling issues?)
515  dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
516  self._df = pd.concat(dflist)
517  else:
518  self._df = self.func(self.parq, dropna=dropna)
519 
520  return self._df
521 
522 
523 class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
524  dimensions=()):
525  """Expected Connections for subclasses of TransformCatalogBaseTask.
526 
527  Must be subclassed.
528  """
529  inputCatalog = connectionTypes.Input(
530  name="",
531  storageClass="DataFrame",
532  )
533  outputCatalog = connectionTypes.Output(
534  name="",
535  storageClass="DataFrame",
536  )
537 
538 
539 class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
540  pipelineConnections=TransformCatalogBaseConnections):
541  functorFile = pexConfig.Field(
542  dtype=str,
543  doc='Path to YAML file specifying functors to be computed',
544  default=None,
545  optional=True
546  )
547 
548 
549 class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask):
550  """Base class for transforming/standardizing a catalog
551 
552  by applying functors that convert units and apply calibrations.
553  The purpose of this task is to perform a set of computations on
554  an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
555  results to a new dataset (which needs to be declared in an `outputDataset`
556  attribute).
557 
558  The calculations to be performed are defined in a YAML file that specifies
559  a set of functors to be computed, provided as
560  a `--functorFile` config parameter. An example of such a YAML file
561  is the following:
562 
563  funcs:
564  psfMag:
565  functor: Mag
566  args:
567  - base_PsfFlux
568  filt: HSC-G
569  dataset: meas
570  cmodel_magDiff:
571  functor: MagDiff
572  args:
573  - modelfit_CModel
574  - base_PsfFlux
575  filt: HSC-G
576  gauss_magDiff:
577  functor: MagDiff
578  args:
579  - base_GaussianFlux
580  - base_PsfFlux
581  filt: HSC-G
582  count:
583  functor: Column
584  args:
585  - base_InputCount_value
586  filt: HSC-G
587  deconvolved_moments:
588  functor: DeconvolvedMoments
589  filt: HSC-G
590  dataset: forced_src
591  refFlags:
592  - calib_psfUsed
593  - merge_measurement_i
594  - merge_measurement_r
595  - merge_measurement_z
596  - merge_measurement_y
597  - merge_measurement_g
598  - base_PixelFlags_flag_inexact_psfCenter
599  - detect_isPrimary
600 
601  The names for each entry under "func" will become the names of columns in the
602  output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
603  Positional arguments to be passed to each functor are in the `args` list,
604  and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
605  `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
606 
607  The "flags" entry is the default shortcut for `Column` functors.
608  All columns listed under "flags" will be copied to the output table
609  untransformed. They can be of any datatype.
610  In the special case of transforming a multi-level oject table with
611  band and dataset indices (deepCoadd_obj), these will be taked from the
612  `meas` dataset and exploded out per band.
613 
614  There are two special shortcuts that only apply when transforming
615  multi-level Object (deepCoadd_obj) tables:
616  - The "refFlags" entry is shortcut for `Column` functor
617  taken from the `'ref'` dataset if transforming an ObjectTable.
618  - The "forcedFlags" entry is shortcut for `Column` functors.
619  taken from the ``forced_src`` dataset if transforming an ObjectTable.
620  These are expanded out per band.
621 
622 
623  This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
624  to organize and excecute the calculations.
625 
626  """
627  @property
628  def _DefaultName(self):
629  raise NotImplementedError('Subclass must define "_DefaultName" attribute')
630 
631  @property
632  def outputDataset(self):
633  raise NotImplementedError('Subclass must define "outputDataset" attribute')
634 
635  @property
636  def inputDataset(self):
637  raise NotImplementedError('Subclass must define "inputDataset" attribute')
638 
639  @property
640  def ConfigClass(self):
641  raise NotImplementedError('Subclass must define "ConfigClass" attribute')
642 
643  def __init__(self, *args, **kwargs):
644  super().__init__(*args, **kwargs)
645  if self.config.functorFile:
646  self.log.info('Loading tranform functor definitions from %s',
647  self.config.functorFile)
648  self.funcsfuncs = CompositeFunctor.from_file(self.config.functorFile)
649  self.funcsfuncs.update(dict(PostprocessAnalysis._defaultFuncs))
650  else:
651  self.funcsfuncs = None
652 
653  def runQuantum(self, butlerQC, inputRefs, outputRefs):
654  inputs = butlerQC.get(inputRefs)
655  if self.funcsfuncs is None:
656  raise ValueError("config.functorFile is None. "
657  "Must be a valid path to yaml in order to run Task as a PipelineTask.")
658  result = self.runrun(parq=inputs['inputCatalog'], funcs=self.funcsfuncs,
659  dataId=outputRefs.outputCatalog.dataId.full)
660  outputs = pipeBase.Struct(outputCatalog=result)
661  butlerQC.put(outputs, outputRefs)
662 
663  def runDataRef(self, dataRef):
664  parq = dataRef.get()
665  if self.funcsfuncs is None:
666  raise ValueError("config.functorFile is None. "
667  "Must be a valid path to yaml in order to run as a CommandlineTask.")
668  df = self.runrun(parq, funcs=self.funcsfuncs, dataId=dataRef.dataId)
669  self.writewrite(df, dataRef)
670  return df
671 
672  def run(self, parq, funcs=None, dataId=None, band=None):
673  """Do postprocessing calculations
674 
675  Takes a `ParquetTable` object and dataId,
676  returns a dataframe with results of postprocessing calculations.
677 
678  Parameters
679  ----------
680  parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
681  ParquetTable from which calculations are done.
682  funcs : `lsst.pipe.tasks.functors.Functors`
683  Functors to apply to the table's columns
684  dataId : dict, optional
685  Used to add a `patchId` column to the output dataframe.
686  band : `str`, optional
687  Filter band that is being processed.
688 
689  Returns
690  ------
691  `pandas.DataFrame`
692 
693  """
694  self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
695 
696  df = self.transformtransform(band, parq, funcs, dataId).df
697  self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
698  return df
699 
700  def getFunctors(self):
701  return self.funcsfuncs
702 
703  def getAnalysis(self, parq, funcs=None, band=None):
704  if funcs is None:
705  funcs = self.funcsfuncs
706  analysis = PostprocessAnalysis(parq, funcs, filt=band)
707  return analysis
708 
709  def transform(self, band, parq, funcs, dataId):
710  analysis = self.getAnalysisgetAnalysis(parq, funcs=funcs, band=band)
711  df = analysis.df
712  if dataId is not None:
713  for key, value in dataId.items():
714  df[str(key)] = value
715 
716  return pipeBase.Struct(
717  df=df,
718  analysis=analysis
719  )
720 
721  def write(self, df, parqRef):
722  parqRef.put(ParquetTable(dataFrame=df), self.outputDatasetoutputDataset)
723 
724  def writeMetadata(self, dataRef):
725  """No metadata to write.
726  """
727  pass
728 
729 
730 class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
731  defaultTemplates={"coaddName": "deep"},
732  dimensions=("tract", "patch", "skymap")):
733  inputCatalog = connectionTypes.Input(
734  doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
735  "stored as a DataFrame with a multi-level column index per-patch.",
736  dimensions=("tract", "patch", "skymap"),
737  storageClass="DataFrame",
738  name="{coaddName}Coadd_obj",
739  deferLoad=True,
740  )
741  outputCatalog = connectionTypes.Output(
742  doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
743  "data model.",
744  dimensions=("tract", "patch", "skymap"),
745  storageClass="DataFrame",
746  name="objectTable"
747  )
748 
749 
750 class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
751  pipelineConnections=TransformObjectCatalogConnections):
752  coaddName = pexConfig.Field(
753  dtype=str,
754  default="deep",
755  doc="Name of coadd"
756  )
757  # TODO: remove in DM-27177
758  filterMap = pexConfig.DictField(
759  keytype=str,
760  itemtype=str,
761  default={},
762  doc=("Dictionary mapping full filter name to short one for column name munging."
763  "These filters determine the output columns no matter what filters the "
764  "input data actually contain."),
765  deprecated=("Coadds are now identified by the band, so this transform is unused."
766  "Will be removed after v22.")
767  )
768  outputBands = pexConfig.ListField(
769  dtype=str,
770  default=None,
771  optional=True,
772  doc=("These bands and only these bands will appear in the output,"
773  " NaN-filled if the input does not include them."
774  " If None, then use all bands found in the input.")
775  )
776  camelCase = pexConfig.Field(
777  dtype=bool,
778  default=True,
779  doc=("Write per-band columns names with camelCase, else underscore "
780  "For example: gPsFlux instead of g_PsFlux.")
781  )
782  multilevelOutput = pexConfig.Field(
783  dtype=bool,
784  default=False,
785  doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
786  "and name-munged (False).")
787  )
788 
789 
790 class TransformObjectCatalogTask(TransformCatalogBaseTask):
791  """Produce a flattened Object Table to match the format specified in
792  sdm_schemas.
793 
794  Do the same set of postprocessing calculations on all bands
795 
796  This is identical to `TransformCatalogBaseTask`, except for that it does the
797  specified functor calculations for all filters present in the
798  input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
799  by the YAML file will be superceded.
800  """
801  _DefaultName = "transformObjectCatalog"
802  ConfigClass = TransformObjectCatalogConfig
803 
804  # Used by Gen 2 runDataRef only:
805  inputDataset = 'deepCoadd_obj'
806  outputDataset = 'objectTable'
807 
808  @classmethod
809  def _makeArgumentParser(cls):
810  parser = ArgumentParser(name=cls._DefaultName)
811  parser.add_id_argument("--id", cls.inputDataset,
812  ContainerClass=CoaddDataIdContainer,
813  help="data ID, e.g. --id tract=12345 patch=1,2")
814  return parser
815 
816  def run(self, parq, funcs=None, dataId=None, band=None):
817  # NOTE: band kwarg is ignored here.
818  dfDict = {}
819  analysisDict = {}
820  templateDf = pd.DataFrame()
821 
822  if isinstance(parq, DeferredDatasetHandle):
823  columns = parq.get(component='columns')
824  inputBands = columns.unique(level=1).values
825  else:
826  inputBands = parq.columnLevelNames['band']
827 
828  outputBands = self.config.outputBands if self.config.outputBands else inputBands
829 
830  # Perform transform for data of filters that exist in parq.
831  for inputBand in inputBands:
832  if inputBand not in outputBands:
833  self.log.info("Ignoring %s band data in the input", inputBand)
834  continue
835  self.log.info("Transforming the catalog of band %s", inputBand)
836  result = self.transform(inputBand, parq, funcs, dataId)
837  dfDict[inputBand] = result.df
838  analysisDict[inputBand] = result.analysis
839  if templateDf.empty:
840  templateDf = result.df
841 
842  # Fill NaNs in columns of other wanted bands
843  for filt in outputBands:
844  if filt not in dfDict:
845  self.log.info("Adding empty columns for band %s", filt)
846  dfDict[filt] = pd.DataFrame().reindex_like(templateDf)
847 
848  # This makes a multilevel column index, with band as first level
849  df = pd.concat(dfDict, axis=1, names=['band', 'column'])
850 
851  if not self.config.multilevelOutput:
852  noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
853  if dataId is not None:
854  noDupCols += list(dataId.keys())
855  df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
856  inputBands=inputBands)
857 
858  self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
859  return df
860 
861 
862 class TractObjectDataIdContainer(CoaddDataIdContainer):
863 
864  def makeDataRefList(self, namespace):
865  """Make self.refList from self.idList
866 
867  Generate a list of data references given tract and/or patch.
868  This was adapted from `TractQADataIdContainer`, which was
869  `TractDataIdContainer` modifie to not require "filter".
870  Only existing dataRefs are returned.
871  """
872  def getPatchRefList(tract):
873  return [namespace.butler.dataRef(datasetType=self.datasetType,
874  tract=tract.getId(),
875  patch="%d,%d" % patch.getIndex()) for patch in tract]
876 
877  tractRefs = defaultdict(list) # Data references for each tract
878  for dataId in self.idList:
879  skymap = self.getSkymap(namespace)
880 
881  if "tract" in dataId:
882  tractId = dataId["tract"]
883  if "patch" in dataId:
884  tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
885  tract=tractId,
886  patch=dataId['patch']))
887  else:
888  tractRefs[tractId] += getPatchRefList(skymap[tractId])
889  else:
890  tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
891  for tract in skymap)
892  outputRefList = []
893  for tractRefList in tractRefs.values():
894  existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
895  outputRefList.append(existingRefs)
896 
897  self.refList = outputRefList
898 
899 
900 class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
901  dimensions=("tract", "skymap")):
902  inputCatalogs = connectionTypes.Input(
903  doc="Per-Patch objectTables conforming to the standard data model.",
904  name="objectTable",
905  storageClass="DataFrame",
906  dimensions=("tract", "patch", "skymap"),
907  multiple=True,
908  )
909  outputCatalog = connectionTypes.Output(
910  doc="Pre-tract horizontal concatenation of the input objectTables",
911  name="objectTable_tract",
912  storageClass="DataFrame",
913  dimensions=("tract", "skymap"),
914  )
915 
916 
917 class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
918  pipelineConnections=ConsolidateObjectTableConnections):
919  coaddName = pexConfig.Field(
920  dtype=str,
921  default="deep",
922  doc="Name of coadd"
923  )
924 
925 
926 class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
927  """Write patch-merged source tables to a tract-level parquet file
928 
929  Concatenates `objectTable` list into a per-visit `objectTable_tract`
930  """
931  _DefaultName = "consolidateObjectTable"
932  ConfigClass = ConsolidateObjectTableConfig
933 
934  inputDataset = 'objectTable'
935  outputDataset = 'objectTable_tract'
936 
937  def runQuantum(self, butlerQC, inputRefs, outputRefs):
938  inputs = butlerQC.get(inputRefs)
939  self.log.info("Concatenating %s per-patch Object Tables",
940  len(inputs['inputCatalogs']))
941  df = pd.concat(inputs['inputCatalogs'])
942  butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
943 
944  @classmethod
945  def _makeArgumentParser(cls):
946  parser = ArgumentParser(name=cls._DefaultName)
947 
948  parser.add_id_argument("--id", cls.inputDataset,
949  help="data ID, e.g. --id tract=12345",
950  ContainerClass=TractObjectDataIdContainer)
951  return parser
952 
953  def runDataRef(self, patchRefList):
954  df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
955  patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
956 
957  def writeMetadata(self, dataRef):
958  """No metadata to write.
959  """
960  pass
961 
962 
963 class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
964  defaultTemplates={"catalogType": ""},
965  dimensions=("instrument", "visit", "detector")):
966 
967  inputCatalog = connectionTypes.Input(
968  doc="Wide input catalog of sources produced by WriteSourceTableTask",
969  name="{catalogType}source",
970  storageClass="DataFrame",
971  dimensions=("instrument", "visit", "detector"),
972  deferLoad=True
973  )
974  outputCatalog = connectionTypes.Output(
975  doc="Narrower, per-detector Source Table transformed and converted per a "
976  "specified set of functors",
977  name="{catalogType}sourceTable",
978  storageClass="DataFrame",
979  dimensions=("instrument", "visit", "detector")
980  )
981 
982 
983 class TransformSourceTableConfig(TransformCatalogBaseConfig,
984  pipelineConnections=TransformSourceTableConnections):
985  pass
986 
987 
988 class TransformSourceTableTask(TransformCatalogBaseTask):
989  """Transform/standardize a source catalog
990  """
991  _DefaultName = "transformSourceTable"
992  ConfigClass = TransformSourceTableConfig
993 
994  inputDataset = 'source'
995  outputDataset = 'sourceTable'
996 
997  @classmethod
998  def _makeArgumentParser(cls):
999  parser = ArgumentParser(name=cls._DefaultName)
1000  parser.add_id_argument("--id", datasetType=cls.inputDataset,
1001  level="sensor",
1002  help="data ID, e.g. --id visit=12345 ccd=0")
1003  return parser
1004 
1005  def runDataRef(self, dataRef):
1006  """Override to specify band label to run()."""
1007  parq = dataRef.get()
1008  funcs = self.getFunctors()
1009  band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel
1010  df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
1011  self.write(df, dataRef)
1012  return df
1013 
1014 
1015 class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1016  dimensions=("instrument", "visit",),
1017  defaultTemplates={"calexpType": ""}):
1018  calexp = connectionTypes.Input(
1019  doc="Processed exposures used for metadata",
1020  name="{calexpType}calexp",
1021  storageClass="ExposureF",
1022  dimensions=("instrument", "visit", "detector"),
1023  deferLoad=True,
1024  multiple=True,
1025  )
1026  visitSummary = connectionTypes.Output(
1027  doc=("Per-visit consolidated exposure metadata. These catalogs use "
1028  "detector id for the id and are sorted for fast lookups of a "
1029  "detector."),
1030  name="{calexpType}visitSummary",
1031  storageClass="ExposureCatalog",
1032  dimensions=("instrument", "visit"),
1033  )
1034 
1035 
1036 class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1037  pipelineConnections=ConsolidateVisitSummaryConnections):
1038  """Config for ConsolidateVisitSummaryTask"""
1039  pass
1040 
1041 
1042 class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1043  """Task to consolidate per-detector visit metadata.
1044 
1045  This task aggregates the following metadata from all the detectors in a
1046  single visit into an exposure catalog:
1047  - The visitInfo.
1048  - The wcs.
1049  - The photoCalib.
1050  - The physical_filter and band (if available).
1051  - The psf size, shape, and effective area at the center of the detector.
1052  - The corners of the bounding box in right ascension/declination.
1053 
1054  Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1055  are not persisted here because of storage concerns, and because of their
1056  limited utility as summary statistics.
1057 
1058  Tests for this task are performed in ci_hsc_gen3.
1059  """
1060  _DefaultName = "consolidateVisitSummary"
1061  ConfigClass = ConsolidateVisitSummaryConfig
1062 
1063  @classmethod
1064  def _makeArgumentParser(cls):
1065  parser = ArgumentParser(name=cls._DefaultName)
1066 
1067  parser.add_id_argument("--id", "calexp",
1068  help="data ID, e.g. --id visit=12345",
1069  ContainerClass=VisitDataIdContainer)
1070  return parser
1071 
1072  def writeMetadata(self, dataRef):
1073  """No metadata to persist, so override to remove metadata persistance.
1074  """
1075  pass
1076 
1077  def writeConfig(self, butler, clobber=False, doBackup=True):
1078  """No config to persist, so override to remove config persistance.
1079  """
1080  pass
1081 
1082  def runDataRef(self, dataRefList):
1083  visit = dataRefList[0].dataId['visit']
1084 
1085  self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1086  len(dataRefList), visit)
1087 
1088  expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
1089 
1090  dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
1091 
1092  def runQuantum(self, butlerQC, inputRefs, outputRefs):
1093  dataRefs = butlerQC.get(inputRefs.calexp)
1094  visit = dataRefs[0].dataId.byName()['visit']
1095 
1096  self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1097  len(dataRefs), visit)
1098 
1099  expCatalog = self._combineExposureMetadata(visit, dataRefs)
1100 
1101  butlerQC.put(expCatalog, outputRefs.visitSummary)
1102 
1103  def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1104  """Make a combined exposure catalog from a list of dataRefs.
1105  These dataRefs must point to exposures with wcs, summaryStats,
1106  and other visit metadata.
1107 
1108  Parameters
1109  ----------
1110  visit : `int`
1111  Visit identification number.
1112  dataRefs : `list`
1113  List of dataRefs in visit. May be list of
1114  `lsst.daf.persistence.ButlerDataRef` (Gen2) or
1115  `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1116  isGen3 : `bool`, optional
1117  Specifies if this is a Gen3 list of datarefs.
1118 
1119  Returns
1120  -------
1121  visitSummary : `lsst.afw.table.ExposureCatalog`
1122  Exposure catalog with per-detector summary information.
1123  """
1124  schema = self._makeVisitSummarySchema()
1125  cat = afwTable.ExposureCatalog(schema)
1126  cat.resize(len(dataRefs))
1127 
1128  cat['visit'] = visit
1129 
1130  for i, dataRef in enumerate(dataRefs):
1131  if isGen3:
1132  visitInfo = dataRef.get(component='visitInfo')
1133  filterLabel = dataRef.get(component='filterLabel')
1134  summaryStats = dataRef.get(component='summaryStats')
1135  detector = dataRef.get(component='detector')
1136  wcs = dataRef.get(component='wcs')
1137  photoCalib = dataRef.get(component='photoCalib')
1138  detector = dataRef.get(component='detector')
1139  bbox = dataRef.get(component='bbox')
1140  validPolygon = dataRef.get(component='validPolygon')
1141  else:
1142  # Note that we need to read the calexp because there is
1143  # no magic access to the psf except through the exposure.
1144  gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
1145  exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
1146  visitInfo = exp.getInfo().getVisitInfo()
1147  filterLabel = dataRef.get("calexp_filterLabel")
1148  summaryStats = exp.getInfo().getSummaryStats()
1149  wcs = exp.getWcs()
1150  photoCalib = exp.getPhotoCalib()
1151  detector = exp.getDetector()
1152  bbox = dataRef.get(datasetType='calexp_bbox')
1153  validPolygon = exp.getInfo().getValidPolygon()
1154 
1155  rec = cat[i]
1156  rec.setBBox(bbox)
1157  rec.setVisitInfo(visitInfo)
1158  rec.setWcs(wcs)
1159  rec.setPhotoCalib(photoCalib)
1160  rec.setValidPolygon(validPolygon)
1161 
1162  rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1163  rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1164  rec.setId(detector.getId())
1165  rec['psfSigma'] = summaryStats.psfSigma
1166  rec['psfIxx'] = summaryStats.psfIxx
1167  rec['psfIyy'] = summaryStats.psfIyy
1168  rec['psfIxy'] = summaryStats.psfIxy
1169  rec['psfArea'] = summaryStats.psfArea
1170  rec['raCorners'][:] = summaryStats.raCorners
1171  rec['decCorners'][:] = summaryStats.decCorners
1172  rec['ra'] = summaryStats.ra
1173  rec['decl'] = summaryStats.decl
1174  rec['zenithDistance'] = summaryStats.zenithDistance
1175  rec['zeroPoint'] = summaryStats.zeroPoint
1176  rec['skyBg'] = summaryStats.skyBg
1177  rec['skyNoise'] = summaryStats.skyNoise
1178  rec['meanVar'] = summaryStats.meanVar
1179  rec['astromOffsetMean'] = summaryStats.astromOffsetMean
1180  rec['astromOffsetStd'] = summaryStats.astromOffsetStd
1181 
1182  metadata = dafBase.PropertyList()
1183  metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1184  # We are looping over existing datarefs, so the following is true
1185  metadata.add("COMMENT", "Only detectors with data have entries.")
1186  cat.setMetadata(metadata)
1187 
1188  cat.sort()
1189  return cat
1190 
1191  def _makeVisitSummarySchema(self):
1192  """Make the schema for the visitSummary catalog."""
1193  schema = afwTable.ExposureTable.makeMinimalSchema()
1194  schema.addField('visit', type='I', doc='Visit number')
1195  schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1196  schema.addField('band', type='String', size=32, doc='Name of band')
1197  schema.addField('psfSigma', type='F',
1198  doc='PSF model second-moments determinant radius (center of chip) (pixel)')
1199  schema.addField('psfArea', type='F',
1200  doc='PSF model effective area (center of chip) (pixel**2)')
1201  schema.addField('psfIxx', type='F',
1202  doc='PSF model Ixx (center of chip) (pixel**2)')
1203  schema.addField('psfIyy', type='F',
1204  doc='PSF model Iyy (center of chip) (pixel**2)')
1205  schema.addField('psfIxy', type='F',
1206  doc='PSF model Ixy (center of chip) (pixel**2)')
1207  schema.addField('raCorners', type='ArrayD', size=4,
1208  doc='Right Ascension of bounding box corners (degrees)')
1209  schema.addField('decCorners', type='ArrayD', size=4,
1210  doc='Declination of bounding box corners (degrees)')
1211  schema.addField('ra', type='D',
1212  doc='Right Ascension of bounding box center (degrees)')
1213  schema.addField('decl', type='D',
1214  doc='Declination of bounding box center (degrees)')
1215  schema.addField('zenithDistance', type='F',
1216  doc='Zenith distance of bounding box center (degrees)')
1217  schema.addField('zeroPoint', type='F',
1218  doc='Mean zeropoint in detector (mag)')
1219  schema.addField('skyBg', type='F',
1220  doc='Average sky background (ADU)')
1221  schema.addField('skyNoise', type='F',
1222  doc='Average sky noise (ADU)')
1223  schema.addField('meanVar', type='F',
1224  doc='Mean variance of the weight plane (ADU**2)')
1225  schema.addField('astromOffsetMean', type='F',
1226  doc='Mean offset of astrometric calibration matches (arcsec)')
1227  schema.addField('astromOffsetStd', type='F',
1228  doc='Standard deviation of offsets of astrometric calibration matches (arcsec)')
1229 
1230  return schema
1231 
1232 
1233 class VisitDataIdContainer(DataIdContainer):
1234  """DataIdContainer that groups sensor-level id's by visit
1235  """
1236 
1237  def makeDataRefList(self, namespace):
1238  """Make self.refList from self.idList
1239 
1240  Generate a list of data references grouped by visit.
1241 
1242  Parameters
1243  ----------
1244  namespace : `argparse.Namespace`
1245  Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1246  """
1247  # Group by visits
1248  visitRefs = defaultdict(list)
1249  for dataId in self.idList:
1250  if "visit" in dataId:
1251  visitId = dataId["visit"]
1252  # append all subsets to
1253  subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1254  visitRefs[visitId].extend([dataRef for dataRef in subset])
1255 
1256  outputRefList = []
1257  for refList in visitRefs.values():
1258  existingRefs = [ref for ref in refList if ref.datasetExists()]
1259  if existingRefs:
1260  outputRefList.append(existingRefs)
1261 
1262  self.refList = outputRefList
1263 
1264 
1265 class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1266  defaultTemplates={"catalogType": ""},
1267  dimensions=("instrument", "visit")):
1268  inputCatalogs = connectionTypes.Input(
1269  doc="Input per-detector Source Tables",
1270  name="{catalogType}sourceTable",
1271  storageClass="DataFrame",
1272  dimensions=("instrument", "visit", "detector"),
1273  multiple=True
1274  )
1275  outputCatalog = connectionTypes.Output(
1276  doc="Per-visit concatenation of Source Table",
1277  name="{catalogType}sourceTable_visit",
1278  storageClass="DataFrame",
1279  dimensions=("instrument", "visit")
1280  )
1281 
1282 
1283 class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1284  pipelineConnections=ConsolidateSourceTableConnections):
1285  pass
1286 
1287 
1288 class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1289  """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1290  """
1291  _DefaultName = 'consolidateSourceTable'
1292  ConfigClass = ConsolidateSourceTableConfig
1293 
1294  inputDataset = 'sourceTable'
1295  outputDataset = 'sourceTable_visit'
1296 
1297  def runQuantum(self, butlerQC, inputRefs, outputRefs):
1298  inputs = butlerQC.get(inputRefs)
1299  self.log.info("Concatenating %s per-detector Source Tables",
1300  len(inputs['inputCatalogs']))
1301  df = pd.concat(inputs['inputCatalogs'])
1302  butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1303 
1304  def runDataRef(self, dataRefList):
1305  self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1306  df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1307  dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1308 
1309  @classmethod
1310  def _makeArgumentParser(cls):
1311  parser = ArgumentParser(name=cls._DefaultName)
1312 
1313  parser.add_id_argument("--id", cls.inputDataset,
1314  help="data ID, e.g. --id visit=12345",
1315  ContainerClass=VisitDataIdContainer)
1316  return parser
1317 
1318  def writeMetadata(self, dataRef):
1319  """No metadata to write.
1320  """
1321  pass
1322 
1323  def writeConfig(self, butler, clobber=False, doBackup=True):
1324  """No config to write.
1325  """
1326  pass
1327 
1328 
1329 class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1330  dimensions=("instrument",),
1331  defaultTemplates={}):
1332  visitSummaryRefs = connectionTypes.Input(
1333  doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1334  name="visitSummary",
1335  storageClass="ExposureCatalog",
1336  dimensions=("instrument", "visit"),
1337  multiple=True,
1338  deferLoad=True,
1339  )
1340  outputCatalog = connectionTypes.Output(
1341  doc="CCD and Visit metadata table",
1342  name="CcdVisitTable",
1343  storageClass="DataFrame",
1344  dimensions=("instrument",)
1345  )
1346 
1347 
1348 class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1349  pipelineConnections=MakeCcdVisitTableConnections):
1350  pass
1351 
1352 
1353 class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1354  """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1355  """
1356  _DefaultName = 'makeCcdVisitTable'
1357  ConfigClass = MakeCcdVisitTableConfig
1358 
1359  def run(self, visitSummaryRefs):
1360  """ Make a table of ccd information from the `visitSummary` catalogs.
1361  Parameters
1362  ----------
1363  visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1364  List of DeferredDatasetHandles pointing to exposure catalogs with
1365  per-detector summary information.
1366  Returns
1367  -------
1368  result : `lsst.pipe.Base.Struct`
1369  Results struct with attribute:
1370  - `outputCatalog`
1371  Catalog of ccd and visit information.
1372  """
1373  ccdEntries = []
1374  for visitSummaryRef in visitSummaryRefs:
1375  visitSummary = visitSummaryRef.get()
1376  visitInfo = visitSummary[0].getVisitInfo()
1377 
1378  ccdEntry = {}
1379  summaryTable = visitSummary.asAstropy()
1380  selectColumns = ['id', 'visit', 'physical_filter', 'ra', 'decl', 'zenithDistance', 'zeroPoint',
1381  'psfSigma', 'skyBg', 'skyNoise']
1382  ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1383  ccdEntry = ccdEntry.rename(columns={"physical_filter": "filterName", "visit": "visitId"})
1384 
1385  dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in
1386  summaryTable['id']]
1387  packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId)
1388  ccdVisitIds = [packer.pack(dataId) for dataId in dataIds]
1389  ccdEntry['ccdVisitId'] = ccdVisitIds
1390 
1391  pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary])
1392  ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1393 
1394  ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1395  ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1396  expTime = visitInfo.getExposureTime()
1397  ccdEntry['expTime'] = expTime
1398  ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1399  ccdEntry['darkTime'] = visitInfo.getDarkTime()
1400  ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1401  ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1402  ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1403  ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1404  ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1405  ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1406  ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1407  ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1408  ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1409  ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1410  # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags,
1411  # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted.
1412  ccdEntries.append(ccdEntry)
1413 
1414  outputCatalog = pd.concat(ccdEntries)
1415  return pipeBase.Struct(outputCatalog=outputCatalog)
1416 
1417 
1418 class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1419  dimensions=("instrument",),
1420  defaultTemplates={}):
1421  visitSummaries = connectionTypes.Input(
1422  doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1423  name="visitSummary",
1424  storageClass="ExposureCatalog",
1425  dimensions=("instrument", "visit",),
1426  multiple=True,
1427  deferLoad=True,
1428  )
1429  outputCatalog = connectionTypes.Output(
1430  doc="Visit metadata table",
1431  name="visitTable",
1432  storageClass="DataFrame",
1433  dimensions=("instrument",)
1434  )
1435 
1436 
1437 class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1438  pipelineConnections=MakeVisitTableConnections):
1439  pass
1440 
1441 
1442 class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1443  """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1444  """
1445  _DefaultName = 'makeVisitTable'
1446  ConfigClass = MakeVisitTableConfig
1447 
1448  def run(self, visitSummaries):
1449  """ Make a table of visit information from the `visitSummary` catalogs
1450 
1451  Parameters
1452  ----------
1453  visitSummaries : list of `lsst.afw.table.ExposureCatalog`
1454  List of exposure catalogs with per-detector summary information.
1455  Returns
1456  -------
1457  result : `lsst.pipe.Base.Struct`
1458  Results struct with attribute:
1459  ``outputCatalog``
1460  Catalog of visit information.
1461  """
1462  visitEntries = []
1463  for visitSummary in visitSummaries:
1464  visitSummary = visitSummary.get()
1465  visitRow = visitSummary[0]
1466  visitInfo = visitRow.getVisitInfo()
1467 
1468  visitEntry = {}
1469  visitEntry["visitId"] = visitRow['visit']
1470  visitEntry["filterName"] = visitRow['physical_filter']
1471  raDec = visitInfo.getBoresightRaDec()
1472  visitEntry["ra"] = raDec.getRa().asDegrees()
1473  visitEntry["decl"] = raDec.getDec().asDegrees()
1474  visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1475  azAlt = visitInfo.getBoresightAzAlt()
1476  visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1477  visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1478  visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1479  visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1480  visitEntry["obsStart"] = visitInfo.getDate().toPython()
1481  visitEntry["expTime"] = visitInfo.getExposureTime()
1482  visitEntries.append(visitEntry)
1483  # TODO: DM-30623, Add programId, exposureType, expMidpt, cameraTemp, mirror1Temp, mirror2Temp,
1484  # mirror3Temp, domeTemp, externalTemp, dimmSeeing, pwvGPS, pwvMW, flags, nExposures
1485 
1486  outputCatalog = pd.DataFrame(data=visitEntries)
1487  return pipeBase.Struct(outputCatalog=outputCatalog)
1488 
1489 
1490 class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1491  dimensions=("instrument", "visit", "detector", "skymap", "tract")):
1492 
1493  inputCatalog = connectionTypes.Input(
1494  doc="Primary per-detector, single-epoch forced-photometry catalog. "
1495  "By default, it is the output of ForcedPhotCcdTask on calexps",
1496  name="forced_src",
1497  storageClass="SourceCatalog",
1498  dimensions=("instrument", "visit", "detector", "skymap", "tract")
1499  )
1500  inputCatalogDiff = connectionTypes.Input(
1501  doc="Secondary multi-epoch, per-detector, forced photometry catalog. "
1502  "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1503  name="forced_diff",
1504  storageClass="SourceCatalog",
1505  dimensions=("instrument", "visit", "detector", "skymap", "tract")
1506  )
1507  outputCatalog = connectionTypes.Output(
1508  doc="InputCatalogs horizonatally joined on `objectId` in Parquet format",
1509  name="forcedSource",
1510  storageClass="DataFrame",
1511  dimensions=("instrument", "visit", "detector")
1512  )
1513 
1514 
1515 class WriteForcedSourceTableConfig(WriteSourceTableConfig,
1516  pipelineConnections=WriteForcedSourceTableConnections):
1517  pass
1518 
1519 
1520 class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1521  """Merge and convert per-detector forced source catalogs to parquet
1522  """
1523  _DefaultName = "writeForcedSourceTable"
1524  ConfigClass = WriteForcedSourceTableConfig
1525 
1526  def runQuantum(self, butlerQC, inputRefs, outputRefs):
1527  inputs = butlerQC.get(inputRefs)
1528  # Add ccdVisitId to allow joining with CcdVisitTable
1529  inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
1530  inputs['band'] = butlerQC.quantum.dataId.full['band']
1531 
1532  outputs = self.run(**inputs)
1533  butlerQC.put(outputs, outputRefs)
1534 
1535  def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1536  dfs = []
1537  for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')):
1538  df = table.asAstropy().to_pandas().set_index('objectId', drop=False)
1539  df = df.reindex(sorted(df.columns), axis=1)
1540  df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA
1541  df['band'] = band if band else pd.NA
1542  df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns],
1543  names=('dataset', 'column'))
1544 
1545  dfs.append(df)
1546 
1547  outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
1548  return pipeBase.Struct(outputCatalog=outputCatalog)
1549 
1550 
1551 class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1552  dimensions=("instrument", "skymap", "patch", "tract")):
1553 
1554  inputCatalogs = connectionTypes.Input(
1555  doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1556  name="forcedSource",
1557  storageClass="DataFrame",
1558  dimensions=("instrument", "visit", "detector"),
1559  multiple=True,
1560  deferLoad=True
1561  )
1562  referenceCatalog = connectionTypes.Input(
1563  doc="Reference catalog which was used to seed the forcedPhot. Columns "
1564  "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1565  "are expected.",
1566  name="objectTable",
1567  storageClass="DataFrame",
1568  dimensions=("tract", "patch", "skymap"),
1569  deferLoad=True
1570  )
1571  outputCatalog = connectionTypes.Output(
1572  doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1573  "specified set of functors",
1574  name="ForcedSourceTable",
1575  storageClass="DataFrame",
1576  dimensions=("tract", "patch", "skymap")
1577  )
1578 
1579 
1580 class TransformForcedSourceTableConfig(TransformCatalogBaseConfig,
1581  pipelineConnections=TransformForcedSourceTableConnections):
1582  pass
1583 
1584 
1585 class TransformForcedSourceTableTask(TransformCatalogBaseTask):
1586  """Transform/standardize a ForcedSource catalog
1587 
1588  Transforms each wide, per-detector forcedSource parquet table per the
1589  specification file (per-camera defaults found in ForcedSource.yaml).
1590  All epochs that overlap the patch are aggregated into one per-patch
1591  narrow-parquet file.
1592 
1593  No de-duplication of rows is performed. Duplicate resolutions flags are
1594  pulled in from the referenceCatalog: `detect_isPrimary`,
1595  `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1596  for analysis or compare duplicates for QA.
1597 
1598  The resulting table includes multiple bands. Epochs (MJDs) and other useful
1599  per-visit rows can be retreived by joining with the CcdVisitTable on
1600  ccdVisitId.
1601  """
1602  _DefaultName = "transformForcedSourceTable"
1603  ConfigClass = TransformForcedSourceTableConfig
1604 
1605  def runQuantum(self, butlerQC, inputRefs, outputRefs):
1606  inputs = butlerQC.get(inputRefs)
1607  if self.funcs is None:
1608  raise ValueError("config.functorFile is None. "
1609  "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1610  outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs,
1611  dataId=outputRefs.outputCatalog.dataId.full)
1612 
1613  butlerQC.put(outputs, outputRefs)
1614 
1615  def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1616  dfs = []
1617  ref = referenceCatalog.get(parameters={"columns": ['detect_isPrimary', 'detect_isTractInner',
1618  'detect_isPatchInner']})
1619  self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs)))
1620  for handle in inputCatalogs:
1621  result = self.transform(None, handle, funcs, dataId)
1622  # Filter for only rows that were detected on (overlap) the patch
1623  dfs.append(ref.join(result.df, how='inner'))
1624 
1625  outputCatalog = pd.concat(dfs)
1626  self.log.info("Made a table of %d columns and %d rows",
1627  len(outputCatalog.columns), len(outputCatalog))
1628  return pipeBase.Struct(outputCatalog=outputCatalog)
1629 
1630 
1631 class ConsolidateForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1632  defaultTemplates={"catalogType": ""},
1633  dimensions=("instrument", "tract")):
1634  inputCatalogs = connectionTypes.Input(
1635  doc="Input per-patch ForcedSource Tables",
1636  name="{catalogType}ForcedSourceTable",
1637  storageClass="DataFrame",
1638  dimensions=("tract", "patch", "skymap"),
1639  multiple=True,
1640  )
1641 
1642  outputCatalog = connectionTypes.Output(
1643  doc="Output per-tract concatenation of ForcedSource Tables",
1644  name="{catalogType}ForcedSourceTable_tract",
1645  storageClass="DataFrame",
1646  dimensions=("tract", "skymap"),
1647  )
1648 
1649 
1650 class ConsolidateForcedSourceTableConfig(pipeBase.PipelineTaskConfig,
1651  pipelineConnections=ConsolidateForcedSourceTableConnections):
1652  pass
1653 
1654 
1655 class ConsolidateForcedSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1656  """Concatenate a per-patch `ForcedSourceTable` list into a single
1657  per-tract `forcedSourceTable_tract`
1658  """
1659  _DefaultName = 'consolidateForcedSourceTable'
1660  ConfigClass = ConsolidateForcedSourceTableConfig
1661 
1662  def runQuantum(self, butlerQC, inputRefs, outputRefs):
1663  inputs = butlerQC.get(inputRefs)
1664  self.log.info("Concatenating %s per-patch ForcedSource Tables",
1665  len(inputs['inputCatalogs']))
1666  df = pd.concat(inputs['inputCatalogs'])
1667  butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def getAnalysis(self, parq, funcs=None, band=None)
Definition: postprocess.py:703
def transform(self, band, parq, funcs, dataId)
Definition: postprocess.py:709
def run(self, parq, funcs=None, dataId=None, band=None)
Definition: postprocess.py:672
def runQuantum(self, butlerQC, inputRefs, outputRefs)
Definition: postprocess.py:653
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def readCatalog(task, patchRef)
Read input catalog.
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)
Definition: postprocess.py:43