Coverage for python/lsst/pipe/tasks/postprocess.py : 27%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_tasks
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import functools
23import pandas as pd
24import numpy as np
25from collections import defaultdict
27import lsst.geom
28import lsst.pex.config as pexConfig
29import lsst.pipe.base as pipeBase
30from lsst.pipe.base import connectionTypes
31import lsst.afw.table as afwTable
32from lsst.meas.base import SingleFrameMeasurementTask
33from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
34from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
36from .parquetTable import ParquetTable
37from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
38from .functors import CompositeFunctor, RAColumn, DecColumn, Column
41def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False):
42 """Flattens a dataframe with multilevel column index
43 """
44 newDf = pd.DataFrame()
45 for band in set(df.columns.to_frame()['band']):
46 subdf = df[band]
47 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
48 newColumns = {c: columnFormat.format(band, c)
49 for c in subdf.columns if c not in noDupCols}
50 cols = list(newColumns.keys())
51 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
53 newDf = pd.concat([subdf[noDupCols], newDf], axis=1)
54 return newDf
57class WriteObjectTableConfig(pexConfig.Config):
58 priorityList = pexConfig.ListField(
59 dtype=str,
60 default=[],
61 doc="Priority-ordered list of bands for the merge."
62 )
63 engine = pexConfig.Field(
64 dtype=str,
65 default="pyarrow",
66 doc="Parquet engine for writing (pyarrow or fastparquet)"
67 )
68 coaddName = pexConfig.Field(
69 dtype=str,
70 default="deep",
71 doc="Name of coadd"
72 )
74 def validate(self):
75 pexConfig.Config.validate(self)
76 if len(self.priorityList) == 0:
77 raise RuntimeError("No priority list provided")
80class WriteObjectTableTask(CmdLineTask):
81 """Write filter-merged source tables to parquet
82 """
83 _DefaultName = "writeObjectTable"
84 ConfigClass = WriteObjectTableConfig
85 RunnerClass = MergeSourcesRunner
87 # Names of table datasets to be merged
88 inputDatasets = ('forced_src', 'meas', 'ref')
90 # Tag of output dataset written by `MergeSourcesTask.write`
91 outputDataset = 'obj'
93 def __init__(self, butler=None, schema=None, **kwargs):
94 # It is a shame that this class can't use the default init for CmdLineTask
95 # But to do so would require its own special task runner, which is many
96 # more lines of specialization, so this is how it is for now
97 CmdLineTask.__init__(self, **kwargs)
99 def runDataRef(self, patchRefList):
100 """!
101 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
102 subclasses that inherit from MergeSourcesTask.
103 @param[in] patchRefList list of data references for each filter
104 """
105 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
106 dataId = patchRefList[0].dataId
107 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
108 self.write(patchRefList[0], mergedCatalog)
110 @classmethod
111 def _makeArgumentParser(cls):
112 """Create a suitable ArgumentParser.
114 We will use the ArgumentParser to get a list of data
115 references for patches; the RunnerClass will sort them into lists
116 of data references for the same patch.
118 References first of self.inputDatasets, rather than
119 self.inputDataset
120 """
121 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
123 def readCatalog(self, patchRef):
124 """Read input catalogs
126 Read all the input datasets given by the 'inputDatasets'
127 attribute.
129 Parameters
130 ----------
131 patchRef : `lsst.daf.persistence.ButlerDataRef`
132 Data reference for patch
134 Returns
135 -------
136 Tuple consisting of band name and a dict of catalogs, keyed by
137 dataset name
138 """
139 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel
140 catalogDict = {}
141 for dataset in self.inputDatasets:
142 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
143 self.log.info("Read %d sources from %s for band %s: %s" %
144 (len(catalog), dataset, band, patchRef.dataId))
145 catalogDict[dataset] = catalog
146 return band, catalogDict
148 def run(self, catalogs, tract, patch):
149 """Merge multiple catalogs.
151 Parameters
152 ----------
153 catalogs : `dict`
154 Mapping from filter names to dict of catalogs.
155 tract : int
156 tractId to use for the tractId column
157 patch : str
158 patchId to use for the patchId column
160 Returns
161 -------
162 catalog : `lsst.pipe.tasks.parquetTable.ParquetTable`
163 Merged dataframe, with each column prefixed by
164 `filter_tag(filt)`, wrapped in the parquet writer shim class.
165 """
167 dfs = []
168 for filt, tableDict in catalogs.items():
169 for dataset, table in tableDict.items():
170 # Convert afwTable to pandas DataFrame
171 df = table.asAstropy().to_pandas().set_index('id', drop=True)
173 # Sort columns by name, to ensure matching schema among patches
174 df = df.reindex(sorted(df.columns), axis=1)
175 df['tractId'] = tract
176 df['patchId'] = patch
178 # Make columns a 3-level MultiIndex
179 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
180 names=('dataset', 'band', 'column'))
181 dfs.append(df)
183 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
184 return ParquetTable(dataFrame=catalog)
186 def write(self, patchRef, catalog):
187 """Write the output.
189 Parameters
190 ----------
191 catalog : `ParquetTable`
192 Catalog to write
193 patchRef : `lsst.daf.persistence.ButlerDataRef`
194 Data reference for patch
195 """
196 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
197 # since the filter isn't actually part of the data ID for the dataset we're saving,
198 # it's confusing to see it in the log message, even if the butler simply ignores it.
199 mergeDataId = patchRef.dataId.copy()
200 del mergeDataId["filter"]
201 self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
203 def writeMetadata(self, dataRefList):
204 """No metadata to write, and not sure how to write it for a list of dataRefs.
205 """
206 pass
209class WriteSourceTableConfig(pexConfig.Config):
210 doApplyExternalPhotoCalib = pexConfig.Field(
211 dtype=bool,
212 default=False,
213 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
214 "generating Source Tables from older src tables which do not already have local calib columns")
215 )
216 doApplyExternalSkyWcs = pexConfig.Field(
217 dtype=bool,
218 default=False,
219 doc=("Add local WCS columns from the calexp.wcs? Should only set True if "
220 "generating Source Tables from older src tables which do not already have local calib columns")
221 )
224class WriteSourceTableTask(CmdLineTask):
225 """Write source table to parquet
226 """
227 _DefaultName = "writeSourceTable"
228 ConfigClass = WriteSourceTableConfig
230 def runDataRef(self, dataRef):
231 src = dataRef.get('src')
232 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
233 src = self.addCalibColumns(src, dataRef)
235 ccdVisitId = dataRef.get('ccdExposureId')
236 result = self.run(src, ccdVisitId=ccdVisitId)
237 dataRef.put(result.table, 'source')
239 def run(self, catalog, ccdVisitId=None):
240 """Convert `src` catalog to parquet
242 Parameters
243 ----------
244 catalog: `afwTable.SourceCatalog`
245 catalog to be converted
246 ccdVisitId: `int`
247 ccdVisitId to be added as a column
249 Returns
250 -------
251 result : `lsst.pipe.base.Struct`
252 ``table``
253 `ParquetTable` version of the input catalog
254 """
255 self.log.info("Generating parquet table from src catalog")
256 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
257 df['ccdVisitId'] = ccdVisitId
258 return pipeBase.Struct(table=ParquetTable(dataFrame=df))
260 def addCalibColumns(self, catalog, dataRef):
261 """Add columns with local calibration evaluated at each centroid
263 for backwards compatibility with old repos.
264 This exists for the purpose of converting old src catalogs
265 (which don't have the expected local calib columns) to Source Tables.
267 Parameters
268 ----------
269 catalog: `afwTable.SourceCatalog`
270 catalog to which calib columns will be added
271 dataRef: `lsst.daf.persistence.ButlerDataRef
272 for fetching the calibs from disk.
274 Returns
275 -------
276 newCat: `afwTable.SourceCatalog`
277 Source Catalog with requested local calib columns
278 """
279 mapper = afwTable.SchemaMapper(catalog.schema)
280 measureConfig = SingleFrameMeasurementTask.ConfigClass()
281 measureConfig.doReplaceWithNoise = False
283 # Just need the WCS or the PhotoCalib attached to an exposue
284 exposure = dataRef.get('calexp_sub',
285 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0)))
287 mapper = afwTable.SchemaMapper(catalog.schema)
288 mapper.addMinimalSchema(catalog.schema, True)
289 schema = mapper.getOutputSchema()
291 exposureIdInfo = dataRef.get("expIdInfo")
292 measureConfig.plugins.names = []
293 if self.config.doApplyExternalSkyWcs:
294 plugin = 'base_LocalWcs'
295 if plugin in schema:
296 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
297 else:
298 measureConfig.plugins.names.add(plugin)
300 if self.config.doApplyExternalPhotoCalib:
301 plugin = 'base_LocalPhotoCalib'
302 if plugin in schema:
303 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
304 else:
305 measureConfig.plugins.names.add(plugin)
307 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
308 newCat = afwTable.SourceCatalog(schema)
309 newCat.extend(catalog, mapper=mapper)
310 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
311 return newCat
313 def writeMetadata(self, dataRef):
314 """No metadata to write.
315 """
316 pass
318 @classmethod
319 def _makeArgumentParser(cls):
320 parser = ArgumentParser(name=cls._DefaultName)
321 parser.add_id_argument("--id", 'src',
322 help="data ID, e.g. --id visit=12345 ccd=0")
323 return parser
326class PostprocessAnalysis(object):
327 """Calculate columns from ParquetTable
329 This object manages and organizes an arbitrary set of computations
330 on a catalog. The catalog is defined by a
331 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
332 `deepCoadd_obj` dataset, and the computations are defined by a collection
333 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
334 a `CompositeFunctor`).
336 After the object is initialized, accessing the `.df` attribute (which
337 holds the `pandas.DataFrame` containing the results of the calculations) triggers
338 computation of said dataframe.
340 One of the conveniences of using this object is the ability to define a desired common
341 filter for all functors. This enables the same functor collection to be passed to
342 several different `PostprocessAnalysis` objects without having to change the original
343 functor collection, since the `filt` keyword argument of this object triggers an
344 overwrite of the `filt` property for all functors in the collection.
346 This object also allows a list of refFlags to be passed, and defines a set of default
347 refFlags that are always included even if not requested.
349 If a list of `ParquetTable` object is passed, rather than a single one, then the
350 calculations will be mapped over all the input catalogs. In principle, it should
351 be straightforward to parallelize this activity, but initial tests have failed
352 (see TODO in code comments).
354 Parameters
355 ----------
356 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
357 Source catalog(s) for computation
359 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
360 Computations to do (functors that act on `parq`).
361 If a dict, the output
362 DataFrame will have columns keyed accordingly.
363 If a list, the column keys will come from the
364 `.shortname` attribute of each functor.
366 filt : `str` (optional)
367 Filter in which to calculate. If provided,
368 this will overwrite any existing `.filt` attribute
369 of the provided functors.
371 flags : `list` (optional)
372 List of flags (per-band) to include in output table.
374 refFlags : `list` (optional)
375 List of refFlags (only reference band) to include in output table.
378 """
379 _defaultRefFlags = []
380 _defaultFuncs = (('coord_ra', RAColumn()),
381 ('coord_dec', DecColumn()))
383 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None):
384 self.parq = parq
385 self.functors = functors
387 self.filt = filt
388 self.flags = list(flags) if flags is not None else []
389 self.refFlags = list(self._defaultRefFlags)
390 if refFlags is not None:
391 self.refFlags += list(refFlags)
393 self._df = None
395 @property
396 def defaultFuncs(self):
397 funcs = dict(self._defaultFuncs)
398 return funcs
400 @property
401 def func(self):
402 additionalFuncs = self.defaultFuncs
403 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
404 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
406 if isinstance(self.functors, CompositeFunctor):
407 func = self.functors
408 else:
409 func = CompositeFunctor(self.functors)
411 func.funcDict.update(additionalFuncs)
412 func.filt = self.filt
414 return func
416 @property
417 def noDupCols(self):
418 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
420 @property
421 def df(self):
422 if self._df is None:
423 self.compute()
424 return self._df
426 def compute(self, dropna=False, pool=None):
427 # map over multiple parquet tables
428 if type(self.parq) in (list, tuple):
429 if pool is None:
430 dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
431 else:
432 # TODO: Figure out why this doesn't work (pyarrow pickling issues?)
433 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
434 self._df = pd.concat(dflist)
435 else:
436 self._df = self.func(self.parq, dropna=dropna)
438 return self._df
441class TransformCatalogBaseConfig(pexConfig.Config):
442 functorFile = pexConfig.Field(
443 dtype=str,
444 doc='Path to YAML file specifying functors to be computed',
445 default=None,
446 optional=True
447 )
450class TransformCatalogBaseTask(CmdLineTask):
451 """Base class for transforming/standardizing a catalog
453 by applying functors that convert units and apply calibrations.
454 The purpose of this task is to perform a set of computations on
455 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
456 results to a new dataset (which needs to be declared in an `outputDataset`
457 attribute).
459 The calculations to be performed are defined in a YAML file that specifies
460 a set of functors to be computed, provided as
461 a `--functorFile` config parameter. An example of such a YAML file
462 is the following:
464 funcs:
465 psfMag:
466 functor: Mag
467 args:
468 - base_PsfFlux
469 filt: HSC-G
470 dataset: meas
471 cmodel_magDiff:
472 functor: MagDiff
473 args:
474 - modelfit_CModel
475 - base_PsfFlux
476 filt: HSC-G
477 gauss_magDiff:
478 functor: MagDiff
479 args:
480 - base_GaussianFlux
481 - base_PsfFlux
482 filt: HSC-G
483 count:
484 functor: Column
485 args:
486 - base_InputCount_value
487 filt: HSC-G
488 deconvolved_moments:
489 functor: DeconvolvedMoments
490 filt: HSC-G
491 dataset: forced_src
492 refFlags:
493 - calib_psfUsed
494 - merge_measurement_i
495 - merge_measurement_r
496 - merge_measurement_z
497 - merge_measurement_y
498 - merge_measurement_g
499 - base_PixelFlags_flag_inexact_psfCenter
500 - detect_isPrimary
502 The names for each entry under "func" will become the names of columns in the
503 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
504 Positional arguments to be passed to each functor are in the `args` list,
505 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
506 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
508 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and
509 taken from the `'ref'` dataset.
511 The "flags" entry will be expanded out per band.
513 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
514 to organize and excecute the calculations.
516 """
517 @property
518 def _DefaultName(self):
519 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
521 @property
522 def outputDataset(self):
523 raise NotImplementedError('Subclass must define "outputDataset" attribute')
525 @property
526 def inputDataset(self):
527 raise NotImplementedError('Subclass must define "inputDataset" attribute')
529 @property
530 def ConfigClass(self):
531 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
533 def runDataRef(self, dataRef):
534 parq = dataRef.get()
535 funcs = self.getFunctors()
536 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId)
537 self.write(df, dataRef)
538 return df
540 def run(self, parq, funcs=None, dataId=None, band=None):
541 """Do postprocessing calculations
543 Takes a `ParquetTable` object and dataId,
544 returns a dataframe with results of postprocessing calculations.
546 Parameters
547 ----------
548 parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
549 ParquetTable from which calculations are done.
550 funcs : `lsst.pipe.tasks.functors.Functors`
551 Functors to apply to the table's columns
552 dataId : dict, optional
553 Used to add a `patchId` column to the output dataframe.
554 band : `str`, optional
555 Filter band that is being processed.
557 Returns
558 ------
559 `pandas.DataFrame`
561 """
562 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
564 df = self.transform(band, parq, funcs, dataId).df
565 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
566 return df
568 def getFunctors(self):
569 funcs = CompositeFunctor.from_file(self.config.functorFile)
570 funcs.update(dict(PostprocessAnalysis._defaultFuncs))
571 return funcs
573 def getAnalysis(self, parq, funcs=None, band=None):
574 # Avoids disk access if funcs is passed
575 if funcs is None:
576 funcs = self.getFunctors()
577 analysis = PostprocessAnalysis(parq, funcs, filt=band)
578 return analysis
580 def transform(self, band, parq, funcs, dataId):
581 analysis = self.getAnalysis(parq, funcs=funcs, band=band)
582 df = analysis.df
583 if dataId is not None:
584 for key, value in dataId.items():
585 df[key] = value
587 return pipeBase.Struct(
588 df=df,
589 analysis=analysis
590 )
592 def write(self, df, parqRef):
593 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset)
595 def writeMetadata(self, dataRef):
596 """No metadata to write.
597 """
598 pass
601class TransformObjectCatalogConfig(TransformCatalogBaseConfig):
602 coaddName = pexConfig.Field(
603 dtype=str,
604 default="deep",
605 doc="Name of coadd"
606 )
607 # TODO: remove in DM-27177
608 filterMap = pexConfig.DictField(
609 keytype=str,
610 itemtype=str,
611 default={},
612 doc=("Dictionary mapping full filter name to short one for column name munging."
613 "These filters determine the output columns no matter what filters the "
614 "input data actually contain."),
615 deprecated=("Coadds are now identified by the band, so this transform is unused."
616 "Will be removed after v22.")
617 )
618 outputBands = pexConfig.ListField(
619 dtype=str,
620 default=None,
621 optional=True,
622 doc=("These bands and only these bands will appear in the output,"
623 " NaN-filled if the input does not include them."
624 " If None, then use all bands found in the input.")
625 )
626 camelCase = pexConfig.Field(
627 dtype=bool,
628 default=True,
629 doc=("Write per-band columns names with camelCase, else underscore "
630 "For example: gPsFlux instead of g_PsFlux.")
631 )
632 multilevelOutput = pexConfig.Field(
633 dtype=bool,
634 default=False,
635 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
636 "and name-munged (False).")
637 )
640class TransformObjectCatalogTask(TransformCatalogBaseTask):
641 """Produce a flattened Object Table to match the format specified in
642 sdm_schemas.
644 Do the same set of postprocessing calculations on all bands
646 This is identical to `TransformCatalogBaseTask`, except for that it does the
647 specified functor calculations for all filters present in the
648 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
649 by the YAML file will be superceded.
650 """
651 _DefaultName = "transformObjectCatalog"
652 ConfigClass = TransformObjectCatalogConfig
654 inputDataset = 'deepCoadd_obj'
655 outputDataset = 'objectTable'
657 @classmethod
658 def _makeArgumentParser(cls):
659 parser = ArgumentParser(name=cls._DefaultName)
660 parser.add_id_argument("--id", cls.inputDataset,
661 ContainerClass=CoaddDataIdContainer,
662 help="data ID, e.g. --id tract=12345 patch=1,2")
663 return parser
665 def run(self, parq, funcs=None, dataId=None, band=None):
666 # NOTE: band kwarg is ignored here.
667 dfDict = {}
668 analysisDict = {}
669 templateDf = pd.DataFrame()
670 outputBands = parq.columnLevelNames['band'] if self.config.outputBands is None else \
671 self.config.outputBands
673 # Perform transform for data of filters that exist in parq.
674 for inputBand in parq.columnLevelNames['band']:
675 if inputBand not in outputBands:
676 self.log.info("Ignoring %s band data in the input", inputBand)
677 continue
678 self.log.info("Transforming the catalog of band %s", inputBand)
679 result = self.transform(inputBand, parq, funcs, dataId)
680 dfDict[inputBand] = result.df
681 analysisDict[inputBand] = result.analysis
682 if templateDf.empty:
683 templateDf = result.df
685 # Fill NaNs in columns of other wanted bands
686 for filt in outputBands:
687 if filt not in dfDict:
688 self.log.info("Adding empty columns for band %s", filt)
689 dfDict[filt] = pd.DataFrame().reindex_like(templateDf)
691 # This makes a multilevel column index, with band as first level
692 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
694 if not self.config.multilevelOutput:
695 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
696 if dataId is not None:
697 noDupCols += list(dataId.keys())
698 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase)
700 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
701 return df
704class TractObjectDataIdContainer(CoaddDataIdContainer):
706 def makeDataRefList(self, namespace):
707 """Make self.refList from self.idList
709 Generate a list of data references given tract and/or patch.
710 This was adapted from `TractQADataIdContainer`, which was
711 `TractDataIdContainer` modifie to not require "filter".
712 Only existing dataRefs are returned.
713 """
714 def getPatchRefList(tract):
715 return [namespace.butler.dataRef(datasetType=self.datasetType,
716 tract=tract.getId(),
717 patch="%d,%d" % patch.getIndex()) for patch in tract]
719 tractRefs = defaultdict(list) # Data references for each tract
720 for dataId in self.idList:
721 skymap = self.getSkymap(namespace)
723 if "tract" in dataId:
724 tractId = dataId["tract"]
725 if "patch" in dataId:
726 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
727 tract=tractId,
728 patch=dataId['patch']))
729 else:
730 tractRefs[tractId] += getPatchRefList(skymap[tractId])
731 else:
732 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
733 for tract in skymap)
734 outputRefList = []
735 for tractRefList in tractRefs.values():
736 existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
737 outputRefList.append(existingRefs)
739 self.refList = outputRefList
742class ConsolidateObjectTableConfig(pexConfig.Config):
743 coaddName = pexConfig.Field(
744 dtype=str,
745 default="deep",
746 doc="Name of coadd"
747 )
750class ConsolidateObjectTableTask(CmdLineTask):
751 """Write patch-merged source tables to a tract-level parquet file
752 """
753 _DefaultName = "consolidateObjectTable"
754 ConfigClass = ConsolidateObjectTableConfig
756 inputDataset = 'objectTable'
757 outputDataset = 'objectTable_tract'
759 @classmethod
760 def _makeArgumentParser(cls):
761 parser = ArgumentParser(name=cls._DefaultName)
763 parser.add_id_argument("--id", cls.inputDataset,
764 help="data ID, e.g. --id tract=12345",
765 ContainerClass=TractObjectDataIdContainer)
766 return parser
768 def runDataRef(self, patchRefList):
769 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
770 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
772 def writeMetadata(self, dataRef):
773 """No metadata to write.
774 """
775 pass
778class TransformSourceTableConfig(TransformCatalogBaseConfig):
779 pass
782class TransformSourceTableTask(TransformCatalogBaseTask):
783 """Transform/standardize a source catalog
784 """
785 _DefaultName = "transformSourceTable"
786 ConfigClass = TransformSourceTableConfig
788 inputDataset = 'source'
789 outputDataset = 'sourceTable'
791 def writeMetadata(self, dataRef):
792 """No metadata to write.
793 """
794 pass
796 @classmethod
797 def _makeArgumentParser(cls):
798 parser = ArgumentParser(name=cls._DefaultName)
799 parser.add_id_argument("--id", datasetType=cls.inputDataset,
800 level="sensor",
801 help="data ID, e.g. --id visit=12345 ccd=0")
802 return parser
804 def runDataRef(self, dataRef):
805 """Override to specify band label to run()."""
806 parq = dataRef.get()
807 funcs = self.getFunctors()
808 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel
809 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
810 self.write(df, dataRef)
811 return df
814class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
815 dimensions=("instrument", "visit",),
816 defaultTemplates={}):
817 calexp = connectionTypes.Input(
818 doc="Processed exposures used for metadata",
819 name="calexp",
820 storageClass="ExposureF",
821 dimensions=("instrument", "visit", "detector"),
822 deferLoad=True,
823 multiple=True,
824 )
825 visitSummary = connectionTypes.Output(
826 doc="Consolidated visit-level exposure metadata",
827 name="visitSummary",
828 storageClass="ExposureCatalog",
829 dimensions=("instrument", "visit"),
830 )
833class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
834 pipelineConnections=ConsolidateVisitSummaryConnections):
835 """Config for ConsolidateVisitSummaryTask"""
836 pass
839class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
840 """Task to consolidate per-detector visit metadata.
842 This task aggregates the following metadata from all the detectors in a
843 single visit into an exposure catalog:
844 - The visitInfo.
845 - The wcs.
846 - The photoCalib.
847 - The physical_filter and band (if available).
848 - The psf size, shape, and effective area at the center of the detector.
849 - The corners of the bounding box in right ascension/declination.
851 Other quantities such as Psf, ApCorrMap, and TransmissionCurve are not
852 persisted here because of storage concerns, and because of their limited
853 utility as summary statistics.
855 Tests for this task are performed in ci_hsc_gen3.
856 """
857 _DefaultName = "consolidateVisitSummary"
858 ConfigClass = ConsolidateVisitSummaryConfig
860 @classmethod
861 def _makeArgumentParser(cls):
862 parser = ArgumentParser(name=cls._DefaultName)
864 parser.add_id_argument("--id", "calexp",
865 help="data ID, e.g. --id visit=12345",
866 ContainerClass=VisitDataIdContainer)
867 return parser
869 def writeMetadata(self, dataRef):
870 """No metadata to persist, so override to remove metadata persistance.
871 """
872 pass
874 def writeConfig(self, butler, clobber=False, doBackup=True):
875 """No config to persist, so override to remove config persistance.
876 """
877 pass
879 def runDataRef(self, dataRefList):
880 visit = dataRefList[0].dataId['visit']
882 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" %
883 (len(dataRefList), visit))
885 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
887 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
889 def runQuantum(self, butlerQC, inputRefs, outputRefs):
890 dataRefs = butlerQC.get(inputRefs.calexp)
891 visit = dataRefs[0].dataId.byName()['visit']
893 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" %
894 (len(dataRefs), visit))
896 expCatalog = self._combineExposureMetadata(visit, dataRefs)
898 butlerQC.put(expCatalog, outputRefs.visitSummary)
900 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
901 """Make a combined exposure catalog from a list of dataRefs.
903 Parameters
904 ----------
905 visit : `int`
906 Visit identification number
907 dataRefs : `list`
908 List of calexp dataRefs in visit. May be list of
909 `lsst.daf.persistence.ButlerDataRef` (Gen2) or
910 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
911 isGen3 : `bool`, optional
912 Specifies if this is a Gen3 list of datarefs.
914 Returns
915 -------
916 visitSummary : `lsst.afw.table.ExposureCatalog`
917 Exposure catalog with per-detector summary information.
918 """
919 schema = afwTable.ExposureTable.makeMinimalSchema()
920 schema.addField('visit', type='I', doc='Visit number')
921 schema.addField('detector_id', type='I', doc='Detector number')
922 schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
923 schema.addField('band', type='String', size=32, doc='Name of band')
924 schema.addField('psfSigma', type='F',
925 doc='PSF model second-moments determinant radius (center of chip) (pixel)')
926 schema.addField('psfArea', type='F',
927 doc='PSF model effective area (center of chip) (pixel**2)')
928 schema.addField('psfIxx', type='F',
929 doc='PSF model Ixx (center of chip) (pixel**2)')
930 schema.addField('psfIyy', type='F',
931 doc='PSF model Iyy (center of chip) (pixel**2)')
932 schema.addField('psfIxy', type='F',
933 doc='PSF model Ixy (center of chip) (pixel**2)')
934 schema.addField('raCorners', type='ArrayD', size=4,
935 doc='Right Ascension of bounding box corners (degrees)')
936 schema.addField('decCorners', type='ArrayD', size=4,
937 doc='Declination of bounding box corners (degrees)')
939 cat = afwTable.ExposureCatalog(schema)
940 cat.resize(len(dataRefs))
942 cat['visit'] = visit
944 for i, dataRef in enumerate(dataRefs):
945 if isGen3:
946 visitInfo = dataRef.get(component='visitInfo')
947 filterLabel = dataRef.get(component='filterLabel')
948 psf = dataRef.get(component='psf')
949 wcs = dataRef.get(component='wcs')
950 photoCalib = dataRef.get(component='photoCalib')
951 detector = dataRef.get(component='detector')
952 bbox = dataRef.get(component='bbox')
953 validPolygon = dataRef.get(component='validPolygon')
954 else:
955 # Note that we need to read the calexp because there is
956 # no magic access to the psf except through the exposure.
957 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
958 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
959 visitInfo = exp.getInfo().getVisitInfo()
960 filterLabel = exp.getFilterLabel()
961 psf = exp.getPsf()
962 wcs = exp.getWcs()
963 photoCalib = exp.getPhotoCalib()
964 detector = exp.getDetector()
965 bbox = dataRef.get(datasetType='calexp_bbox')
966 validPolygon = exp.getInfo().getValidPolygon()
968 rec = cat[i]
969 rec.setBBox(bbox)
970 rec.setVisitInfo(visitInfo)
971 rec.setWcs(wcs)
972 rec.setPhotoCalib(photoCalib)
973 rec.setDetector(detector)
974 rec.setValidPolygon(validPolygon)
976 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
977 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
978 rec['detector_id'] = detector.getId()
979 shape = psf.computeShape(bbox.getCenter())
980 rec['psfSigma'] = shape.getDeterminantRadius()
981 rec['psfIxx'] = shape.getIxx()
982 rec['psfIyy'] = shape.getIyy()
983 rec['psfIxy'] = shape.getIxy()
984 im = psf.computeKernelImage(bbox.getCenter())
985 # The calculation of effective psf area is taken from
986 # meas_base/src/PsfFlux.cc#L112. See
987 # https://github.com/lsst/meas_base/blob/
988 # 750bffe6620e565bda731add1509507f5c40c8bb/src/PsfFlux.cc#L112
989 rec['psfArea'] = np.sum(im.array)/np.sum(im.array**2.)
991 sph_pts = wcs.pixelToSky(lsst.geom.Box2D(bbox).getCorners())
992 rec['raCorners'][:] = [sph.getRa().asDegrees() for sph in sph_pts]
993 rec['decCorners'][:] = [sph.getDec().asDegrees() for sph in sph_pts]
995 return cat
998class VisitDataIdContainer(DataIdContainer):
999 """DataIdContainer that groups sensor-level id's by visit
1000 """
1002 def makeDataRefList(self, namespace):
1003 """Make self.refList from self.idList
1005 Generate a list of data references grouped by visit.
1007 Parameters
1008 ----------
1009 namespace : `argparse.Namespace`
1010 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1011 """
1012 # Group by visits
1013 visitRefs = defaultdict(list)
1014 for dataId in self.idList:
1015 if "visit" in dataId:
1016 visitId = dataId["visit"]
1017 # append all subsets to
1018 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1019 visitRefs[visitId].extend([dataRef for dataRef in subset])
1021 outputRefList = []
1022 for refList in visitRefs.values():
1023 existingRefs = [ref for ref in refList if ref.datasetExists()]
1024 if existingRefs:
1025 outputRefList.append(existingRefs)
1027 self.refList = outputRefList
1030class ConsolidateSourceTableConfig(pexConfig.Config):
1031 pass
1034class ConsolidateSourceTableTask(CmdLineTask):
1035 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1036 """
1037 _DefaultName = 'consolidateSourceTable'
1038 ConfigClass = ConsolidateSourceTableConfig
1040 inputDataset = 'sourceTable'
1041 outputDataset = 'sourceTable_visit'
1043 def runDataRef(self, dataRefList):
1044 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1045 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1046 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1048 @classmethod
1049 def _makeArgumentParser(cls):
1050 parser = ArgumentParser(name=cls._DefaultName)
1052 parser.add_id_argument("--id", cls.inputDataset,
1053 help="data ID, e.g. --id visit=12345",
1054 ContainerClass=VisitDataIdContainer)
1055 return parser
1057 def writeMetadata(self, dataRef):
1058 """No metadata to write.
1059 """
1060 pass
1062 def writeConfig(self, butler, clobber=False, doBackup=True):
1063 """No config to write.
1064 """
1065 pass