Coverage for python/lsst/pipe/tasks/postprocess.py : 27%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_tasks
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import functools
23import pandas as pd
24import numpy as np
25from collections import defaultdict
27import lsst.geom
28import lsst.pex.config as pexConfig
29import lsst.pipe.base as pipeBase
30from lsst.pipe.base import connectionTypes
31import lsst.afw.table as afwTable
32from lsst.meas.base import SingleFrameMeasurementTask
33from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
34from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
36from .parquetTable import ParquetTable
37from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
38from .functors import CompositeFunctor, RAColumn, DecColumn, Column
41def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False):
42 """Flattens a dataframe with multilevel column index
43 """
44 newDf = pd.DataFrame()
45 for band in set(df.columns.to_frame()['band']):
46 subdf = df[band]
47 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
48 newColumns = {c: columnFormat.format(band, c)
49 for c in subdf.columns if c not in noDupCols}
50 cols = list(newColumns.keys())
51 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
53 newDf = pd.concat([subdf[noDupCols], newDf], axis=1)
54 return newDf
57class WriteObjectTableConfig(pexConfig.Config):
58 engine = pexConfig.Field(
59 dtype=str,
60 default="pyarrow",
61 doc="Parquet engine for writing (pyarrow or fastparquet)"
62 )
63 coaddName = pexConfig.Field(
64 dtype=str,
65 default="deep",
66 doc="Name of coadd"
67 )
70class WriteObjectTableTask(CmdLineTask):
71 """Write filter-merged source tables to parquet
72 """
73 _DefaultName = "writeObjectTable"
74 ConfigClass = WriteObjectTableConfig
75 RunnerClass = MergeSourcesRunner
77 # Names of table datasets to be merged
78 inputDatasets = ('forced_src', 'meas', 'ref')
80 # Tag of output dataset written by `MergeSourcesTask.write`
81 outputDataset = 'obj'
83 def __init__(self, butler=None, schema=None, **kwargs):
84 # It is a shame that this class can't use the default init for CmdLineTask
85 # But to do so would require its own special task runner, which is many
86 # more lines of specialization, so this is how it is for now
87 CmdLineTask.__init__(self, **kwargs)
89 def runDataRef(self, patchRefList):
90 """!
91 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
92 subclasses that inherit from MergeSourcesTask.
93 @param[in] patchRefList list of data references for each filter
94 """
95 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
96 dataId = patchRefList[0].dataId
97 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
98 self.write(patchRefList[0], mergedCatalog)
100 @classmethod
101 def _makeArgumentParser(cls):
102 """Create a suitable ArgumentParser.
104 We will use the ArgumentParser to get a list of data
105 references for patches; the RunnerClass will sort them into lists
106 of data references for the same patch.
108 References first of self.inputDatasets, rather than
109 self.inputDataset
110 """
111 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
113 def readCatalog(self, patchRef):
114 """Read input catalogs
116 Read all the input datasets given by the 'inputDatasets'
117 attribute.
119 Parameters
120 ----------
121 patchRef : `lsst.daf.persistence.ButlerDataRef`
122 Data reference for patch
124 Returns
125 -------
126 Tuple consisting of band name and a dict of catalogs, keyed by
127 dataset name
128 """
129 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel
130 catalogDict = {}
131 for dataset in self.inputDatasets:
132 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
133 self.log.info("Read %d sources from %s for band %s: %s" %
134 (len(catalog), dataset, band, patchRef.dataId))
135 catalogDict[dataset] = catalog
136 return band, catalogDict
138 def run(self, catalogs, tract, patch):
139 """Merge multiple catalogs.
141 Parameters
142 ----------
143 catalogs : `dict`
144 Mapping from filter names to dict of catalogs.
145 tract : int
146 tractId to use for the tractId column
147 patch : str
148 patchId to use for the patchId column
150 Returns
151 -------
152 catalog : `lsst.pipe.tasks.parquetTable.ParquetTable`
153 Merged dataframe, with each column prefixed by
154 `filter_tag(filt)`, wrapped in the parquet writer shim class.
155 """
157 dfs = []
158 for filt, tableDict in catalogs.items():
159 for dataset, table in tableDict.items():
160 # Convert afwTable to pandas DataFrame
161 df = table.asAstropy().to_pandas().set_index('id', drop=True)
163 # Sort columns by name, to ensure matching schema among patches
164 df = df.reindex(sorted(df.columns), axis=1)
165 df['tractId'] = tract
166 df['patchId'] = patch
168 # Make columns a 3-level MultiIndex
169 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
170 names=('dataset', 'band', 'column'))
171 dfs.append(df)
173 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
174 return ParquetTable(dataFrame=catalog)
176 def write(self, patchRef, catalog):
177 """Write the output.
179 Parameters
180 ----------
181 catalog : `ParquetTable`
182 Catalog to write
183 patchRef : `lsst.daf.persistence.ButlerDataRef`
184 Data reference for patch
185 """
186 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
187 # since the filter isn't actually part of the data ID for the dataset we're saving,
188 # it's confusing to see it in the log message, even if the butler simply ignores it.
189 mergeDataId = patchRef.dataId.copy()
190 del mergeDataId["filter"]
191 self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
193 def writeMetadata(self, dataRefList):
194 """No metadata to write, and not sure how to write it for a list of dataRefs.
195 """
196 pass
199class WriteSourceTableConfig(pexConfig.Config):
200 doApplyExternalPhotoCalib = pexConfig.Field(
201 dtype=bool,
202 default=False,
203 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
204 "generating Source Tables from older src tables which do not already have local calib columns")
205 )
206 doApplyExternalSkyWcs = pexConfig.Field(
207 dtype=bool,
208 default=False,
209 doc=("Add local WCS columns from the calexp.wcs? Should only set True if "
210 "generating Source Tables from older src tables which do not already have local calib columns")
211 )
214class WriteSourceTableTask(CmdLineTask):
215 """Write source table to parquet
216 """
217 _DefaultName = "writeSourceTable"
218 ConfigClass = WriteSourceTableConfig
220 def runDataRef(self, dataRef):
221 src = dataRef.get('src')
222 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
223 src = self.addCalibColumns(src, dataRef)
225 ccdVisitId = dataRef.get('ccdExposureId')
226 result = self.run(src, ccdVisitId=ccdVisitId)
227 dataRef.put(result.table, 'source')
229 def run(self, catalog, ccdVisitId=None):
230 """Convert `src` catalog to parquet
232 Parameters
233 ----------
234 catalog: `afwTable.SourceCatalog`
235 catalog to be converted
236 ccdVisitId: `int`
237 ccdVisitId to be added as a column
239 Returns
240 -------
241 result : `lsst.pipe.base.Struct`
242 ``table``
243 `ParquetTable` version of the input catalog
244 """
245 self.log.info("Generating parquet table from src catalog")
246 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
247 df['ccdVisitId'] = ccdVisitId
248 return pipeBase.Struct(table=ParquetTable(dataFrame=df))
250 def addCalibColumns(self, catalog, dataRef):
251 """Add columns with local calibration evaluated at each centroid
253 for backwards compatibility with old repos.
254 This exists for the purpose of converting old src catalogs
255 (which don't have the expected local calib columns) to Source Tables.
257 Parameters
258 ----------
259 catalog: `afwTable.SourceCatalog`
260 catalog to which calib columns will be added
261 dataRef: `lsst.daf.persistence.ButlerDataRef
262 for fetching the calibs from disk.
264 Returns
265 -------
266 newCat: `afwTable.SourceCatalog`
267 Source Catalog with requested local calib columns
268 """
269 mapper = afwTable.SchemaMapper(catalog.schema)
270 measureConfig = SingleFrameMeasurementTask.ConfigClass()
271 measureConfig.doReplaceWithNoise = False
273 # Just need the WCS or the PhotoCalib attached to an exposue
274 exposure = dataRef.get('calexp_sub',
275 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0)))
277 mapper = afwTable.SchemaMapper(catalog.schema)
278 mapper.addMinimalSchema(catalog.schema, True)
279 schema = mapper.getOutputSchema()
281 exposureIdInfo = dataRef.get("expIdInfo")
282 measureConfig.plugins.names = []
283 if self.config.doApplyExternalSkyWcs:
284 plugin = 'base_LocalWcs'
285 if plugin in schema:
286 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
287 else:
288 measureConfig.plugins.names.add(plugin)
290 if self.config.doApplyExternalPhotoCalib:
291 plugin = 'base_LocalPhotoCalib'
292 if plugin in schema:
293 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
294 else:
295 measureConfig.plugins.names.add(plugin)
297 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
298 newCat = afwTable.SourceCatalog(schema)
299 newCat.extend(catalog, mapper=mapper)
300 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
301 return newCat
303 def writeMetadata(self, dataRef):
304 """No metadata to write.
305 """
306 pass
308 @classmethod
309 def _makeArgumentParser(cls):
310 parser = ArgumentParser(name=cls._DefaultName)
311 parser.add_id_argument("--id", 'src',
312 help="data ID, e.g. --id visit=12345 ccd=0")
313 return parser
316class PostprocessAnalysis(object):
317 """Calculate columns from ParquetTable
319 This object manages and organizes an arbitrary set of computations
320 on a catalog. The catalog is defined by a
321 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
322 `deepCoadd_obj` dataset, and the computations are defined by a collection
323 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
324 a `CompositeFunctor`).
326 After the object is initialized, accessing the `.df` attribute (which
327 holds the `pandas.DataFrame` containing the results of the calculations) triggers
328 computation of said dataframe.
330 One of the conveniences of using this object is the ability to define a desired common
331 filter for all functors. This enables the same functor collection to be passed to
332 several different `PostprocessAnalysis` objects without having to change the original
333 functor collection, since the `filt` keyword argument of this object triggers an
334 overwrite of the `filt` property for all functors in the collection.
336 This object also allows a list of refFlags to be passed, and defines a set of default
337 refFlags that are always included even if not requested.
339 If a list of `ParquetTable` object is passed, rather than a single one, then the
340 calculations will be mapped over all the input catalogs. In principle, it should
341 be straightforward to parallelize this activity, but initial tests have failed
342 (see TODO in code comments).
344 Parameters
345 ----------
346 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
347 Source catalog(s) for computation
349 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
350 Computations to do (functors that act on `parq`).
351 If a dict, the output
352 DataFrame will have columns keyed accordingly.
353 If a list, the column keys will come from the
354 `.shortname` attribute of each functor.
356 filt : `str` (optional)
357 Filter in which to calculate. If provided,
358 this will overwrite any existing `.filt` attribute
359 of the provided functors.
361 flags : `list` (optional)
362 List of flags (per-band) to include in output table.
364 refFlags : `list` (optional)
365 List of refFlags (only reference band) to include in output table.
368 """
369 _defaultRefFlags = []
370 _defaultFuncs = (('coord_ra', RAColumn()),
371 ('coord_dec', DecColumn()))
373 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None):
374 self.parq = parq
375 self.functors = functors
377 self.filt = filt
378 self.flags = list(flags) if flags is not None else []
379 self.refFlags = list(self._defaultRefFlags)
380 if refFlags is not None:
381 self.refFlags += list(refFlags)
383 self._df = None
385 @property
386 def defaultFuncs(self):
387 funcs = dict(self._defaultFuncs)
388 return funcs
390 @property
391 def func(self):
392 additionalFuncs = self.defaultFuncs
393 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
394 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
396 if isinstance(self.functors, CompositeFunctor):
397 func = self.functors
398 else:
399 func = CompositeFunctor(self.functors)
401 func.funcDict.update(additionalFuncs)
402 func.filt = self.filt
404 return func
406 @property
407 def noDupCols(self):
408 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
410 @property
411 def df(self):
412 if self._df is None:
413 self.compute()
414 return self._df
416 def compute(self, dropna=False, pool=None):
417 # map over multiple parquet tables
418 if type(self.parq) in (list, tuple):
419 if pool is None:
420 dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
421 else:
422 # TODO: Figure out why this doesn't work (pyarrow pickling issues?)
423 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
424 self._df = pd.concat(dflist)
425 else:
426 self._df = self.func(self.parq, dropna=dropna)
428 return self._df
431class TransformCatalogBaseConfig(pexConfig.Config):
432 functorFile = pexConfig.Field(
433 dtype=str,
434 doc='Path to YAML file specifying functors to be computed',
435 default=None,
436 optional=True
437 )
440class TransformCatalogBaseTask(CmdLineTask):
441 """Base class for transforming/standardizing a catalog
443 by applying functors that convert units and apply calibrations.
444 The purpose of this task is to perform a set of computations on
445 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
446 results to a new dataset (which needs to be declared in an `outputDataset`
447 attribute).
449 The calculations to be performed are defined in a YAML file that specifies
450 a set of functors to be computed, provided as
451 a `--functorFile` config parameter. An example of such a YAML file
452 is the following:
454 funcs:
455 psfMag:
456 functor: Mag
457 args:
458 - base_PsfFlux
459 filt: HSC-G
460 dataset: meas
461 cmodel_magDiff:
462 functor: MagDiff
463 args:
464 - modelfit_CModel
465 - base_PsfFlux
466 filt: HSC-G
467 gauss_magDiff:
468 functor: MagDiff
469 args:
470 - base_GaussianFlux
471 - base_PsfFlux
472 filt: HSC-G
473 count:
474 functor: Column
475 args:
476 - base_InputCount_value
477 filt: HSC-G
478 deconvolved_moments:
479 functor: DeconvolvedMoments
480 filt: HSC-G
481 dataset: forced_src
482 refFlags:
483 - calib_psfUsed
484 - merge_measurement_i
485 - merge_measurement_r
486 - merge_measurement_z
487 - merge_measurement_y
488 - merge_measurement_g
489 - base_PixelFlags_flag_inexact_psfCenter
490 - detect_isPrimary
492 The names for each entry under "func" will become the names of columns in the
493 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
494 Positional arguments to be passed to each functor are in the `args` list,
495 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
496 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
498 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and
499 taken from the `'ref'` dataset.
501 The "flags" entry will be expanded out per band.
503 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
504 to organize and excecute the calculations.
506 """
507 @property
508 def _DefaultName(self):
509 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
511 @property
512 def outputDataset(self):
513 raise NotImplementedError('Subclass must define "outputDataset" attribute')
515 @property
516 def inputDataset(self):
517 raise NotImplementedError('Subclass must define "inputDataset" attribute')
519 @property
520 def ConfigClass(self):
521 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
523 def runDataRef(self, dataRef):
524 parq = dataRef.get()
525 funcs = self.getFunctors()
526 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId)
527 self.write(df, dataRef)
528 return df
530 def run(self, parq, funcs=None, dataId=None, band=None):
531 """Do postprocessing calculations
533 Takes a `ParquetTable` object and dataId,
534 returns a dataframe with results of postprocessing calculations.
536 Parameters
537 ----------
538 parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
539 ParquetTable from which calculations are done.
540 funcs : `lsst.pipe.tasks.functors.Functors`
541 Functors to apply to the table's columns
542 dataId : dict, optional
543 Used to add a `patchId` column to the output dataframe.
544 band : `str`, optional
545 Filter band that is being processed.
547 Returns
548 ------
549 `pandas.DataFrame`
551 """
552 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
554 df = self.transform(band, parq, funcs, dataId).df
555 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
556 return df
558 def getFunctors(self):
559 funcs = CompositeFunctor.from_file(self.config.functorFile)
560 funcs.update(dict(PostprocessAnalysis._defaultFuncs))
561 return funcs
563 def getAnalysis(self, parq, funcs=None, band=None):
564 # Avoids disk access if funcs is passed
565 if funcs is None:
566 funcs = self.getFunctors()
567 analysis = PostprocessAnalysis(parq, funcs, filt=band)
568 return analysis
570 def transform(self, band, parq, funcs, dataId):
571 analysis = self.getAnalysis(parq, funcs=funcs, band=band)
572 df = analysis.df
573 if dataId is not None:
574 for key, value in dataId.items():
575 df[key] = value
577 return pipeBase.Struct(
578 df=df,
579 analysis=analysis
580 )
582 def write(self, df, parqRef):
583 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset)
585 def writeMetadata(self, dataRef):
586 """No metadata to write.
587 """
588 pass
591class TransformObjectCatalogConfig(TransformCatalogBaseConfig):
592 coaddName = pexConfig.Field(
593 dtype=str,
594 default="deep",
595 doc="Name of coadd"
596 )
597 # TODO: remove in DM-27177
598 filterMap = pexConfig.DictField(
599 keytype=str,
600 itemtype=str,
601 default={},
602 doc=("Dictionary mapping full filter name to short one for column name munging."
603 "These filters determine the output columns no matter what filters the "
604 "input data actually contain."),
605 deprecated=("Coadds are now identified by the band, so this transform is unused."
606 "Will be removed after v22.")
607 )
608 outputBands = pexConfig.ListField(
609 dtype=str,
610 default=None,
611 optional=True,
612 doc=("These bands and only these bands will appear in the output,"
613 " NaN-filled if the input does not include them."
614 " If None, then use all bands found in the input.")
615 )
616 camelCase = pexConfig.Field(
617 dtype=bool,
618 default=True,
619 doc=("Write per-band columns names with camelCase, else underscore "
620 "For example: gPsFlux instead of g_PsFlux.")
621 )
622 multilevelOutput = pexConfig.Field(
623 dtype=bool,
624 default=False,
625 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
626 "and name-munged (False).")
627 )
630class TransformObjectCatalogTask(TransformCatalogBaseTask):
631 """Produce a flattened Object Table to match the format specified in
632 sdm_schemas.
634 Do the same set of postprocessing calculations on all bands
636 This is identical to `TransformCatalogBaseTask`, except for that it does the
637 specified functor calculations for all filters present in the
638 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
639 by the YAML file will be superceded.
640 """
641 _DefaultName = "transformObjectCatalog"
642 ConfigClass = TransformObjectCatalogConfig
644 inputDataset = 'deepCoadd_obj'
645 outputDataset = 'objectTable'
647 @classmethod
648 def _makeArgumentParser(cls):
649 parser = ArgumentParser(name=cls._DefaultName)
650 parser.add_id_argument("--id", cls.inputDataset,
651 ContainerClass=CoaddDataIdContainer,
652 help="data ID, e.g. --id tract=12345 patch=1,2")
653 return parser
655 def run(self, parq, funcs=None, dataId=None, band=None):
656 # NOTE: band kwarg is ignored here.
657 dfDict = {}
658 analysisDict = {}
659 templateDf = pd.DataFrame()
660 outputBands = parq.columnLevelNames['band'] if self.config.outputBands is None else \
661 self.config.outputBands
663 # Perform transform for data of filters that exist in parq.
664 for inputBand in parq.columnLevelNames['band']:
665 if inputBand not in outputBands:
666 self.log.info("Ignoring %s band data in the input", inputBand)
667 continue
668 self.log.info("Transforming the catalog of band %s", inputBand)
669 result = self.transform(inputBand, parq, funcs, dataId)
670 dfDict[inputBand] = result.df
671 analysisDict[inputBand] = result.analysis
672 if templateDf.empty:
673 templateDf = result.df
675 # Fill NaNs in columns of other wanted bands
676 for filt in outputBands:
677 if filt not in dfDict:
678 self.log.info("Adding empty columns for band %s", filt)
679 dfDict[filt] = pd.DataFrame().reindex_like(templateDf)
681 # This makes a multilevel column index, with band as first level
682 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
684 if not self.config.multilevelOutput:
685 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
686 if dataId is not None:
687 noDupCols += list(dataId.keys())
688 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase)
690 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
691 return df
694class TractObjectDataIdContainer(CoaddDataIdContainer):
696 def makeDataRefList(self, namespace):
697 """Make self.refList from self.idList
699 Generate a list of data references given tract and/or patch.
700 This was adapted from `TractQADataIdContainer`, which was
701 `TractDataIdContainer` modifie to not require "filter".
702 Only existing dataRefs are returned.
703 """
704 def getPatchRefList(tract):
705 return [namespace.butler.dataRef(datasetType=self.datasetType,
706 tract=tract.getId(),
707 patch="%d,%d" % patch.getIndex()) for patch in tract]
709 tractRefs = defaultdict(list) # Data references for each tract
710 for dataId in self.idList:
711 skymap = self.getSkymap(namespace)
713 if "tract" in dataId:
714 tractId = dataId["tract"]
715 if "patch" in dataId:
716 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
717 tract=tractId,
718 patch=dataId['patch']))
719 else:
720 tractRefs[tractId] += getPatchRefList(skymap[tractId])
721 else:
722 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
723 for tract in skymap)
724 outputRefList = []
725 for tractRefList in tractRefs.values():
726 existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
727 outputRefList.append(existingRefs)
729 self.refList = outputRefList
732class ConsolidateObjectTableConfig(pexConfig.Config):
733 coaddName = pexConfig.Field(
734 dtype=str,
735 default="deep",
736 doc="Name of coadd"
737 )
740class ConsolidateObjectTableTask(CmdLineTask):
741 """Write patch-merged source tables to a tract-level parquet file
742 """
743 _DefaultName = "consolidateObjectTable"
744 ConfigClass = ConsolidateObjectTableConfig
746 inputDataset = 'objectTable'
747 outputDataset = 'objectTable_tract'
749 @classmethod
750 def _makeArgumentParser(cls):
751 parser = ArgumentParser(name=cls._DefaultName)
753 parser.add_id_argument("--id", cls.inputDataset,
754 help="data ID, e.g. --id tract=12345",
755 ContainerClass=TractObjectDataIdContainer)
756 return parser
758 def runDataRef(self, patchRefList):
759 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
760 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
762 def writeMetadata(self, dataRef):
763 """No metadata to write.
764 """
765 pass
768class TransformSourceTableConfig(TransformCatalogBaseConfig):
769 pass
772class TransformSourceTableTask(TransformCatalogBaseTask):
773 """Transform/standardize a source catalog
774 """
775 _DefaultName = "transformSourceTable"
776 ConfigClass = TransformSourceTableConfig
778 inputDataset = 'source'
779 outputDataset = 'sourceTable'
781 def writeMetadata(self, dataRef):
782 """No metadata to write.
783 """
784 pass
786 @classmethod
787 def _makeArgumentParser(cls):
788 parser = ArgumentParser(name=cls._DefaultName)
789 parser.add_id_argument("--id", datasetType=cls.inputDataset,
790 level="sensor",
791 help="data ID, e.g. --id visit=12345 ccd=0")
792 return parser
794 def runDataRef(self, dataRef):
795 """Override to specify band label to run()."""
796 parq = dataRef.get()
797 funcs = self.getFunctors()
798 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel
799 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
800 self.write(df, dataRef)
801 return df
804class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
805 dimensions=("instrument", "visit",),
806 defaultTemplates={}):
807 calexp = connectionTypes.Input(
808 doc="Processed exposures used for metadata",
809 name="calexp",
810 storageClass="ExposureF",
811 dimensions=("instrument", "visit", "detector"),
812 deferLoad=True,
813 multiple=True,
814 )
815 visitSummary = connectionTypes.Output(
816 doc="Consolidated visit-level exposure metadata",
817 name="visitSummary",
818 storageClass="ExposureCatalog",
819 dimensions=("instrument", "visit"),
820 )
823class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
824 pipelineConnections=ConsolidateVisitSummaryConnections):
825 """Config for ConsolidateVisitSummaryTask"""
826 pass
829class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
830 """Task to consolidate per-detector visit metadata.
832 This task aggregates the following metadata from all the detectors in a
833 single visit into an exposure catalog:
834 - The visitInfo.
835 - The wcs.
836 - The photoCalib.
837 - The physical_filter and band (if available).
838 - The psf size, shape, and effective area at the center of the detector.
839 - The corners of the bounding box in right ascension/declination.
841 Other quantities such as Psf, ApCorrMap, and TransmissionCurve are not
842 persisted here because of storage concerns, and because of their limited
843 utility as summary statistics.
845 Tests for this task are performed in ci_hsc_gen3.
846 """
847 _DefaultName = "consolidateVisitSummary"
848 ConfigClass = ConsolidateVisitSummaryConfig
850 @classmethod
851 def _makeArgumentParser(cls):
852 parser = ArgumentParser(name=cls._DefaultName)
854 parser.add_id_argument("--id", "calexp",
855 help="data ID, e.g. --id visit=12345",
856 ContainerClass=VisitDataIdContainer)
857 return parser
859 def writeMetadata(self, dataRef):
860 """No metadata to persist, so override to remove metadata persistance.
861 """
862 pass
864 def writeConfig(self, butler, clobber=False, doBackup=True):
865 """No config to persist, so override to remove config persistance.
866 """
867 pass
869 def runDataRef(self, dataRefList):
870 visit = dataRefList[0].dataId['visit']
872 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" %
873 (len(dataRefList), visit))
875 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
877 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
879 def runQuantum(self, butlerQC, inputRefs, outputRefs):
880 dataRefs = butlerQC.get(inputRefs.calexp)
881 visit = dataRefs[0].dataId.byName()['visit']
883 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" %
884 (len(dataRefs), visit))
886 expCatalog = self._combineExposureMetadata(visit, dataRefs)
888 butlerQC.put(expCatalog, outputRefs.visitSummary)
890 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
891 """Make a combined exposure catalog from a list of dataRefs.
893 Parameters
894 ----------
895 visit : `int`
896 Visit identification number
897 dataRefs : `list`
898 List of calexp dataRefs in visit. May be list of
899 `lsst.daf.persistence.ButlerDataRef` (Gen2) or
900 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
901 isGen3 : `bool`, optional
902 Specifies if this is a Gen3 list of datarefs.
904 Returns
905 -------
906 visitSummary : `lsst.afw.table.ExposureCatalog`
907 Exposure catalog with per-detector summary information.
908 """
909 schema = afwTable.ExposureTable.makeMinimalSchema()
910 schema.addField('visit', type='I', doc='Visit number')
911 schema.addField('detector_id', type='I', doc='Detector number')
912 schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
913 schema.addField('band', type='String', size=32, doc='Name of band')
914 schema.addField('psfSigma', type='F',
915 doc='PSF model second-moments determinant radius (center of chip) (pixel)')
916 schema.addField('psfArea', type='F',
917 doc='PSF model effective area (center of chip) (pixel**2)')
918 schema.addField('psfIxx', type='F',
919 doc='PSF model Ixx (center of chip) (pixel**2)')
920 schema.addField('psfIyy', type='F',
921 doc='PSF model Iyy (center of chip) (pixel**2)')
922 schema.addField('psfIxy', type='F',
923 doc='PSF model Ixy (center of chip) (pixel**2)')
924 schema.addField('raCorners', type='ArrayD', size=4,
925 doc='Right Ascension of bounding box corners (degrees)')
926 schema.addField('decCorners', type='ArrayD', size=4,
927 doc='Declination of bounding box corners (degrees)')
929 cat = afwTable.ExposureCatalog(schema)
930 cat.resize(len(dataRefs))
932 cat['visit'] = visit
934 for i, dataRef in enumerate(dataRefs):
935 if isGen3:
936 visitInfo = dataRef.get(component='visitInfo')
937 filterLabel = dataRef.get(component='filterLabel')
938 psf = dataRef.get(component='psf')
939 wcs = dataRef.get(component='wcs')
940 photoCalib = dataRef.get(component='photoCalib')
941 detector = dataRef.get(component='detector')
942 bbox = dataRef.get(component='bbox')
943 validPolygon = dataRef.get(component='validPolygon')
944 else:
945 # Note that we need to read the calexp because there is
946 # no magic access to the psf except through the exposure.
947 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
948 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
949 visitInfo = exp.getInfo().getVisitInfo()
950 filterLabel = dataRef.get("calexp_filterLabel")
951 psf = exp.getPsf()
952 wcs = exp.getWcs()
953 photoCalib = exp.getPhotoCalib()
954 detector = exp.getDetector()
955 bbox = dataRef.get(datasetType='calexp_bbox')
956 validPolygon = exp.getInfo().getValidPolygon()
958 rec = cat[i]
959 rec.setBBox(bbox)
960 rec.setVisitInfo(visitInfo)
961 rec.setWcs(wcs)
962 rec.setPhotoCalib(photoCalib)
963 rec.setDetector(detector)
964 rec.setValidPolygon(validPolygon)
966 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
967 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
968 rec['detector_id'] = detector.getId()
969 shape = psf.computeShape(bbox.getCenter())
970 rec['psfSigma'] = shape.getDeterminantRadius()
971 rec['psfIxx'] = shape.getIxx()
972 rec['psfIyy'] = shape.getIyy()
973 rec['psfIxy'] = shape.getIxy()
974 im = psf.computeKernelImage(bbox.getCenter())
975 # The calculation of effective psf area is taken from
976 # meas_base/src/PsfFlux.cc#L112. See
977 # https://github.com/lsst/meas_base/blob/
978 # 750bffe6620e565bda731add1509507f5c40c8bb/src/PsfFlux.cc#L112
979 rec['psfArea'] = np.sum(im.array)/np.sum(im.array**2.)
981 sph_pts = wcs.pixelToSky(lsst.geom.Box2D(bbox).getCorners())
982 rec['raCorners'][:] = [sph.getRa().asDegrees() for sph in sph_pts]
983 rec['decCorners'][:] = [sph.getDec().asDegrees() for sph in sph_pts]
985 return cat
988class VisitDataIdContainer(DataIdContainer):
989 """DataIdContainer that groups sensor-level id's by visit
990 """
992 def makeDataRefList(self, namespace):
993 """Make self.refList from self.idList
995 Generate a list of data references grouped by visit.
997 Parameters
998 ----------
999 namespace : `argparse.Namespace`
1000 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1001 """
1002 # Group by visits
1003 visitRefs = defaultdict(list)
1004 for dataId in self.idList:
1005 if "visit" in dataId:
1006 visitId = dataId["visit"]
1007 # append all subsets to
1008 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1009 visitRefs[visitId].extend([dataRef for dataRef in subset])
1011 outputRefList = []
1012 for refList in visitRefs.values():
1013 existingRefs = [ref for ref in refList if ref.datasetExists()]
1014 if existingRefs:
1015 outputRefList.append(existingRefs)
1017 self.refList = outputRefList
1020class ConsolidateSourceTableConfig(pexConfig.Config):
1021 pass
1024class ConsolidateSourceTableTask(CmdLineTask):
1025 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1026 """
1027 _DefaultName = 'consolidateSourceTable'
1028 ConfigClass = ConsolidateSourceTableConfig
1030 inputDataset = 'sourceTable'
1031 outputDataset = 'sourceTable_visit'
1033 def runDataRef(self, dataRefList):
1034 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1035 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1036 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1038 @classmethod
1039 def _makeArgumentParser(cls):
1040 parser = ArgumentParser(name=cls._DefaultName)
1042 parser.add_id_argument("--id", cls.inputDataset,
1043 help="data ID, e.g. --id visit=12345",
1044 ContainerClass=VisitDataIdContainer)
1045 return parser
1047 def writeMetadata(self, dataRef):
1048 """No metadata to write.
1049 """
1050 pass
1052 def writeConfig(self, butler, clobber=False, doBackup=True):
1053 """No config to write.
1054 """
1055 pass