Coverage for python/lsst/pipe/tasks/postprocess.py : 27%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_tasks
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import functools
23import pandas as pd
24import numpy as np
25from collections import defaultdict
27import lsst.geom
28import lsst.pex.config as pexConfig
29import lsst.pipe.base as pipeBase
30from lsst.pipe.base import connectionTypes
31import lsst.afw.table as afwTable
32from lsst.meas.base import SingleFrameMeasurementTask
33from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
34from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
36from .parquetTable import ParquetTable
37from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
38from .functors import CompositeFunctor, RAColumn, DecColumn, Column
41def flattenFilters(df, filterDict, noDupCols=['coord_ra', 'coord_dec'], camelCase=False):
42 """Flattens a dataframe with multilevel column index
43 """
44 newDf = pd.DataFrame()
45 for filt, filtShort in filterDict.items():
46 subdf = df[filt]
47 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
48 newColumns = {c: columnFormat.format(filtShort, c)
49 for c in subdf.columns if c not in noDupCols}
50 cols = list(newColumns.keys())
51 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
53 newDf = pd.concat([subdf[noDupCols], newDf], axis=1)
54 return newDf
57class WriteObjectTableConfig(pexConfig.Config):
58 priorityList = pexConfig.ListField(
59 dtype=str,
60 default=[],
61 doc="Priority-ordered list of bands for the merge."
62 )
63 engine = pexConfig.Field(
64 dtype=str,
65 default="pyarrow",
66 doc="Parquet engine for writing (pyarrow or fastparquet)"
67 )
68 coaddName = pexConfig.Field(
69 dtype=str,
70 default="deep",
71 doc="Name of coadd"
72 )
74 def validate(self):
75 pexConfig.Config.validate(self)
76 if len(self.priorityList) == 0:
77 raise RuntimeError("No priority list provided")
80class WriteObjectTableTask(CmdLineTask):
81 """Write filter-merged source tables to parquet
82 """
83 _DefaultName = "writeObjectTable"
84 ConfigClass = WriteObjectTableConfig
85 RunnerClass = MergeSourcesRunner
87 # Names of table datasets to be merged
88 inputDatasets = ('forced_src', 'meas', 'ref')
90 # Tag of output dataset written by `MergeSourcesTask.write`
91 outputDataset = 'obj'
93 def __init__(self, butler=None, schema=None, **kwargs):
94 # It is a shame that this class can't use the default init for CmdLineTask
95 # But to do so would require its own special task runner, which is many
96 # more lines of specialization, so this is how it is for now
97 CmdLineTask.__init__(self, **kwargs)
99 def runDataRef(self, patchRefList):
100 """!
101 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
102 subclasses that inherit from MergeSourcesTask.
103 @param[in] patchRefList list of data references for each filter
104 """
105 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
106 dataId = patchRefList[0].dataId
107 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
108 self.write(patchRefList[0], mergedCatalog)
110 @classmethod
111 def _makeArgumentParser(cls):
112 """Create a suitable ArgumentParser.
114 We will use the ArgumentParser to get a list of data
115 references for patches; the RunnerClass will sort them into lists
116 of data references for the same patch.
118 References first of self.inputDatasets, rather than
119 self.inputDataset
120 """
121 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
123 def readCatalog(self, patchRef):
124 """Read input catalogs
126 Read all the input datasets given by the 'inputDatasets'
127 attribute.
129 Parameters
130 ----------
131 patchRef : `lsst.daf.persistence.ButlerDataRef`
132 Data reference for patch
134 Returns
135 -------
136 Tuple consisting of filter name and a dict of catalogs, keyed by
137 dataset name
138 """
139 filterName = patchRef.dataId["filter"]
140 catalogDict = {}
141 for dataset in self.inputDatasets:
142 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
143 self.log.info("Read %d sources from %s for filter %s: %s" %
144 (len(catalog), dataset, filterName, patchRef.dataId))
145 catalogDict[dataset] = catalog
146 return filterName, catalogDict
148 def run(self, catalogs, tract, patch):
149 """Merge multiple catalogs.
151 Parameters
152 ----------
153 catalogs : `dict`
154 Mapping from filter names to dict of catalogs.
155 tract : int
156 tractId to use for the tractId column
157 patch : str
158 patchId to use for the patchId column
160 Returns
161 -------
162 catalog : `lsst.pipe.tasks.parquetTable.ParquetTable`
163 Merged dataframe, with each column prefixed by
164 `filter_tag(filt)`, wrapped in the parquet writer shim class.
165 """
167 dfs = []
168 for filt, tableDict in catalogs.items():
169 for dataset, table in tableDict.items():
170 # Convert afwTable to pandas DataFrame
171 df = table.asAstropy().to_pandas().set_index('id', drop=True)
173 # Sort columns by name, to ensure matching schema among patches
174 df = df.reindex(sorted(df.columns), axis=1)
175 df['tractId'] = tract
176 df['patchId'] = patch
178 # Make columns a 3-level MultiIndex
179 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
180 names=('dataset', 'filter', 'column'))
181 dfs.append(df)
183 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
184 return ParquetTable(dataFrame=catalog)
186 def write(self, patchRef, catalog):
187 """Write the output.
189 Parameters
190 ----------
191 catalog : `ParquetTable`
192 Catalog to write
193 patchRef : `lsst.daf.persistence.ButlerDataRef`
194 Data reference for patch
195 """
196 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
197 # since the filter isn't actually part of the data ID for the dataset we're saving,
198 # it's confusing to see it in the log message, even if the butler simply ignores it.
199 mergeDataId = patchRef.dataId.copy()
200 del mergeDataId["filter"]
201 self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
203 def writeMetadata(self, dataRefList):
204 """No metadata to write, and not sure how to write it for a list of dataRefs.
205 """
206 pass
209class WriteSourceTableConfig(pexConfig.Config):
210 doApplyExternalPhotoCalib = pexConfig.Field(
211 dtype=bool,
212 default=False,
213 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
214 "generating Source Tables from older src tables which do not already have local calib columns")
215 )
216 doApplyExternalSkyWcs = pexConfig.Field(
217 dtype=bool,
218 default=False,
219 doc=("Add local WCS columns from the calexp.wcs? Should only set True if "
220 "generating Source Tables from older src tables which do not already have local calib columns")
221 )
224class WriteSourceTableTask(CmdLineTask):
225 """Write source table to parquet
226 """
227 _DefaultName = "writeSourceTable"
228 ConfigClass = WriteSourceTableConfig
230 def runDataRef(self, dataRef):
231 src = dataRef.get('src')
232 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
233 src = self.addCalibColumns(src, dataRef)
235 ccdVisitId = dataRef.get('ccdExposureId')
236 result = self.run(src, ccdVisitId=ccdVisitId)
237 dataRef.put(result.table, 'source')
239 def run(self, catalog, ccdVisitId=None):
240 """Convert `src` catalog to parquet
242 Parameters
243 ----------
244 catalog: `afwTable.SourceCatalog`
245 catalog to be converted
246 ccdVisitId: `int`
247 ccdVisitId to be added as a column
249 Returns
250 -------
251 result : `lsst.pipe.base.Struct`
252 ``table``
253 `ParquetTable` version of the input catalog
254 """
255 self.log.info("Generating parquet table from src catalog")
256 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
257 df['ccdVisitId'] = ccdVisitId
258 return pipeBase.Struct(table=ParquetTable(dataFrame=df))
260 def addCalibColumns(self, catalog, dataRef):
261 """Add columns with local calibration evaluated at each centroid
263 for backwards compatibility with old repos.
264 This exists for the purpose of converting old src catalogs
265 (which don't have the expected local calib columns) to Source Tables.
267 Parameters
268 ----------
269 catalog: `afwTable.SourceCatalog`
270 catalog to which calib columns will be added
271 dataRef: `lsst.daf.persistence.ButlerDataRef
272 for fetching the calibs from disk.
274 Returns
275 -------
276 newCat: `afwTable.SourceCatalog`
277 Source Catalog with requested local calib columns
278 """
279 mapper = afwTable.SchemaMapper(catalog.schema)
280 measureConfig = SingleFrameMeasurementTask.ConfigClass()
281 measureConfig.doReplaceWithNoise = False
283 # Just need the WCS or the PhotoCalib attached to an exposue
284 exposure = dataRef.get('calexp_sub',
285 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0)))
287 mapper = afwTable.SchemaMapper(catalog.schema)
288 mapper.addMinimalSchema(catalog.schema, True)
289 schema = mapper.getOutputSchema()
291 exposureIdInfo = dataRef.get("expIdInfo")
292 measureConfig.plugins.names = []
293 if self.config.doApplyExternalSkyWcs:
294 plugin = 'base_LocalWcs'
295 if plugin in schema:
296 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
297 else:
298 measureConfig.plugins.names.add(plugin)
300 if self.config.doApplyExternalPhotoCalib:
301 plugin = 'base_LocalPhotoCalib'
302 if plugin in schema:
303 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
304 else:
305 measureConfig.plugins.names.add(plugin)
307 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
308 newCat = afwTable.SourceCatalog(schema)
309 newCat.extend(catalog, mapper=mapper)
310 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
311 return newCat
313 def writeMetadata(self, dataRef):
314 """No metadata to write.
315 """
316 pass
318 @classmethod
319 def _makeArgumentParser(cls):
320 parser = ArgumentParser(name=cls._DefaultName)
321 parser.add_id_argument("--id", 'src',
322 help="data ID, e.g. --id visit=12345 ccd=0")
323 return parser
326class PostprocessAnalysis(object):
327 """Calculate columns from ParquetTable
329 This object manages and organizes an arbitrary set of computations
330 on a catalog. The catalog is defined by a
331 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
332 `deepCoadd_obj` dataset, and the computations are defined by a collection
333 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
334 a `CompositeFunctor`).
336 After the object is initialized, accessing the `.df` attribute (which
337 holds the `pandas.DataFrame` containing the results of the calculations) triggers
338 computation of said dataframe.
340 One of the conveniences of using this object is the ability to define a desired common
341 filter for all functors. This enables the same functor collection to be passed to
342 several different `PostprocessAnalysis` objects without having to change the original
343 functor collection, since the `filt` keyword argument of this object triggers an
344 overwrite of the `filt` property for all functors in the collection.
346 This object also allows a list of refFlags to be passed, and defines a set of default
347 refFlags that are always included even if not requested.
349 If a list of `ParquetTable` object is passed, rather than a single one, then the
350 calculations will be mapped over all the input catalogs. In principle, it should
351 be straightforward to parallelize this activity, but initial tests have failed
352 (see TODO in code comments).
354 Parameters
355 ----------
356 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
357 Source catalog(s) for computation
359 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
360 Computations to do (functors that act on `parq`).
361 If a dict, the output
362 DataFrame will have columns keyed accordingly.
363 If a list, the column keys will come from the
364 `.shortname` attribute of each functor.
366 filt : `str` (optional)
367 Filter in which to calculate. If provided,
368 this will overwrite any existing `.filt` attribute
369 of the provided functors.
371 flags : `list` (optional)
372 List of flags (per-band) to include in output table.
374 refFlags : `list` (optional)
375 List of refFlags (only reference band) to include in output table.
378 """
379 _defaultRefFlags = []
380 _defaultFuncs = (('coord_ra', RAColumn()),
381 ('coord_dec', DecColumn()))
383 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None):
384 self.parq = parq
385 self.functors = functors
387 self.filt = filt
388 self.flags = list(flags) if flags is not None else []
389 self.refFlags = list(self._defaultRefFlags)
390 if refFlags is not None:
391 self.refFlags += list(refFlags)
393 self._df = None
395 @property
396 def defaultFuncs(self):
397 funcs = dict(self._defaultFuncs)
398 return funcs
400 @property
401 def func(self):
402 additionalFuncs = self.defaultFuncs
403 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
404 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
406 if isinstance(self.functors, CompositeFunctor):
407 func = self.functors
408 else:
409 func = CompositeFunctor(self.functors)
411 func.funcDict.update(additionalFuncs)
412 func.filt = self.filt
414 return func
416 @property
417 def noDupCols(self):
418 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
420 @property
421 def df(self):
422 if self._df is None:
423 self.compute()
424 return self._df
426 def compute(self, dropna=False, pool=None):
427 # map over multiple parquet tables
428 if type(self.parq) in (list, tuple):
429 if pool is None:
430 dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
431 else:
432 # TODO: Figure out why this doesn't work (pyarrow pickling issues?)
433 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
434 self._df = pd.concat(dflist)
435 else:
436 self._df = self.func(self.parq, dropna=dropna)
438 return self._df
441class TransformCatalogBaseConfig(pexConfig.Config):
442 functorFile = pexConfig.Field(
443 dtype=str,
444 doc='Path to YAML file specifying functors to be computed',
445 default=None,
446 optional=True
447 )
450class TransformCatalogBaseTask(CmdLineTask):
451 """Base class for transforming/standardizing a catalog
453 by applying functors that convert units and apply calibrations.
454 The purpose of this task is to perform a set of computations on
455 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
456 results to a new dataset (which needs to be declared in an `outputDataset`
457 attribute).
459 The calculations to be performed are defined in a YAML file that specifies
460 a set of functors to be computed, provided as
461 a `--functorFile` config parameter. An example of such a YAML file
462 is the following:
464 funcs:
465 psfMag:
466 functor: Mag
467 args:
468 - base_PsfFlux
469 filt: HSC-G
470 dataset: meas
471 cmodel_magDiff:
472 functor: MagDiff
473 args:
474 - modelfit_CModel
475 - base_PsfFlux
476 filt: HSC-G
477 gauss_magDiff:
478 functor: MagDiff
479 args:
480 - base_GaussianFlux
481 - base_PsfFlux
482 filt: HSC-G
483 count:
484 functor: Column
485 args:
486 - base_InputCount_value
487 filt: HSC-G
488 deconvolved_moments:
489 functor: DeconvolvedMoments
490 filt: HSC-G
491 dataset: forced_src
492 refFlags:
493 - calib_psfUsed
494 - merge_measurement_i
495 - merge_measurement_r
496 - merge_measurement_z
497 - merge_measurement_y
498 - merge_measurement_g
499 - base_PixelFlags_flag_inexact_psfCenter
500 - detect_isPrimary
502 The names for each entry under "func" will become the names of columns in the
503 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
504 Positional arguments to be passed to each functor are in the `args` list,
505 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
506 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
508 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and
509 taken from the `'ref'` dataset.
511 The "flags" entry will be expanded out per band.
513 Note, if `'filter'` is provided as part of the `dataId` when running this task (even though
514 `deepCoadd_obj` does not use `'filter'`), then this will override the `filt` kwargs
515 provided in the YAML file, and the calculations will be done in that filter.
517 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
518 to organize and excecute the calculations.
520 """
521 @property
522 def _DefaultName(self):
523 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
525 @property
526 def outputDataset(self):
527 raise NotImplementedError('Subclass must define "outputDataset" attribute')
529 @property
530 def inputDataset(self):
531 raise NotImplementedError('Subclass must define "inputDataset" attribute')
533 @property
534 def ConfigClass(self):
535 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
537 def runDataRef(self, dataRef):
538 parq = dataRef.get()
539 funcs = self.getFunctors()
540 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId)
541 self.write(df, dataRef)
542 return df
544 def run(self, parq, funcs=None, dataId=None):
545 """Do postprocessing calculations
547 Takes a `ParquetTable` object and dataId,
548 returns a dataframe with results of postprocessing calculations.
550 Parameters
551 ----------
552 parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
553 ParquetTable from which calculations are done.
554 funcs : `lsst.pipe.tasks.functors.Functors`
555 Functors to apply to the table's columns
556 dataId : dict, optional
557 Used to add a `patchId` column to the output dataframe.
559 Returns
560 ------
561 `pandas.DataFrame`
563 """
564 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
566 filt = dataId.get('filter', None)
567 df = self.transform(filt, parq, funcs, dataId).df
568 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
569 return df
571 def getFunctors(self):
572 funcs = CompositeFunctor.from_file(self.config.functorFile)
573 funcs.update(dict(PostprocessAnalysis._defaultFuncs))
574 return funcs
576 def getAnalysis(self, parq, funcs=None, filt=None):
577 # Avoids disk access if funcs is passed
578 if funcs is None:
579 funcs = self.getFunctors()
580 analysis = PostprocessAnalysis(parq, funcs, filt=filt)
581 return analysis
583 def transform(self, filt, parq, funcs, dataId):
584 analysis = self.getAnalysis(parq, funcs=funcs, filt=filt)
585 df = analysis.df
586 if dataId is not None:
587 for key, value in dataId.items():
588 df[key] = value
590 return pipeBase.Struct(
591 df=df,
592 analysis=analysis
593 )
595 def write(self, df, parqRef):
596 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset)
598 def writeMetadata(self, dataRef):
599 """No metadata to write.
600 """
601 pass
604class TransformObjectCatalogConfig(TransformCatalogBaseConfig):
605 coaddName = pexConfig.Field(
606 dtype=str,
607 default="deep",
608 doc="Name of coadd"
609 )
610 filterMap = pexConfig.DictField(
611 keytype=str,
612 itemtype=str,
613 default={},
614 doc=("Dictionary mapping full filter name to short one for column name munging."
615 "These filters determine the output columns no matter what filters the "
616 "input data actually contain.")
617 )
618 camelCase = pexConfig.Field(
619 dtype=bool,
620 default=True,
621 doc=("Write per-filter columns names with camelCase, else underscore "
622 "For example: gPsfFlux instead of g_PsfFlux.")
623 )
624 multilevelOutput = pexConfig.Field(
625 dtype=bool,
626 default=False,
627 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
628 "and name-munged (False).")
629 )
632class TransformObjectCatalogTask(TransformCatalogBaseTask):
633 """Compute Flatted Object Table as defined in the DPDD
635 Do the same set of postprocessing calculations on all bands
637 This is identical to `TransformCatalogBaseTask`, except for that it does the
638 specified functor calculations for all filters present in the
639 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
640 by the YAML file will be superceded.
641 """
642 _DefaultName = "transformObjectCatalog"
643 ConfigClass = TransformObjectCatalogConfig
645 inputDataset = 'deepCoadd_obj'
646 outputDataset = 'objectTable'
648 @classmethod
649 def _makeArgumentParser(cls):
650 parser = ArgumentParser(name=cls._DefaultName)
651 parser.add_id_argument("--id", cls.inputDataset,
652 ContainerClass=CoaddDataIdContainer,
653 help="data ID, e.g. --id tract=12345 patch=1,2")
654 return parser
656 def run(self, parq, funcs=None, dataId=None):
657 dfDict = {}
658 analysisDict = {}
659 templateDf = pd.DataFrame()
660 # Perform transform for data of filters that exist in parq and are
661 # specified in config.filterMap
662 for filt in parq.columnLevelNames['filter']:
663 if filt not in self.config.filterMap:
664 self.log.info("Ignoring %s data in the input", filt)
665 continue
666 self.log.info("Transforming the catalog of filter %s", filt)
667 result = self.transform(filt, parq, funcs, dataId)
668 dfDict[filt] = result.df
669 analysisDict[filt] = result.analysis
670 if templateDf.empty:
671 templateDf = result.df
673 # Fill NaNs in columns of other wanted filters
674 for filt in self.config.filterMap:
675 if filt not in dfDict:
676 self.log.info("Adding empty columns for filter %s", filt)
677 dfDict[filt] = pd.DataFrame().reindex_like(templateDf)
679 # This makes a multilevel column index, with filter as first level
680 df = pd.concat(dfDict, axis=1, names=['filter', 'column'])
682 if not self.config.multilevelOutput:
683 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
684 if dataId is not None:
685 noDupCols += list(dataId.keys())
686 df = flattenFilters(df, self.config.filterMap, noDupCols=noDupCols,
687 camelCase=self.config.camelCase)
689 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
690 return df
693class TractObjectDataIdContainer(CoaddDataIdContainer):
695 def makeDataRefList(self, namespace):
696 """Make self.refList from self.idList
698 Generate a list of data references given tract and/or patch.
699 This was adapted from `TractQADataIdContainer`, which was
700 `TractDataIdContainer` modifie to not require "filter".
701 Only existing dataRefs are returned.
702 """
703 def getPatchRefList(tract):
704 return [namespace.butler.dataRef(datasetType=self.datasetType,
705 tract=tract.getId(),
706 patch="%d,%d" % patch.getIndex()) for patch in tract]
708 tractRefs = defaultdict(list) # Data references for each tract
709 for dataId in self.idList:
710 skymap = self.getSkymap(namespace)
712 if "tract" in dataId:
713 tractId = dataId["tract"]
714 if "patch" in dataId:
715 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
716 tract=tractId,
717 patch=dataId['patch']))
718 else:
719 tractRefs[tractId] += getPatchRefList(skymap[tractId])
720 else:
721 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
722 for tract in skymap)
723 outputRefList = []
724 for tractRefList in tractRefs.values():
725 existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
726 outputRefList.append(existingRefs)
728 self.refList = outputRefList
731class ConsolidateObjectTableConfig(pexConfig.Config):
732 coaddName = pexConfig.Field(
733 dtype=str,
734 default="deep",
735 doc="Name of coadd"
736 )
739class ConsolidateObjectTableTask(CmdLineTask):
740 """Write patch-merged source tables to a tract-level parquet file
741 """
742 _DefaultName = "consolidateObjectTable"
743 ConfigClass = ConsolidateObjectTableConfig
745 inputDataset = 'objectTable'
746 outputDataset = 'objectTable_tract'
748 @classmethod
749 def _makeArgumentParser(cls):
750 parser = ArgumentParser(name=cls._DefaultName)
752 parser.add_id_argument("--id", cls.inputDataset,
753 help="data ID, e.g. --id tract=12345",
754 ContainerClass=TractObjectDataIdContainer)
755 return parser
757 def runDataRef(self, patchRefList):
758 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
759 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
761 def writeMetadata(self, dataRef):
762 """No metadata to write.
763 """
764 pass
767class TransformSourceTableConfig(TransformCatalogBaseConfig):
768 pass
771class TransformSourceTableTask(TransformCatalogBaseTask):
772 """Transform/standardize a source catalog
773 """
774 _DefaultName = "transformSourceTable"
775 ConfigClass = TransformSourceTableConfig
777 inputDataset = 'source'
778 outputDataset = 'sourceTable'
780 def writeMetadata(self, dataRef):
781 """No metadata to write.
782 """
783 pass
785 @classmethod
786 def _makeArgumentParser(cls):
787 parser = ArgumentParser(name=cls._DefaultName)
788 parser.add_id_argument("--id", datasetType=cls.inputDataset,
789 level="sensor",
790 help="data ID, e.g. --id visit=12345 ccd=0")
791 return parser
794class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
795 dimensions=("instrument", "visit",),
796 defaultTemplates={}):
797 calexp = connectionTypes.Input(
798 doc="Processed exposures used for metadata",
799 name="calexp",
800 storageClass="ExposureF",
801 dimensions=("instrument", "visit", "detector"),
802 deferLoad=True,
803 multiple=True,
804 )
805 visitSummary = connectionTypes.Output(
806 doc="Consolidated visit-level exposure metadata",
807 name="visitSummary",
808 storageClass="ExposureCatalog",
809 dimensions=("instrument", "visit"),
810 )
813class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
814 pipelineConnections=ConsolidateVisitSummaryConnections):
815 """Config for ConsolidateVisitSummaryTask"""
816 pass
819class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
820 """Task to consolidate per-detector visit metadata.
822 This task aggregates the following metadata from all the detectors in a
823 single visit into an exposure catalog:
824 - The visitInfo.
825 - The wcs.
826 - The photoCalib.
827 - The physical_filter and band (if available).
828 - The psf size, shape, and effective area at the center of the detector.
829 - The corners of the bounding box in right ascension/declination.
831 Other quantities such as Psf, ApCorrMap, and TransmissionCurve are not
832 persisted here because of storage concerns, and because of their limited
833 utility as summary statistics.
835 Tests for this task are performed in ci_hsc_gen3.
836 """
837 _DefaultName = "consolidateVisitSummary"
838 ConfigClass = ConsolidateVisitSummaryConfig
840 @classmethod
841 def _makeArgumentParser(cls):
842 parser = ArgumentParser(name=cls._DefaultName)
844 parser.add_id_argument("--id", "calexp",
845 help="data ID, e.g. --id visit=12345",
846 ContainerClass=VisitDataIdContainer)
847 return parser
849 def writeMetadata(self, dataRef):
850 """No metadata to persist, so override to remove metadata persistance.
851 """
852 pass
854 def writeConfig(self, butler, clobber=False, doBackup=True):
855 """No config to persist, so override to remove config persistance.
856 """
857 pass
859 def runDataRef(self, dataRefList):
860 visit = dataRefList[0].dataId['visit']
862 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" %
863 (len(dataRefList), visit))
865 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
867 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
869 def runQuantum(self, butlerQC, inputRefs, outputRefs):
870 dataRefs = butlerQC.get(inputRefs.calexp)
871 visit = dataRefs[0].dataId.byName()['visit']
873 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" %
874 (len(dataRefs), visit))
876 expCatalog = self._combineExposureMetadata(visit, dataRefs)
878 butlerQC.put(expCatalog, outputRefs.visitSummary)
880 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
881 """Make a combined exposure catalog from a list of dataRefs.
883 Parameters
884 ----------
885 visit : `int`
886 Visit identification number
887 dataRefs : `list`
888 List of calexp dataRefs in visit. May be list of
889 `lsst.daf.persistence.ButlerDataRef` (Gen2) or
890 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
891 isGen3 : `bool`, optional
892 Specifies if this is a Gen3 list of datarefs.
894 Returns
895 -------
896 visitSummary : `lsst.afw.table.ExposureCatalog`
897 Exposure catalog with per-detector summary information.
898 """
899 schema = afwTable.ExposureTable.makeMinimalSchema()
900 schema.addField('visit', type='I', doc='Visit number')
901 schema.addField('detector_id', type='I', doc='Detector number')
902 schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
903 schema.addField('band', type='String', size=32, doc='Name of band')
904 schema.addField('psfSigma', type='F',
905 doc='PSF model second-moments determinant radius (center of chip) (pixel)')
906 schema.addField('psfArea', type='F',
907 doc='PSF model effective area (center of chip) (pixel**2)')
908 schema.addField('psfIxx', type='F',
909 doc='PSF model Ixx (center of chip) (pixel**2)')
910 schema.addField('psfIyy', type='F',
911 doc='PSF model Iyy (center of chip) (pixel**2)')
912 schema.addField('psfIxy', type='F',
913 doc='PSF model Ixy (center of chip) (pixel**2)')
914 schema.addField('raCorners', type='ArrayD', size=4,
915 doc='Right Ascension of bounding box corners (degrees)')
916 schema.addField('decCorners', type='ArrayD', size=4,
917 doc='Declination of bounding box corners (degrees)')
919 cat = afwTable.ExposureCatalog(schema)
920 cat.resize(len(dataRefs))
922 cat['visit'] = visit
924 for i, dataRef in enumerate(dataRefs):
925 if isGen3:
926 visitInfo = dataRef.get(component='visitInfo')
927 filter_ = dataRef.get(component='filter')
928 psf = dataRef.get(component='psf')
929 wcs = dataRef.get(component='wcs')
930 photoCalib = dataRef.get(component='photoCalib')
931 detector = dataRef.get(component='detector')
932 bbox = dataRef.get(component='bbox')
933 validPolygon = dataRef.get(component='validPolygon')
934 else:
935 # Note that we need to read the calexp because there is
936 # no magic access to the psf except through the exposure.
937 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
938 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
939 visitInfo = exp.getInfo().getVisitInfo()
940 filter_ = exp.getFilter()
941 psf = exp.getPsf()
942 wcs = exp.getWcs()
943 photoCalib = exp.getPhotoCalib()
944 detector = exp.getDetector()
945 bbox = dataRef.get(datasetType='calexp_bbox')
946 validPolygon = exp.getInfo().getValidPolygon()
948 rec = cat[i]
949 rec.setBBox(bbox)
950 rec.setVisitInfo(visitInfo)
951 rec.setWcs(wcs)
952 rec.setPhotoCalib(photoCalib)
953 rec.setDetector(detector)
954 rec.setValidPolygon(validPolygon)
956 # TODO: When RFC-730 is implemented we can fill both of these.
957 rec['physical_filter'] = filter_.getName()
958 rec['band'] = ''
959 rec['detector_id'] = detector.getId()
960 shape = psf.computeShape(bbox.getCenter())
961 rec['psfSigma'] = shape.getDeterminantRadius()
962 rec['psfIxx'] = shape.getIxx()
963 rec['psfIyy'] = shape.getIyy()
964 rec['psfIxy'] = shape.getIxy()
965 im = psf.computeKernelImage(bbox.getCenter())
966 # The calculation of effective psf area is taken from
967 # meas_base/src/PsfFlux.cc#L112. See
968 # https://github.com/lsst/meas_base/blob/
969 # 750bffe6620e565bda731add1509507f5c40c8bb/src/PsfFlux.cc#L112
970 rec['psfArea'] = np.sum(im.array)/np.sum(im.array**2.)
972 sph_pts = wcs.pixelToSky(lsst.geom.Box2D(bbox).getCorners())
973 rec['raCorners'][:] = [sph.getRa().asDegrees() for sph in sph_pts]
974 rec['decCorners'][:] = [sph.getDec().asDegrees() for sph in sph_pts]
976 return cat
979class VisitDataIdContainer(DataIdContainer):
980 """DataIdContainer that groups sensor-level id's by visit
981 """
983 def makeDataRefList(self, namespace):
984 """Make self.refList from self.idList
986 Generate a list of data references grouped by visit.
988 Parameters
989 ----------
990 namespace : `argparse.Namespace`
991 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
992 """
993 # Group by visits
994 visitRefs = defaultdict(list)
995 for dataId in self.idList:
996 if "visit" in dataId:
997 visitId = dataId["visit"]
998 # append all subsets to
999 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1000 visitRefs[visitId].extend([dataRef for dataRef in subset])
1002 outputRefList = []
1003 for refList in visitRefs.values():
1004 existingRefs = [ref for ref in refList if ref.datasetExists()]
1005 if existingRefs:
1006 outputRefList.append(existingRefs)
1008 self.refList = outputRefList
1011class ConsolidateSourceTableConfig(pexConfig.Config):
1012 pass
1015class ConsolidateSourceTableTask(CmdLineTask):
1016 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1017 """
1018 _DefaultName = 'consolidateSourceTable'
1019 ConfigClass = ConsolidateSourceTableConfig
1021 inputDataset = 'sourceTable'
1022 outputDataset = 'sourceTable_visit'
1024 def runDataRef(self, dataRefList):
1025 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1026 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1027 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1029 @classmethod
1030 def _makeArgumentParser(cls):
1031 parser = ArgumentParser(name=cls._DefaultName)
1033 parser.add_id_argument("--id", cls.inputDataset,
1034 help="data ID, e.g. --id visit=12345",
1035 ContainerClass=VisitDataIdContainer)
1036 return parser
1038 def writeMetadata(self, dataRef):
1039 """No metadata to write.
1040 """
1041 pass
1043 def writeConfig(self, butler, clobber=False, doBackup=True):
1044 """No config to write.
1045 """
1046 pass