Coverage for python/lsst/pipe/tasks/postprocess.py : 27%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_tasks
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import functools
23import pandas as pd
24from collections import defaultdict
25import numpy as np
26import numbers
28import lsst.geom
29import lsst.pex.config as pexConfig
30import lsst.pipe.base as pipeBase
31import lsst.daf.base as dafBase
32from lsst.pipe.base import connectionTypes
33import lsst.afw.table as afwTable
34from lsst.meas.base import SingleFrameMeasurementTask
35from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
36from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
37from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate
39from .parquetTable import ParquetTable
40from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
41from .functors import CompositeFunctor, Column
44def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
45 """Flattens a dataframe with multilevel column index
46 """
47 newDf = pd.DataFrame()
48 # band is the level 0 index
49 dfBands = df.columns.unique(level=0).values
50 for band in dfBands:
51 subdf = df[band]
52 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
53 newColumns = {c: columnFormat.format(band, c)
54 for c in subdf.columns if c not in noDupCols}
55 cols = list(newColumns.keys())
56 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
58 # Band must be present in the input and output or else column is all NaN:
59 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
60 # Get the unexploded columns from any present band's partition
61 noDupDf = df[presentBands[0]][noDupCols]
62 newDf = pd.concat([noDupDf, newDf], axis=1)
63 return newDf
66class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
67 defaultTemplates={"coaddName": "deep"},
68 dimensions=("tract", "patch", "skymap")):
69 inputCatalogMeas = connectionTypes.Input(
70 doc="Catalog of source measurements on the deepCoadd.",
71 dimensions=("tract", "patch", "band", "skymap"),
72 storageClass="SourceCatalog",
73 name="{coaddName}Coadd_meas",
74 multiple=True
75 )
76 inputCatalogForcedSrc = connectionTypes.Input(
77 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
78 dimensions=("tract", "patch", "band", "skymap"),
79 storageClass="SourceCatalog",
80 name="{coaddName}Coadd_forced_src",
81 multiple=True
82 )
83 inputCatalogRef = connectionTypes.Input(
84 doc="Catalog marking the primary detection (which band provides a good shape and position)"
85 "for each detection in deepCoadd_mergeDet.",
86 dimensions=("tract", "patch", "skymap"),
87 storageClass="SourceCatalog",
88 name="{coaddName}Coadd_ref"
89 )
90 outputCatalog = connectionTypes.Output(
91 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
92 "stored as a DataFrame with a multi-level column index per-patch.",
93 dimensions=("tract", "patch", "skymap"),
94 storageClass="DataFrame",
95 name="{coaddName}Coadd_obj"
96 )
99class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
100 pipelineConnections=WriteObjectTableConnections):
101 engine = pexConfig.Field(
102 dtype=str,
103 default="pyarrow",
104 doc="Parquet engine for writing (pyarrow or fastparquet)"
105 )
106 coaddName = pexConfig.Field(
107 dtype=str,
108 default="deep",
109 doc="Name of coadd"
110 )
113class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
114 """Write filter-merged source tables to parquet
115 """
116 _DefaultName = "writeObjectTable"
117 ConfigClass = WriteObjectTableConfig
118 RunnerClass = MergeSourcesRunner
120 # Names of table datasets to be merged
121 inputDatasets = ('forced_src', 'meas', 'ref')
123 # Tag of output dataset written by `MergeSourcesTask.write`
124 outputDataset = 'obj'
126 def __init__(self, butler=None, schema=None, **kwargs):
127 # It is a shame that this class can't use the default init for CmdLineTask
128 # But to do so would require its own special task runner, which is many
129 # more lines of specialization, so this is how it is for now
130 super().__init__(**kwargs)
132 def runDataRef(self, patchRefList):
133 """!
134 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
135 subclasses that inherit from MergeSourcesTask.
136 @param[in] patchRefList list of data references for each filter
137 """
138 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
139 dataId = patchRefList[0].dataId
140 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
141 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog))
143 def runQuantum(self, butlerQC, inputRefs, outputRefs):
144 inputs = butlerQC.get(inputRefs)
146 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
147 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
148 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
149 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
151 catalogs = {}
152 for band in measDict.keys():
153 catalogs[band] = {'meas': measDict[band]['meas'],
154 'forced_src': forcedSourceDict[band]['forced_src'],
155 'ref': inputs['inputCatalogRef']}
156 dataId = butlerQC.quantum.dataId
157 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
158 outputs = pipeBase.Struct(outputCatalog=df)
159 butlerQC.put(outputs, outputRefs)
161 @classmethod
162 def _makeArgumentParser(cls):
163 """Create a suitable ArgumentParser.
165 We will use the ArgumentParser to get a list of data
166 references for patches; the RunnerClass will sort them into lists
167 of data references for the same patch.
169 References first of self.inputDatasets, rather than
170 self.inputDataset
171 """
172 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
174 def readCatalog(self, patchRef):
175 """Read input catalogs
177 Read all the input datasets given by the 'inputDatasets'
178 attribute.
180 Parameters
181 ----------
182 patchRef : `lsst.daf.persistence.ButlerDataRef`
183 Data reference for patch
185 Returns
186 -------
187 Tuple consisting of band name and a dict of catalogs, keyed by
188 dataset name
189 """
190 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel
191 catalogDict = {}
192 for dataset in self.inputDatasets:
193 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
194 self.log.info("Read %d sources from %s for band %s: %s",
195 len(catalog), dataset, band, patchRef.dataId)
196 catalogDict[dataset] = catalog
197 return band, catalogDict
199 def run(self, catalogs, tract, patch):
200 """Merge multiple catalogs.
202 Parameters
203 ----------
204 catalogs : `dict`
205 Mapping from filter names to dict of catalogs.
206 tract : int
207 tractId to use for the tractId column
208 patch : str
209 patchId to use for the patchId column
211 Returns
212 -------
213 catalog : `pandas.DataFrame`
214 Merged dataframe
215 """
217 dfs = []
218 for filt, tableDict in catalogs.items():
219 for dataset, table in tableDict.items():
220 # Convert afwTable to pandas DataFrame
221 df = table.asAstropy().to_pandas().set_index('id', drop=True)
223 # Sort columns by name, to ensure matching schema among patches
224 df = df.reindex(sorted(df.columns), axis=1)
225 df['tractId'] = tract
226 df['patchId'] = patch
228 # Make columns a 3-level MultiIndex
229 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
230 names=('dataset', 'band', 'column'))
231 dfs.append(df)
233 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
234 return catalog
236 def write(self, patchRef, catalog):
237 """Write the output.
239 Parameters
240 ----------
241 catalog : `ParquetTable`
242 Catalog to write
243 patchRef : `lsst.daf.persistence.ButlerDataRef`
244 Data reference for patch
245 """
246 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
247 # since the filter isn't actually part of the data ID for the dataset we're saving,
248 # it's confusing to see it in the log message, even if the butler simply ignores it.
249 mergeDataId = patchRef.dataId.copy()
250 del mergeDataId["filter"]
251 self.log.info("Wrote merged catalog: %s", mergeDataId)
253 def writeMetadata(self, dataRefList):
254 """No metadata to write, and not sure how to write it for a list of dataRefs.
255 """
256 pass
259class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
260 defaultTemplates={"catalogType": ""},
261 dimensions=("instrument", "visit", "detector")):
263 catalog = connectionTypes.Input(
264 doc="Input full-depth catalog of sources produced by CalibrateTask",
265 name="{catalogType}src",
266 storageClass="SourceCatalog",
267 dimensions=("instrument", "visit", "detector")
268 )
269 outputCatalog = connectionTypes.Output(
270 doc="Catalog of sources, `src` in Parquet format. The 'id' column is "
271 "replaced with an index; all other columns are unchanged.",
272 name="{catalogType}source",
273 storageClass="DataFrame",
274 dimensions=("instrument", "visit", "detector")
275 )
278class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
279 pipelineConnections=WriteSourceTableConnections):
280 doApplyExternalPhotoCalib = pexConfig.Field(
281 dtype=bool,
282 default=False,
283 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
284 "generating Source Tables from older src tables which do not already have local calib columns")
285 )
286 doApplyExternalSkyWcs = pexConfig.Field(
287 dtype=bool,
288 default=False,
289 doc=("Add local WCS columns from the calexp.wcs? Should only set True if "
290 "generating Source Tables from older src tables which do not already have local calib columns")
291 )
294class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
295 """Write source table to parquet
296 """
297 _DefaultName = "writeSourceTable"
298 ConfigClass = WriteSourceTableConfig
300 def runDataRef(self, dataRef):
301 src = dataRef.get('src')
302 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
303 src = self.addCalibColumns(src, dataRef)
305 ccdVisitId = dataRef.get('ccdExposureId')
306 result = self.run(src, ccdVisitId=ccdVisitId)
307 dataRef.put(result.table, 'source')
309 def runQuantum(self, butlerQC, inputRefs, outputRefs):
310 inputs = butlerQC.get(inputRefs)
311 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
312 result = self.run(**inputs).table
313 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
314 butlerQC.put(outputs, outputRefs)
316 def run(self, catalog, ccdVisitId=None):
317 """Convert `src` catalog to parquet
319 Parameters
320 ----------
321 catalog: `afwTable.SourceCatalog`
322 catalog to be converted
323 ccdVisitId: `int`
324 ccdVisitId to be added as a column
326 Returns
327 -------
328 result : `lsst.pipe.base.Struct`
329 ``table``
330 `ParquetTable` version of the input catalog
331 """
332 self.log.info("Generating parquet table from src catalog %s", ccdVisitId)
333 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
334 df['ccdVisitId'] = ccdVisitId
335 return pipeBase.Struct(table=ParquetTable(dataFrame=df))
337 def addCalibColumns(self, catalog, dataRef):
338 """Add columns with local calibration evaluated at each centroid
340 for backwards compatibility with old repos.
341 This exists for the purpose of converting old src catalogs
342 (which don't have the expected local calib columns) to Source Tables.
344 Parameters
345 ----------
346 catalog: `afwTable.SourceCatalog`
347 catalog to which calib columns will be added
348 dataRef: `lsst.daf.persistence.ButlerDataRef
349 for fetching the calibs from disk.
351 Returns
352 -------
353 newCat: `afwTable.SourceCatalog`
354 Source Catalog with requested local calib columns
355 """
356 mapper = afwTable.SchemaMapper(catalog.schema)
357 measureConfig = SingleFrameMeasurementTask.ConfigClass()
358 measureConfig.doReplaceWithNoise = False
360 # Just need the WCS or the PhotoCalib attached to an exposue
361 exposure = dataRef.get('calexp_sub',
362 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0)))
364 mapper = afwTable.SchemaMapper(catalog.schema)
365 mapper.addMinimalSchema(catalog.schema, True)
366 schema = mapper.getOutputSchema()
368 exposureIdInfo = dataRef.get("expIdInfo")
369 measureConfig.plugins.names = []
370 if self.config.doApplyExternalSkyWcs:
371 plugin = 'base_LocalWcs'
372 if plugin in schema:
373 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
374 else:
375 measureConfig.plugins.names.add(plugin)
377 if self.config.doApplyExternalPhotoCalib:
378 plugin = 'base_LocalPhotoCalib'
379 if plugin in schema:
380 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
381 else:
382 measureConfig.plugins.names.add(plugin)
384 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
385 newCat = afwTable.SourceCatalog(schema)
386 newCat.extend(catalog, mapper=mapper)
387 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
388 return newCat
390 def writeMetadata(self, dataRef):
391 """No metadata to write.
392 """
393 pass
395 @classmethod
396 def _makeArgumentParser(cls):
397 parser = ArgumentParser(name=cls._DefaultName)
398 parser.add_id_argument("--id", 'src',
399 help="data ID, e.g. --id visit=12345 ccd=0")
400 return parser
403class PostprocessAnalysis(object):
404 """Calculate columns from ParquetTable
406 This object manages and organizes an arbitrary set of computations
407 on a catalog. The catalog is defined by a
408 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
409 `deepCoadd_obj` dataset, and the computations are defined by a collection
410 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
411 a `CompositeFunctor`).
413 After the object is initialized, accessing the `.df` attribute (which
414 holds the `pandas.DataFrame` containing the results of the calculations) triggers
415 computation of said dataframe.
417 One of the conveniences of using this object is the ability to define a desired common
418 filter for all functors. This enables the same functor collection to be passed to
419 several different `PostprocessAnalysis` objects without having to change the original
420 functor collection, since the `filt` keyword argument of this object triggers an
421 overwrite of the `filt` property for all functors in the collection.
423 This object also allows a list of refFlags to be passed, and defines a set of default
424 refFlags that are always included even if not requested.
426 If a list of `ParquetTable` object is passed, rather than a single one, then the
427 calculations will be mapped over all the input catalogs. In principle, it should
428 be straightforward to parallelize this activity, but initial tests have failed
429 (see TODO in code comments).
431 Parameters
432 ----------
433 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
434 Source catalog(s) for computation
436 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
437 Computations to do (functors that act on `parq`).
438 If a dict, the output
439 DataFrame will have columns keyed accordingly.
440 If a list, the column keys will come from the
441 `.shortname` attribute of each functor.
443 filt : `str` (optional)
444 Filter in which to calculate. If provided,
445 this will overwrite any existing `.filt` attribute
446 of the provided functors.
448 flags : `list` (optional)
449 List of flags (per-band) to include in output table.
450 Taken from the `meas` dataset if applied to a multilevel Object Table.
452 refFlags : `list` (optional)
453 List of refFlags (only reference band) to include in output table.
455 forcedFlags : `list` (optional)
456 List of flags (per-band) to include in output table.
457 Taken from the ``forced_src`` dataset if applied to a
458 multilevel Object Table. Intended for flags from measurement plugins
459 only run during multi-band forced-photometry.
460 """
461 _defaultRefFlags = []
462 _defaultFuncs = ()
464 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
465 self.parq = parq
466 self.functors = functors
468 self.filt = filt
469 self.flags = list(flags) if flags is not None else []
470 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else []
471 self.refFlags = list(self._defaultRefFlags)
472 if refFlags is not None:
473 self.refFlags += list(refFlags)
475 self._df = None
477 @property
478 def defaultFuncs(self):
479 funcs = dict(self._defaultFuncs)
480 return funcs
482 @property
483 def func(self):
484 additionalFuncs = self.defaultFuncs
485 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags})
486 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
487 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
489 if isinstance(self.functors, CompositeFunctor):
490 func = self.functors
491 else:
492 func = CompositeFunctor(self.functors)
494 func.funcDict.update(additionalFuncs)
495 func.filt = self.filt
497 return func
499 @property
500 def noDupCols(self):
501 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
503 @property
504 def df(self):
505 if self._df is None:
506 self.compute()
507 return self._df
509 def compute(self, dropna=False, pool=None):
510 # map over multiple parquet tables
511 if type(self.parq) in (list, tuple):
512 if pool is None:
513 dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
514 else:
515 # TODO: Figure out why this doesn't work (pyarrow pickling issues?)
516 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
517 self._df = pd.concat(dflist)
518 else:
519 self._df = self.func(self.parq, dropna=dropna)
521 return self._df
524class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
525 dimensions=()):
526 """Expected Connections for subclasses of TransformCatalogBaseTask.
528 Must be subclassed.
529 """
530 inputCatalog = connectionTypes.Input(
531 name="",
532 storageClass="DataFrame",
533 )
534 outputCatalog = connectionTypes.Output(
535 name="",
536 storageClass="DataFrame",
537 )
540class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
541 pipelineConnections=TransformCatalogBaseConnections):
542 functorFile = pexConfig.Field(
543 dtype=str,
544 doc="Path to YAML file specifying Science Data Model functors to use "
545 "when copying columns and computing calibrated values.",
546 default=None,
547 optional=True
548 )
549 primaryKey = pexConfig.Field(
550 dtype=str,
551 doc="Name of column to be set as the DataFrame index. If None, the index"
552 "will be named `id`",
553 default=None,
554 optional=True
555 )
558class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask):
559 """Base class for transforming/standardizing a catalog
561 by applying functors that convert units and apply calibrations.
562 The purpose of this task is to perform a set of computations on
563 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
564 results to a new dataset (which needs to be declared in an `outputDataset`
565 attribute).
567 The calculations to be performed are defined in a YAML file that specifies
568 a set of functors to be computed, provided as
569 a `--functorFile` config parameter. An example of such a YAML file
570 is the following:
572 funcs:
573 psfMag:
574 functor: Mag
575 args:
576 - base_PsfFlux
577 filt: HSC-G
578 dataset: meas
579 cmodel_magDiff:
580 functor: MagDiff
581 args:
582 - modelfit_CModel
583 - base_PsfFlux
584 filt: HSC-G
585 gauss_magDiff:
586 functor: MagDiff
587 args:
588 - base_GaussianFlux
589 - base_PsfFlux
590 filt: HSC-G
591 count:
592 functor: Column
593 args:
594 - base_InputCount_value
595 filt: HSC-G
596 deconvolved_moments:
597 functor: DeconvolvedMoments
598 filt: HSC-G
599 dataset: forced_src
600 refFlags:
601 - calib_psfUsed
602 - merge_measurement_i
603 - merge_measurement_r
604 - merge_measurement_z
605 - merge_measurement_y
606 - merge_measurement_g
607 - base_PixelFlags_flag_inexact_psfCenter
608 - detect_isPrimary
610 The names for each entry under "func" will become the names of columns in the
611 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
612 Positional arguments to be passed to each functor are in the `args` list,
613 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
614 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
616 The "flags" entry is the default shortcut for `Column` functors.
617 All columns listed under "flags" will be copied to the output table
618 untransformed. They can be of any datatype.
619 In the special case of transforming a multi-level oject table with
620 band and dataset indices (deepCoadd_obj), these will be taked from the
621 `meas` dataset and exploded out per band.
623 There are two special shortcuts that only apply when transforming
624 multi-level Object (deepCoadd_obj) tables:
625 - The "refFlags" entry is shortcut for `Column` functor
626 taken from the `'ref'` dataset if transforming an ObjectTable.
627 - The "forcedFlags" entry is shortcut for `Column` functors.
628 taken from the ``forced_src`` dataset if transforming an ObjectTable.
629 These are expanded out per band.
632 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
633 to organize and excecute the calculations.
635 """
636 @property
637 def _DefaultName(self):
638 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
640 @property
641 def outputDataset(self):
642 raise NotImplementedError('Subclass must define "outputDataset" attribute')
644 @property
645 def inputDataset(self):
646 raise NotImplementedError('Subclass must define "inputDataset" attribute')
648 @property
649 def ConfigClass(self):
650 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
652 def __init__(self, *args, **kwargs):
653 super().__init__(*args, **kwargs)
654 if self.config.functorFile:
655 self.log.info('Loading tranform functor definitions from %s',
656 self.config.functorFile)
657 self.funcs = CompositeFunctor.from_file(self.config.functorFile)
658 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs))
659 else:
660 self.funcs = None
662 def runQuantum(self, butlerQC, inputRefs, outputRefs):
663 inputs = butlerQC.get(inputRefs)
664 if self.funcs is None:
665 raise ValueError("config.functorFile is None. "
666 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
667 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs,
668 dataId=outputRefs.outputCatalog.dataId.full)
669 outputs = pipeBase.Struct(outputCatalog=result)
670 butlerQC.put(outputs, outputRefs)
672 def runDataRef(self, dataRef):
673 parq = dataRef.get()
674 if self.funcs is None:
675 raise ValueError("config.functorFile is None. "
676 "Must be a valid path to yaml in order to run as a CommandlineTask.")
677 df = self.run(parq, funcs=self.funcs, dataId=dataRef.dataId)
678 self.write(df, dataRef)
679 return df
681 def run(self, parq, funcs=None, dataId=None, band=None):
682 """Do postprocessing calculations
684 Takes a `ParquetTable` object and dataId,
685 returns a dataframe with results of postprocessing calculations.
687 Parameters
688 ----------
689 parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
690 ParquetTable from which calculations are done.
691 funcs : `lsst.pipe.tasks.functors.Functors`
692 Functors to apply to the table's columns
693 dataId : dict, optional
694 Used to add a `patchId` column to the output dataframe.
695 band : `str`, optional
696 Filter band that is being processed.
698 Returns
699 ------
700 `pandas.DataFrame`
702 """
703 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
705 df = self.transform(band, parq, funcs, dataId).df
706 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
707 return df
709 def getFunctors(self):
710 return self.funcs
712 def getAnalysis(self, parq, funcs=None, band=None):
713 if funcs is None:
714 funcs = self.funcs
715 analysis = PostprocessAnalysis(parq, funcs, filt=band)
716 return analysis
718 def transform(self, band, parq, funcs, dataId):
719 analysis = self.getAnalysis(parq, funcs=funcs, band=band)
720 df = analysis.df
721 if dataId is not None:
722 for key, value in dataId.items():
723 df[str(key)] = value
725 if self.config.primaryKey:
726 if df.index.name != self.config.primaryKey and self.config.primaryKey in df:
727 df.reset_index(inplace=True, drop=True)
728 df.set_index(self.config.primaryKey, inplace=True)
730 return pipeBase.Struct(
731 df=df,
732 analysis=analysis
733 )
735 def write(self, df, parqRef):
736 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset)
738 def writeMetadata(self, dataRef):
739 """No metadata to write.
740 """
741 pass
744class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
745 defaultTemplates={"coaddName": "deep"},
746 dimensions=("tract", "patch", "skymap")):
747 inputCatalog = connectionTypes.Input(
748 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
749 "stored as a DataFrame with a multi-level column index per-patch.",
750 dimensions=("tract", "patch", "skymap"),
751 storageClass="DataFrame",
752 name="{coaddName}Coadd_obj",
753 deferLoad=True,
754 )
755 outputCatalog = connectionTypes.Output(
756 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
757 "data model.",
758 dimensions=("tract", "patch", "skymap"),
759 storageClass="DataFrame",
760 name="objectTable"
761 )
764class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
765 pipelineConnections=TransformObjectCatalogConnections):
766 coaddName = pexConfig.Field(
767 dtype=str,
768 default="deep",
769 doc="Name of coadd"
770 )
771 # TODO: remove in DM-27177
772 filterMap = pexConfig.DictField(
773 keytype=str,
774 itemtype=str,
775 default={},
776 doc=("Dictionary mapping full filter name to short one for column name munging."
777 "These filters determine the output columns no matter what filters the "
778 "input data actually contain."),
779 deprecated=("Coadds are now identified by the band, so this transform is unused."
780 "Will be removed after v22.")
781 )
782 outputBands = pexConfig.ListField(
783 dtype=str,
784 default=None,
785 optional=True,
786 doc=("These bands and only these bands will appear in the output,"
787 " NaN-filled if the input does not include them."
788 " If None, then use all bands found in the input.")
789 )
790 camelCase = pexConfig.Field(
791 dtype=bool,
792 default=False,
793 doc=("Write per-band columns names with camelCase, else underscore "
794 "For example: gPsFlux instead of g_PsFlux.")
795 )
796 multilevelOutput = pexConfig.Field(
797 dtype=bool,
798 default=False,
799 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
800 "and name-munged (False).")
801 )
802 goodFlags = pexConfig.ListField(
803 dtype=str,
804 default=[],
805 doc=("List of 'good' flags that should be set False when populating empty tables. "
806 "All other flags are considered to be 'bad' flags and will be set to True.")
807 )
808 floatFillValue = pexConfig.Field(
809 dtype=float,
810 default=np.nan,
811 doc="Fill value for float fields when populating empty tables."
812 )
813 integerFillValue = pexConfig.Field(
814 dtype=int,
815 default=-1,
816 doc="Fill value for integer fields when populating empty tables."
817 )
819 def setDefaults(self):
820 super().setDefaults()
821 self.primaryKey = 'objectId'
822 self.goodFlags = ['calib_astrometry_used',
823 'calib_photometry_reserved',
824 'calib_photometry_used',
825 'calib_psf_candidate',
826 'calib_psf_reserved',
827 'calib_psf_used']
830class TransformObjectCatalogTask(TransformCatalogBaseTask):
831 """Produce a flattened Object Table to match the format specified in
832 sdm_schemas.
834 Do the same set of postprocessing calculations on all bands
836 This is identical to `TransformCatalogBaseTask`, except for that it does the
837 specified functor calculations for all filters present in the
838 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
839 by the YAML file will be superceded.
840 """
841 _DefaultName = "transformObjectCatalog"
842 ConfigClass = TransformObjectCatalogConfig
844 # Used by Gen 2 runDataRef only:
845 inputDataset = 'deepCoadd_obj'
846 outputDataset = 'objectTable'
848 @classmethod
849 def _makeArgumentParser(cls):
850 parser = ArgumentParser(name=cls._DefaultName)
851 parser.add_id_argument("--id", cls.inputDataset,
852 ContainerClass=CoaddDataIdContainer,
853 help="data ID, e.g. --id tract=12345 patch=1,2")
854 return parser
856 def run(self, parq, funcs=None, dataId=None, band=None):
857 # NOTE: band kwarg is ignored here.
858 dfDict = {}
859 analysisDict = {}
860 templateDf = pd.DataFrame()
862 if isinstance(parq, DeferredDatasetHandle):
863 columns = parq.get(component='columns')
864 inputBands = columns.unique(level=1).values
865 else:
866 inputBands = parq.columnLevelNames['band']
868 outputBands = self.config.outputBands if self.config.outputBands else inputBands
870 # Perform transform for data of filters that exist in parq.
871 for inputBand in inputBands:
872 if inputBand not in outputBands:
873 self.log.info("Ignoring %s band data in the input", inputBand)
874 continue
875 self.log.info("Transforming the catalog of band %s", inputBand)
876 result = self.transform(inputBand, parq, funcs, dataId)
877 dfDict[inputBand] = result.df
878 analysisDict[inputBand] = result.analysis
879 if templateDf.empty:
880 templateDf = result.df
882 # Put filler values in columns of other wanted bands
883 for filt in outputBands:
884 if filt not in dfDict:
885 self.log.info("Adding empty columns for band %s", filt)
886 dfTemp = templateDf.copy()
887 for col in dfTemp.columns:
888 testValue = dfTemp[col].values[0]
889 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
890 # Boolean flag type, check if it is a "good" flag
891 if col in self.config.goodFlags:
892 fillValue = False
893 else:
894 fillValue = True
895 elif isinstance(testValue, numbers.Integral):
896 # Checking numbers.Integral catches all flavors
897 # of python, numpy, pandas, etc. integers.
898 # We must ensure this is not an unsigned integer.
899 if isinstance(testValue, np.unsignedinteger):
900 raise ValueError("Parquet tables may not have unsigned integer columns.")
901 else:
902 fillValue = self.config.integerFillValue
903 else:
904 fillValue = self.config.floatFillValue
905 dfTemp[col].values[:] = fillValue
906 dfDict[filt] = dfTemp
908 # This makes a multilevel column index, with band as first level
909 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
911 if not self.config.multilevelOutput:
912 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
913 if self.config.primaryKey in noDupCols:
914 noDupCols.remove(self.config.primaryKey)
915 if dataId is not None:
916 noDupCols += list(dataId.keys())
917 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
918 inputBands=inputBands)
920 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
922 return df
925class TractObjectDataIdContainer(CoaddDataIdContainer):
927 def makeDataRefList(self, namespace):
928 """Make self.refList from self.idList
930 Generate a list of data references given tract and/or patch.
931 This was adapted from `TractQADataIdContainer`, which was
932 `TractDataIdContainer` modifie to not require "filter".
933 Only existing dataRefs are returned.
934 """
935 def getPatchRefList(tract):
936 return [namespace.butler.dataRef(datasetType=self.datasetType,
937 tract=tract.getId(),
938 patch="%d,%d" % patch.getIndex()) for patch in tract]
940 tractRefs = defaultdict(list) # Data references for each tract
941 for dataId in self.idList:
942 skymap = self.getSkymap(namespace)
944 if "tract" in dataId:
945 tractId = dataId["tract"]
946 if "patch" in dataId:
947 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
948 tract=tractId,
949 patch=dataId['patch']))
950 else:
951 tractRefs[tractId] += getPatchRefList(skymap[tractId])
952 else:
953 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
954 for tract in skymap)
955 outputRefList = []
956 for tractRefList in tractRefs.values():
957 existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
958 outputRefList.append(existingRefs)
960 self.refList = outputRefList
963class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
964 dimensions=("tract", "skymap")):
965 inputCatalogs = connectionTypes.Input(
966 doc="Per-Patch objectTables conforming to the standard data model.",
967 name="objectTable",
968 storageClass="DataFrame",
969 dimensions=("tract", "patch", "skymap"),
970 multiple=True,
971 )
972 outputCatalog = connectionTypes.Output(
973 doc="Pre-tract horizontal concatenation of the input objectTables",
974 name="objectTable_tract",
975 storageClass="DataFrame",
976 dimensions=("tract", "skymap"),
977 )
980class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
981 pipelineConnections=ConsolidateObjectTableConnections):
982 coaddName = pexConfig.Field(
983 dtype=str,
984 default="deep",
985 doc="Name of coadd"
986 )
989class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
990 """Write patch-merged source tables to a tract-level parquet file
992 Concatenates `objectTable` list into a per-visit `objectTable_tract`
993 """
994 _DefaultName = "consolidateObjectTable"
995 ConfigClass = ConsolidateObjectTableConfig
997 inputDataset = 'objectTable'
998 outputDataset = 'objectTable_tract'
1000 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1001 inputs = butlerQC.get(inputRefs)
1002 self.log.info("Concatenating %s per-patch Object Tables",
1003 len(inputs['inputCatalogs']))
1004 df = pd.concat(inputs['inputCatalogs'])
1005 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1007 @classmethod
1008 def _makeArgumentParser(cls):
1009 parser = ArgumentParser(name=cls._DefaultName)
1011 parser.add_id_argument("--id", cls.inputDataset,
1012 help="data ID, e.g. --id tract=12345",
1013 ContainerClass=TractObjectDataIdContainer)
1014 return parser
1016 def runDataRef(self, patchRefList):
1017 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
1018 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1020 def writeMetadata(self, dataRef):
1021 """No metadata to write.
1022 """
1023 pass
1026class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1027 defaultTemplates={"catalogType": ""},
1028 dimensions=("instrument", "visit", "detector")):
1030 inputCatalog = connectionTypes.Input(
1031 doc="Wide input catalog of sources produced by WriteSourceTableTask",
1032 name="{catalogType}source",
1033 storageClass="DataFrame",
1034 dimensions=("instrument", "visit", "detector"),
1035 deferLoad=True
1036 )
1037 outputCatalog = connectionTypes.Output(
1038 doc="Narrower, per-detector Source Table transformed and converted per a "
1039 "specified set of functors",
1040 name="{catalogType}sourceTable",
1041 storageClass="DataFrame",
1042 dimensions=("instrument", "visit", "detector")
1043 )
1046class TransformSourceTableConfig(TransformCatalogBaseConfig,
1047 pipelineConnections=TransformSourceTableConnections):
1049 def setDefaults(self):
1050 super().setDefaults()
1051 self.primaryKey = 'sourceId'
1054class TransformSourceTableTask(TransformCatalogBaseTask):
1055 """Transform/standardize a source catalog
1056 """
1057 _DefaultName = "transformSourceTable"
1058 ConfigClass = TransformSourceTableConfig
1060 inputDataset = 'source'
1061 outputDataset = 'sourceTable'
1063 @classmethod
1064 def _makeArgumentParser(cls):
1065 parser = ArgumentParser(name=cls._DefaultName)
1066 parser.add_id_argument("--id", datasetType=cls.inputDataset,
1067 level="sensor",
1068 help="data ID, e.g. --id visit=12345 ccd=0")
1069 return parser
1071 def runDataRef(self, dataRef):
1072 """Override to specify band label to run()."""
1073 parq = dataRef.get()
1074 funcs = self.getFunctors()
1075 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel
1076 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
1077 self.write(df, dataRef)
1078 return df
1081class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1082 dimensions=("instrument", "visit",),
1083 defaultTemplates={"calexpType": ""}):
1084 calexp = connectionTypes.Input(
1085 doc="Processed exposures used for metadata",
1086 name="{calexpType}calexp",
1087 storageClass="ExposureF",
1088 dimensions=("instrument", "visit", "detector"),
1089 deferLoad=True,
1090 multiple=True,
1091 )
1092 visitSummary = connectionTypes.Output(
1093 doc=("Per-visit consolidated exposure metadata. These catalogs use "
1094 "detector id for the id and are sorted for fast lookups of a "
1095 "detector."),
1096 name="{calexpType}visitSummary",
1097 storageClass="ExposureCatalog",
1098 dimensions=("instrument", "visit"),
1099 )
1102class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1103 pipelineConnections=ConsolidateVisitSummaryConnections):
1104 """Config for ConsolidateVisitSummaryTask"""
1105 pass
1108class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1109 """Task to consolidate per-detector visit metadata.
1111 This task aggregates the following metadata from all the detectors in a
1112 single visit into an exposure catalog:
1113 - The visitInfo.
1114 - The wcs.
1115 - The photoCalib.
1116 - The physical_filter and band (if available).
1117 - The psf size, shape, and effective area at the center of the detector.
1118 - The corners of the bounding box in right ascension/declination.
1120 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1121 are not persisted here because of storage concerns, and because of their
1122 limited utility as summary statistics.
1124 Tests for this task are performed in ci_hsc_gen3.
1125 """
1126 _DefaultName = "consolidateVisitSummary"
1127 ConfigClass = ConsolidateVisitSummaryConfig
1129 @classmethod
1130 def _makeArgumentParser(cls):
1131 parser = ArgumentParser(name=cls._DefaultName)
1133 parser.add_id_argument("--id", "calexp",
1134 help="data ID, e.g. --id visit=12345",
1135 ContainerClass=VisitDataIdContainer)
1136 return parser
1138 def writeMetadata(self, dataRef):
1139 """No metadata to persist, so override to remove metadata persistance.
1140 """
1141 pass
1143 def writeConfig(self, butler, clobber=False, doBackup=True):
1144 """No config to persist, so override to remove config persistance.
1145 """
1146 pass
1148 def runDataRef(self, dataRefList):
1149 visit = dataRefList[0].dataId['visit']
1151 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1152 len(dataRefList), visit)
1154 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
1156 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
1158 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1159 dataRefs = butlerQC.get(inputRefs.calexp)
1160 visit = dataRefs[0].dataId.byName()['visit']
1162 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1163 len(dataRefs), visit)
1165 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1167 butlerQC.put(expCatalog, outputRefs.visitSummary)
1169 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1170 """Make a combined exposure catalog from a list of dataRefs.
1171 These dataRefs must point to exposures with wcs, summaryStats,
1172 and other visit metadata.
1174 Parameters
1175 ----------
1176 visit : `int`
1177 Visit identification number.
1178 dataRefs : `list`
1179 List of dataRefs in visit. May be list of
1180 `lsst.daf.persistence.ButlerDataRef` (Gen2) or
1181 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1182 isGen3 : `bool`, optional
1183 Specifies if this is a Gen3 list of datarefs.
1185 Returns
1186 -------
1187 visitSummary : `lsst.afw.table.ExposureCatalog`
1188 Exposure catalog with per-detector summary information.
1189 """
1190 schema = self._makeVisitSummarySchema()
1191 cat = afwTable.ExposureCatalog(schema)
1192 cat.resize(len(dataRefs))
1194 cat['visit'] = visit
1196 for i, dataRef in enumerate(dataRefs):
1197 if isGen3:
1198 visitInfo = dataRef.get(component='visitInfo')
1199 filterLabel = dataRef.get(component='filterLabel')
1200 summaryStats = dataRef.get(component='summaryStats')
1201 detector = dataRef.get(component='detector')
1202 wcs = dataRef.get(component='wcs')
1203 photoCalib = dataRef.get(component='photoCalib')
1204 detector = dataRef.get(component='detector')
1205 bbox = dataRef.get(component='bbox')
1206 validPolygon = dataRef.get(component='validPolygon')
1207 else:
1208 # Note that we need to read the calexp because there is
1209 # no magic access to the psf except through the exposure.
1210 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
1211 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
1212 visitInfo = exp.getInfo().getVisitInfo()
1213 filterLabel = dataRef.get("calexp_filterLabel")
1214 summaryStats = exp.getInfo().getSummaryStats()
1215 wcs = exp.getWcs()
1216 photoCalib = exp.getPhotoCalib()
1217 detector = exp.getDetector()
1218 bbox = dataRef.get(datasetType='calexp_bbox')
1219 validPolygon = exp.getInfo().getValidPolygon()
1221 rec = cat[i]
1222 rec.setBBox(bbox)
1223 rec.setVisitInfo(visitInfo)
1224 rec.setWcs(wcs)
1225 rec.setPhotoCalib(photoCalib)
1226 rec.setValidPolygon(validPolygon)
1228 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1229 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1230 rec.setId(detector.getId())
1231 rec['psfSigma'] = summaryStats.psfSigma
1232 rec['psfIxx'] = summaryStats.psfIxx
1233 rec['psfIyy'] = summaryStats.psfIyy
1234 rec['psfIxy'] = summaryStats.psfIxy
1235 rec['psfArea'] = summaryStats.psfArea
1236 rec['raCorners'][:] = summaryStats.raCorners
1237 rec['decCorners'][:] = summaryStats.decCorners
1238 rec['ra'] = summaryStats.ra
1239 rec['decl'] = summaryStats.decl
1240 rec['zenithDistance'] = summaryStats.zenithDistance
1241 rec['zeroPoint'] = summaryStats.zeroPoint
1242 rec['skyBg'] = summaryStats.skyBg
1243 rec['skyNoise'] = summaryStats.skyNoise
1244 rec['meanVar'] = summaryStats.meanVar
1245 rec['astromOffsetMean'] = summaryStats.astromOffsetMean
1246 rec['astromOffsetStd'] = summaryStats.astromOffsetStd
1248 metadata = dafBase.PropertyList()
1249 metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1250 # We are looping over existing datarefs, so the following is true
1251 metadata.add("COMMENT", "Only detectors with data have entries.")
1252 cat.setMetadata(metadata)
1254 cat.sort()
1255 return cat
1257 def _makeVisitSummarySchema(self):
1258 """Make the schema for the visitSummary catalog."""
1259 schema = afwTable.ExposureTable.makeMinimalSchema()
1260 schema.addField('visit', type='I', doc='Visit number')
1261 schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1262 schema.addField('band', type='String', size=32, doc='Name of band')
1263 schema.addField('psfSigma', type='F',
1264 doc='PSF model second-moments determinant radius (center of chip) (pixel)')
1265 schema.addField('psfArea', type='F',
1266 doc='PSF model effective area (center of chip) (pixel**2)')
1267 schema.addField('psfIxx', type='F',
1268 doc='PSF model Ixx (center of chip) (pixel**2)')
1269 schema.addField('psfIyy', type='F',
1270 doc='PSF model Iyy (center of chip) (pixel**2)')
1271 schema.addField('psfIxy', type='F',
1272 doc='PSF model Ixy (center of chip) (pixel**2)')
1273 schema.addField('raCorners', type='ArrayD', size=4,
1274 doc='Right Ascension of bounding box corners (degrees)')
1275 schema.addField('decCorners', type='ArrayD', size=4,
1276 doc='Declination of bounding box corners (degrees)')
1277 schema.addField('ra', type='D',
1278 doc='Right Ascension of bounding box center (degrees)')
1279 schema.addField('decl', type='D',
1280 doc='Declination of bounding box center (degrees)')
1281 schema.addField('zenithDistance', type='F',
1282 doc='Zenith distance of bounding box center (degrees)')
1283 schema.addField('zeroPoint', type='F',
1284 doc='Mean zeropoint in detector (mag)')
1285 schema.addField('skyBg', type='F',
1286 doc='Average sky background (ADU)')
1287 schema.addField('skyNoise', type='F',
1288 doc='Average sky noise (ADU)')
1289 schema.addField('meanVar', type='F',
1290 doc='Mean variance of the weight plane (ADU**2)')
1291 schema.addField('astromOffsetMean', type='F',
1292 doc='Mean offset of astrometric calibration matches (arcsec)')
1293 schema.addField('astromOffsetStd', type='F',
1294 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)')
1296 return schema
1299class VisitDataIdContainer(DataIdContainer):
1300 """DataIdContainer that groups sensor-level id's by visit
1301 """
1303 def makeDataRefList(self, namespace):
1304 """Make self.refList from self.idList
1306 Generate a list of data references grouped by visit.
1308 Parameters
1309 ----------
1310 namespace : `argparse.Namespace`
1311 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1312 """
1313 # Group by visits
1314 visitRefs = defaultdict(list)
1315 for dataId in self.idList:
1316 if "visit" in dataId:
1317 visitId = dataId["visit"]
1318 # append all subsets to
1319 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1320 visitRefs[visitId].extend([dataRef for dataRef in subset])
1322 outputRefList = []
1323 for refList in visitRefs.values():
1324 existingRefs = [ref for ref in refList if ref.datasetExists()]
1325 if existingRefs:
1326 outputRefList.append(existingRefs)
1328 self.refList = outputRefList
1331class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1332 defaultTemplates={"catalogType": ""},
1333 dimensions=("instrument", "visit")):
1334 inputCatalogs = connectionTypes.Input(
1335 doc="Input per-detector Source Tables",
1336 name="{catalogType}sourceTable",
1337 storageClass="DataFrame",
1338 dimensions=("instrument", "visit", "detector"),
1339 multiple=True
1340 )
1341 outputCatalog = connectionTypes.Output(
1342 doc="Per-visit concatenation of Source Table",
1343 name="{catalogType}sourceTable_visit",
1344 storageClass="DataFrame",
1345 dimensions=("instrument", "visit")
1346 )
1349class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1350 pipelineConnections=ConsolidateSourceTableConnections):
1351 pass
1354class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1355 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1356 """
1357 _DefaultName = 'consolidateSourceTable'
1358 ConfigClass = ConsolidateSourceTableConfig
1360 inputDataset = 'sourceTable'
1361 outputDataset = 'sourceTable_visit'
1363 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1364 inputs = butlerQC.get(inputRefs)
1365 self.log.info("Concatenating %s per-detector Source Tables",
1366 len(inputs['inputCatalogs']))
1367 df = pd.concat(inputs['inputCatalogs'])
1368 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1370 def runDataRef(self, dataRefList):
1371 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1372 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1373 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1375 @classmethod
1376 def _makeArgumentParser(cls):
1377 parser = ArgumentParser(name=cls._DefaultName)
1379 parser.add_id_argument("--id", cls.inputDataset,
1380 help="data ID, e.g. --id visit=12345",
1381 ContainerClass=VisitDataIdContainer)
1382 return parser
1384 def writeMetadata(self, dataRef):
1385 """No metadata to write.
1386 """
1387 pass
1389 def writeConfig(self, butler, clobber=False, doBackup=True):
1390 """No config to write.
1391 """
1392 pass
1395class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1396 dimensions=("instrument",),
1397 defaultTemplates={"calexpType": ""}):
1398 visitSummaryRefs = connectionTypes.Input(
1399 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1400 name="{calexpType}visitSummary",
1401 storageClass="ExposureCatalog",
1402 dimensions=("instrument", "visit"),
1403 multiple=True,
1404 deferLoad=True,
1405 )
1406 outputCatalog = connectionTypes.Output(
1407 doc="CCD and Visit metadata table",
1408 name="ccdVisitTable",
1409 storageClass="DataFrame",
1410 dimensions=("instrument",)
1411 )
1414class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1415 pipelineConnections=MakeCcdVisitTableConnections):
1416 pass
1419class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1420 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1421 """
1422 _DefaultName = 'makeCcdVisitTable'
1423 ConfigClass = MakeCcdVisitTableConfig
1425 def run(self, visitSummaryRefs):
1426 """ Make a table of ccd information from the `visitSummary` catalogs.
1427 Parameters
1428 ----------
1429 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1430 List of DeferredDatasetHandles pointing to exposure catalogs with
1431 per-detector summary information.
1432 Returns
1433 -------
1434 result : `lsst.pipe.Base.Struct`
1435 Results struct with attribute:
1436 - `outputCatalog`
1437 Catalog of ccd and visit information.
1438 """
1439 ccdEntries = []
1440 for visitSummaryRef in visitSummaryRefs:
1441 visitSummary = visitSummaryRef.get()
1442 visitInfo = visitSummary[0].getVisitInfo()
1444 ccdEntry = {}
1445 summaryTable = visitSummary.asAstropy()
1446 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance',
1447 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise']
1448 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1449 # 'visit' is the human readible visit number
1450 # 'visitId' is the key to the visitId table. They are the same
1451 # Technically you should join to get the visit from the visit table
1452 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"})
1453 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in
1454 summaryTable['id']]
1455 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId)
1456 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds]
1457 ccdEntry['ccdVisitId'] = ccdVisitIds
1458 ccdEntry['detector'] = summaryTable['id']
1459 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary])
1460 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1462 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1463 ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1464 expTime = visitInfo.getExposureTime()
1465 ccdEntry['expTime'] = expTime
1466 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1467 ccdEntry['darkTime'] = visitInfo.getDarkTime()
1468 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1469 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1470 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1471 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1472 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1473 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1474 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1475 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1476 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1477 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1478 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags,
1479 # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted.
1480 ccdEntries.append(ccdEntry)
1482 outputCatalog = pd.concat(ccdEntries)
1483 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True)
1484 return pipeBase.Struct(outputCatalog=outputCatalog)
1487class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1488 dimensions=("instrument",),
1489 defaultTemplates={"calexpType": ""}):
1490 visitSummaries = connectionTypes.Input(
1491 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1492 name="{calexpType}visitSummary",
1493 storageClass="ExposureCatalog",
1494 dimensions=("instrument", "visit",),
1495 multiple=True,
1496 deferLoad=True,
1497 )
1498 outputCatalog = connectionTypes.Output(
1499 doc="Visit metadata table",
1500 name="visitTable",
1501 storageClass="DataFrame",
1502 dimensions=("instrument",)
1503 )
1506class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1507 pipelineConnections=MakeVisitTableConnections):
1508 pass
1511class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1512 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1513 """
1514 _DefaultName = 'makeVisitTable'
1515 ConfigClass = MakeVisitTableConfig
1517 def run(self, visitSummaries):
1518 """ Make a table of visit information from the `visitSummary` catalogs
1520 Parameters
1521 ----------
1522 visitSummaries : list of `lsst.afw.table.ExposureCatalog`
1523 List of exposure catalogs with per-detector summary information.
1524 Returns
1525 -------
1526 result : `lsst.pipe.Base.Struct`
1527 Results struct with attribute:
1528 ``outputCatalog``
1529 Catalog of visit information.
1530 """
1531 visitEntries = []
1532 for visitSummary in visitSummaries:
1533 visitSummary = visitSummary.get()
1534 visitRow = visitSummary[0]
1535 visitInfo = visitRow.getVisitInfo()
1537 visitEntry = {}
1538 visitEntry["visitId"] = visitRow['visit']
1539 visitEntry["visit"] = visitRow['visit']
1540 visitEntry["physical_filter"] = visitRow['physical_filter']
1541 visitEntry["band"] = visitRow['band']
1542 raDec = visitInfo.getBoresightRaDec()
1543 visitEntry["ra"] = raDec.getRa().asDegrees()
1544 visitEntry["decl"] = raDec.getDec().asDegrees()
1545 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1546 azAlt = visitInfo.getBoresightAzAlt()
1547 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1548 visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1549 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1550 visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1551 visitEntry["obsStart"] = visitInfo.getDate().toPython()
1552 visitEntry["expTime"] = visitInfo.getExposureTime()
1553 visitEntries.append(visitEntry)
1554 # TODO: DM-30623, Add programId, exposureType, expMidpt, cameraTemp, mirror1Temp, mirror2Temp,
1555 # mirror3Temp, domeTemp, externalTemp, dimmSeeing, pwvGPS, pwvMW, flags, nExposures
1557 outputCatalog = pd.DataFrame(data=visitEntries)
1558 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True)
1559 return pipeBase.Struct(outputCatalog=outputCatalog)
1562class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1563 dimensions=("instrument", "visit", "detector", "skymap", "tract")):
1565 inputCatalog = connectionTypes.Input(
1566 doc="Primary per-detector, single-epoch forced-photometry catalog. "
1567 "By default, it is the output of ForcedPhotCcdTask on calexps",
1568 name="forced_src",
1569 storageClass="SourceCatalog",
1570 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1571 )
1572 inputCatalogDiff = connectionTypes.Input(
1573 doc="Secondary multi-epoch, per-detector, forced photometry catalog. "
1574 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1575 name="forced_diff",
1576 storageClass="SourceCatalog",
1577 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1578 )
1579 outputCatalog = connectionTypes.Output(
1580 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format",
1581 name="mergedForcedSource",
1582 storageClass="DataFrame",
1583 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1584 )
1587class WriteForcedSourceTableConfig(WriteSourceTableConfig,
1588 pipelineConnections=WriteForcedSourceTableConnections):
1589 key = lsst.pex.config.Field(
1590 doc="Column on which to join the two input tables on and make the primary key of the output",
1591 dtype=str,
1592 default="objectId",
1593 )
1596class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1597 """Merge and convert per-detector forced source catalogs to parquet
1599 Because the predecessor ForcedPhotCcdTask operates per-detector,
1600 per-tract, (i.e., it has tract in its dimensions), detectors
1601 on the tract boundary may have multiple forced source catalogs.
1603 The successor task TransformForcedSourceTable runs per-patch
1604 and temporally-aggregates overlapping mergedForcedSource catalogs from all
1605 available multiple epochs.
1606 """
1607 _DefaultName = "writeForcedSourceTable"
1608 ConfigClass = WriteForcedSourceTableConfig
1610 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1611 inputs = butlerQC.get(inputRefs)
1612 # Add ccdVisitId to allow joining with CcdVisitTable
1613 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
1614 inputs['band'] = butlerQC.quantum.dataId.full['band']
1615 outputs = self.run(**inputs)
1616 butlerQC.put(outputs, outputRefs)
1618 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1619 dfs = []
1620 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')):
1621 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False)
1622 df = df.reindex(sorted(df.columns), axis=1)
1623 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA
1624 df['band'] = band if band else pd.NA
1625 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns],
1626 names=('dataset', 'column'))
1628 dfs.append(df)
1630 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
1631 return pipeBase.Struct(outputCatalog=outputCatalog)
1634class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1635 dimensions=("instrument", "skymap", "patch", "tract")):
1637 inputCatalogs = connectionTypes.Input(
1638 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1639 name="mergedForcedSource",
1640 storageClass="DataFrame",
1641 dimensions=("instrument", "visit", "detector", "skymap", "tract"),
1642 multiple=True,
1643 deferLoad=True
1644 )
1645 referenceCatalog = connectionTypes.Input(
1646 doc="Reference catalog which was used to seed the forcedPhot. Columns "
1647 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1648 "are expected.",
1649 name="objectTable",
1650 storageClass="DataFrame",
1651 dimensions=("tract", "patch", "skymap"),
1652 deferLoad=True
1653 )
1654 outputCatalog = connectionTypes.Output(
1655 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1656 "specified set of functors",
1657 name="forcedSourceTable",
1658 storageClass="DataFrame",
1659 dimensions=("tract", "patch", "skymap")
1660 )
1663class TransformForcedSourceTableConfig(TransformCatalogBaseConfig,
1664 pipelineConnections=TransformForcedSourceTableConnections):
1665 referenceColumns = pexConfig.ListField(
1666 dtype=str,
1667 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"],
1668 optional=True,
1669 doc="Columns to pull from reference catalog",
1670 )
1671 keyRef = lsst.pex.config.Field(
1672 doc="Column on which to join the two input tables on and make the primary key of the output",
1673 dtype=str,
1674 default="objectId",
1675 )
1676 key = lsst.pex.config.Field(
1677 doc="Rename the output DataFrame index to this name",
1678 dtype=str,
1679 default="forcedSourceId",
1680 )
1683class TransformForcedSourceTableTask(TransformCatalogBaseTask):
1684 """Transform/standardize a ForcedSource catalog
1686 Transforms each wide, per-detector forcedSource parquet table per the
1687 specification file (per-camera defaults found in ForcedSource.yaml).
1688 All epochs that overlap the patch are aggregated into one per-patch
1689 narrow-parquet file.
1691 No de-duplication of rows is performed. Duplicate resolutions flags are
1692 pulled in from the referenceCatalog: `detect_isPrimary`,
1693 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1694 for analysis or compare duplicates for QA.
1696 The resulting table includes multiple bands. Epochs (MJDs) and other useful
1697 per-visit rows can be retreived by joining with the CcdVisitTable on
1698 ccdVisitId.
1699 """
1700 _DefaultName = "transformForcedSourceTable"
1701 ConfigClass = TransformForcedSourceTableConfig
1703 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1704 inputs = butlerQC.get(inputRefs)
1705 if self.funcs is None:
1706 raise ValueError("config.functorFile is None. "
1707 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1708 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs,
1709 dataId=outputRefs.outputCatalog.dataId.full)
1711 butlerQC.put(outputs, outputRefs)
1713 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1714 dfs = []
1715 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns})
1716 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs)))
1717 for handle in inputCatalogs:
1718 result = self.transform(None, handle, funcs, dataId)
1719 # Filter for only rows that were detected on (overlap) the patch
1720 dfs.append(result.df.join(ref, how='inner'))
1722 outputCatalog = pd.concat(dfs)
1724 # Now that we are done joining on config.keyRef
1725 # Change index to config.key by
1726 outputCatalog.index.rename(self.config.keyRef, inplace=True)
1727 # Add config.keyRef to the column list
1728 outputCatalog.reset_index(inplace=True)
1729 # set the forcedSourceId to the index. This is specified in the ForcedSource.yaml
1730 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True)
1731 # Rename it to the config.key
1732 outputCatalog.index.rename(self.config.key, inplace=True)
1734 self.log.info("Made a table of %d columns and %d rows",
1735 len(outputCatalog.columns), len(outputCatalog))
1736 return pipeBase.Struct(outputCatalog=outputCatalog)
1739class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1740 defaultTemplates={"catalogType": ""},
1741 dimensions=("instrument", "tract")):
1742 inputCatalogs = connectionTypes.Input(
1743 doc="Input per-patch DataFrame Tables to be concatenated",
1744 name="{catalogType}ForcedSourceTable",
1745 storageClass="DataFrame",
1746 dimensions=("tract", "patch", "skymap"),
1747 multiple=True,
1748 )
1750 outputCatalog = connectionTypes.Output(
1751 doc="Output per-tract concatenation of DataFrame Tables",
1752 name="{catalogType}ForcedSourceTable_tract",
1753 storageClass="DataFrame",
1754 dimensions=("tract", "skymap"),
1755 )
1758class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1759 pipelineConnections=ConsolidateTractConnections):
1760 pass
1763class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask):
1764 """Concatenate any per-patch, dataframe list into a single
1765 per-tract DataFrame
1766 """
1767 _DefaultName = 'ConsolidateTract'
1768 ConfigClass = ConsolidateTractConfig
1770 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1771 inputs = butlerQC.get(inputRefs)
1772 # Not checking at least one inputCatalog exists because that'd be an empty QG
1773 self.log.info("Concatenating %s per-patch %s Tables",
1774 len(inputs['inputCatalogs']),
1775 inputRefs.inputCatalogs[0].datasetType.name)
1776 df = pd.concat(inputs['inputCatalogs'])
1777 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)