lsst.pipe.tasks gba22ec62b9+b101dbd4e6
getRepositoryData.py
Go to the documentation of this file.
2# LSST Data Management System
3# Copyright 2008, 2009, 2010, 2011, 2012 LSST Corporation.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <http://www.lsstcorp.org/LegalNotices/>.
21#
22"""Retrieve collections of metadata or data based on a set of data references
23
24Use this as a base task for creating graphs and reports for a set of data.
25"""
26import lsst.pex.config as pexConfig
27import lsst.pipe.base as pipeBase
28from lsst.utils.timer import timeMethod
29
30__all__ = ["DataRefListRunner", "GetRepositoryDataTask"]
31
32
33class DataRefListRunner(pipeBase.TaskRunner):
34 """A task runner that calls run with a list of data references
35
36 Differs from the default TaskRunner by providing all data references at once,
37 instead of iterating over them one at a time.
38 """
39 @staticmethod
40 def getTargetList(parsedCmd):
41 """Return a list of targets (arguments for __call__); one entry per invocation
42 """
43 return [parsedCmd.id.refList] # one argument consisting of a list of dataRefs
44
45 def __call__(self, dataRefList):
46 """Run GetRepositoryDataTask.run on a single target
47
48 @param dataRefList: argument dict for run; contains one key: dataRefList
49
50 @return:
51 - None if doReturnResults false
52 - A pipe_base Struct containing these fields if doReturnResults true:
53 - dataRefList: the argument dict sent to runDataRef
54 - metadata: task metadata after execution of runDataRef
55 - result: result returned by task runDataRef
56 """
57 task = self.TaskClass(config=self.config, log=self.log)
58 result = task.runDataRef(dataRefList)
59
60 if self.doReturnResults:
61 return pipeBase.Struct(
62 dataRefList=dataRefList,
63 metadata=task.metadata,
64 result=result,
65 )
66
67
68class GetRepositoryDataTask(pipeBase.CmdLineTask):
69 """Retrieve data from a repository, e.g. for plotting or analysis purposes
70 """
71 ConfigClass = pexConfig.Config # nothing to configure
72 RunnerClass = DataRefListRunner
73 _DefaultName = "getTaskData"
74
75 def __init__(self, *args, **kwargs):
76 pipeBase.CmdLineTask.__init__(self, *args, **kwargs)
77
78 @timeMethod
79 def runDataRef(self, dataRefList):
80 """Get data from a repository for a collection of data references
81
82 @param dataRefList: a list of data references
83 """
84 raise NotImplementedError("subclass must specify a run method")
85
86 def getIdList(self, dataRefList):
87 """Get a list of data IDs in a form that can be used as dictionary keys
88
89 @param dataRefList: a list of data references
90 @return a pipe_base Struct with fields:
91 - idKeyTuple: a tuple of dataRef data ID keys
92 - idValList: a list of data ID value tuples, each tuple contains values in the order in idKeyTuple
93 """
94 if not dataRefList:
95 raise RuntimeError("No data refs")
96 idKeyTuple = tuple(sorted(dataRefList[0].dataId.keys()))
97
98 idValList = []
99 for dataRef in dataRefList:
100 idValTuple = tuple(dataRef.dataId[key] for key in idKeyTuple)
101 idValList.append(idValTuple)
102
103 return pipeBase.Struct(
104 idKeyTuple=idKeyTuple,
105 idValList=idValList,
106 )
107
108 def getDataList(self, dataRefList, datasetType):
109 """Retrieve a list of data
110
111 @param dataRefList: a list of data references
112 @param datasetType: datasetType of data to be retrieved
113 @return a list of data, one entry per dataRef in dataRefList (in order)
114 """
115 return [dataRef.get(datasetType=datasetType) for dataRef in dataRefList]
116
117 def getMetadataItems(self, dataRefList, datasetType, nameList):
118 """Retrieve a list of dictionaries of metadata
119
120 @param dataRefList: a list of data references
121 @param datasetType: datasetType of metadata (or any object that supports get(name))
122 @return a list of dicts of metadata:
123 - each entry in the list corresponds to a dataRef in dataRefList
124 - each dict contains name: item of metadata, for each name in nameList;
125 numeric and string values will be returned as arrays
126 """
127 valList = []
128 for dataRef in dataRefList:
129 metadata = dataRef.get(datasetType=datasetType)
130 valList.append(dict((name, metadata.getArray(name)) for name in nameList))
131 return valList
def getMetadataItems(self, dataRefList, datasetType, nameList)