lsst.pipe.base  16.0-25-g2c6bf4a+2
config.py
Go to the documentation of this file.
1 # This file is part of pipe_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 
22 """Module defining config classes for PipelineTask.
23 """
24 
25 __all__ = ["InputDatasetConfig", "InputDatasetField",
26  "OutputDatasetConfig", "OutputDatasetField",
27  "InitInputDatasetConfig", "InitInputDatasetField",
28  "InitOutputDatasetConfig", "InitOutputDatasetField",
29  "ResourceConfig", "QuantumConfig", "PipelineTaskConfig"]
30 
31 # -------------------------------
32 # Imports of standard modules --
33 # -------------------------------
34 from textwrap import dedent, indent
35 
36 # -----------------------------
37 # Imports for other modules --
38 # -----------------------------
39 import lsst.pex.config as pexConfig
40 
41 # ----------------------------------
42 # Local non-exported definitions --
43 # ----------------------------------
44 
45 # ------------------------
46 # Exported definitions --
47 # ------------------------
48 
49 PIPELINETASK_CONFIG_TEMPLATE_DICT = {}
50 
51 
52 def _makeDatasetField(name, dtype):
53  """ Function to make callables which produce ConfigField objects
54 
55  This is factory function which produces factory functions. The factories
56  returned by this function are used to simplify the process of creating
57  `lsst.pex.config.ConfigField` which have dtypes derived from either
58  `_DatasetTypeConfig`, or `_GlobalDatasetTypeConfig`. These functions can
59  then be used in a mannor similar to other `~lsst.pex.config.ConfigField`
60  constructors.
61 
62  Below is a flow diagram to explain the use of this function visually,
63  where arrows indicate processing flow.
64 
65  Make a ConfigField factory:
66  _makeDatasetField() -> return wrappedFunc -> assign to variable corresponding
67  to name
68 
69  Use a ConfigField factory:
70  name() -> factory() -> return pexConfig instance
71 
72  Example
73  -------
74  FooField = _makeDatasetField("FooField", FooConfig)
75  fooFieldInstance = FooField("An example Foo ConfigField",
76  "fooConfigurable",
77  ("tract", "patch"),
78  "Exposure")
79 
80  Parameters
81  ----------
82  name : `str`
83  The name to use as the final output `~lsst.pex.config.Field`
84  constructor
85  dtype : Configuration Object
86  This is the python type to set as the dtype in the ConfigField
87  construction
88 
89  Returns
90  -------
91  func : function
92  Python callable function which can be used to produce instances of
93  `~lsst.pex.config.ConfigField` of type dtype.
94 
95  Raises
96  ------
97  TypeError
98  Possibly raises a TypeError if attempting to create a factory function
99  from an incompatible type
100  """
101 
102  def factory(**kwargs):
103  """ This is the innermost function in the closure, and does the work
104  of actually producing the ConfigField
105  """
106  # kwargs contain all the variables needed to construct the ConfigField
107  # The doc and check variables are used to construct the ConfigField,
108  # while the rest are used in the construction of the dtype object,
109  # which is why doc and check are filted out before unpacking the
110  # dictionary to the dtype constructor.
111  return pexConfig.ConfigField(doc=kwargs['doc'],
112  dtype=dtype,
113  default=dtype(
114  **{k: v for k, v in kwargs.items()
115  if k not in ('doc', 'check')}),
116  check=kwargs['check'])
117 
118  # here the dtype is checked against its baseclass type. This is due to the fact
119  # the code nesseary to make ConfigField factories is shared, but the arguments
120  # the factories take differ between base class types
121  if issubclass(dtype, _GlobalDatasetTypeConfig):
122  # Handle global dataset types like InitInputDatasetConfig, these types have
123  # a function signature with no dimensions variable
124  def wrappedFunc(*, doc, storageClass, check=None, name="", nameTemplate=''):
125  return factory(**{k: v for k, v in locals().items() if k != 'factory'})
126  # This factory does not take a dimensions argument, so set the
127  # variables for the dimensions documentation to empty python strings
128  extraDoc = ""
129  extraFields = ""
130  elif issubclass(dtype, _DatasetTypeConfig):
131  # Handle dataset types like InputDatasetConfig, note these take a dimensions argument
132  def wrappedFunc(*, doc, dimensions, storageClass, name="", scalar=False, check=None, nameTemplate=''):
133  return factory(**{k: v for k, v in locals().items() if k != 'factory'})
134  # Set the string corresponding to the dimensions parameter documentation
135  # formatting is to support final output of the docstring variable
136  extraDoc = """
137  dimensions : iterable of `str`
138  Iterable of Dimensions for this `~lsst.daf.butler.DatasetType`
139  scalar : `bool`, optional
140  If set to True then only a single dataset is expected on input or
141  produced on output. In that case list of objects/DataIds will be
142  unpacked before calling task methods, returned data is expected
143  to contain single objects as well.
144  nameTemplate : `str`, optional
145  Template for the `name` field which is specified as a python formattable
146  string. The template is formatted during the configuration of a Config
147  class with a user defined string. Defaults to empty string, in which
148  case no formatting is done."""
149  # Set a string to add the dimensions argument to the list of arguments in the
150  # docstring explanation section formatting is to support final output
151  # of the docstring variable
152  extraFields = ", dimensions, scalar, nameTemplate"
153  else:
154  # if someone tries to create a config factory for a type that is not
155  # handled raise and exception
156  raise TypeError(f"Cannot create a factory for dtype {dtype}")
157 
158  # Programatically create a docstring to use in the factory function
159  docstring = f""" Factory function to create `~lsst.pex.config.Config` class instances
160  of `{dtype.__name__}`
161 
162  This function servers as syntactic sugar for creating
163  `~lsst.pex.config.ConfigField` which are `{dtype.__name__}`. The naming of
164  this function violates the normal convention of a lowercase first letter
165  in the function name, as this function is intended to sit in the same
166  place as `~lsst.pex.config.ConfigField` classes, and consistency in
167  declaration syntax is important.
168 
169  The input arguments for this class are a combination of the arguments for
170  `~lsst.pex.config.ConfigField` and `{dtype.__name__}`. The arguments
171  doc and check come from `~lsst.pex.config.ConfigField`, while name{extraFields}
172  and storageClass come from `{dtype.__name__}`.
173 
174  Parameters
175  ----------
176  doc : `str`
177  Documentation string for the `{dtype.__name__}`
178  name : `str`
179  Name of the `~lsst.daf.butler.DatasetType` in the returned
180  `{dtype.__name__}`{indent(dedent(extraDoc), " " * 4)}
181  storageClass : `str`
182  Name of the `~lsst.daf.butler.StorageClass` in the `{dtype.__name__}`
183  check : callable
184  A callable to be called with the field value that returns
185  False if the value is invalid.
186 
187  Returns
188  -------
189  result : `~lsst.pex.config.ConfigField`
190  Instance of a `~lsst.pex.config.ConfigField` with `InputDatasetConfig` as a dtype
191  """
192  # Set the name to be used for the returned ConfigField factory function
193  wrappedFunc.__name__ = name
194  # Set the name to be used for the returned ConfigField factory function, and unindent
195  # the docstring as it was indednted to corrispond to this factory functions indention
196  wrappedFunc.__doc__ = dedent(docstring)
197  return wrappedFunc
198 
199 
200 class QuantumConfig(pexConfig.Config):
201  """Configuration class which defines PipelineTask quanta dimensions.
202 
203  In addition to a list of dataUnit names this also includes optional list of
204  SQL statements to be executed against Registry database. Exact meaning and
205  format of SQL will be determined at later point.
206  """
207  dimensions = pexConfig.ListField(dtype=str,
208  doc="list of Dimensions which define quantum")
209  sql = pexConfig.ListField(dtype=str,
210  doc="sequence of SQL statements",
211  optional=True)
212 
213 
214 class _BaseDatasetTypeConfig(pexConfig.Config):
215  """Intermediate base class for dataset type configuration in PipelineTask.
216  """
217  name = pexConfig.Field(dtype=str,
218  doc="name of the DatasetType")
219  storageClass = pexConfig.Field(dtype=str,
220  doc="name of the StorageClass")
221  nameTemplate = pexConfig.Field(dtype=str,
222  default='',
223  optional=True,
224  doc=("Templated name of string, used to set name "
225  "field according to a shared substring when "
226  "`~PipelineTaskConfig.formatTemplateNames` "
227  "is called"))
228 
229 
231  """Configuration class which defines dataset type used by PipelineTask.
232 
233  Consists of DatasetType name, list of Dimension names and StorageCass name.
234  PipelineTasks typically define one or more input and output datasets. This
235  class should not be used directly, instead one of `InputDatasetConfig` or
236  `OutputDatasetConfig` should be used in PipelineTask config.
237  """
238  dimensions = pexConfig.ListField(dtype=str,
239  doc="list of Dimensions for this DatasetType")
240  scalar = pexConfig.Field(dtype=bool,
241  default=False,
242  optional=True,
243  doc=("If set to True then only a single dataset is expected "
244  "on input or produced on output. In that case list of "
245  "objects/DataIds will be unpacked before calling task "
246  "methods, returned data is expected to contain single "
247  "objects as well."))
248 
249 
251  pass
252 
253 
254 class OutputDatasetConfig(_DatasetTypeConfig):
255  pass
256 
257 
259  """Configuration class which defines dataset types used in PipelineTask
260  initialization.
261 
262  Consists of DatasetType name and StorageCass name, with a read-only
263  ``dimensions`` property that returns an empty tuple, enforcing the
264  constraint that datasets used in initialization are not associated with
265  any Dimensions. This class should not be used directly, instead one of
266  `InitInputDatasetConfig` or `InitOutputDatasetConfig` should be used in
267  PipelineTask config.
268  """
269  @property
270  def dimensions(self):
271  """Dimensions associated with this DatasetType (always empty)."""
272  return ()
273 
274 
276  pass
277 
278 
279 class InitOutputDatasetConfig(_GlobalDatasetTypeConfig):
280  pass
281 
282 
283 class ResourceConfig(pexConfig.Config):
284  """Configuration for resource requirements.
285 
286  This configuration class will be used by some activators to estimate
287  resource use by pipeline. Additionally some tasks could use it to adjust
288  their resource use (e.g. reduce the number of threads).
289 
290  For some resources their limit can be estimated by corresponding task,
291  in that case task could set the field value. For many fields defined in
292  this class their associated resource used by a task will depend on the
293  size of the data and is not known in advance. For these resources their
294  value will be configured through overrides based on some external
295  estimates.
296  """
297  minMemoryMB = pexConfig.Field(dtype=int, default=None, optional=True,
298  doc="Minimal memory needed by task, can be None if estimate is unknown.")
299  minNumCores = pexConfig.Field(dtype=int, default=1,
300  doc="Minimal number of cores needed by task.")
301 
302 
303 class PipelineTaskConfig(pexConfig.Config):
304  """Base class for all PipelineTask configurations.
305 
306  This class defines fields that must be defined for every
307  `~lsst.pipe.base.PipelineTask`. It will be used as a base class for all
308  `~lsst.pipe.base.PipelineTask` configurations instead of
309  `pex.config.Config`.
310  """
311  quantum = pexConfig.ConfigField(dtype=QuantumConfig,
312  doc="configuration for PipelineTask quantum")
313 
314  def formatTemplateNames(self, templateParamsDict):
315  # Look up the stored parameters for the specific instance of this config
316  # class
317  storedParamsDict = PIPELINETASK_CONFIG_TEMPLATE_DICT.setdefault(id(self), {})
318  storedParamsDict.update(templateParamsDict)
319  for key, value in self.items():
320  if isinstance(value, _BaseDatasetTypeConfig) and value.nameTemplate != '':
321  value.name = value.nameTemplate.format(**storedParamsDict)
322 
323 
324 InputDatasetField = _makeDatasetField("InputDatasetField", InputDatasetConfig)
325 OutputDatasetField = _makeDatasetField("OutputDatasetField", OutputDatasetConfig)
326 InitInputDatasetField = _makeDatasetField("InitInputDatasetField", InitInputDatasetConfig)
327 InitOutputDatasetField = _makeDatasetField("InitOutputDatasetField", InitOutputDatasetConfig)
def formatTemplateNames(self, templateParamsDict)
Definition: config.py:314