lsst.pipe.base  16.0-21-g91a1c50+1
config.py
Go to the documentation of this file.
1 # This file is part of pipe_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 
22 """Module defining config classes for PipelineTask.
23 """
24 
25 __all__ = ["InputDatasetConfig", "InputDatasetField",
26  "OutputDatasetConfig", "OutputDatasetField",
27  "InitInputDatasetConfig", "InitInputDatasetField",
28  "InitOutputDatasetConfig", "InitOutputDatasetField",
29  "ResourceConfig", "QuantumConfig", "PipelineTaskConfig"]
30 
31 # -------------------------------
32 # Imports of standard modules --
33 # -------------------------------
34 from textwrap import dedent, indent
35 
36 # -----------------------------
37 # Imports for other modules --
38 # -----------------------------
39 import lsst.pex.config as pexConfig
40 
41 # ----------------------------------
42 # Local non-exported definitions --
43 # ----------------------------------
44 
45 # ------------------------
46 # Exported definitions --
47 # ------------------------
48 
49 PIPELINETASK_CONFIG_TEMPLATE_DICT = {}
50 
51 
52 def _makeDatasetField(name, dtype):
53  """ Function to make callables which produce ConfigField objects
54 
55  This is factory function which produces factory functions. The factories
56  returned by this function are used to simplify the process of creating
57  pex config ConfigFields which have dtypes derived from either
58  _DatasetTypeConfig, or _GlobalDatasetTypeConfig. These functions can
59  then be used in a mannor similar to other ConfigField constructors.
60 
61  Below is a flow diagram to explain the use of this function visually,
62  where arrows indicate processing flow.
63 
64  Make a ConfigField factory:
65  _makeDatasetField() -> return wrappedFunc -> assign to variable corresponding
66  to name
67 
68  Use a ConfigField factory:
69  name() -> factory() -> return pexConfig instance
70 
71  Example
72  -------
73  FooField = _makeDatasetField("FooField", FooConfig)
74  fooFieldInstance = FooField("An example Foo ConfigField",
75  "fooConfigurable",
76  ("tract", "patch"),
77  "Exposure")
78 
79  Parameters
80  ----------
81  name : `str`
82  The name to use as the final output Field constructor
83  dtype : Configuration Object
84  This is the python type to set as the dtype in the ConfigField
85  construction
86 
87  Returns
88  -------
89  func : function
90  Python callable function which can be used to produce instances of
91  ConfigFields of type dtype.
92 
93  Raises
94  ------
95  TypeError
96  Possibly raises a TypeError if attempting to create a factory function
97  from an incompatible type
98  """
99 
100  def factory(**kwargs):
101  """ This is the innermost function in the closure, and does the work
102  of actually producing the ConfigField
103  """
104  # kwargs contain all the variables needed to construct the ConfigField
105  # The doc and check variables are used to construct the ConfigField,
106  # while the rest are used in the construction of the dtype object,
107  # which is why doc and check are filted out before unpacking the
108  # dictionary to the dtype constructor.
109  return pexConfig.ConfigField(doc=kwargs['doc'],
110  dtype=dtype,
111  default=dtype(
112  **{k: v for k, v in kwargs.items()
113  if k not in ('doc', 'check')}),
114  check=kwargs['check'])
115 
116  # here the dtype is checked against its baseclass type. This is due to the fact
117  # the code nesseary to make ConfigField factories is shared, but the arguments
118  # the factories take differ between base class types
119  if issubclass(dtype, _GlobalDatasetTypeConfig):
120  # Handle global dataset types like InitInputDatasetConfig, these types have
121  # a function signature with no dimensions variable
122  def wrappedFunc(*, doc, storageClass, check=None, name="", nameTemplate=''):
123  return factory(**{k: v for k, v in locals().items() if k != 'factory'})
124  # This factory does not take a dimensions argument, so set the
125  # variables for the dimensions documentation to empty python strings
126  extraDoc = ""
127  extraFields = ""
128  elif issubclass(dtype, _DatasetTypeConfig):
129  # Handle dataset types like InputDatasetConfig, note these take a dimensions argument
130  def wrappedFunc(*, doc, dimensions, storageClass, name="", scalar=False, check=None, nameTemplate=''):
131  return factory(**{k: v for k, v in locals().items() if k != 'factory'})
132  # Set the string corresponding to the dimensions parameter documentation
133  # formatting is to support final output of the docstring variable
134  extraDoc = """
135  dimensions : iterable of `str`
136  Iterable of Dimensions for this `~lsst.daf.butler.DatasetType`
137  scalar : `bool`, optional
138  If set to True then only a single dataset is expected on input or
139  produced on output. In that case list of objects/DataIds will be
140  unpacked before calling task methods, returned data is expected
141  to contain single objects as well.
142  nameTemplate : `str`, optional
143  Template for the `name` field which is specified as a python formattable
144  string. The template is formatted during the configuration of a Config
145  class with a user defined string. Defaults to empty string, in which
146  case no formatting is done."""
147  # Set a string to add the dimensions argument to the list of arguments in the
148  # docstring explanation section formatting is to support final output
149  # of the docstring variable
150  extraFields = ", dimensions, scalar, nameTemplate"
151  else:
152  # if someone tries to create a config factory for a type that is not
153  # handled raise and exception
154  raise TypeError(f"Cannot create a factory for dtype {dtype}")
155 
156  # Programatically create a docstring to use in the factory function
157  docstring = f""" Factory function to create `~lsst.pex.config.Config` class instances
158  of `{dtype.__name__}`
159 
160  This function servers as syntactic sugar for creating Configurable fields
161  which are `{dtype.__name__}`. The naming of this function violates the
162  normal convention of a lowercase first letter in the function name, as
163  this function is intended to sit in the same place as
164  `~lsst.pex.config.ConfigField` classes, and consistency in declaration
165  syntax is important.
166 
167  The input arguments for this class are a combination of the arguments for
168  `~lsst.pex.config.ConfigField` and `{dtype.__name__}`. The arguments
169  doc and check come from `~lsst.pex.config.ConfigField`, while name{extraFields}
170  and storageClass come from `{dtype.__name__}`.
171 
172  Parameters
173  ----------
174  doc : `str`
175  Documentation string for the `{dtype.__name__}`
176  name : `str`
177  Name of the `~lsst.daf.butler.DatasetType` in the returned
178  `{dtype.__name__}`{indent(dedent(extraDoc), " " * 4)}
179  storageClass : `str`
180  Name of the `~lsst.daf.butler.StorageClass` in the `{dtype.__name__}`
181  check : callable
182  A callable to be called with the field value that returns
183  False if the value is invalid.
184 
185  Returns
186  -------
187  result : `~lsst.pex.config.ConfigField`
188  Instance of a `~lsst.pex.config.ConfigField` with `InputDatasetConfig` as a dtype
189  """
190  # Set the name to be used for the returned ConfigField factory function
191  wrappedFunc.__name__ = name
192  # Set the name to be used for the returned ConfigField factory function, and unindent
193  # the docstring as it was indednted to corrispond to this factory functions indention
194  wrappedFunc.__doc__ = dedent(docstring)
195  return wrappedFunc
196 
197 
198 class QuantumConfig(pexConfig.Config):
199  """Configuration class which defines PipelineTask quanta dimensions.
200 
201  In addition to a list of dataUnit names this also includes optional list of
202  SQL statements to be executed against Registry database. Exact meaning and
203  format of SQL will be determined at later point.
204  """
205  dimensions = pexConfig.ListField(dtype=str,
206  doc="list of Dimensions which define quantum")
207  sql = pexConfig.ListField(dtype=str,
208  doc="sequence of SQL statements",
209  optional=True)
210 
211 
212 class _BaseDatasetTypeConfig(pexConfig.Config):
213  """Intermediate base class for dataset type configuration in PipelineTask.
214  """
215  name = pexConfig.Field(dtype=str,
216  doc="name of the DatasetType")
217  storageClass = pexConfig.Field(dtype=str,
218  doc="name of the StorageClass")
219  nameTemplate = pexConfig.Field(dtype=str,
220  default='',
221  optional=True,
222  doc=("Templated name of string, used to set name "
223  "field according to a shared substring when "
224  "formatTemplateNames is called"))
225 
226 
228  """Configuration class which defines dataset type used by PipelineTask.
229 
230  Consists of DatasetType name, list of Dimension names and StorageCass name.
231  PipelineTasks typically define one or more input and output datasets. This
232  class should not be used directly, instead one of `InputDatasetConfig` or
233  `OutputDatasetConfig` should be used in PipelineTask config.
234  """
235  dimensions = pexConfig.ListField(dtype=str,
236  doc="list of Dimensions for this DatasetType")
237  scalar = pexConfig.Field(dtype=bool,
238  default=False,
239  optional=True,
240  doc=("If set to True then only a single dataset is expected "
241  "on input or produced on output. In that case list of "
242  "objects/DataIds will be unpacked before calling task "
243  "methods, returned data is expected to contain single "
244  "objects as well."))
245 
246 
248  pass
249 
250 
251 class OutputDatasetConfig(_DatasetTypeConfig):
252  pass
253 
254 
256  """Configuration class which defines dataset types used in PipelineTask
257  initialization.
258 
259  Consists of DatasetType name and StorageCass name, with a read-only
260  ``dimensions`` property that returns an empty tuple, enforcing the
261  constraint that datasets used in initialization are not associated with
262  any Dimensions. This class should not be used directly, instead one of
263  `InitInputDatasetConfig` or `InitOutputDatasetConfig` should be used in
264  PipelineTask config.
265  """
266  @property
267  def dimensions(self):
268  """Dimensions associated with this DatasetType (always empty)."""
269  return ()
270 
271 
273  pass
274 
275 
276 class InitOutputDatasetConfig(_GlobalDatasetTypeConfig):
277  pass
278 
279 
280 class ResourceConfig(pexConfig.Config):
281  """Configuration for resource requirements.
282 
283  This configuration class will be used by some activators to estimate
284  resource use by pipeline. Additionally some tasks could use it to adjust
285  their resource use (e.g. reduce the number of threads).
286 
287  For some resources their limit can be estimated by corresponding task,
288  in that case task could set the field value. For many fields defined in
289  this class their associated resource used by a task will depend on the
290  size of the data and is not known in advance. For these resources their
291  value will be configured through overrides based on some external
292  estimates.
293  """
294  minMemoryMB = pexConfig.Field(dtype=int, default=None, optional=True,
295  doc="Minimal memory needed by task, can be None if estimate is unknown.")
296  minNumCores = pexConfig.Field(dtype=int, default=1,
297  doc="Minimal number of cores needed by task.")
298 
299 
300 class PipelineTaskConfig(pexConfig.Config):
301  """Base class for all PipelineTask configurations.
302 
303  This class defines fields that must be defined for every PipelineTask.
304  It will be used as a base class for all PipelineTask configurations instead
305  of `pex.config.Config`.
306  """
307  quantum = pexConfig.ConfigField(dtype=QuantumConfig,
308  doc="configuration for PipelineTask quantum")
309 
310  def formatTemplateNames(self, templateParamsDict):
311  # Look up the stored parameters for the specific instance of this config
312  # class
313  storedParamsDict = PIPELINETASK_CONFIG_TEMPLATE_DICT.setdefault(id(self), {})
314  storedParamsDict.update(templateParamsDict)
315  for key, value in self.items():
316  if isinstance(value, _BaseDatasetTypeConfig) and value.nameTemplate != '':
317  value.name = value.nameTemplate.format(**storedParamsDict)
318 
319 
320 InputDatasetField = _makeDatasetField("InputDatasetField", InputDatasetConfig)
321 OutputDatasetField = _makeDatasetField("OutputDatasetField", OutputDatasetConfig)
322 InitInputDatasetField = _makeDatasetField("InitInputDatasetField", InitInputDatasetConfig)
323 InitOutputDatasetField = _makeDatasetField("InitOutputDatasetField", InitOutputDatasetConfig)
def formatTemplateNames(self, templateParamsDict)
Definition: config.py:310