lsst.pipe.base  20.0.0-28-g73474ed+3c7e226ea7
pipelineIR.py
Go to the documentation of this file.
1 # This file is part of pipe_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 from __future__ import annotations
22 
23 __all__ = ("ConfigIR", "ContractError", "ContractIR", "InheritIR", "PipelineIR", "TaskIR", "LabeledSubset")
24 
25 from collections import Counter
26 from collections.abc import Iterable as abcIterable
27 from dataclasses import dataclass, field
28 from typing import Any, List, Set, Union, Generator, MutableMapping, Optional, Dict
29 
30 import copy
31 import re
32 import os
33 import yaml
34 import warnings
35 
36 
37 class PipelineYamlLoader(yaml.SafeLoader):
38  """This is a specialized version of yaml's SafeLoader. It checks and raises
39  an exception if it finds that there are multiple instances of the same key
40  found inside a pipeline file at a given scope.
41  """
42  def construct_mapping(self, node, deep=False):
43  # do the call to super first so that it can do all the other forms of
44  # checking on this node. If you check the uniqueness of keys first
45  # it would save the work that super does in the case of a failure, but
46  # it might fail in the case that the node was the incorrect node due
47  # to a parsing error, and the resulting exception would be difficult to
48  # understand.
49  mapping = super().construct_mapping(node, deep)
50  # Check if there are any duplicate keys
51  all_keys = Counter(key_node.value for key_node, _ in node.value)
52  duplicates = {k for k, i in all_keys.items() if i != 1}
53  if duplicates:
54  raise KeyError("Pipeline files must not have duplicated keys, "
55  f"{duplicates} appeared multiple times")
56  return mapping
57 
58 
59 class ContractError(Exception):
60  """An exception that is raised when a pipeline contract is not satisfied
61  """
62  pass
63 
64 
65 @dataclass
66 class ContractIR:
67  """Intermediate representation of contracts read from a pipeline yaml file.
68  """
69  contract: str
70  """A string of python code representing one or more conditions on configs
71  in a pipeline. This code-as-string should, once evaluated, should be True
72  if the configs are fine, and False otherwise.
73  """
74  msg: Union[str, None] = None
75  """An optional message to be shown to the user if a contract fails
76  """
77 
78  def to_primitives(self) -> dict:
79  """Convert to a representation used in yaml serialization
80  """
81  accumulate = {"contract": self.contract}
82  if self.msg is not None:
83  accumulate['msg'] = self.msg
84  return accumulate
85 
86  def __eq__(self, other: "ContractIR"):
87  if not isinstance(other, ContractIR):
88  return False
89  elif self.contract == other.contract and self.msg == other.msg:
90  return True
91  else:
92  return False
93 
94 
95 @dataclass
97  """Intermediate representation of named subset of task labels read from
98  a pipeline yaml file.
99  """
100  label: str
101  """The label used to identify the subset of task labels.
102  """
103  subset: Set[str]
104  """A set of task labels contained in this subset.
105  """
106  description: Optional[str]
107  """A description of what this subset of tasks is intended to do
108  """
109 
110  @staticmethod
111  def from_primatives(label: str, value: Union[List[str], dict]) -> LabeledSubset:
112  """Generate `LabeledSubset` objects given a properly formatted object
113  that as been created by a yaml loader.
114 
115  Parameters
116  ----------
117  label : `str`
118  The label that will be used to identify this labeled subset.
119  value : `list` of `str` or `dict`
120  Object returned from loading a labeled subset section from a yaml
121  document.
122 
123  Returns
124  -------
125  labeledSubset : `LabeledSubset`
126  A `LabeledSubset` object build from the inputs.
127 
128  Raises
129  ------
130  ValueError
131  Raised if the value input is not properly formatted for parsing
132  """
133  if isinstance(value, MutableMapping):
134  subset = value.pop("subset", None)
135  if subset is None:
136  raise ValueError("If a labeled subset is specified as a mapping, it must contain the key "
137  "'subset'")
138  description = value.pop("description", None)
139  elif isinstance(value, abcIterable):
140  subset = value
141  description = None
142  else:
143  raise ValueError(f"There was a problem parsing the labeled subset {label}, make sure the "
144  "definition is either a valid yaml list, or a mapping with keys "
145  "(subset, description) where subset points to a yaml list, and description is "
146  "associated with a string")
147  return LabeledSubset(label, set(subset), description)
148 
149  def to_primitives(self) -> dict:
150  """Convert to a representation used in yaml serialization
151  """
152  accumulate: Dict[str, Any] = {"subset": list(self.subset)}
153  if self.description is not None:
154  accumulate["description"] = self.description
155  return accumulate
156 
157 
158 @dataclass
160  """Intermediate representation of parameters that are global to a pipeline
161 
162  These parameters are specified under a top level key named `parameters`
163  and are declared as a yaml mapping. These entries can then be used inside
164  task configuration blocks to specify configuration values. They may not be
165  used in the special ``file`` or ``python`` blocks.
166 
167  Example:
168  paramters:
169  shared_value: 14
170  tasks:
171  taskA:
172  class: modA
173  config:
174  field1: parameters.shared_value
175  taskB:
176  class: modB
177  config:
178  field2: parameters.shared_value
179  """
180  mapping: MutableMapping[str, str]
181  """A mutable mapping of identifiers as keys, and shared configuration
182  as values.
183  """
184  def update(self, other: Optional[ParametersIR]):
185  if other is not None:
186  self.mapping.update(other.mapping)
187 
188  def to_primitives(self) -> MutableMapping[str, str]:
189  """Convert to a representation used in yaml serialization
190  """
191  return self.mapping
192 
193  def __contains__(self, value: str) -> bool:
194  return value in self.mapping
195 
196  def __getitem__(self, item: str) -> Any:
197  return self.mapping[item]
198 
199  def __bool__(self) -> bool:
200  return bool(self.mapping)
201 
202 
203 @dataclass
204 class ConfigIR:
205  """Intermediate representation of configurations read from a pipeline yaml
206  file.
207  """
208  python: Union[str, None] = None
209  """A string of python code that is used to modify a configuration. This can
210  also be None if there are no modifications to do.
211  """
212  dataId: Union[dict, None] = None
213  """A dataId that is used to constrain these config overrides to only quanta
214  with matching dataIds. This field can be None if there is no constraint.
215  This is currently an unimplemented feature, and is placed here for future
216  use.
217  """
218  file: List[str] = field(default_factory=list)
219  """A list of paths which points to a file containing config overrides to be
220  applied. This value may be an empty list if there are no overrides to
221  apply.
222  """
223  rest: dict = field(default_factory=dict)
224  """This is a dictionary of key value pairs, where the keys are strings
225  corresponding to qualified fields on a config to override, and the values
226  are strings representing the values to apply.
227  """
228 
229  def to_primitives(self) -> dict:
230  """Convert to a representation used in yaml serialization
231  """
232  accumulate = {}
233  for name in ("python", "dataId", "file"):
234  # if this attribute is thruthy add it to the accumulation
235  # dictionary
236  if getattr(self, name):
237  accumulate[name] = getattr(self, name)
238  # Add the dictionary containing the rest of the config keys to the
239  # # accumulated dictionary
240  accumulate.update(self.rest)
241  return accumulate
242 
243  def formatted(self, parameters: ParametersIR) -> ConfigIR:
244  """Returns a new ConfigIR object that is formatted according to the
245  specified parameters
246 
247  Parameters
248  ----------
249  parameters : ParametersIR
250  Object that contains variable mappings used in substitution.
251 
252  Returns
253  -------
254  config : ConfigIR
255  A new ConfigIR object formatted with the input parameters
256  """
257  new_config = copy.deepcopy(self)
258  for key, value in new_config.rest.items():
259  if not isinstance(value, str):
260  continue
261  match = re.match("parameters[.](.*)", value)
262  if match and match.group(1) in parameters:
263  new_config.rest[key] = parameters[match.group(1)]
264  if match and match.group(1) not in parameters:
265  warnings.warn(f"config {key} contains value {match.group(0)} which is formatted like a "
266  "Pipeline parameter but was not found within the Pipeline, if this was not "
267  "intentional, check for a typo")
268  return new_config
269 
270  def maybe_merge(self, other_config: "ConfigIR") -> Generator["ConfigIR", None, None]:
271  """Merges another instance of a `ConfigIR` into this instance if
272  possible. This function returns a generator that is either self
273  if the configs were merged, or self, and other_config if that could
274  not be merged.
275 
276  Parameters
277  ----------
278  other_config : `ConfigIR`
279  An instance of `ConfigIR` to merge into this instance.
280 
281  Returns
282  -------
283  Generator : `ConfigIR`
284  A generator containing either self, or self and other_config if
285  the configs could be merged or not respectively.
286  """
287  # Verify that the config blocks can be merged
288  if self.dataId != other_config.dataId or self.python or other_config.python or\
289  self.file or other_config.file:
290  yield from (self, other_config)
291  return
292 
293  # create a set of all keys, and verify two keys do not have different
294  # values
295  key_union = self.rest.keys() & other_config.rest.keys()
296  for key in key_union:
297  if self.rest[key] != other_config.rest[key]:
298  yield from (self, other_config)
299  return
300  self.rest.update(other_config.rest)
301 
302  # Combine the lists of override files to load
303  self_file_set = set(self.file)
304  other_file_set = set(other_config.file)
305  self.file = list(self_file_set.union(other_file_set))
306 
307  yield self
308 
309  def __eq__(self, other: "ConfigIR"):
310  if not isinstance(other, ConfigIR):
311  return False
312  elif all(getattr(self, attr) == getattr(other, attr) for attr in
313  ("python", "dataId", "file", "rest")):
314  return True
315  else:
316  return False
317 
318 
319 @dataclass
320 class TaskIR:
321  """Intermediate representation of tasks read from a pipeline yaml file.
322  """
323  label: str
324  """An identifier used to refer to a task.
325  """
326  klass: str
327  """A string containing a fully qualified python class to be run in a
328  pipeline.
329  """
330  config: Union[List[ConfigIR], None] = None
331  """List of all configs overrides associated with this task, and may be
332  `None` if there are no config overrides.
333  """
334 
335  def to_primitives(self) -> dict:
336  """Convert to a representation used in yaml serialization
337  """
338  accumulate = {'class': self.klass}
339  if self.config:
340  accumulate['config'] = [c.to_primitives() for c in self.config]
341  return accumulate
342 
343  def add_or_update_config(self, other_config: ConfigIR):
344  """Adds a `ConfigIR` to this task if one is not present. Merges configs
345  if there is a `ConfigIR` present and the dataId keys of both configs
346  match, otherwise adds a new entry to the config list. The exception to
347  the above is that if either the last config or other_config has a
348  python block, then other_config is always added, as python blocks can
349  modify configs in ways that cannot be predicted.
350 
351  Parameters
352  ----------
353  other_config : `ConfigIR`
354  A `ConfigIR` instance to add or merge into the config attribute of
355  this task.
356  """
357  if not self.config:
358  self.config = [other_config]
359  return
360  self.config.extend(self.config.pop().maybe_merge(other_config))
361 
362  def __eq__(self, other: "TaskIR"):
363  if not isinstance(other, TaskIR):
364  return False
365  elif all(getattr(self, attr) == getattr(other, attr) for attr in
366  ("label", "klass", "config")):
367  return True
368  else:
369  return False
370 
371 
372 @dataclass
373 class InheritIR:
374  """An intermediate representation of inherited pipelines
375  """
376  location: str
377  """This is the location of the pipeline to inherit. The path should be
378  specified as an absolute path. Environment variables may be used in the
379  path and should be specified as a python string template, with the name of
380  the environment variable inside braces.
381  """
382  include: Union[List[str], None] = None
383  """List of tasks that should be included when inheriting this pipeline.
384  Either the include or exclude attributes may be specified, but not both.
385  """
386  exclude: Union[List[str], None] = None
387  """List of tasks that should be excluded when inheriting this pipeline.
388  Either the include or exclude attributes may be specified, but not both.
389  """
390  importContracts: bool = True
391  """Boolean attribute to dictate if contracts should be inherited with the
392  pipeline or not.
393  """
394 
395  def toPipelineIR(self, instrument=None) -> "PipelineIR":
396  """Load in the Pipeline specified by this object, and turn it into a
397  PipelineIR instance.
398 
399  Parameters
400  ----------
401  instrument : Optional `str`
402  A string giving the fully qualified path to an instrument object.
403  If a inherited pipeline defines the same instrument as defined in
404  this variable, an import warning message is skipped.
405 
406  Returns
407  -------
408  pipeline : `PipelineIR`
409  A pipeline generated from the imported pipeline file
410  """
411  if self.include and self.exclude:
412  raise ValueError("Both an include and an exclude list cant be specified"
413  " when declaring a pipeline import")
414  tmp_pipeline = PipelineIR.from_file(os.path.expandvars(self.location))
415  if tmp_pipeline.instrument is not None and tmp_pipeline.instrument != instrument:
416  warnings.warn("Any instrument definitions in imported pipelines are ignored. "
417  "if an instrument is desired please define it in the top most pipeline")
418 
419  included_labels = set()
420  for label in tmp_pipeline.tasks:
421  if (self.include and label in self.include) or (self.exclude and label not in self.exclude)\
422  or (self.include is None and self.exclude is None):
423  included_labels.add(label)
424 
425  # Handle labeled subsets being specified in the include or exclude
426  # list, adding or removing labels.
427  if self.include is not None:
428  subsets_in_include = tmp_pipeline.labeled_subsets.keys() & self.include
429  for label in subsets_in_include:
430  included_labels.update(tmp_pipeline.labeled_subsets[label].subset)
431 
432  elif self.exclude is not None:
433  subsets_in_exclude = tmp_pipeline.labeled_subsets.keys() & self.exclude
434  for label in subsets_in_exclude:
435  included_labels.difference_update(tmp_pipeline.labeled_subsets[label].subset)
436 
437  tmp_pipeline = tmp_pipeline.subset_from_labels(included_labels)
438 
439  if not self.importContracts:
440  tmp_pipeline.contracts = []
441 
442  return tmp_pipeline
443 
444  def __eq__(self, other: "InheritIR"):
445  if not isinstance(other, InheritIR):
446  return False
447  elif all(getattr(self, attr) == getattr(other, attr) for attr in
448  ("location", "include", "exclude", "importContracts")):
449  return True
450  else:
451  return False
452 
453 
455  """Intermediate representation of a pipeline definition
456 
457  Parameters
458  ----------
459  loaded_yaml : `dict`
460  A dictionary which matches the structure that would be produced by a
461  yaml reader which parses a pipeline definition document
462 
463  Raises
464  ------
465  ValueError :
466  - If a pipeline is declared without a description
467  - If no tasks are declared in a pipeline, and no pipelines are to be
468  inherited
469  - If more than one instrument is specified
470  - If more than one inherited pipeline share a label
471  """
472  def __init__(self, loaded_yaml):
473  # Check required fields are present
474  if "description" not in loaded_yaml:
475  raise ValueError("A pipeline must be declared with a description")
476  if "tasks" not in loaded_yaml and "inherits" not in loaded_yaml:
477  raise ValueError("A pipeline must be declared with one or more tasks")
478 
479  # These steps below must happen in this call order
480 
481  # Process pipeline description
482  self.description = loaded_yaml.pop("description")
483 
484  # Process tasks
485  self._read_tasks(loaded_yaml)
486 
487  # Process instrument keys
488  inst = loaded_yaml.pop("instrument", None)
489  if isinstance(inst, list):
490  raise ValueError("Only one top level instrument can be defined in a pipeline")
491  self.instrument = inst
492 
493  # Process any contracts
494  self._read_contracts(loaded_yaml)
495 
496  # Process any defined parameters
497  self._read_parameters(loaded_yaml)
498 
499  # Process any named label subsets
500  self._read_labeled_subsets(loaded_yaml)
501 
502  # Process any inherited pipelines
503  self._read_inherits(loaded_yaml)
504 
505  # verify named subsets, must be done after inheriting
507 
508  def _read_contracts(self, loaded_yaml):
509  """Process the contracts portion of the loaded yaml document
510 
511  Parameters
512  ---------
513  loaded_yaml : `dict`
514  A dictionary which matches the structure that would be produced by
515  a yaml reader which parses a pipeline definition document
516  """
517  loaded_contracts = loaded_yaml.pop("contracts", [])
518  if isinstance(loaded_contracts, str):
519  loaded_contracts = [loaded_contracts]
520  self.contracts = []
521  for contract in loaded_contracts:
522  if isinstance(contract, dict):
523  self.contracts.append(ContractIR(**contract))
524  if isinstance(contract, str):
525  self.contracts.append(ContractIR(contract=contract))
526 
527  def _read_parameters(self, loaded_yaml):
528  """Process the parameters portion of the loaded yaml document
529 
530  Parameters
531  ---------
532  loaded_yaml : `dict`
533  A dictionary which matches the structure that would be produced by
534  a yaml reader which parses a pipeline definition document
535  """
536  loaded_parameters = loaded_yaml.pop("parameters", {})
537  if not isinstance(loaded_parameters, dict):
538  raise ValueError("The parameters section must be a yaml mapping")
539  self.parameters = ParametersIR(loaded_parameters)
540 
541  def _read_labeled_subsets(self, loaded_yaml: dict):
542  """Process the subsets portion of the loaded yaml document
543 
544  Parameters
545  ----------
546  loaded_yaml: `MutableMapping`
547  A dictionary which matches the structure that would be produced
548  by a yaml reader which parses a pipeline definition document
549  """
550  loaded_subsets = loaded_yaml.pop("subsets", {})
551  self.labeled_subsets = {}
552  if not loaded_subsets and "subset" in loaded_yaml:
553  raise ValueError("Top level key should be subsets and not subset, add an s")
554  for key, value in loaded_subsets.items():
555  self.labeled_subsets[key] = LabeledSubset.from_primatives(key, value)
556 
557  def _verify_labeled_subsets(self):
558  """Verifies that all the labels in each named subset exist within the
559  pipeline.
560  """
561  # Verify that all labels defined in a labeled subset are in the
562  # Pipeline
563  for labeled_subset in self.labeled_subsets.values():
564  if not labeled_subset.subset.issubset(self.tasks.keys()):
565  raise ValueError(f"Labels {labeled_subset.subset - self.tasks.keys()} were not found in the "
566  "declared pipeline")
567  # Verify subset labels are not already task labels
568  label_intersection = self.labeled_subsets.keys() & self.tasks.keys()
569  if label_intersection:
570  raise ValueError(f"Labeled subsets can not use the same label as a task: {label_intersection}")
571 
572  def _read_inherits(self, loaded_yaml):
573  """Process the inherits portion of the loaded yaml document
574 
575  Parameters
576  ---------
577  loaded_yaml : `dict`
578  A dictionary which matches the structure that would be produced by
579  a yaml reader which parses a pipeline definition document
580  """
581  def process_args(argument: Union[str, dict]) -> dict:
582  if isinstance(argument, str):
583  return {"location": argument}
584  elif isinstance(argument, dict):
585  if "exclude" in argument and isinstance(argument["exclude"], str):
586  argument["exclude"] = [argument["exclude"]]
587  if "include" in argument and isinstance(argument["include"], str):
588  argument["include"] = [argument["include"]]
589  return argument
590  tmp_inherit = loaded_yaml.pop("inherits", None)
591  if tmp_inherit is None:
592  self.inherits = []
593  elif isinstance(tmp_inherit, list):
594  self.inherits = [InheritIR(**process_args(args)) for args in tmp_inherit]
595  else:
596  self.inherits = [InheritIR(**process_args(tmp_inherit))]
597 
598  # integrate any imported pipelines
599  accumulate_tasks = {}
600  accumulate_labeled_subsets = {}
601  accumulated_parameters = ParametersIR({})
602  for other_pipeline in self.inherits:
603  tmp_IR = other_pipeline.toPipelineIR(instrument=self.instrument)
604  if accumulate_tasks.keys() & tmp_IR.tasks.keys():
605  raise ValueError("Task labels in the imported pipelines must "
606  "be unique")
607  accumulate_tasks.update(tmp_IR.tasks)
608  self.contracts.extend(tmp_IR.contracts)
609  # verify that tmp_IR has unique labels for named subset among
610  # existing labeled subsets, and with existing task labels.
611  overlapping_subsets = accumulate_labeled_subsets.keys() & tmp_IR.labeled_subsets.keys()
612  task_subset_overlap = ((accumulate_labeled_subsets.keys() | tmp_IR.labeled_subsets.keys())
613  & accumulate_tasks.keys())
614  if overlapping_subsets or task_subset_overlap:
615  raise ValueError("Labeled subset names must be unique amongst imports in both labels and "
616  f" named Subsets. Duplicate: {overlapping_subsets | task_subset_overlap}")
617  accumulate_labeled_subsets.update(tmp_IR.labeled_subsets)
618  accumulated_parameters.update(tmp_IR.parameters)
619 
620  # verify that any accumulated labeled subsets dont clash with a label
621  # from this pipeline
622  if accumulate_labeled_subsets.keys() & self.tasks.keys():
623  raise ValueError("Labeled subset names must be unique amongst imports in both labels and "
624  " named Subsets")
625  # merge in the named subsets for self so this document can override any
626  # that have been delcared
627  accumulate_labeled_subsets.update(self.labeled_subsets)
628  self.labeled_subsets = accumulate_labeled_subsets
629 
630  # merge the dict of label:TaskIR objects, preserving any configs in the
631  # imported pipeline if the labels point to the same class
632  for label, task in self.tasks.items():
633  if label not in accumulate_tasks:
634  accumulate_tasks[label] = task
635  elif accumulate_tasks[label].klass == task.klass:
636  if task.config is not None:
637  for config in task.config:
638  accumulate_tasks[label].add_or_update_config(config)
639  else:
640  accumulate_tasks[label] = task
641  self.tasks = accumulate_tasks
642  self.parameters.update(accumulated_parameters)
643 
644  def _read_tasks(self, loaded_yaml):
645  """Process the tasks portion of the loaded yaml document
646 
647  Parameters
648  ---------
649  loaded_yaml : `dict`
650  A dictionary which matches the structure that would be produced by
651  a yaml reader which parses a pipeline definition document
652  """
653  self.tasks = {}
654  tmp_tasks = loaded_yaml.pop("tasks", None)
655  if tmp_tasks is None:
656  tmp_tasks = {}
657 
658  if "parameters" in tmp_tasks:
659  raise ValueError("parameters is a reserved word and cannot be used as a task label")
660 
661  for label, definition in tmp_tasks.items():
662  if isinstance(definition, str):
663  definition = {"class": definition}
664  config = definition.get('config', None)
665  if config is None:
666  task_config_ir = None
667  else:
668  if isinstance(config, dict):
669  config = [config]
670  task_config_ir = []
671  for c in config:
672  file = c.pop("file", None)
673  if file is None:
674  file = []
675  elif not isinstance(file, list):
676  file = [file]
677  task_config_ir.append(ConfigIR(python=c.pop("python", None),
678  dataId=c.pop("dataId", None),
679  file=file,
680  rest=c))
681  self.tasks[label] = TaskIR(label, definition["class"], task_config_ir)
682 
683  def _remove_contracts(self, label: str):
684  """Remove any contracts that contain the given label
685 
686  String comparison used in this way is not the most elegant and may
687  have issues, but it is the only feasible way when users can specify
688  contracts with generic strings.
689  """
690  new_contracts = []
691  for contract in self.contracts:
692  # match a label that is not preceded by an ASCII identifier, or
693  # is the start of a line and is followed by a dot
694  if re.match(f".*([^A-Za-z0-9_]|^){label}[.]", contract.contract):
695  continue
696  new_contracts.append(contract)
697  self.contracts = new_contracts
698 
699  def subset_from_labels(self, labelSpecifier: Set[str]) -> PipelineIR:
700  """Subset a pipelineIR to contain only labels specified in
701  labelSpecifier.
702 
703  Parameters
704  ----------
705  labelSpecifier : `set` of `str`
706  Set containing labels that describes how to subset a pipeline.
707 
708  Returns
709  -------
710  pipeline : `PipelineIR`
711  A new pipelineIR object that is a subset of the old pipelineIR
712 
713  Raises
714  ------
715  ValueError
716  Raised if there is an issue with specified labels
717 
718  Notes
719  -----
720  This method attempts to prune any contracts that contain labels which
721  are not in the declared subset of labels. This pruning is done using a
722  string based matching due to the nature of contracts and may prune more
723  than it should. Any labeled subsets defined that no longer have all
724  members of the subset present in the pipeline will be removed from the
725  resulting pipeline.
726  """
727 
728  pipeline = copy.deepcopy(self)
729 
730  # update the label specifier to expand any named subsets
731  toRemove = set()
732  toAdd = set()
733  for label in labelSpecifier:
734  if label in pipeline.labeled_subsets:
735  toRemove.add(label)
736  toAdd.update(pipeline.labeled_subsets[label].subset)
737  labelSpecifier.difference_update(toRemove)
738  labelSpecifier.update(toAdd)
739  # verify all the labels are in the pipeline
740  if not labelSpecifier.issubset(pipeline.tasks.keys()
741  | pipeline.labeled_subsets):
742  difference = labelSpecifier.difference(pipeline.tasks.keys())
743  raise ValueError("Not all supplied labels (specified or named subsets) are in the pipeline "
744  f"definition, extra labels: {difference}")
745  # copy needed so as to not modify while iterating
746  pipeline_labels = set(pipeline.tasks.keys())
747  # Remove the labels from the pipelineIR, and any contracts that contain
748  # those labels (see docstring on _remove_contracts for why this may
749  # cause issues)
750  for label in pipeline_labels:
751  if label not in labelSpecifier:
752  pipeline.tasks.pop(label)
753  pipeline._remove_contracts(label)
754 
755  # create a copy of the object to iterate over
756  labeled_subsets = copy.copy(pipeline.labeled_subsets)
757  # remove any labeled subsets that no longer have a complete set
758  for label, labeled_subset in labeled_subsets.items():
759  if labeled_subset.subset - pipeline.tasks.keys():
760  pipeline.labeled_subsets.pop(label)
761 
762  return pipeline
763 
764  @classmethod
765  def from_string(cls, pipeline_string: str):
766  """Create a `PipelineIR` object from a string formatted like a pipeline
767  document
768 
769  Parameters
770  ----------
771  pipeline_string : `str`
772  A string that is formatted according like a pipeline document
773  """
774  loaded_yaml = yaml.load(pipeline_string, Loader=PipelineYamlLoader)
775  return cls(loaded_yaml)
776 
777  @classmethod
778  def from_file(cls, filename: str):
779  """Create a `PipelineIR` object from the document specified by the
780  input path.
781 
782  Parameters
783  ----------
784  filename : `str`
785  Location of document to use in creating a `PipelineIR` object.
786  """
787  with open(filename, 'r') as f:
788  loaded_yaml = yaml.load(f, Loader=PipelineYamlLoader)
789  return cls(loaded_yaml)
790 
791  def to_file(self, filename: str):
792  """Serialize this `PipelineIR` object into a yaml formatted string and
793  write the output to a file at the specified path.
794 
795  Parameters
796  ----------
797  filename : `str`
798  Location of document to write a `PipelineIR` object.
799  """
800  with open(filename, 'w') as f:
801  yaml.dump(self.to_primitives(), f, sort_keys=False)
802 
803  def to_primitives(self):
804  """Convert to a representation used in yaml serialization
805  """
806  accumulate = {"description": self.description}
807  if self.instrument is not None:
808  accumulate['instrument'] = self.instrument
809  if self.parameters:
810  accumulate['parameters'] = self.parameters.to_primitives()
811  accumulate['tasks'] = {m: t.to_primitives() for m, t in self.tasks.items()}
812  if len(self.contracts) > 0:
813  accumulate['contracts'] = [c.to_primitives() for c in self.contracts]
814  if self.labeled_subsets:
815  accumulate['subsets'] = {k: v.to_primitives() for k, v in self.labeled_subsets.items()}
816  return accumulate
817 
818  def __str__(self) -> str:
819  """Instance formatting as how it would look in yaml representation
820  """
821  return yaml.dump(self.to_primitives(), sort_keys=False)
822 
823  def __repr__(self) -> str:
824  """Instance formatting as how it would look in yaml representation
825  """
826  return str(self)
827 
828  def __eq__(self, other: "PipelineIR"):
829  if not isinstance(other, PipelineIR):
830  return False
831  elif all(getattr(self, attr) == getattr(other, attr) for attr in
832  ("contracts", "tasks", "instrument")):
833  return True
834  else:
835  return False
lsst::pipe::base.pipelineIR.PipelineIR._read_contracts
def _read_contracts(self, loaded_yaml)
Definition: pipelineIR.py:508
lsst::pipe::base.pipelineIR.LabeledSubset
Definition: pipelineIR.py:96
lsst::pipe::base.pipelineIR.ContractIR.msg
msg
Definition: pipelineIR.py:89
lsst::pipe::base.pipelineIR.PipelineIR.__eq__
def __eq__(self, "PipelineIR" other)
Definition: pipelineIR.py:828
lsst::pipe::base.pipelineIR.PipelineIR.contracts
contracts
Definition: pipelineIR.py:520
lsst::pipe::base.pipelineIR.TaskIR.to_primitives
dict to_primitives(self)
Definition: pipelineIR.py:335
lsst::pipe::base.pipelineIR.ConfigIR.formatted
ConfigIR formatted(self, ParametersIR parameters)
Definition: pipelineIR.py:243
lsst::pipe::base.pipelineIR.PipelineIR._read_labeled_subsets
def _read_labeled_subsets(self, dict loaded_yaml)
Definition: pipelineIR.py:541
lsst::pipe::base.pipelineIR.PipelineIR.subset_from_labels
PipelineIR subset_from_labels(self, Set[str] labelSpecifier)
Definition: pipelineIR.py:699
lsst::pipe::base.pipelineIR.TaskIR.config
config
Definition: pipelineIR.py:358
lsst::pipe::base.pipelineIR.LabeledSubset.to_primitives
dict to_primitives(self)
Definition: pipelineIR.py:149
lsst::pipe::base.pipelineIR.ContractError
Definition: pipelineIR.py:59
lsst::pipe::base.pipelineIR.ConfigIR.file
file
Definition: pipelineIR.py:305
lsst::pipe::base.pipelineIR.ParametersIR.__bool__
bool __bool__(self)
Definition: pipelineIR.py:199
lsst::pipe::base.pipelineIR.PipelineIR.description
description
Definition: pipelineIR.py:482
lsst::pipe::base.pipelineIR.InheritIR.toPipelineIR
"PipelineIR" toPipelineIR(self, instrument=None)
Definition: pipelineIR.py:395
lsst::pipe::base.pipelineIR.PipelineIR._verify_labeled_subsets
def _verify_labeled_subsets(self)
Definition: pipelineIR.py:557
lsst::pipe::base.pipelineIR.ConfigIR.to_primitives
dict to_primitives(self)
Definition: pipelineIR.py:229
lsst::pipe::base.pipelineIR.PipelineIR.__str__
str __str__(self)
Definition: pipelineIR.py:818
lsst::pipe::base.pipelineIR.PipelineIR.inherits
inherits
Definition: pipelineIR.py:592
lsst::pipe::base.pipelineIR.PipelineIR
Definition: pipelineIR.py:454
lsst::pipe::base.pipelineIR.PipelineIR.__repr__
str __repr__(self)
Definition: pipelineIR.py:823
lsst::pipe::base.pipelineIR.PipelineYamlLoader
Definition: pipelineIR.py:37
lsst::pipe::base.pipelineIR.PipelineIR.from_file
def from_file(cls, str filename)
Definition: pipelineIR.py:778
lsst::pipe::base.pipelineIR.TaskIR
Definition: pipelineIR.py:320
lsst::pipe::base.pipelineIR.ContractIR.__eq__
def __eq__(self, "ContractIR" other)
Definition: pipelineIR.py:86
lsst::pipe::base.pipelineIR.PipelineIR._read_tasks
def _read_tasks(self, loaded_yaml)
Definition: pipelineIR.py:644
lsst::pipe::base.pipelineIR.LabeledSubset.from_primatives
LabeledSubset from_primatives(str label, Union[List[str], dict] value)
Definition: pipelineIR.py:111
lsst::pipe::base.pipelineIR.TaskIR.add_or_update_config
def add_or_update_config(self, ConfigIR other_config)
Definition: pipelineIR.py:343
lsst::pipe::base.pipelineIR.ConfigIR.maybe_merge
Generator["ConfigIR", None, None] maybe_merge(self, "ConfigIR" other_config)
Definition: pipelineIR.py:270
lsst::pipe::base.pipelineIR.PipelineIR.tasks
tasks
Definition: pipelineIR.py:641
lsst::pipe::base.pipelineIR.ParametersIR
Definition: pipelineIR.py:159
lsst::pipe::base.pipelineIR.ParametersIR.to_primitives
MutableMapping[str, str] to_primitives(self)
Definition: pipelineIR.py:188
lsst::pipe::base.pipelineIR.ParametersIR.__contains__
bool __contains__(self, str value)
Definition: pipelineIR.py:193
lsst::pipe::base.pipelineIR.ContractIR.to_primitives
dict to_primitives(self)
Definition: pipelineIR.py:78
lsst::pipe::base.pipelineIR.PipelineIR._read_parameters
def _read_parameters(self, loaded_yaml)
Definition: pipelineIR.py:527
lsst::pipe::base.pipelineIR.InheritIR.__eq__
def __eq__(self, "InheritIR" other)
Definition: pipelineIR.py:444
lsst::pipe::base.pipelineIR.ParametersIR.__getitem__
Any __getitem__(self, str item)
Definition: pipelineIR.py:196
lsst::pipe::base.pipelineIR.ConfigIR
Definition: pipelineIR.py:204
lsst::pipe::base.pipelineIR.PipelineIR.from_string
def from_string(cls, str pipeline_string)
Definition: pipelineIR.py:765
lsst::pipe::base.pipelineIR.PipelineIR.__init__
def __init__(self, loaded_yaml)
Definition: pipelineIR.py:472
lsst::pipe::base.pipelineIR.PipelineIR.to_file
def to_file(self, str filename)
Definition: pipelineIR.py:791
lsst::pipe::base.pipelineIR.ParametersIR.update
def update(self, Optional[ParametersIR] other)
Definition: pipelineIR.py:184
lsst::pipe::base.pipelineIR.PipelineIR.to_primitives
def to_primitives(self)
Definition: pipelineIR.py:803
lsst::pipe::base.pipelineIR.PipelineIR.parameters
parameters
Definition: pipelineIR.py:539
lsst::pipe::base.pipelineIR.ContractIR
Definition: pipelineIR.py:66
lsst::pipe::base.pipelineIR.PipelineYamlLoader.construct_mapping
def construct_mapping(self, node, deep=False)
Definition: pipelineIR.py:42
lsst::pipe::base.pipelineIR.PipelineIR._read_inherits
def _read_inherits(self, loaded_yaml)
Definition: pipelineIR.py:572
lsst::pipe::base.pipelineIR.ContractIR.contract
contract
Definition: pipelineIR.py:89
lsst::pipe::base.pipelineIR.InheritIR
Definition: pipelineIR.py:373
lsst::pipe::base.pipelineIR.PipelineIR.instrument
instrument
Definition: pipelineIR.py:491
lsst::pipe::base.pipelineIR.PipelineIR.labeled_subsets
labeled_subsets
Definition: pipelineIR.py:551
lsst::pipe::base.pipelineIR.TaskIR.__eq__
def __eq__(self, "TaskIR" other)
Definition: pipelineIR.py:362
lsst::pipe::base.pipelineIR.ConfigIR.__eq__
def __eq__(self, "ConfigIR" other)
Definition: pipelineIR.py:309