lsst.pipe.base  21.0.0-6-gd3283ba+55f5a6a5c9
pipelineIR.py
Go to the documentation of this file.
1 # This file is part of pipe_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 from __future__ import annotations
22 
23 __all__ = ("ConfigIR", "ContractError", "ContractIR", "ImportIR", "PipelineIR", "TaskIR", "LabeledSubset")
24 
25 from collections import Counter
26 from collections.abc import Iterable as abcIterable
27 from dataclasses import dataclass, field
28 from typing import Any, List, Set, Union, Generator, MutableMapping, Optional, Dict, Type
29 
30 import copy
31 import re
32 import os
33 import yaml
34 import warnings
35 
36 
38  pass
39 
40 
41 class PipelineYamlLoader(yaml.SafeLoader):
42  """This is a specialized version of yaml's SafeLoader. It checks and raises
43  an exception if it finds that there are multiple instances of the same key
44  found inside a pipeline file at a given scope.
45  """
46  def construct_mapping(self, node, deep=False):
47  # do the call to super first so that it can do all the other forms of
48  # checking on this node. If you check the uniqueness of keys first
49  # it would save the work that super does in the case of a failure, but
50  # it might fail in the case that the node was the incorrect node due
51  # to a parsing error, and the resulting exception would be difficult to
52  # understand.
53  mapping = super().construct_mapping(node, deep)
54  # Check if there are any duplicate keys
55  all_keys = Counter(key_node.value for key_node, _ in node.value)
56  duplicates = {k for k, i in all_keys.items() if i != 1}
57  if duplicates:
58  raise KeyError("Pipeline files must not have duplicated keys, "
59  f"{duplicates} appeared multiple times")
60  return mapping
61 
62 
63 class ContractError(Exception):
64  """An exception that is raised when a pipeline contract is not satisfied
65  """
66  pass
67 
68 
69 @dataclass
70 class ContractIR:
71  """Intermediate representation of contracts read from a pipeline yaml file.
72  """
73  contract: str
74  """A string of python code representing one or more conditions on configs
75  in a pipeline. This code-as-string should, once evaluated, should be True
76  if the configs are fine, and False otherwise.
77  """
78  msg: Union[str, None] = None
79  """An optional message to be shown to the user if a contract fails
80  """
81 
82  def to_primitives(self) -> dict:
83  """Convert to a representation used in yaml serialization
84  """
85  accumulate = {"contract": self.contract}
86  if self.msg is not None:
87  accumulate['msg'] = self.msg
88  return accumulate
89 
90  def __eq__(self, other: "ContractIR"):
91  if not isinstance(other, ContractIR):
92  return False
93  elif self.contract == other.contract and self.msg == other.msg:
94  return True
95  else:
96  return False
97 
98 
99 @dataclass
101  """Intermediate representation of named subset of task labels read from
102  a pipeline yaml file.
103  """
104  label: str
105  """The label used to identify the subset of task labels.
106  """
107  subset: Set[str]
108  """A set of task labels contained in this subset.
109  """
110  description: Optional[str]
111  """A description of what this subset of tasks is intended to do
112  """
113 
114  @staticmethod
115  def from_primatives(label: str, value: Union[List[str], dict]) -> LabeledSubset:
116  """Generate `LabeledSubset` objects given a properly formatted object
117  that as been created by a yaml loader.
118 
119  Parameters
120  ----------
121  label : `str`
122  The label that will be used to identify this labeled subset.
123  value : `list` of `str` or `dict`
124  Object returned from loading a labeled subset section from a yaml
125  document.
126 
127  Returns
128  -------
129  labeledSubset : `LabeledSubset`
130  A `LabeledSubset` object build from the inputs.
131 
132  Raises
133  ------
134  ValueError
135  Raised if the value input is not properly formatted for parsing
136  """
137  if isinstance(value, MutableMapping):
138  subset = value.pop("subset", None)
139  if subset is None:
140  raise ValueError("If a labeled subset is specified as a mapping, it must contain the key "
141  "'subset'")
142  description = value.pop("description", None)
143  elif isinstance(value, abcIterable):
144  subset = value
145  description = None
146  else:
147  raise ValueError(f"There was a problem parsing the labeled subset {label}, make sure the "
148  "definition is either a valid yaml list, or a mapping with keys "
149  "(subset, description) where subset points to a yaml list, and description is "
150  "associated with a string")
151  return LabeledSubset(label, set(subset), description)
152 
153  def to_primitives(self) -> dict:
154  """Convert to a representation used in yaml serialization
155  """
156  accumulate: Dict[str, Any] = {"subset": list(self.subset)}
157  if self.description is not None:
158  accumulate["description"] = self.description
159  return accumulate
160 
161 
162 @dataclass
164  """Intermediate representation of parameters that are global to a pipeline
165 
166  These parameters are specified under a top level key named `parameters`
167  and are declared as a yaml mapping. These entries can then be used inside
168  task configuration blocks to specify configuration values. They may not be
169  used in the special ``file`` or ``python`` blocks.
170 
171  Example:
172  paramters:
173  shared_value: 14
174  tasks:
175  taskA:
176  class: modA
177  config:
178  field1: parameters.shared_value
179  taskB:
180  class: modB
181  config:
182  field2: parameters.shared_value
183  """
184  mapping: MutableMapping[str, str]
185  """A mutable mapping of identifiers as keys, and shared configuration
186  as values.
187  """
188  def update(self, other: Optional[ParametersIR]):
189  if other is not None:
190  self.mapping.update(other.mapping)
191 
192  def to_primitives(self) -> MutableMapping[str, str]:
193  """Convert to a representation used in yaml serialization
194  """
195  return self.mapping
196 
197  def __contains__(self, value: str) -> bool:
198  return value in self.mapping
199 
200  def __getitem__(self, item: str) -> Any:
201  return self.mapping[item]
202 
203  def __bool__(self) -> bool:
204  return bool(self.mapping)
205 
206 
207 @dataclass
208 class ConfigIR:
209  """Intermediate representation of configurations read from a pipeline yaml
210  file.
211  """
212  python: Union[str, None] = None
213  """A string of python code that is used to modify a configuration. This can
214  also be None if there are no modifications to do.
215  """
216  dataId: Union[dict, None] = None
217  """A dataId that is used to constrain these config overrides to only quanta
218  with matching dataIds. This field can be None if there is no constraint.
219  This is currently an unimplemented feature, and is placed here for future
220  use.
221  """
222  file: List[str] = field(default_factory=list)
223  """A list of paths which points to a file containing config overrides to be
224  applied. This value may be an empty list if there are no overrides to
225  apply.
226  """
227  rest: dict = field(default_factory=dict)
228  """This is a dictionary of key value pairs, where the keys are strings
229  corresponding to qualified fields on a config to override, and the values
230  are strings representing the values to apply.
231  """
232 
233  def to_primitives(self) -> dict:
234  """Convert to a representation used in yaml serialization
235  """
236  accumulate = {}
237  for name in ("python", "dataId", "file"):
238  # if this attribute is thruthy add it to the accumulation
239  # dictionary
240  if getattr(self, name):
241  accumulate[name] = getattr(self, name)
242  # Add the dictionary containing the rest of the config keys to the
243  # # accumulated dictionary
244  accumulate.update(self.rest)
245  return accumulate
246 
247  def formatted(self, parameters: ParametersIR) -> ConfigIR:
248  """Returns a new ConfigIR object that is formatted according to the
249  specified parameters
250 
251  Parameters
252  ----------
253  parameters : ParametersIR
254  Object that contains variable mappings used in substitution.
255 
256  Returns
257  -------
258  config : ConfigIR
259  A new ConfigIR object formatted with the input parameters
260  """
261  new_config = copy.deepcopy(self)
262  for key, value in new_config.rest.items():
263  if not isinstance(value, str):
264  continue
265  match = re.match("parameters[.](.*)", value)
266  if match and match.group(1) in parameters:
267  new_config.rest[key] = parameters[match.group(1)]
268  if match and match.group(1) not in parameters:
269  warnings.warn(f"config {key} contains value {match.group(0)} which is formatted like a "
270  "Pipeline parameter but was not found within the Pipeline, if this was not "
271  "intentional, check for a typo")
272  return new_config
273 
274  def maybe_merge(self, other_config: "ConfigIR") -> Generator["ConfigIR", None, None]:
275  """Merges another instance of a `ConfigIR` into this instance if
276  possible. This function returns a generator that is either self
277  if the configs were merged, or self, and other_config if that could
278  not be merged.
279 
280  Parameters
281  ----------
282  other_config : `ConfigIR`
283  An instance of `ConfigIR` to merge into this instance.
284 
285  Returns
286  -------
287  Generator : `ConfigIR`
288  A generator containing either self, or self and other_config if
289  the configs could be merged or not respectively.
290  """
291  # Verify that the config blocks can be merged
292  if self.dataId != other_config.dataId or self.python or other_config.python or\
293  self.file or other_config.file:
294  yield from (self, other_config)
295  return
296 
297  # create a set of all keys, and verify two keys do not have different
298  # values
299  key_union = self.rest.keys() & other_config.rest.keys()
300  for key in key_union:
301  if self.rest[key] != other_config.rest[key]:
302  yield from (self, other_config)
303  return
304  self.rest.update(other_config.rest)
305 
306  # Combine the lists of override files to load
307  self_file_set = set(self.file)
308  other_file_set = set(other_config.file)
309  self.file = list(self_file_set.union(other_file_set))
310 
311  yield self
312 
313  def __eq__(self, other: "ConfigIR"):
314  if not isinstance(other, ConfigIR):
315  return False
316  elif all(getattr(self, attr) == getattr(other, attr) for attr in
317  ("python", "dataId", "file", "rest")):
318  return True
319  else:
320  return False
321 
322 
323 @dataclass
324 class TaskIR:
325  """Intermediate representation of tasks read from a pipeline yaml file.
326  """
327  label: str
328  """An identifier used to refer to a task.
329  """
330  klass: str
331  """A string containing a fully qualified python class to be run in a
332  pipeline.
333  """
334  config: Union[List[ConfigIR], None] = None
335  """List of all configs overrides associated with this task, and may be
336  `None` if there are no config overrides.
337  """
338 
339  def to_primitives(self) -> dict:
340  """Convert to a representation used in yaml serialization
341  """
342  accumulate = {'class': self.klass}
343  if self.config:
344  accumulate['config'] = [c.to_primitives() for c in self.config]
345  return accumulate
346 
347  def add_or_update_config(self, other_config: ConfigIR):
348  """Adds a `ConfigIR` to this task if one is not present. Merges configs
349  if there is a `ConfigIR` present and the dataId keys of both configs
350  match, otherwise adds a new entry to the config list. The exception to
351  the above is that if either the last config or other_config has a
352  python block, then other_config is always added, as python blocks can
353  modify configs in ways that cannot be predicted.
354 
355  Parameters
356  ----------
357  other_config : `ConfigIR`
358  A `ConfigIR` instance to add or merge into the config attribute of
359  this task.
360  """
361  if not self.config:
362  self.config = [other_config]
363  return
364  self.config.extend(self.config.pop().maybe_merge(other_config))
365 
366  def __eq__(self, other: "TaskIR"):
367  if not isinstance(other, TaskIR):
368  return False
369  elif all(getattr(self, attr) == getattr(other, attr) for attr in
370  ("label", "klass", "config")):
371  return True
372  else:
373  return False
374 
375 
376 @dataclass
377 class ImportIR:
378  """An intermediate representation of imported pipelines
379  """
380  location: str
381  """This is the location of the pipeline to inherit. The path should be
382  specified as an absolute path. Environment variables may be used in the
383  path and should be specified as a python string template, with the name of
384  the environment variable inside braces.
385  """
386  include: Union[List[str], None] = None
387  """List of tasks that should be included when inheriting this pipeline.
388  Either the include or exclude attributes may be specified, but not both.
389  """
390  exclude: Union[List[str], None] = None
391  """List of tasks that should be excluded when inheriting this pipeline.
392  Either the include or exclude attributes may be specified, but not both.
393  """
394  importContracts: bool = True
395  """Boolean attribute to dictate if contracts should be inherited with the
396  pipeline or not.
397  """
398  instrument: Union[Type[KeepInstrument], str, None] = KeepInstrument
399  """Instrument to assign to the Pipeline at import. The default value of
400  KEEP_INSTRUMENT indicates that whatever instrument the pipeline is declared
401  with will not be modified. Setting this value to None will drop any
402  declared instrument prior to import.
403  """
404 
405  def toPipelineIR(self) -> "PipelineIR":
406  """Load in the Pipeline specified by this object, and turn it into a
407  PipelineIR instance.
408 
409  Returns
410  -------
411  pipeline : `PipelineIR`
412  A pipeline generated from the imported pipeline file
413  """
414  if self.include and self.exclude:
415  raise ValueError("Both an include and an exclude list cant be specified"
416  " when declaring a pipeline import")
417  tmp_pipeline = PipelineIR.from_file(os.path.expandvars(self.location))
418  if self.instrument is not KeepInstrument:
419  tmp_pipeline.instrument = self.instrument
420 
421  included_labels = set()
422  for label in tmp_pipeline.tasks:
423  if (self.include and label in self.include) or (self.exclude and label not in self.exclude)\
424  or (self.include is None and self.exclude is None):
425  included_labels.add(label)
426 
427  # Handle labeled subsets being specified in the include or exclude
428  # list, adding or removing labels.
429  if self.include is not None:
430  subsets_in_include = tmp_pipeline.labeled_subsets.keys() & self.include
431  for label in subsets_in_include:
432  included_labels.update(tmp_pipeline.labeled_subsets[label].subset)
433 
434  elif self.exclude is not None:
435  subsets_in_exclude = tmp_pipeline.labeled_subsets.keys() & self.exclude
436  for label in subsets_in_exclude:
437  included_labels.difference_update(tmp_pipeline.labeled_subsets[label].subset)
438 
439  tmp_pipeline = tmp_pipeline.subset_from_labels(included_labels)
440 
441  if not self.importContracts:
442  tmp_pipeline.contracts = []
443 
444  return tmp_pipeline
445 
446  def __eq__(self, other: "ImportIR"):
447  if not isinstance(other, ImportIR):
448  return False
449  elif all(getattr(self, attr) == getattr(other, attr) for attr in
450  ("location", "include", "exclude", "importContracts")):
451  return True
452  else:
453  return False
454 
455 
457  """Intermediate representation of a pipeline definition
458 
459  Parameters
460  ----------
461  loaded_yaml : `dict`
462  A dictionary which matches the structure that would be produced by a
463  yaml reader which parses a pipeline definition document
464 
465  Raises
466  ------
467  ValueError :
468  - If a pipeline is declared without a description
469  - If no tasks are declared in a pipeline, and no pipelines are to be
470  inherited
471  - If more than one instrument is specified
472  - If more than one inherited pipeline share a label
473  """
474  def __init__(self, loaded_yaml):
475  # Check required fields are present
476  if "description" not in loaded_yaml:
477  raise ValueError("A pipeline must be declared with a description")
478  if "tasks" not in loaded_yaml and len({"imports", "inherits"} - loaded_yaml.keys()) == 2:
479  raise ValueError("A pipeline must be declared with one or more tasks")
480 
481  # These steps below must happen in this call order
482 
483  # Process pipeline description
484  self.description = loaded_yaml.pop("description")
485 
486  # Process tasks
487  self._read_tasks(loaded_yaml)
488 
489  # Process instrument keys
490  inst = loaded_yaml.pop("instrument", None)
491  if isinstance(inst, list):
492  raise ValueError("Only one top level instrument can be defined in a pipeline")
493  self.instrument = inst
494 
495  # Process any contracts
496  self._read_contracts(loaded_yaml)
497 
498  # Process any defined parameters
499  self._read_parameters(loaded_yaml)
500 
501  # Process any named label subsets
502  self._read_labeled_subsets(loaded_yaml)
503 
504  # Process any inherited pipelines
505  self._read_imports(loaded_yaml)
506 
507  # verify named subsets, must be done after inheriting
509 
510  def _read_contracts(self, loaded_yaml):
511  """Process the contracts portion of the loaded yaml document
512 
513  Parameters
514  ---------
515  loaded_yaml : `dict`
516  A dictionary which matches the structure that would be produced by
517  a yaml reader which parses a pipeline definition document
518  """
519  loaded_contracts = loaded_yaml.pop("contracts", [])
520  if isinstance(loaded_contracts, str):
521  loaded_contracts = [loaded_contracts]
522  self.contracts = []
523  for contract in loaded_contracts:
524  if isinstance(contract, dict):
525  self.contracts.append(ContractIR(**contract))
526  if isinstance(contract, str):
527  self.contracts.append(ContractIR(contract=contract))
528 
529  def _read_parameters(self, loaded_yaml):
530  """Process the parameters portion of the loaded yaml document
531 
532  Parameters
533  ---------
534  loaded_yaml : `dict`
535  A dictionary which matches the structure that would be produced by
536  a yaml reader which parses a pipeline definition document
537  """
538  loaded_parameters = loaded_yaml.pop("parameters", {})
539  if not isinstance(loaded_parameters, dict):
540  raise ValueError("The parameters section must be a yaml mapping")
541  self.parameters = ParametersIR(loaded_parameters)
542 
543  def _read_labeled_subsets(self, loaded_yaml: dict):
544  """Process the subsets portion of the loaded yaml document
545 
546  Parameters
547  ----------
548  loaded_yaml: `MutableMapping`
549  A dictionary which matches the structure that would be produced
550  by a yaml reader which parses a pipeline definition document
551  """
552  loaded_subsets = loaded_yaml.pop("subsets", {})
553  self.labeled_subsets = {}
554  if not loaded_subsets and "subset" in loaded_yaml:
555  raise ValueError("Top level key should be subsets and not subset, add an s")
556  for key, value in loaded_subsets.items():
557  self.labeled_subsets[key] = LabeledSubset.from_primatives(key, value)
558 
559  def _verify_labeled_subsets(self):
560  """Verifies that all the labels in each named subset exist within the
561  pipeline.
562  """
563  # Verify that all labels defined in a labeled subset are in the
564  # Pipeline
565  for labeled_subset in self.labeled_subsets.values():
566  if not labeled_subset.subset.issubset(self.tasks.keys()):
567  raise ValueError(f"Labels {labeled_subset.subset - self.tasks.keys()} were not found in the "
568  "declared pipeline")
569  # Verify subset labels are not already task labels
570  label_intersection = self.labeled_subsets.keys() & self.tasks.keys()
571  if label_intersection:
572  raise ValueError(f"Labeled subsets can not use the same label as a task: {label_intersection}")
573 
574  def _read_imports(self, loaded_yaml):
575  """Process the inherits portion of the loaded yaml document
576 
577  Parameters
578  ---------
579  loaded_yaml : `dict`
580  A dictionary which matches the structure that would be produced by
581  a yaml reader which parses a pipeline definition document
582  """
583  def process_args(argument: Union[str, dict]) -> dict:
584  if isinstance(argument, str):
585  return {"location": argument}
586  elif isinstance(argument, dict):
587  if "exclude" in argument and isinstance(argument["exclude"], str):
588  argument["exclude"] = [argument["exclude"]]
589  if "include" in argument and isinstance(argument["include"], str):
590  argument["include"] = [argument["include"]]
591  if "instrument" in argument and argument["instrument"] == "None":
592  argument["instrument"] = None
593  return argument
594  if not {"inherits", "imports"} - loaded_yaml.keys():
595  raise ValueError("Cannot define both inherits and imports sections, use imports")
596  tmp_import = loaded_yaml.pop("inherits", None)
597  if tmp_import is None:
598  tmp_import = loaded_yaml.pop("imports", None)
599  else:
600  warnings.warn("The 'inherits' key is deprecated, and will be "
601  "removed around June 2021. Please use the key "
602  "'imports' instead")
603  if tmp_import is None:
604  self.imports = []
605  elif isinstance(tmp_import, list):
606  self.imports = [ImportIR(**process_args(args)) for args in tmp_import]
607  else:
608  self.imports = [ImportIR(**process_args(tmp_import))]
609 
610  # integrate any imported pipelines
611  accumulate_tasks = {}
612  accumulate_labeled_subsets = {}
613  accumulated_parameters = ParametersIR({})
614  for other_pipeline in self.imports:
615  tmp_IR = other_pipeline.toPipelineIR()
616  if self.instrument is None:
617  self.instrument = tmp_IR.instrument
618  elif self.instrument != tmp_IR.instrument and tmp_IR.instrument is not None:
619  raise ValueError("Only one instrument can be declared in a pipeline or it's imports")
620  if accumulate_tasks.keys() & tmp_IR.tasks.keys():
621  raise ValueError("Task labels in the imported pipelines must "
622  "be unique")
623  accumulate_tasks.update(tmp_IR.tasks)
624  self.contracts.extend(tmp_IR.contracts)
625  # verify that tmp_IR has unique labels for named subset among
626  # existing labeled subsets, and with existing task labels.
627  overlapping_subsets = accumulate_labeled_subsets.keys() & tmp_IR.labeled_subsets.keys()
628  task_subset_overlap = ((accumulate_labeled_subsets.keys() | tmp_IR.labeled_subsets.keys())
629  & accumulate_tasks.keys())
630  if overlapping_subsets or task_subset_overlap:
631  raise ValueError("Labeled subset names must be unique amongst imports in both labels and "
632  f" named Subsets. Duplicate: {overlapping_subsets | task_subset_overlap}")
633  accumulate_labeled_subsets.update(tmp_IR.labeled_subsets)
634  accumulated_parameters.update(tmp_IR.parameters)
635 
636  # verify that any accumulated labeled subsets dont clash with a label
637  # from this pipeline
638  if accumulate_labeled_subsets.keys() & self.tasks.keys():
639  raise ValueError("Labeled subset names must be unique amongst imports in both labels and "
640  " named Subsets")
641  # merge in the named subsets for self so this document can override any
642  # that have been delcared
643  accumulate_labeled_subsets.update(self.labeled_subsets)
644  self.labeled_subsets = accumulate_labeled_subsets
645 
646  # merge the dict of label:TaskIR objects, preserving any configs in the
647  # imported pipeline if the labels point to the same class
648  for label, task in self.tasks.items():
649  if label not in accumulate_tasks:
650  accumulate_tasks[label] = task
651  elif accumulate_tasks[label].klass == task.klass:
652  if task.config is not None:
653  for config in task.config:
654  accumulate_tasks[label].add_or_update_config(config)
655  else:
656  accumulate_tasks[label] = task
657  self.tasks = accumulate_tasks
658  self.parameters.update(accumulated_parameters)
659 
660  def _read_tasks(self, loaded_yaml):
661  """Process the tasks portion of the loaded yaml document
662 
663  Parameters
664  ---------
665  loaded_yaml : `dict`
666  A dictionary which matches the structure that would be produced by
667  a yaml reader which parses a pipeline definition document
668  """
669  self.tasks = {}
670  tmp_tasks = loaded_yaml.pop("tasks", None)
671  if tmp_tasks is None:
672  tmp_tasks = {}
673 
674  if "parameters" in tmp_tasks:
675  raise ValueError("parameters is a reserved word and cannot be used as a task label")
676 
677  for label, definition in tmp_tasks.items():
678  if isinstance(definition, str):
679  definition = {"class": definition}
680  config = definition.get('config', None)
681  if config is None:
682  task_config_ir = None
683  else:
684  if isinstance(config, dict):
685  config = [config]
686  task_config_ir = []
687  for c in config:
688  file = c.pop("file", None)
689  if file is None:
690  file = []
691  elif not isinstance(file, list):
692  file = [file]
693  task_config_ir.append(ConfigIR(python=c.pop("python", None),
694  dataId=c.pop("dataId", None),
695  file=file,
696  rest=c))
697  self.tasks[label] = TaskIR(label, definition["class"], task_config_ir)
698 
699  def _remove_contracts(self, label: str):
700  """Remove any contracts that contain the given label
701 
702  String comparison used in this way is not the most elegant and may
703  have issues, but it is the only feasible way when users can specify
704  contracts with generic strings.
705  """
706  new_contracts = []
707  for contract in self.contracts:
708  # match a label that is not preceded by an ASCII identifier, or
709  # is the start of a line and is followed by a dot
710  if re.match(f".*([^A-Za-z0-9_]|^){label}[.]", contract.contract):
711  continue
712  new_contracts.append(contract)
713  self.contracts = new_contracts
714 
715  def subset_from_labels(self, labelSpecifier: Set[str]) -> PipelineIR:
716  """Subset a pipelineIR to contain only labels specified in
717  labelSpecifier.
718 
719  Parameters
720  ----------
721  labelSpecifier : `set` of `str`
722  Set containing labels that describes how to subset a pipeline.
723 
724  Returns
725  -------
726  pipeline : `PipelineIR`
727  A new pipelineIR object that is a subset of the old pipelineIR
728 
729  Raises
730  ------
731  ValueError
732  Raised if there is an issue with specified labels
733 
734  Notes
735  -----
736  This method attempts to prune any contracts that contain labels which
737  are not in the declared subset of labels. This pruning is done using a
738  string based matching due to the nature of contracts and may prune more
739  than it should. Any labeled subsets defined that no longer have all
740  members of the subset present in the pipeline will be removed from the
741  resulting pipeline.
742  """
743 
744  pipeline = copy.deepcopy(self)
745 
746  # update the label specifier to expand any named subsets
747  toRemove = set()
748  toAdd = set()
749  for label in labelSpecifier:
750  if label in pipeline.labeled_subsets:
751  toRemove.add(label)
752  toAdd.update(pipeline.labeled_subsets[label].subset)
753  labelSpecifier.difference_update(toRemove)
754  labelSpecifier.update(toAdd)
755  # verify all the labels are in the pipeline
756  if not labelSpecifier.issubset(pipeline.tasks.keys()
757  | pipeline.labeled_subsets):
758  difference = labelSpecifier.difference(pipeline.tasks.keys())
759  raise ValueError("Not all supplied labels (specified or named subsets) are in the pipeline "
760  f"definition, extra labels: {difference}")
761  # copy needed so as to not modify while iterating
762  pipeline_labels = set(pipeline.tasks.keys())
763  # Remove the labels from the pipelineIR, and any contracts that contain
764  # those labels (see docstring on _remove_contracts for why this may
765  # cause issues)
766  for label in pipeline_labels:
767  if label not in labelSpecifier:
768  pipeline.tasks.pop(label)
769  pipeline._remove_contracts(label)
770 
771  # create a copy of the object to iterate over
772  labeled_subsets = copy.copy(pipeline.labeled_subsets)
773  # remove any labeled subsets that no longer have a complete set
774  for label, labeled_subset in labeled_subsets.items():
775  if labeled_subset.subset - pipeline.tasks.keys():
776  pipeline.labeled_subsets.pop(label)
777 
778  return pipeline
779 
780  @classmethod
781  def from_string(cls, pipeline_string: str):
782  """Create a `PipelineIR` object from a string formatted like a pipeline
783  document
784 
785  Parameters
786  ----------
787  pipeline_string : `str`
788  A string that is formatted according like a pipeline document
789  """
790  loaded_yaml = yaml.load(pipeline_string, Loader=PipelineYamlLoader)
791  return cls(loaded_yaml)
792 
793  @classmethod
794  def from_file(cls, filename: str):
795  """Create a `PipelineIR` object from the document specified by the
796  input path.
797 
798  Parameters
799  ----------
800  filename : `str`
801  Location of document to use in creating a `PipelineIR` object.
802  """
803  with open(filename, 'r') as f:
804  loaded_yaml = yaml.load(f, Loader=PipelineYamlLoader)
805  return cls(loaded_yaml)
806 
807  def to_file(self, filename: str):
808  """Serialize this `PipelineIR` object into a yaml formatted string and
809  write the output to a file at the specified path.
810 
811  Parameters
812  ----------
813  filename : `str`
814  Location of document to write a `PipelineIR` object.
815  """
816  with open(filename, 'w') as f:
817  yaml.dump(self.to_primitives(), f, sort_keys=False)
818 
819  def to_primitives(self):
820  """Convert to a representation used in yaml serialization
821  """
822  accumulate = {"description": self.description}
823  if self.instrument is not None:
824  accumulate['instrument'] = self.instrument
825  if self.parameters:
826  accumulate['parameters'] = self.parameters.to_primitives()
827  accumulate['tasks'] = {m: t.to_primitives() for m, t in self.tasks.items()}
828  if len(self.contracts) > 0:
829  accumulate['contracts'] = [c.to_primitives() for c in self.contracts]
830  if self.labeled_subsets:
831  accumulate['subsets'] = {k: v.to_primitives() for k, v in self.labeled_subsets.items()}
832  return accumulate
833 
834  def __str__(self) -> str:
835  """Instance formatting as how it would look in yaml representation
836  """
837  return yaml.dump(self.to_primitives(), sort_keys=False)
838 
839  def __repr__(self) -> str:
840  """Instance formatting as how it would look in yaml representation
841  """
842  return str(self)
843 
844  def __eq__(self, other: "PipelineIR"):
845  if not isinstance(other, PipelineIR):
846  return False
847  elif all(getattr(self, attr) == getattr(other, attr) for attr in
848  ("contracts", "tasks", "instrument")):
849  return True
850  else:
851  return False
lsst::pipe::base.pipelineIR.PipelineIR._read_contracts
def _read_contracts(self, loaded_yaml)
Definition: pipelineIR.py:510
lsst::pipe::base.pipelineIR.LabeledSubset
Definition: pipelineIR.py:100
lsst::pipe::base.pipelineIR.ContractIR.msg
msg
Definition: pipelineIR.py:93
lsst::pipe::base.pipelineIR.PipelineIR.__eq__
def __eq__(self, "PipelineIR" other)
Definition: pipelineIR.py:844
lsst::pipe::base.pipelineIR.PipelineIR.contracts
contracts
Definition: pipelineIR.py:522
lsst::pipe::base.pipelineIR.TaskIR.to_primitives
dict to_primitives(self)
Definition: pipelineIR.py:339
lsst::pipe::base.pipelineIR.ConfigIR.formatted
ConfigIR formatted(self, ParametersIR parameters)
Definition: pipelineIR.py:247
lsst::pipe::base.pipelineIR.PipelineIR._read_labeled_subsets
def _read_labeled_subsets(self, dict loaded_yaml)
Definition: pipelineIR.py:543
lsst::pipe::base.pipelineIR.PipelineIR.subset_from_labels
PipelineIR subset_from_labels(self, Set[str] labelSpecifier)
Definition: pipelineIR.py:715
lsst::pipe::base.pipelineIR.TaskIR.config
config
Definition: pipelineIR.py:362
lsst::pipe::base.pipelineIR.LabeledSubset.to_primitives
dict to_primitives(self)
Definition: pipelineIR.py:153
lsst::pipe::base.pipelineIR.ContractError
Definition: pipelineIR.py:63
lsst::pipe::base.pipelineIR.ImportIR
Definition: pipelineIR.py:377
lsst::pipe::base.pipelineIR.ConfigIR.file
file
Definition: pipelineIR.py:309
lsst::pipe::base.pipelineIR.ParametersIR.__bool__
bool __bool__(self)
Definition: pipelineIR.py:203
lsst::pipe::base.pipelineIR.PipelineIR.description
description
Definition: pipelineIR.py:484
lsst::pipe::base.pipelineIR.PipelineIR._verify_labeled_subsets
def _verify_labeled_subsets(self)
Definition: pipelineIR.py:559
lsst::pipe::base.pipelineIR.ConfigIR.to_primitives
dict to_primitives(self)
Definition: pipelineIR.py:233
lsst::pipe::base.pipelineIR.PipelineIR.__str__
str __str__(self)
Definition: pipelineIR.py:834
lsst::pipe::base.pipelineIR.PipelineIR
Definition: pipelineIR.py:456
lsst::pipe::base.pipelineIR.PipelineIR.__repr__
str __repr__(self)
Definition: pipelineIR.py:839
lsst::pipe::base.pipelineIR.PipelineYamlLoader
Definition: pipelineIR.py:41
lsst::pipe::base.pipelineIR.ImportIR.__eq__
def __eq__(self, "ImportIR" other)
Definition: pipelineIR.py:446
lsst::pipe::base.pipelineIR.PipelineIR.from_file
def from_file(cls, str filename)
Definition: pipelineIR.py:794
lsst::pipe::base.pipelineIR.TaskIR
Definition: pipelineIR.py:324
lsst::pipe::base.pipelineIR.ContractIR.__eq__
def __eq__(self, "ContractIR" other)
Definition: pipelineIR.py:90
lsst::pipe::base.pipelineIR.PipelineIR._read_tasks
def _read_tasks(self, loaded_yaml)
Definition: pipelineIR.py:660
lsst::pipe::base.pipelineIR.LabeledSubset.from_primatives
LabeledSubset from_primatives(str label, Union[List[str], dict] value)
Definition: pipelineIR.py:115
lsst::pipe::base.pipelineIR.TaskIR.add_or_update_config
def add_or_update_config(self, ConfigIR other_config)
Definition: pipelineIR.py:347
lsst::pipe::base.pipelineIR.ConfigIR.maybe_merge
Generator["ConfigIR", None, None] maybe_merge(self, "ConfigIR" other_config)
Definition: pipelineIR.py:274
lsst::pipe::base.pipelineIR.PipelineIR.tasks
tasks
Definition: pipelineIR.py:657
lsst::pipe::base.pipelineIR.ImportIR.toPipelineIR
"PipelineIR" toPipelineIR(self)
Definition: pipelineIR.py:405
lsst::pipe::base.pipelineIR.ParametersIR
Definition: pipelineIR.py:163
lsst::pipe::base.pipelineIR.ParametersIR.to_primitives
MutableMapping[str, str] to_primitives(self)
Definition: pipelineIR.py:192
lsst::pipe::base.pipelineIR.ParametersIR.__contains__
bool __contains__(self, str value)
Definition: pipelineIR.py:197
lsst::pipe::base.pipelineIR.ContractIR.to_primitives
dict to_primitives(self)
Definition: pipelineIR.py:82
lsst::pipe::base.pipelineIR.PipelineIR._read_parameters
def _read_parameters(self, loaded_yaml)
Definition: pipelineIR.py:529
lsst::pipe::base.pipelineIR.ParametersIR.__getitem__
Any __getitem__(self, str item)
Definition: pipelineIR.py:200
lsst::pipe::base.pipelineIR.ConfigIR
Definition: pipelineIR.py:208
lsst::pipe::base.pipelineIR.PipelineIR.from_string
def from_string(cls, str pipeline_string)
Definition: pipelineIR.py:781
lsst::pipe::base.pipelineIR.PipelineIR.__init__
def __init__(self, loaded_yaml)
Definition: pipelineIR.py:474
lsst::pipe::base.pipelineIR.PipelineIR._read_imports
def _read_imports(self, loaded_yaml)
Definition: pipelineIR.py:574
lsst::pipe::base.pipelineIR.PipelineIR.to_file
def to_file(self, str filename)
Definition: pipelineIR.py:807
lsst::pipe::base.pipelineIR.ParametersIR.update
def update(self, Optional[ParametersIR] other)
Definition: pipelineIR.py:188
lsst::pipe::base.pipelineIR.PipelineIR.to_primitives
def to_primitives(self)
Definition: pipelineIR.py:819
lsst::pipe::base.pipelineIR.PipelineIR.parameters
parameters
Definition: pipelineIR.py:541
lsst::pipe::base.pipelineIR.PipelineIR.imports
imports
Definition: pipelineIR.py:604
lsst::pipe::base.pipelineIR.ContractIR
Definition: pipelineIR.py:70
lsst::pipe::base.pipelineIR.PipelineYamlLoader.construct_mapping
def construct_mapping(self, node, deep=False)
Definition: pipelineIR.py:46
lsst::pipe::base.pipelineIR.ContractIR.contract
contract
Definition: pipelineIR.py:93
lsst::pipe::base.pipelineIR.KeepInstrument
Definition: pipelineIR.py:37
lsst::pipe::base.pipelineIR.PipelineIR.instrument
instrument
Definition: pipelineIR.py:493
lsst::pipe::base.pipelineIR.PipelineIR.labeled_subsets
labeled_subsets
Definition: pipelineIR.py:553
lsst::pipe::base.pipelineIR.TaskIR.__eq__
def __eq__(self, "TaskIR" other)
Definition: pipelineIR.py:366
lsst::pipe::base.pipelineIR.ConfigIR.__eq__
def __eq__(self, "ConfigIR" other)
Definition: pipelineIR.py:313