Coverage for python/lsst/pipe/base/pipelineIR.py : 16%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("ConfigIR", "ContractError", "ContractIR", "InheritIR", "PipelineIR", "TaskIR", "LabeledSubset")
25from collections import Counter
26from collections.abc import Iterable as abcIterable
27from dataclasses import dataclass, field
28from typing import Any, List, Set, Union, Generator, MutableMapping, Optional, Dict
30import copy
31import re
32import os
33import yaml
34import warnings
37class PipelineYamlLoader(yaml.SafeLoader):
38 """This is a specialized version of yaml's SafeLoader. It checks and raises
39 an exception if it finds that there are multiple instances of the same key
40 found inside a pipeline file at a given scope.
41 """
42 def construct_mapping(self, node, deep=False):
43 # do the call to super first so that it can do all the other forms of
44 # checking on this node. If you check the uniqueness of keys first
45 # it would save the work that super does in the case of a failure, but
46 # it might fail in the case that the node was the incorrect node due
47 # to a parsing error, and the resulting exception would be difficult to
48 # understand.
49 mapping = super().construct_mapping(node, deep)
50 # Check if there are any duplicate keys
51 all_keys = Counter(key_node.value for key_node, _ in node.value)
52 duplicates = {k for k, i in all_keys.items() if i != 1}
53 if duplicates:
54 raise KeyError("Pipeline files must not have duplicated keys, "
55 f"{duplicates} appeared multiple times")
56 return mapping
59class ContractError(Exception):
60 """An exception that is raised when a pipeline contract is not satisfied
61 """
62 pass
65@dataclass
66class ContractIR:
67 """Intermediate representation of contracts read from a pipeline yaml file.
68 """
69 contract: str
70 """A string of python code representing one or more conditions on configs
71 in a pipeline. This code-as-string should, once evaluated, should be True
72 if the configs are fine, and False otherwise.
73 """
74 msg: Union[str, None] = None
75 """An optional message to be shown to the user if a contract fails
76 """
78 def to_primitives(self) -> dict:
79 """Convert to a representation used in yaml serialization
80 """
81 accumulate = {"contract": self.contract}
82 if self.msg is not None:
83 accumulate['msg'] = self.msg
84 return accumulate
86 def __eq__(self, other: "ContractIR"):
87 if not isinstance(other, ContractIR):
88 return False
89 elif self.contract == other.contract and self.msg == other.msg:
90 return True
91 else:
92 return False
95@dataclass
96class LabeledSubset:
97 """Intermediate representation of named subset of task labels read from
98 a pipeline yaml file.
99 """
100 label: str
101 """The label used to identify the subset of task labels.
102 """
103 subset: Set[str]
104 """A set of task labels contained in this subset.
105 """
106 description: Optional[str]
107 """A description of what this subset of tasks is intended to do
108 """
110 @staticmethod
111 def from_primatives(label: str, value: Union[List[str], dict]) -> LabeledSubset:
112 """Generate `LabeledSubset` objects given a properly formatted object
113 that as been created by a yaml loader.
115 Parameters
116 ----------
117 label : `str`
118 The label that will be used to identify this labeled subset.
119 value : `list` of `str` or `dict`
120 Object returned from loading a labeled subset section from a yaml
121 document.
123 Returns
124 -------
125 labeledSubset : `LabeledSubset`
126 A `LabeledSubset` object build from the inputs.
128 Raises
129 ------
130 ValueError
131 Raised if the value input is not properly formatted for parsing
132 """
133 if isinstance(value, MutableMapping):
134 subset = value.pop("subset", None)
135 if subset is None:
136 raise ValueError("If a labeled subset is specified as a mapping, it must contain the key "
137 "'subset'")
138 description = value.pop("description", None)
139 elif isinstance(value, abcIterable):
140 subset = value
141 description = None
142 else:
143 raise ValueError(f"There was a problem parsing the labeled subset {label}, make sure the "
144 "definition is either a valid yaml list, or a mapping with keys "
145 "(subset, description) where subset points to a yaml list, and description is "
146 "associated with a string")
147 return LabeledSubset(label, set(subset), description)
149 def to_primitives(self) -> dict:
150 """Convert to a representation used in yaml serialization
151 """
152 accumulate: Dict[str, Any] = {"subset": list(self.subset)}
153 if self.description is not None:
154 accumulate["description"] = self.description
155 return accumulate
158@dataclass
159class ParametersIR:
160 """Intermediate representation of parameters that are global to a pipeline
162 These parameters are specified under a top level key named `parameters`
163 and are declared as a yaml mapping. These entries can then be used inside
164 task configuration blocks to specify configuration values. They may not be
165 used in the special ``file`` or ``python`` blocks.
167 Example:
168 paramters:
169 shared_value: 14
170 tasks:
171 taskA:
172 class: modA
173 config:
174 field1: parameters.shared_value
175 taskB:
176 class: modB
177 config:
178 field2: parameters.shared_value
179 """
180 mapping: MutableMapping[str, str]
181 """A mutable mapping of identifiers as keys, and shared configuration
182 as values.
183 """
184 def update(self, other: Optional[ParametersIR]):
185 if other is not None:
186 self.mapping.update(other.mapping)
188 def to_primitives(self) -> MutableMapping[str, str]:
189 """Convert to a representation used in yaml serialization
190 """
191 return self.mapping
193 def __contains__(self, value: str) -> bool:
194 return value in self.mapping
196 def __getitem__(self, item: str) -> Any:
197 return self.mapping[item]
199 def __bool__(self) -> bool:
200 return bool(self.mapping)
203@dataclass
204class ConfigIR:
205 """Intermediate representation of configurations read from a pipeline yaml
206 file.
207 """
208 python: Union[str, None] = None
209 """A string of python code that is used to modify a configuration. This can
210 also be None if there are no modifications to do.
211 """
212 dataId: Union[dict, None] = None
213 """A dataId that is used to constrain these config overrides to only quanta
214 with matching dataIds. This field can be None if there is no constraint.
215 This is currently an unimplemented feature, and is placed here for future
216 use.
217 """
218 file: List[str] = field(default_factory=list)
219 """A list of paths which points to a file containing config overrides to be
220 applied. This value may be an empty list if there are no overrides to
221 apply.
222 """
223 rest: dict = field(default_factory=dict)
224 """This is a dictionary of key value pairs, where the keys are strings
225 corresponding to qualified fields on a config to override, and the values
226 are strings representing the values to apply.
227 """
229 def to_primitives(self) -> dict:
230 """Convert to a representation used in yaml serialization
231 """
232 accumulate = {}
233 for name in ("python", "dataId", "file"):
234 # if this attribute is thruthy add it to the accumulation
235 # dictionary
236 if getattr(self, name):
237 accumulate[name] = getattr(self, name)
238 # Add the dictionary containing the rest of the config keys to the
239 # # accumulated dictionary
240 accumulate.update(self.rest)
241 return accumulate
243 def formatted(self, parameters: ParametersIR) -> ConfigIR:
244 """Returns a new ConfigIR object that is formatted according to the
245 specified parameters
247 Parameters
248 ----------
249 parameters : ParametersIR
250 Object that contains variable mappings used in substitution.
252 Returns
253 -------
254 config : ConfigIR
255 A new ConfigIR object formatted with the input parameters
256 """
257 new_config = copy.deepcopy(self)
258 for key, value in new_config.rest.items():
259 if not isinstance(value, str):
260 continue
261 match = re.match("parameters[.](.*)", value)
262 if match and match.group(1) in parameters:
263 new_config.rest[key] = parameters[match.group(1)]
264 if match and match.group(1) not in parameters:
265 warnings.warn(f"config {key} contains value {match.group(0)} which is formatted like a "
266 "Pipeline parameter but was not found within the Pipeline, if this was not "
267 "intentional, check for a typo")
268 return new_config
270 def maybe_merge(self, other_config: "ConfigIR") -> Generator["ConfigIR", None, None]:
271 """Merges another instance of a `ConfigIR` into this instance if
272 possible. This function returns a generator that is either self
273 if the configs were merged, or self, and other_config if that could
274 not be merged.
276 Parameters
277 ----------
278 other_config : `ConfigIR`
279 An instance of `ConfigIR` to merge into this instance.
281 Returns
282 -------
283 Generator : `ConfigIR`
284 A generator containing either self, or self and other_config if
285 the configs could be merged or not respectively.
286 """
287 # Verify that the config blocks can be merged
288 if self.dataId != other_config.dataId or self.python or other_config.python or\
289 self.file or other_config.file:
290 yield from (self, other_config)
291 return
293 # create a set of all keys, and verify two keys do not have different
294 # values
295 key_union = self.rest.keys() & other_config.rest.keys()
296 for key in key_union:
297 if self.rest[key] != other_config.rest[key]:
298 yield from (self, other_config)
299 return
300 self.rest.update(other_config.rest)
302 # Combine the lists of override files to load
303 self_file_set = set(self.file)
304 other_file_set = set(other_config.file)
305 self.file = list(self_file_set.union(other_file_set))
307 yield self
309 def __eq__(self, other: "ConfigIR"):
310 if not isinstance(other, ConfigIR):
311 return False
312 elif all(getattr(self, attr) == getattr(other, attr) for attr in
313 ("python", "dataId", "file", "rest")):
314 return True
315 else:
316 return False
319@dataclass
320class TaskIR:
321 """Intermediate representation of tasks read from a pipeline yaml file.
322 """
323 label: str
324 """An identifier used to refer to a task.
325 """
326 klass: str
327 """A string containing a fully qualified python class to be run in a
328 pipeline.
329 """
330 config: Union[List[ConfigIR], None] = None
331 """List of all configs overrides associated with this task, and may be
332 `None` if there are no config overrides.
333 """
335 def to_primitives(self) -> dict:
336 """Convert to a representation used in yaml serialization
337 """
338 accumulate = {'class': self.klass}
339 if self.config:
340 accumulate['config'] = [c.to_primitives() for c in self.config]
341 return accumulate
343 def add_or_update_config(self, other_config: ConfigIR):
344 """Adds a `ConfigIR` to this task if one is not present. Merges configs
345 if there is a `ConfigIR` present and the dataId keys of both configs
346 match, otherwise adds a new entry to the config list. The exception to
347 the above is that if either the last config or other_config has a
348 python block, then other_config is always added, as python blocks can
349 modify configs in ways that cannot be predicted.
351 Parameters
352 ----------
353 other_config : `ConfigIR`
354 A `ConfigIR` instance to add or merge into the config attribute of
355 this task.
356 """
357 if not self.config:
358 self.config = [other_config]
359 return
360 self.config.extend(self.config.pop().maybe_merge(other_config))
362 def __eq__(self, other: "TaskIR"):
363 if not isinstance(other, TaskIR):
364 return False
365 elif all(getattr(self, attr) == getattr(other, attr) for attr in
366 ("label", "klass", "config")):
367 return True
368 else:
369 return False
372@dataclass
373class InheritIR:
374 """An intermediate representation of inherited pipelines
375 """
376 location: str
377 """This is the location of the pipeline to inherit. The path should be
378 specified as an absolute path. Environment variables may be used in the
379 path and should be specified as a python string template, with the name of
380 the environment variable inside braces.
381 """
382 include: Union[List[str], None] = None
383 """List of tasks that should be included when inheriting this pipeline.
384 Either the include or exclude attributes may be specified, but not both.
385 """
386 exclude: Union[List[str], None] = None
387 """List of tasks that should be excluded when inheriting this pipeline.
388 Either the include or exclude attributes may be specified, but not both.
389 """
390 importContracts: bool = True
391 """Boolean attribute to dictate if contracts should be inherited with the
392 pipeline or not.
393 """
395 def toPipelineIR(self, instrument=None) -> "PipelineIR":
396 """Load in the Pipeline specified by this object, and turn it into a
397 PipelineIR instance.
399 Parameters
400 ----------
401 instrument : Optional `str`
402 A string giving the fully qualified path to an instrument object.
403 If a inherited pipeline defines the same instrument as defined in
404 this variable, an import warning message is skipped.
406 Returns
407 -------
408 pipeline : `PipelineIR`
409 A pipeline generated from the imported pipeline file
410 """
411 if self.include and self.exclude:
412 raise ValueError("Both an include and an exclude list cant be specified"
413 " when declaring a pipeline import")
414 tmp_pipeline = PipelineIR.from_file(os.path.expandvars(self.location))
415 if tmp_pipeline.instrument is not None and tmp_pipeline.instrument != instrument:
416 warnings.warn("Any instrument definitions in imported pipelines are ignored. "
417 "if an instrument is desired please define it in the top most pipeline")
419 included_labels = set()
420 for label in tmp_pipeline.tasks:
421 if (self.include and label in self.include) or (self.exclude and label not in self.exclude)\
422 or (self.include is None and self.exclude is None):
423 included_labels.add(label)
425 # Handle labeled subsets being specified in the include or exclude
426 # list, adding or removing labels.
427 if self.include is not None:
428 subsets_in_include = tmp_pipeline.labeled_subsets.keys() & self.include
429 for label in subsets_in_include:
430 included_labels.update(tmp_pipeline.labeled_subsets[label].subset)
432 elif self.exclude is not None:
433 subsets_in_exclude = tmp_pipeline.labeled_subsets.keys() & self.exclude
434 for label in subsets_in_exclude:
435 included_labels.difference_update(tmp_pipeline.labeled_subsets[label].subset)
437 tmp_pipeline = tmp_pipeline.subset_from_labels(included_labels)
439 if not self.importContracts:
440 tmp_pipeline.contracts = []
442 return tmp_pipeline
444 def __eq__(self, other: "InheritIR"):
445 if not isinstance(other, InheritIR):
446 return False
447 elif all(getattr(self, attr) == getattr(other, attr) for attr in
448 ("location", "include", "exclude", "importContracts")):
449 return True
450 else:
451 return False
454class PipelineIR:
455 """Intermediate representation of a pipeline definition
457 Parameters
458 ----------
459 loaded_yaml : `dict`
460 A dictionary which matches the structure that would be produced by a
461 yaml reader which parses a pipeline definition document
463 Raises
464 ------
465 ValueError :
466 - If a pipeline is declared without a description
467 - If no tasks are declared in a pipeline, and no pipelines are to be
468 inherited
469 - If more than one instrument is specified
470 - If more than one inherited pipeline share a label
471 """
472 def __init__(self, loaded_yaml):
473 # Check required fields are present
474 if "description" not in loaded_yaml:
475 raise ValueError("A pipeline must be declared with a description")
476 if "tasks" not in loaded_yaml and "inherits" not in loaded_yaml:
477 raise ValueError("A pipeline must be declared with one or more tasks")
479 # These steps below must happen in this call order
481 # Process pipeline description
482 self.description = loaded_yaml.pop("description")
484 # Process tasks
485 self._read_tasks(loaded_yaml)
487 # Process instrument keys
488 inst = loaded_yaml.pop("instrument", None)
489 if isinstance(inst, list):
490 raise ValueError("Only one top level instrument can be defined in a pipeline")
491 self.instrument = inst
493 # Process any contracts
494 self._read_contracts(loaded_yaml)
496 # Process any defined parameters
497 self._read_parameters(loaded_yaml)
499 # Process any named label subsets
500 self._read_labeled_subsets(loaded_yaml)
502 # Process any inherited pipelines
503 self._read_inherits(loaded_yaml)
505 # verify named subsets, must be done after inheriting
506 self._verify_labeled_subsets()
508 def _read_contracts(self, loaded_yaml):
509 """Process the contracts portion of the loaded yaml document
511 Parameters
512 ---------
513 loaded_yaml : `dict`
514 A dictionary which matches the structure that would be produced by
515 a yaml reader which parses a pipeline definition document
516 """
517 loaded_contracts = loaded_yaml.pop("contracts", [])
518 if isinstance(loaded_contracts, str):
519 loaded_contracts = [loaded_contracts]
520 self.contracts = []
521 for contract in loaded_contracts:
522 if isinstance(contract, dict):
523 self.contracts.append(ContractIR(**contract))
524 if isinstance(contract, str):
525 self.contracts.append(ContractIR(contract=contract))
527 def _read_parameters(self, loaded_yaml):
528 """Process the parameters portion of the loaded yaml document
530 Parameters
531 ---------
532 loaded_yaml : `dict`
533 A dictionary which matches the structure that would be produced by
534 a yaml reader which parses a pipeline definition document
535 """
536 loaded_parameters = loaded_yaml.pop("parameters", {})
537 if not isinstance(loaded_parameters, dict):
538 raise ValueError("The parameters section must be a yaml mapping")
539 self.parameters = ParametersIR(loaded_parameters)
541 def _read_labeled_subsets(self, loaded_yaml: dict):
542 """Process the subsets portion of the loaded yaml document
544 Parameters
545 ----------
546 loaded_yaml: `MutableMapping`
547 A dictionary which matches the structure that would be produced
548 by a yaml reader which parses a pipeline definition document
549 """
550 loaded_subsets = loaded_yaml.pop("subsets", {})
551 self.labeled_subsets = {}
552 if not loaded_subsets and "subset" in loaded_yaml:
553 raise ValueError("Top level key should be subsets and not subset, add an s")
554 for key, value in loaded_subsets.items():
555 self.labeled_subsets[key] = LabeledSubset.from_primatives(key, value)
557 def _verify_labeled_subsets(self):
558 """Verifies that all the labels in each named subset exist within the
559 pipeline.
560 """
561 # Verify that all labels defined in a labeled subset are in the
562 # Pipeline
563 for labeled_subset in self.labeled_subsets.values():
564 if not labeled_subset.subset.issubset(self.tasks.keys()):
565 raise ValueError(f"Labels {labeled_subset.subset - self.tasks.keys()} were not found in the "
566 "declared pipeline")
567 # Verify subset labels are not already task labels
568 label_intersection = self.labeled_subsets.keys() & self.tasks.keys()
569 if label_intersection:
570 raise ValueError(f"Labeled subsets can not use the same label as a task: {label_intersection}")
572 def _read_inherits(self, loaded_yaml):
573 """Process the inherits portion of the loaded yaml document
575 Parameters
576 ---------
577 loaded_yaml : `dict`
578 A dictionary which matches the structure that would be produced by
579 a yaml reader which parses a pipeline definition document
580 """
581 def process_args(argument: Union[str, dict]) -> dict:
582 if isinstance(argument, str):
583 return {"location": argument}
584 elif isinstance(argument, dict):
585 if "exclude" in argument and isinstance(argument["exclude"], str):
586 argument["exclude"] = [argument["exclude"]]
587 if "include" in argument and isinstance(argument["include"], str):
588 argument["include"] = [argument["include"]]
589 return argument
590 tmp_inherit = loaded_yaml.pop("inherits", None)
591 if tmp_inherit is None:
592 self.inherits = []
593 elif isinstance(tmp_inherit, list):
594 self.inherits = [InheritIR(**process_args(args)) for args in tmp_inherit]
595 else:
596 self.inherits = [InheritIR(**process_args(tmp_inherit))]
598 # integrate any imported pipelines
599 accumulate_tasks = {}
600 accumulate_labeled_subsets = {}
601 accumulated_parameters = ParametersIR({})
602 for other_pipeline in self.inherits:
603 tmp_IR = other_pipeline.toPipelineIR(instrument=self.instrument)
604 if accumulate_tasks.keys() & tmp_IR.tasks.keys():
605 raise ValueError("Task labels in the imported pipelines must "
606 "be unique")
607 accumulate_tasks.update(tmp_IR.tasks)
608 self.contracts.extend(tmp_IR.contracts)
609 # verify that tmp_IR has unique labels for named subset among
610 # existing labeled subsets, and with existing task labels.
611 overlapping_subsets = accumulate_labeled_subsets.keys() & tmp_IR.labeled_subsets.keys()
612 task_subset_overlap = ((accumulate_labeled_subsets.keys() | tmp_IR.labeled_subsets.keys())
613 & accumulate_tasks.keys())
614 if overlapping_subsets or task_subset_overlap:
615 raise ValueError("Labeled subset names must be unique amongst imports in both labels and "
616 f" named Subsets. Duplicate: {overlapping_subsets | task_subset_overlap}")
617 accumulate_labeled_subsets.update(tmp_IR.labeled_subsets)
618 accumulated_parameters.update(tmp_IR.parameters)
620 # verify that any accumulated labeled subsets dont clash with a label
621 # from this pipeline
622 if accumulate_labeled_subsets.keys() & self.tasks.keys():
623 raise ValueError("Labeled subset names must be unique amongst imports in both labels and "
624 " named Subsets")
625 # merge in the named subsets for self so this document can override any
626 # that have been delcared
627 accumulate_labeled_subsets.update(self.labeled_subsets)
628 self.labeled_subsets = accumulate_labeled_subsets
630 # merge the dict of label:TaskIR objects, preserving any configs in the
631 # imported pipeline if the labels point to the same class
632 for label, task in self.tasks.items():
633 if label not in accumulate_tasks:
634 accumulate_tasks[label] = task
635 elif accumulate_tasks[label].klass == task.klass:
636 if task.config is not None:
637 for config in task.config:
638 accumulate_tasks[label].add_or_update_config(config)
639 else:
640 accumulate_tasks[label] = task
641 self.tasks = accumulate_tasks
642 self.parameters.update(accumulated_parameters)
644 def _read_tasks(self, loaded_yaml):
645 """Process the tasks portion of the loaded yaml document
647 Parameters
648 ---------
649 loaded_yaml : `dict`
650 A dictionary which matches the structure that would be produced by
651 a yaml reader which parses a pipeline definition document
652 """
653 self.tasks = {}
654 tmp_tasks = loaded_yaml.pop("tasks", None)
655 if tmp_tasks is None:
656 tmp_tasks = {}
658 if "parameters" in tmp_tasks:
659 raise ValueError("parameters is a reserved word and cannot be used as a task label")
661 for label, definition in tmp_tasks.items():
662 if isinstance(definition, str):
663 definition = {"class": definition}
664 config = definition.get('config', None)
665 if config is None:
666 task_config_ir = None
667 else:
668 if isinstance(config, dict):
669 config = [config]
670 task_config_ir = []
671 for c in config:
672 file = c.pop("file", None)
673 if file is None:
674 file = []
675 elif not isinstance(file, list):
676 file = [file]
677 task_config_ir.append(ConfigIR(python=c.pop("python", None),
678 dataId=c.pop("dataId", None),
679 file=file,
680 rest=c))
681 self.tasks[label] = TaskIR(label, definition["class"], task_config_ir)
683 def _remove_contracts(self, label: str):
684 """Remove any contracts that contain the given label
686 String comparison used in this way is not the most elegant and may
687 have issues, but it is the only feasible way when users can specify
688 contracts with generic strings.
689 """
690 new_contracts = []
691 for contract in self.contracts:
692 # match a label that is not preceded by an ASCII identifier, or
693 # is the start of a line and is followed by a dot
694 if re.match(f".*([^A-Za-z0-9_]|^){label}[.]", contract.contract):
695 continue
696 new_contracts.append(contract)
697 self.contracts = new_contracts
699 def subset_from_labels(self, labelSpecifier: Set[str]) -> PipelineIR:
700 """Subset a pipelineIR to contain only labels specified in
701 labelSpecifier.
703 Parameters
704 ----------
705 labelSpecifier : `set` of `str`
706 Set containing labels that describes how to subset a pipeline.
708 Returns
709 -------
710 pipeline : `PipelineIR`
711 A new pipelineIR object that is a subset of the old pipelineIR
713 Raises
714 ------
715 ValueError
716 Raised if there is an issue with specified labels
718 Notes
719 -----
720 This method attempts to prune any contracts that contain labels which
721 are not in the declared subset of labels. This pruning is done using a
722 string based matching due to the nature of contracts and may prune more
723 than it should. Any labeled subsets defined that no longer have all
724 members of the subset present in the pipeline will be removed from the
725 resulting pipeline.
726 """
728 pipeline = copy.deepcopy(self)
730 # update the label specifier to expand any named subsets
731 toRemove = set()
732 toAdd = set()
733 for label in labelSpecifier:
734 if label in pipeline.labeled_subsets:
735 toRemove.add(label)
736 toAdd.update(pipeline.labeled_subsets[label].subset)
737 labelSpecifier.difference_update(toRemove)
738 labelSpecifier.update(toAdd)
739 # verify all the labels are in the pipeline
740 if not labelSpecifier.issubset(pipeline.tasks.keys()
741 | pipeline.labeled_subsets):
742 difference = labelSpecifier.difference(pipeline.tasks.keys())
743 raise ValueError("Not all supplied labels (specified or named subsets) are in the pipeline "
744 f"definition, extra labels: {difference}")
745 # copy needed so as to not modify while iterating
746 pipeline_labels = set(pipeline.tasks.keys())
747 # Remove the labels from the pipelineIR, and any contracts that contain
748 # those labels (see docstring on _remove_contracts for why this may
749 # cause issues)
750 for label in pipeline_labels:
751 if label not in labelSpecifier:
752 pipeline.tasks.pop(label)
753 pipeline._remove_contracts(label)
755 # create a copy of the object to iterate over
756 labeled_subsets = copy.copy(pipeline.labeled_subsets)
757 # remove any labeled subsets that no longer have a complete set
758 for label, labeled_subset in labeled_subsets.items():
759 if labeled_subset.subset - pipeline.tasks.keys():
760 pipeline.labeled_subsets.pop(label)
762 return pipeline
764 @classmethod
765 def from_string(cls, pipeline_string: str):
766 """Create a `PipelineIR` object from a string formatted like a pipeline
767 document
769 Parameters
770 ----------
771 pipeline_string : `str`
772 A string that is formatted according like a pipeline document
773 """
774 loaded_yaml = yaml.load(pipeline_string, Loader=PipelineYamlLoader)
775 return cls(loaded_yaml)
777 @classmethod
778 def from_file(cls, filename: str):
779 """Create a `PipelineIR` object from the document specified by the
780 input path.
782 Parameters
783 ----------
784 filename : `str`
785 Location of document to use in creating a `PipelineIR` object.
786 """
787 with open(filename, 'r') as f:
788 loaded_yaml = yaml.load(f, Loader=PipelineYamlLoader)
789 return cls(loaded_yaml)
791 def to_file(self, filename: str):
792 """Serialize this `PipelineIR` object into a yaml formatted string and
793 write the output to a file at the specified path.
795 Parameters
796 ----------
797 filename : `str`
798 Location of document to write a `PipelineIR` object.
799 """
800 with open(filename, 'w') as f:
801 yaml.dump(self.to_primitives(), f, sort_keys=False)
803 def to_primitives(self):
804 """Convert to a representation used in yaml serialization
805 """
806 accumulate = {"description": self.description}
807 if self.instrument is not None:
808 accumulate['instrument'] = self.instrument
809 if self.parameters:
810 accumulate['parameters'] = self.parameters.to_primitives()
811 accumulate['tasks'] = {m: t.to_primitives() for m, t in self.tasks.items()}
812 if len(self.contracts) > 0:
813 accumulate['contracts'] = [c.to_primitives() for c in self.contracts]
814 if self.labeled_subsets:
815 accumulate['subsets'] = {k: v.to_primitives() for k, v in self.labeled_subsets.items()}
816 return accumulate
818 def __str__(self) -> str:
819 """Instance formatting as how it would look in yaml representation
820 """
821 return yaml.dump(self.to_primitives(), sort_keys=False)
823 def __repr__(self) -> str:
824 """Instance formatting as how it would look in yaml representation
825 """
826 return str(self)
828 def __eq__(self, other: "PipelineIR"):
829 if not isinstance(other, PipelineIR):
830 return False
831 elif all(getattr(self, attr) == getattr(other, attr) for attr in
832 ("contracts", "tasks", "instrument")):
833 return True
834 else:
835 return False