21 from __future__
import annotations
23 __all__ = (
"ConfigIR",
"ContractError",
"ContractIR",
"ImportIR",
"PipelineIR",
"TaskIR",
"LabeledSubset")
25 from collections
import Counter
26 from collections.abc
import Iterable
as abcIterable
27 from dataclasses
import dataclass, field
28 from typing
import Any, List, Set, Union, Generator, MutableMapping, Optional, Dict, Type
42 """This is a specialized version of yaml's SafeLoader. It checks and raises
43 an exception if it finds that there are multiple instances of the same key
44 found inside a pipeline file at a given scope.
55 all_keys = Counter(key_node.value
for key_node, _
in node.value)
56 duplicates = {k
for k, i
in all_keys.items()
if i != 1}
58 raise KeyError(
"Pipeline files must not have duplicated keys, "
59 f
"{duplicates} appeared multiple times")
64 """An exception that is raised when a pipeline contract is not satisfied
71 """Intermediate representation of contracts read from a pipeline yaml file.
74 """A string of python code representing one or more conditions on configs
75 in a pipeline. This code-as-string should, once evaluated, should be True
76 if the configs are fine, and False otherwise.
78 msg: Union[str,
None] =
None
79 """An optional message to be shown to the user if a contract fails
83 """Convert to a representation used in yaml serialization
85 accumulate = {
"contract": self.
contract}
86 if self.
msg is not None:
87 accumulate[
'msg'] = self.
msg
90 def __eq__(self, other:
"ContractIR"):
91 if not isinstance(other, ContractIR):
93 elif self.
contract == other.contract
and self.
msg == other.msg:
101 """Intermediate representation of named subset of task labels read from
102 a pipeline yaml file.
105 """The label used to identify the subset of task labels.
108 """A set of task labels contained in this subset.
110 description: Optional[str]
111 """A description of what this subset of tasks is intended to do
116 """Generate `LabeledSubset` objects given a properly formatted object
117 that as been created by a yaml loader.
122 The label that will be used to identify this labeled subset.
123 value : `list` of `str` or `dict`
124 Object returned from loading a labeled subset section from a yaml
129 labeledSubset : `LabeledSubset`
130 A `LabeledSubset` object build from the inputs.
135 Raised if the value input is not properly formatted for parsing
137 if isinstance(value, MutableMapping):
138 subset = value.pop(
"subset",
None)
140 raise ValueError(
"If a labeled subset is specified as a mapping, it must contain the key "
142 description = value.pop(
"description",
None)
143 elif isinstance(value, abcIterable):
147 raise ValueError(f
"There was a problem parsing the labeled subset {label}, make sure the "
148 "definition is either a valid yaml list, or a mapping with keys "
149 "(subset, description) where subset points to a yaml list, and description is "
150 "associated with a string")
154 """Convert to a representation used in yaml serialization
156 accumulate: Dict[str, Any] = {
"subset": list(self.subset)}
157 if self.description
is not None:
158 accumulate[
"description"] = self.description
164 """Intermediate representation of parameters that are global to a pipeline
166 These parameters are specified under a top level key named `parameters`
167 and are declared as a yaml mapping. These entries can then be used inside
168 task configuration blocks to specify configuration values. They may not be
169 used in the special ``file`` or ``python`` blocks.
178 field1: parameters.shared_value
182 field2: parameters.shared_value
184 mapping: MutableMapping[str, str]
185 """A mutable mapping of identifiers as keys, and shared configuration
188 def update(self, other: Optional[ParametersIR]):
189 if other
is not None:
190 self.mapping.
update(other.mapping)
193 """Convert to a representation used in yaml serialization
198 return value
in self.mapping
201 return self.mapping[item]
204 return bool(self.mapping)
209 """Intermediate representation of configurations read from a pipeline yaml
212 python: Union[str,
None] =
None
213 """A string of python code that is used to modify a configuration. This can
214 also be None if there are no modifications to do.
216 dataId: Union[dict,
None] =
None
217 """A dataId that is used to constrain these config overrides to only quanta
218 with matching dataIds. This field can be None if there is no constraint.
219 This is currently an unimplemented feature, and is placed here for future
222 file: List[str] = field(default_factory=list)
223 """A list of paths which points to a file containing config overrides to be
224 applied. This value may be an empty list if there are no overrides to
227 rest: dict = field(default_factory=dict)
228 """This is a dictionary of key value pairs, where the keys are strings
229 corresponding to qualified fields on a config to override, and the values
230 are strings representing the values to apply.
234 """Convert to a representation used in yaml serialization
237 for name
in (
"python",
"dataId",
"file"):
240 if getattr(self, name):
241 accumulate[name] = getattr(self, name)
244 accumulate.update(self.rest)
247 def formatted(self, parameters: ParametersIR) -> ConfigIR:
248 """Returns a new ConfigIR object that is formatted according to the
253 parameters : ParametersIR
254 Object that contains variable mappings used in substitution.
259 A new ConfigIR object formatted with the input parameters
261 new_config = copy.deepcopy(self)
262 for key, value
in new_config.rest.items():
263 if not isinstance(value, str):
265 match = re.match(
"parameters[.](.*)", value)
266 if match
and match.group(1)
in parameters:
267 new_config.rest[key] = parameters[match.group(1)]
268 if match
and match.group(1)
not in parameters:
269 warnings.warn(f
"config {key} contains value {match.group(0)} which is formatted like a "
270 "Pipeline parameter but was not found within the Pipeline, if this was not "
271 "intentional, check for a typo")
274 def maybe_merge(self, other_config:
"ConfigIR") -> Generator[
"ConfigIR",
None,
None]:
275 """Merges another instance of a `ConfigIR` into this instance if
276 possible. This function returns a generator that is either self
277 if the configs were merged, or self, and other_config if that could
282 other_config : `ConfigIR`
283 An instance of `ConfigIR` to merge into this instance.
287 Generator : `ConfigIR`
288 A generator containing either self, or self and other_config if
289 the configs could be merged or not respectively.
292 if self.dataId != other_config.dataId
or self.python
or other_config.python
or\
293 self.
file or other_config.file:
294 yield from (self, other_config)
299 key_union = self.rest.keys() & other_config.rest.keys()
300 for key
in key_union:
301 if self.rest[key] != other_config.rest[key]:
302 yield from (self, other_config)
304 self.rest.update(other_config.rest)
307 self_file_set = set(self.
file)
308 other_file_set = set(other_config.file)
309 self.
file = list(self_file_set.union(other_file_set))
314 if not isinstance(other, ConfigIR):
316 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
317 (
"python",
"dataId",
"file",
"rest")):
325 """Intermediate representation of tasks read from a pipeline yaml file.
328 """An identifier used to refer to a task.
331 """A string containing a fully qualified python class to be run in a
334 config: Union[List[ConfigIR],
None] =
None
335 """List of all configs overrides associated with this task, and may be
336 `None` if there are no config overrides.
340 """Convert to a representation used in yaml serialization
342 accumulate = {
'class': self.klass}
344 accumulate[
'config'] = [c.to_primitives()
for c
in self.
config]
348 """Adds a `ConfigIR` to this task if one is not present. Merges configs
349 if there is a `ConfigIR` present and the dataId keys of both configs
350 match, otherwise adds a new entry to the config list. The exception to
351 the above is that if either the last config or other_config has a
352 python block, then other_config is always added, as python blocks can
353 modify configs in ways that cannot be predicted.
357 other_config : `ConfigIR`
358 A `ConfigIR` instance to add or merge into the config attribute of
364 self.
config.extend(self.
config.pop().maybe_merge(other_config))
367 if not isinstance(other, TaskIR):
369 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
370 (
"label",
"klass",
"config")):
378 """An intermediate representation of imported pipelines
381 """This is the location of the pipeline to inherit. The path should be
382 specified as an absolute path. Environment variables may be used in the
383 path and should be specified as a python string template, with the name of
384 the environment variable inside braces.
386 include: Union[List[str],
None] =
None
387 """List of tasks that should be included when inheriting this pipeline.
388 Either the include or exclude attributes may be specified, but not both.
390 exclude: Union[List[str],
None] =
None
391 """List of tasks that should be excluded when inheriting this pipeline.
392 Either the include or exclude attributes may be specified, but not both.
394 importContracts: bool =
True
395 """Boolean attribute to dictate if contracts should be inherited with the
398 instrument: Union[Type[KeepInstrument], str,
None] = KeepInstrument
399 """Instrument to assign to the Pipeline at import. The default value of
400 KEEP_INSTRUMENT indicates that whatever instrument the pipeline is declared
401 with will not be modified. Setting this value to None will drop any
402 declared instrument prior to import.
406 """Load in the Pipeline specified by this object, and turn it into a
411 pipeline : `PipelineIR`
412 A pipeline generated from the imported pipeline file
414 if self.include
and self.exclude:
415 raise ValueError(
"Both an include and an exclude list cant be specified"
416 " when declaring a pipeline import")
417 tmp_pipeline = PipelineIR.from_file(os.path.expandvars(self.location))
418 if self.instrument
is not KeepInstrument:
419 tmp_pipeline.instrument = self.instrument
421 included_labels = set()
422 for label
in tmp_pipeline.tasks:
423 if (self.include
and label
in self.include)
or (self.exclude
and label
not in self.exclude)\
424 or (self.include
is None and self.exclude
is None):
425 included_labels.add(label)
429 if self.include
is not None:
430 subsets_in_include = tmp_pipeline.labeled_subsets.keys() & self.include
431 for label
in subsets_in_include:
432 included_labels.update(tmp_pipeline.labeled_subsets[label].subset)
434 elif self.exclude
is not None:
435 subsets_in_exclude = tmp_pipeline.labeled_subsets.keys() & self.exclude
436 for label
in subsets_in_exclude:
437 included_labels.difference_update(tmp_pipeline.labeled_subsets[label].subset)
439 tmp_pipeline = tmp_pipeline.subset_from_labels(included_labels)
441 if not self.importContracts:
442 tmp_pipeline.contracts = []
447 if not isinstance(other, ImportIR):
449 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
450 (
"location",
"include",
"exclude",
"importContracts")):
457 """Intermediate representation of a pipeline definition
462 A dictionary which matches the structure that would be produced by a
463 yaml reader which parses a pipeline definition document
468 - If a pipeline is declared without a description
469 - If no tasks are declared in a pipeline, and no pipelines are to be
471 - If more than one instrument is specified
472 - If more than one inherited pipeline share a label
476 if "description" not in loaded_yaml:
477 raise ValueError(
"A pipeline must be declared with a description")
478 if "tasks" not in loaded_yaml
and len({
"imports",
"inherits"} - loaded_yaml.keys()) == 2:
479 raise ValueError(
"A pipeline must be declared with one or more tasks")
490 inst = loaded_yaml.pop(
"instrument",
None)
491 if isinstance(inst, list):
492 raise ValueError(
"Only one top level instrument can be defined in a pipeline")
510 def _read_contracts(self, loaded_yaml):
511 """Process the contracts portion of the loaded yaml document
516 A dictionary which matches the structure that would be produced by
517 a yaml reader which parses a pipeline definition document
519 loaded_contracts = loaded_yaml.pop(
"contracts", [])
520 if isinstance(loaded_contracts, str):
521 loaded_contracts = [loaded_contracts]
523 for contract
in loaded_contracts:
524 if isinstance(contract, dict):
526 if isinstance(contract, str):
529 def _read_parameters(self, loaded_yaml):
530 """Process the parameters portion of the loaded yaml document
535 A dictionary which matches the structure that would be produced by
536 a yaml reader which parses a pipeline definition document
538 loaded_parameters = loaded_yaml.pop(
"parameters", {})
539 if not isinstance(loaded_parameters, dict):
540 raise ValueError(
"The parameters section must be a yaml mapping")
543 def _read_labeled_subsets(self, loaded_yaml: dict):
544 """Process the subsets portion of the loaded yaml document
548 loaded_yaml: `MutableMapping`
549 A dictionary which matches the structure that would be produced
550 by a yaml reader which parses a pipeline definition document
552 loaded_subsets = loaded_yaml.pop(
"subsets", {})
554 if not loaded_subsets
and "subset" in loaded_yaml:
555 raise ValueError(
"Top level key should be subsets and not subset, add an s")
556 for key, value
in loaded_subsets.items():
559 def _verify_labeled_subsets(self):
560 """Verifies that all the labels in each named subset exist within the
566 if not labeled_subset.subset.issubset(self.
tasks.keys()):
567 raise ValueError(f
"Labels {labeled_subset.subset - self.tasks.keys()} were not found in the "
571 if label_intersection:
572 raise ValueError(f
"Labeled subsets can not use the same label as a task: {label_intersection}")
574 def _read_imports(self, loaded_yaml):
575 """Process the inherits portion of the loaded yaml document
580 A dictionary which matches the structure that would be produced by
581 a yaml reader which parses a pipeline definition document
583 def process_args(argument: Union[str, dict]) -> dict:
584 if isinstance(argument, str):
585 return {
"location": argument}
586 elif isinstance(argument, dict):
587 if "exclude" in argument
and isinstance(argument[
"exclude"], str):
588 argument[
"exclude"] = [argument[
"exclude"]]
589 if "include" in argument
and isinstance(argument[
"include"], str):
590 argument[
"include"] = [argument[
"include"]]
591 if "instrument" in argument
and argument[
"instrument"] ==
"None":
592 argument[
"instrument"] =
None
594 if not {
"inherits",
"imports"} - loaded_yaml.keys():
595 raise ValueError(
"Cannot define both inherits and imports sections, use imports")
596 tmp_import = loaded_yaml.pop(
"inherits",
None)
597 if tmp_import
is None:
598 tmp_import = loaded_yaml.pop(
"imports",
None)
600 warnings.warn(
"The 'inherits' key is deprecated, and will be "
601 "removed around June 2021. Please use the key "
603 if tmp_import
is None:
605 elif isinstance(tmp_import, list):
611 accumulate_tasks = {}
612 accumulate_labeled_subsets = {}
614 for other_pipeline
in self.
imports:
615 tmp_IR = other_pipeline.toPipelineIR()
618 elif self.
instrument != tmp_IR.instrument
and tmp_IR.instrument
is not None:
619 raise ValueError(
"Only one instrument can be declared in a pipeline or it's imports")
620 if accumulate_tasks.keys() & tmp_IR.tasks.keys():
621 raise ValueError(
"Task labels in the imported pipelines must "
623 accumulate_tasks.update(tmp_IR.tasks)
627 overlapping_subsets = accumulate_labeled_subsets.keys() & tmp_IR.labeled_subsets.keys()
628 task_subset_overlap = ((accumulate_labeled_subsets.keys() | tmp_IR.labeled_subsets.keys())
629 & accumulate_tasks.keys())
630 if overlapping_subsets
or task_subset_overlap:
631 raise ValueError(
"Labeled subset names must be unique amongst imports in both labels and "
632 f
" named Subsets. Duplicate: {overlapping_subsets | task_subset_overlap}")
633 accumulate_labeled_subsets.update(tmp_IR.labeled_subsets)
634 accumulated_parameters.update(tmp_IR.parameters)
638 if accumulate_labeled_subsets.keys() & self.
tasks.keys():
639 raise ValueError(
"Labeled subset names must be unique amongst imports in both labels and "
648 for label, task
in self.
tasks.items():
649 if label
not in accumulate_tasks:
650 accumulate_tasks[label] = task
651 elif accumulate_tasks[label].klass == task.klass:
652 if task.config
is not None:
653 for config
in task.config:
654 accumulate_tasks[label].add_or_update_config(config)
656 accumulate_tasks[label] = task
658 self.
parameters.update(accumulated_parameters)
660 def _read_tasks(self, loaded_yaml):
661 """Process the tasks portion of the loaded yaml document
666 A dictionary which matches the structure that would be produced by
667 a yaml reader which parses a pipeline definition document
670 tmp_tasks = loaded_yaml.pop(
"tasks",
None)
671 if tmp_tasks
is None:
674 if "parameters" in tmp_tasks:
675 raise ValueError(
"parameters is a reserved word and cannot be used as a task label")
677 for label, definition
in tmp_tasks.items():
678 if isinstance(definition, str):
679 definition = {
"class": definition}
680 config = definition.get(
'config',
None)
682 task_config_ir =
None
684 if isinstance(config, dict):
688 file = c.pop(
"file",
None)
691 elif not isinstance(file, list):
693 task_config_ir.append(
ConfigIR(python=c.pop(
"python",
None),
694 dataId=c.pop(
"dataId",
None),
697 self.
tasks[label] =
TaskIR(label, definition[
"class"], task_config_ir)
699 def _remove_contracts(self, label: str):
700 """Remove any contracts that contain the given label
702 String comparison used in this way is not the most elegant and may
703 have issues, but it is the only feasible way when users can specify
704 contracts with generic strings.
710 if re.match(f
".*([^A-Za-z0-9_]|^){label}[.]", contract.contract):
712 new_contracts.append(contract)
716 """Subset a pipelineIR to contain only labels specified in
721 labelSpecifier : `set` of `str`
722 Set containing labels that describes how to subset a pipeline.
726 pipeline : `PipelineIR`
727 A new pipelineIR object that is a subset of the old pipelineIR
732 Raised if there is an issue with specified labels
736 This method attempts to prune any contracts that contain labels which
737 are not in the declared subset of labels. This pruning is done using a
738 string based matching due to the nature of contracts and may prune more
739 than it should. Any labeled subsets defined that no longer have all
740 members of the subset present in the pipeline will be removed from the
744 pipeline = copy.deepcopy(self)
749 for label
in labelSpecifier:
750 if label
in pipeline.labeled_subsets:
752 toAdd.update(pipeline.labeled_subsets[label].subset)
753 labelSpecifier.difference_update(toRemove)
754 labelSpecifier.update(toAdd)
756 if not labelSpecifier.issubset(pipeline.tasks.keys()
757 | pipeline.labeled_subsets):
758 difference = labelSpecifier.difference(pipeline.tasks.keys())
759 raise ValueError(
"Not all supplied labels (specified or named subsets) are in the pipeline "
760 f
"definition, extra labels: {difference}")
762 pipeline_labels = set(pipeline.tasks.keys())
766 for label
in pipeline_labels:
767 if label
not in labelSpecifier:
768 pipeline.tasks.pop(label)
769 pipeline._remove_contracts(label)
772 labeled_subsets = copy.copy(pipeline.labeled_subsets)
774 for label, labeled_subset
in labeled_subsets.items():
775 if labeled_subset.subset - pipeline.tasks.keys():
776 pipeline.labeled_subsets.pop(label)
782 """Create a `PipelineIR` object from a string formatted like a pipeline
787 pipeline_string : `str`
788 A string that is formatted according like a pipeline document
790 loaded_yaml = yaml.load(pipeline_string, Loader=PipelineYamlLoader)
791 return cls(loaded_yaml)
795 """Create a `PipelineIR` object from the document specified by the
801 Location of document to use in creating a `PipelineIR` object.
803 with open(filename,
'r')
as f:
804 loaded_yaml = yaml.load(f, Loader=PipelineYamlLoader)
805 return cls(loaded_yaml)
808 """Serialize this `PipelineIR` object into a yaml formatted string and
809 write the output to a file at the specified path.
814 Location of document to write a `PipelineIR` object.
816 with open(filename,
'w')
as f:
820 """Convert to a representation used in yaml serialization
827 accumulate[
'tasks'] = {m: t.to_primitives()
for m, t
in self.
tasks.items()}
829 accumulate[
'contracts'] = [c.to_primitives()
for c
in self.
contracts]
831 accumulate[
'subsets'] = {k: v.to_primitives()
for k, v
in self.
labeled_subsets.items()}
835 """Instance formatting as how it would look in yaml representation
840 """Instance formatting as how it would look in yaml representation
845 if not isinstance(other, PipelineIR):
847 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
848 (
"contracts",
"tasks",
"instrument")):