21 from __future__
import annotations
23 __all__ = (
"ConfigIR",
"ContractError",
"ContractIR",
"InheritIR",
"PipelineIR",
"TaskIR",
"LabeledSubset")
25 from collections
import Counter
26 from collections.abc
import Iterable
as abcIterable
27 from dataclasses
import dataclass, field
28 from typing
import Any, List, Set, Union, Generator, MutableMapping, Optional, Dict
38 """This is a specialized version of yaml's SafeLoader. It checks and raises
39 an exception if it finds that there are multiple instances of the same key
40 found inside a pipeline file at a given scope.
51 all_keys = Counter(key_node.value
for key_node, _
in node.value)
52 duplicates = {k
for k, i
in all_keys.items()
if i != 1}
54 raise KeyError(
"Pipeline files must not have duplicated keys, "
55 f
"{duplicates} appeared multiple times")
60 """An exception that is raised when a pipeline contract is not satisfied
67 """Intermediate representation of contracts read from a pipeline yaml file.
70 """A string of python code representing one or more conditions on configs
71 in a pipeline. This code-as-string should, once evaluated, should be True
72 if the configs are fine, and False otherwise.
74 msg: Union[str,
None] =
None
75 """An optional message to be shown to the user if a contract fails
79 """Convert to a representation used in yaml serialization
81 accumulate = {
"contract": self.
contract}
82 if self.
msg is not None:
83 accumulate[
'msg'] = self.
msg
86 def __eq__(self, other:
"ContractIR"):
87 if not isinstance(other, ContractIR):
89 elif self.
contract == other.contract
and self.
msg == other.msg:
97 """Intermediate representation of named subset of task labels read from
101 """The label used to identify the subset of task labels.
104 """A set of task labels contained in this subset.
106 description: Optional[str]
107 """A description of what this subset of tasks is intended to do
112 """Generate `LabeledSubset` objects given a properly formatted object
113 that as been created by a yaml loader.
118 The label that will be used to identify this labeled subset.
119 value : `list` of `str` or `dict`
120 Object returned from loading a labeled subset section from a yaml
125 labeledSubset : `LabeledSubset`
126 A `LabeledSubset` object build from the inputs.
131 Raised if the value input is not properly formatted for parsing
133 if isinstance(value, MutableMapping):
134 subset = value.pop(
"subset",
None)
136 raise ValueError(
"If a labeled subset is specified as a mapping, it must contain the key "
138 description = value.pop(
"description",
None)
139 elif isinstance(value, abcIterable):
143 raise ValueError(f
"There was a problem parsing the labeled subset {label}, make sure the "
144 "definition is either a valid yaml list, or a mapping with keys "
145 "(subset, description) where subset points to a yaml list, and description is "
146 "associated with a string")
150 """Convert to a representation used in yaml serialization
152 accumulate: Dict[str, Any] = {
"subset": list(self.subset)}
153 if self.description
is not None:
154 accumulate[
"description"] = self.description
160 """Intermediate representation of parameters that are global to a pipeline
162 These parameters are specified under a top level key named `parameters`
163 and are declared as a yaml mapping. These entries can then be used inside
164 task configuration blocks to specify configuration values. They may not be
165 used in the special ``file`` or ``python`` blocks.
174 field1: parameters.shared_value
178 field2: parameters.shared_value
180 mapping: MutableMapping[str, str]
181 """A mutable mapping of identifiers as keys, and shared configuration
184 def update(self, other: Optional[ParametersIR]):
185 if other
is not None:
186 self.mapping.
update(other.mapping)
189 """Convert to a representation used in yaml serialization
194 return value
in self.mapping
197 return self.mapping[item]
200 return bool(self.mapping)
205 """Intermediate representation of configurations read from a pipeline yaml
208 python: Union[str,
None] =
None
209 """A string of python code that is used to modify a configuration. This can
210 also be None if there are no modifications to do.
212 dataId: Union[dict,
None] =
None
213 """A dataId that is used to constrain these config overrides to only quanta
214 with matching dataIds. This field can be None if there is no constraint.
215 This is currently an unimplemented feature, and is placed here for future
218 file: List[str] = field(default_factory=list)
219 """A list of paths which points to a file containing config overrides to be
220 applied. This value may be an empty list if there are no overrides to
223 rest: dict = field(default_factory=dict)
224 """This is a dictionary of key value pairs, where the keys are strings
225 corresponding to qualified fields on a config to override, and the values
226 are strings representing the values to apply.
230 """Convert to a representation used in yaml serialization
233 for name
in (
"python",
"dataId",
"file"):
236 if getattr(self, name):
237 accumulate[name] = getattr(self, name)
240 accumulate.update(self.rest)
243 def formatted(self, parameters: ParametersIR) -> ConfigIR:
244 """Returns a new ConfigIR object that is formatted according to the
249 parameters : ParametersIR
250 Object that contains variable mappings used in substitution.
255 A new ConfigIR object formatted with the input parameters
257 new_config = copy.deepcopy(self)
258 for key, value
in new_config.rest.items():
259 if not isinstance(value, str):
261 match = re.match(
"parameters[.](.*)", value)
262 if match
and match.group(1)
in parameters:
263 new_config.rest[key] = parameters[match.group(1)]
264 if match
and match.group(1)
not in parameters:
265 warnings.warn(f
"config {key} contains value {match.group(0)} which is formatted like a "
266 "Pipeline parameter but was not found within the Pipeline, if this was not "
267 "intentional, check for a typo")
270 def maybe_merge(self, other_config:
"ConfigIR") -> Generator[
"ConfigIR",
None,
None]:
271 """Merges another instance of a `ConfigIR` into this instance if
272 possible. This function returns a generator that is either self
273 if the configs were merged, or self, and other_config if that could
278 other_config : `ConfigIR`
279 An instance of `ConfigIR` to merge into this instance.
283 Generator : `ConfigIR`
284 A generator containing either self, or self and other_config if
285 the configs could be merged or not respectively.
288 if self.dataId != other_config.dataId
or self.python
or other_config.python
or\
289 self.
file or other_config.file:
290 yield from (self, other_config)
295 key_union = self.rest.keys() & other_config.rest.keys()
296 for key
in key_union:
297 if self.rest[key] != other_config.rest[key]:
298 yield from (self, other_config)
300 self.rest.update(other_config.rest)
303 self_file_set = set(self.
file)
304 other_file_set = set(other_config.file)
305 self.
file = list(self_file_set.union(other_file_set))
310 if not isinstance(other, ConfigIR):
312 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
313 (
"python",
"dataId",
"file",
"rest")):
321 """Intermediate representation of tasks read from a pipeline yaml file.
324 """An identifier used to refer to a task.
327 """A string containing a fully qualified python class to be run in a
330 config: Union[List[ConfigIR],
None] =
None
331 """List of all configs overrides associated with this task, and may be
332 `None` if there are no config overrides.
336 """Convert to a representation used in yaml serialization
338 accumulate = {
'class': self.klass}
340 accumulate[
'config'] = [c.to_primitives()
for c
in self.
config]
344 """Adds a `ConfigIR` to this task if one is not present. Merges configs
345 if there is a `ConfigIR` present and the dataId keys of both configs
346 match, otherwise adds a new entry to the config list. The exception to
347 the above is that if either the last config or other_config has a
348 python block, then other_config is always added, as python blocks can
349 modify configs in ways that cannot be predicted.
353 other_config : `ConfigIR`
354 A `ConfigIR` instance to add or merge into the config attribute of
360 self.
config.extend(self.
config.pop().maybe_merge(other_config))
363 if not isinstance(other, TaskIR):
365 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
366 (
"label",
"klass",
"config")):
374 """An intermediate representation of inherited pipelines
377 """This is the location of the pipeline to inherit. The path should be
378 specified as an absolute path. Environment variables may be used in the
379 path and should be specified as a python string template, with the name of
380 the environment variable inside braces.
382 include: Union[List[str],
None] =
None
383 """List of tasks that should be included when inheriting this pipeline.
384 Either the include or exclude attributes may be specified, but not both.
386 exclude: Union[List[str],
None] =
None
387 """List of tasks that should be excluded when inheriting this pipeline.
388 Either the include or exclude attributes may be specified, but not both.
390 importContracts: bool =
True
391 """Boolean attribute to dictate if contracts should be inherited with the
396 """Load in the Pipeline specified by this object, and turn it into a
401 instrument : Optional `str`
402 A string giving the fully qualified path to an instrument object.
403 If a inherited pipeline defines the same instrument as defined in
404 this variable, an import warning message is skipped.
408 pipeline : `PipelineIR`
409 A pipeline generated from the imported pipeline file
411 if self.include
and self.exclude:
412 raise ValueError(
"Both an include and an exclude list cant be specified"
413 " when declaring a pipeline import")
414 tmp_pipeline = PipelineIR.from_file(os.path.expandvars(self.location))
415 if tmp_pipeline.instrument
is not None and tmp_pipeline.instrument != instrument:
416 warnings.warn(
"Any instrument definitions in imported pipelines are ignored. "
417 "if an instrument is desired please define it in the top most pipeline")
419 included_labels = set()
420 for label
in tmp_pipeline.tasks:
421 if (self.include
and label
in self.include)
or (self.exclude
and label
not in self.exclude)\
422 or (self.include
is None and self.exclude
is None):
423 included_labels.add(label)
427 if self.include
is not None:
428 subsets_in_include = tmp_pipeline.labeled_subsets.keys() & self.include
429 for label
in subsets_in_include:
430 included_labels.update(tmp_pipeline.labeled_subsets[label].subset)
432 elif self.exclude
is not None:
433 subsets_in_exclude = tmp_pipeline.labeled_subsets.keys() & self.exclude
434 for label
in subsets_in_exclude:
435 included_labels.difference_update(tmp_pipeline.labeled_subsets[label].subset)
437 tmp_pipeline = tmp_pipeline.subset_from_labels(included_labels)
439 if not self.importContracts:
440 tmp_pipeline.contracts = []
445 if not isinstance(other, InheritIR):
447 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
448 (
"location",
"include",
"exclude",
"importContracts")):
455 """Intermediate representation of a pipeline definition
460 A dictionary which matches the structure that would be produced by a
461 yaml reader which parses a pipeline definition document
466 - If a pipeline is declared without a description
467 - If no tasks are declared in a pipeline, and no pipelines are to be
469 - If more than one instrument is specified
470 - If more than one inherited pipeline share a label
474 if "description" not in loaded_yaml:
475 raise ValueError(
"A pipeline must be declared with a description")
476 if "tasks" not in loaded_yaml
and "inherits" not in loaded_yaml:
477 raise ValueError(
"A pipeline must be declared with one or more tasks")
488 inst = loaded_yaml.pop(
"instrument",
None)
489 if isinstance(inst, list):
490 raise ValueError(
"Only one top level instrument can be defined in a pipeline")
508 def _read_contracts(self, loaded_yaml):
509 """Process the contracts portion of the loaded yaml document
514 A dictionary which matches the structure that would be produced by
515 a yaml reader which parses a pipeline definition document
517 loaded_contracts = loaded_yaml.pop(
"contracts", [])
518 if isinstance(loaded_contracts, str):
519 loaded_contracts = [loaded_contracts]
521 for contract
in loaded_contracts:
522 if isinstance(contract, dict):
524 if isinstance(contract, str):
527 def _read_parameters(self, loaded_yaml):
528 """Process the parameters portion of the loaded yaml document
533 A dictionary which matches the structure that would be produced by
534 a yaml reader which parses a pipeline definition document
536 loaded_parameters = loaded_yaml.pop(
"parameters", {})
537 if not isinstance(loaded_parameters, dict):
538 raise ValueError(
"The parameters section must be a yaml mapping")
541 def _read_labeled_subsets(self, loaded_yaml: dict):
542 """Process the subsets portion of the loaded yaml document
546 loaded_yaml: `MutableMapping`
547 A dictionary which matches the structure that would be produced
548 by a yaml reader which parses a pipeline definition document
550 loaded_subsets = loaded_yaml.pop(
"subsets", {})
552 if not loaded_subsets
and "subset" in loaded_yaml:
553 raise ValueError(
"Top level key should be subsets and not subset, add an s")
554 for key, value
in loaded_subsets.items():
557 def _verify_labeled_subsets(self):
558 """Verifies that all the labels in each named subset exist within the
564 if not labeled_subset.subset.issubset(self.
tasks.keys()):
565 raise ValueError(f
"Labels {labeled_subset.subset - self.tasks.keys()} were not found in the "
569 if label_intersection:
570 raise ValueError(f
"Labeled subsets can not use the same label as a task: {label_intersection}")
572 def _read_inherits(self, loaded_yaml):
573 """Process the inherits portion of the loaded yaml document
578 A dictionary which matches the structure that would be produced by
579 a yaml reader which parses a pipeline definition document
581 def process_args(argument: Union[str, dict]) -> dict:
582 if isinstance(argument, str):
583 return {
"location": argument}
584 elif isinstance(argument, dict):
585 if "exclude" in argument
and isinstance(argument[
"exclude"], str):
586 argument[
"exclude"] = [argument[
"exclude"]]
587 if "include" in argument
and isinstance(argument[
"include"], str):
588 argument[
"include"] = [argument[
"include"]]
590 tmp_inherit = loaded_yaml.pop(
"inherits",
None)
591 if tmp_inherit
is None:
593 elif isinstance(tmp_inherit, list):
599 accumulate_tasks = {}
600 accumulate_labeled_subsets = {}
602 for other_pipeline
in self.
inherits:
603 tmp_IR = other_pipeline.toPipelineIR(instrument=self.
instrument)
604 if accumulate_tasks.keys() & tmp_IR.tasks.keys():
605 raise ValueError(
"Task labels in the imported pipelines must "
607 accumulate_tasks.update(tmp_IR.tasks)
611 overlapping_subsets = accumulate_labeled_subsets.keys() & tmp_IR.labeled_subsets.keys()
612 task_subset_overlap = ((accumulate_labeled_subsets.keys() | tmp_IR.labeled_subsets.keys())
613 & accumulate_tasks.keys())
614 if overlapping_subsets
or task_subset_overlap:
615 raise ValueError(
"Labeled subset names must be unique amongst imports in both labels and "
616 f
" named Subsets. Duplicate: {overlapping_subsets | task_subset_overlap}")
617 accumulate_labeled_subsets.update(tmp_IR.labeled_subsets)
618 accumulated_parameters.update(tmp_IR.parameters)
622 if accumulate_labeled_subsets.keys() & self.
tasks.keys():
623 raise ValueError(
"Labeled subset names must be unique amongst imports in both labels and "
632 for label, task
in self.
tasks.items():
633 if label
not in accumulate_tasks:
634 accumulate_tasks[label] = task
635 elif accumulate_tasks[label].klass == task.klass:
636 if task.config
is not None:
637 for config
in task.config:
638 accumulate_tasks[label].add_or_update_config(config)
640 accumulate_tasks[label] = task
642 self.
parameters.update(accumulated_parameters)
644 def _read_tasks(self, loaded_yaml):
645 """Process the tasks portion of the loaded yaml document
650 A dictionary which matches the structure that would be produced by
651 a yaml reader which parses a pipeline definition document
654 tmp_tasks = loaded_yaml.pop(
"tasks",
None)
655 if tmp_tasks
is None:
658 if "parameters" in tmp_tasks:
659 raise ValueError(
"parameters is a reserved word and cannot be used as a task label")
661 for label, definition
in tmp_tasks.items():
662 if isinstance(definition, str):
663 definition = {
"class": definition}
664 config = definition.get(
'config',
None)
666 task_config_ir =
None
668 if isinstance(config, dict):
672 file = c.pop(
"file",
None)
675 elif not isinstance(file, list):
677 task_config_ir.append(
ConfigIR(python=c.pop(
"python",
None),
678 dataId=c.pop(
"dataId",
None),
681 self.
tasks[label] =
TaskIR(label, definition[
"class"], task_config_ir)
683 def _remove_contracts(self, label: str):
684 """Remove any contracts that contain the given label
686 String comparison used in this way is not the most elegant and may
687 have issues, but it is the only feasible way when users can specify
688 contracts with generic strings.
694 if re.match(f
".*([^A-Za-z0-9_]|^){label}[.]", contract.contract):
696 new_contracts.append(contract)
700 """Subset a pipelineIR to contain only labels specified in
705 labelSpecifier : `set` of `str`
706 Set containing labels that describes how to subset a pipeline.
710 pipeline : `PipelineIR`
711 A new pipelineIR object that is a subset of the old pipelineIR
716 Raised if there is an issue with specified labels
720 This method attempts to prune any contracts that contain labels which
721 are not in the declared subset of labels. This pruning is done using a
722 string based matching due to the nature of contracts and may prune more
723 than it should. Any labeled subsets defined that no longer have all
724 members of the subset present in the pipeline will be removed from the
728 pipeline = copy.deepcopy(self)
733 for label
in labelSpecifier:
734 if label
in pipeline.labeled_subsets:
736 toAdd.update(pipeline.labeled_subsets[label].subset)
737 labelSpecifier.difference_update(toRemove)
738 labelSpecifier.update(toAdd)
740 if not labelSpecifier.issubset(pipeline.tasks.keys()
741 | pipeline.labeled_subsets):
742 difference = labelSpecifier.difference(pipeline.tasks.keys())
743 raise ValueError(
"Not all supplied labels (specified or named subsets) are in the pipeline "
744 f
"definition, extra labels: {difference}")
746 pipeline_labels = set(pipeline.tasks.keys())
750 for label
in pipeline_labels:
751 if label
not in labelSpecifier:
752 pipeline.tasks.pop(label)
753 pipeline._remove_contracts(label)
756 labeled_subsets = copy.copy(pipeline.labeled_subsets)
758 for label, labeled_subset
in labeled_subsets.items():
759 if labeled_subset.subset - pipeline.tasks.keys():
760 pipeline.labeled_subsets.pop(label)
766 """Create a `PipelineIR` object from a string formatted like a pipeline
771 pipeline_string : `str`
772 A string that is formatted according like a pipeline document
774 loaded_yaml = yaml.load(pipeline_string, Loader=PipelineYamlLoader)
775 return cls(loaded_yaml)
779 """Create a `PipelineIR` object from the document specified by the
785 Location of document to use in creating a `PipelineIR` object.
787 with open(filename,
'r')
as f:
788 loaded_yaml = yaml.load(f, Loader=PipelineYamlLoader)
789 return cls(loaded_yaml)
792 """Serialize this `PipelineIR` object into a yaml formatted string and
793 write the output to a file at the specified path.
798 Location of document to write a `PipelineIR` object.
800 with open(filename,
'w')
as f:
804 """Convert to a representation used in yaml serialization
811 accumulate[
'tasks'] = {m: t.to_primitives()
for m, t
in self.
tasks.items()}
813 accumulate[
'contracts'] = [c.to_primitives()
for c
in self.
contracts]
815 accumulate[
'subsets'] = {k: v.to_primitives()
for k, v
in self.
labeled_subsets.items()}
819 """Instance formatting as how it would look in yaml representation
824 """Instance formatting as how it would look in yaml representation
829 if not isinstance(other, PipelineIR):
831 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
832 (
"contracts",
"tasks",
"instrument")):