1 __all__ = (
"ConfigIR",
"ContractError",
"ContractIR",
"InheritIR",
"PipelineIR",
"TaskIR")
23 from collections
import Counter
24 from dataclasses
import dataclass, field
25 from typing
import List, Union, Generator
33 """This is a specialized version of yaml's SafeLoader. It checks and raises
34 an exception if it finds that there are multiple instances of the same key
35 found inside a pipeline file at a given scope.
46 all_keys = Counter(key_node.value
for key_node, _
in node.value)
47 duplicates = {k
for k, i
in all_keys.items()
if i != 1}
49 raise KeyError(
"Pipeline files must not have duplicated keys, "
50 f
"{duplicates} appeared multiple times")
55 """An exception that is raised when a pipeline contract is not satisfied
62 """Intermediate representation of contracts read from a pipeline yaml file.
65 """A string of python code representing one or more conditions on configs
66 in a pipeline. This code-as-string should, once evaluated, should be True
67 if the configs are fine, and False otherwise.
69 msg: Union[str,
None] =
None
70 """An optional message to be shown to the user if a contract fails
74 """Convert to a representation used in yaml serialization
76 accumulate = {
"contract": self.
contract}
77 if self.
msg is not None:
78 accumulate[
'msg'] = self.
msg
81 def __eq__(self, other:
"ContractIR"):
82 if not isinstance(other, ContractIR):
84 elif self.
contract == other.contract
and self.
msg == other.msg:
92 """Intermediate representation of configurations read from a pipeline yaml
95 python: Union[str,
None] =
None
96 """A string of python code that is used to modify a configuration. This can
97 also be None if there are no modifications to do.
99 dataId: Union[dict,
None] =
None
100 """A dataId that is used to constrain these config overrides to only quanta
101 with matching dataIds. This field can be None if there is no constraint.
102 This is currently an unimplemented feature, and is placed here for future
105 file: List[str] = field(default_factory=list)
106 """A list of paths which points to a file containing config overrides to be
107 applied. This value may be an empty list if there are no overrides to apply.
109 rest: dict = field(default_factory=dict)
110 """This is a dictionary of key value pairs, where the keys are strings
111 corresponding to qualified fields on a config to override, and the values
112 are strings representing the values to apply.
116 """Convert to a representation used in yaml serialization
119 for name
in (
"python",
"dataId",
"file"):
121 if getattr(self, name):
122 accumulate[name] = getattr(self, name)
125 accumulate.update(self.rest)
128 def maybe_merge(self, other_config:
"ConfigIR") -> Generator[
"ConfigIR",
None,
None]:
129 """Merges another instance of a `ConfigIR` into this instance if
130 possible. This function returns a generator that is either self
131 if the configs were merged, or self, and other_config if that could
136 other_config : `ConfigIR`
137 An instance of `ConfigIR` to merge into this instance.
141 Generator : `ConfigIR`
142 A generator containing either self, or self and other_config if
143 the configs could be merged or not respectively.
146 if self.dataId != other_config.dataId
or self.python
or other_config.python
or\
147 self.
file or other_config.file:
148 yield from (self, other_config)
153 key_union = self.rest.keys() & other_config.rest.keys()
154 for key
in key_union:
155 if self.rest[key] != other_config.rest[key]:
156 yield from (self, other_config)
158 self.rest.update(other_config.rest)
161 self_file_set = set(self.
file)
162 other_file_set = set(other_config.file)
163 self.
file = list(self_file_set.union(other_file_set))
168 if not isinstance(other, ConfigIR):
170 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
171 (
"python",
"dataId",
"file",
"rest")):
179 """Intermediate representation of tasks read from a pipeline yaml file.
182 """An identifier used to refer to a task.
185 """A string containing a fully qualified python class to be run in a
188 config: Union[List[ConfigIR],
None] =
None
189 """List of all configs overrides associated with this task, and may be
190 `None` if there are no config overrides.
194 """Convert to a representation used in yaml serialization
196 accumulate = {
'class': self.klass}
198 accumulate[
'config'] = [c.to_primitives()
for c
in self.
config]
202 """Adds a `ConfigIR` to this task if one is not present. Merges configs
203 if there is a `ConfigIR` present and the dataId keys of both configs
204 match, otherwise adds a new entry to the config list. The exception to
205 the above is that if either the last config or other_config has a python
206 block, then other_config is always added, as python blocks can modify
207 configs in ways that cannot be predicted.
211 other_config : `ConfigIR`
212 A `ConfigIR` instance to add or merge into the config attribute of
218 self.
config.extend(self.
config.pop().maybe_merge(other_config))
221 if not isinstance(other, TaskIR):
223 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
224 (
"label",
"klass",
"config")):
232 """An intermediate representation of inherited pipelines
235 """This is the location of the pipeline to inherit. The path should be
236 specified as an absolute path. Environment variables may be used in the path
237 and should be specified as a python string template, with the name of the
238 environment variable inside braces.
240 include: Union[List[str],
None] =
None
241 """List of tasks that should be included when inheriting this pipeline.
242 Either the include or exclude attributes may be specified, but not both.
244 exclude: Union[List[str],
None] =
None
245 """List of tasks that should be excluded when inheriting this pipeline.
246 Either the include or exclude attributes may be specified, but not both.
248 importContracts: bool =
True
249 """Boolean attribute to dictate if contracts should be inherited with the
254 """Convert to a representation used in yaml serialization
256 if self.include
and self.exclude:
257 raise ValueError(
"Both an include and an exclude list cant be specified"
258 " when declaring a pipeline import")
259 tmp_pipeline = PipelineIR.from_file(os.path.expandvars(self.location))
260 if tmp_pipeline.instrument
is not None:
261 warnings.warn(
"Any instrument definitions in imported pipelines are ignored. "
262 "if an instrument is desired please define it in the top most pipeline")
265 for label, task
in tmp_pipeline.tasks.items():
266 if (self.include
and label
in self.include)
or (self.exclude
and label
not in self.exclude)\
267 or (self.include
is None and self.exclude
is None):
268 new_tasks[label] = task
269 tmp_pipeline.tasks = new_tasks
271 if not self.importContracts:
272 tmp_pipeline.contracts = []
277 if not isinstance(other, InheritIR):
279 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
280 (
"location",
"include",
"exclude",
"importContracts")):
287 """Intermediate representation of a pipeline definition
292 A dictionary which matches the structure that would be produced by a
293 yaml reader which parses a pipeline definition document
298 - If a pipeline is declared without a description
299 - If no tasks are declared in a pipeline, and no pipelines are to be
301 - If more than one instrument is specified
302 - If more than one inherited pipeline share a label
306 if "description" not in loaded_yaml:
307 raise ValueError(
"A pipeline must be declared with a description")
308 if "tasks" not in loaded_yaml
and "inherits" not in loaded_yaml:
309 raise ValueError(
"A pipeline must be declared with one or more tasks")
318 inst = loaded_yaml.pop(
"instrument",
None)
319 if isinstance(inst, list):
320 raise ValueError(
"Only one top level instrument can be defined in a pipeline")
329 def _read_contracts(self, loaded_yaml):
330 """Process the contracts portion of the loaded yaml document
335 A dictionary which matches the structure that would be produced by a
336 yaml reader which parses a pipeline definition document
338 loaded_contracts = loaded_yaml.pop(
"contracts", [])
339 if isinstance(loaded_contracts, str):
340 loaded_contracts = [loaded_contracts]
342 for contract
in loaded_contracts:
343 if isinstance(contract, dict):
345 if isinstance(contract, str):
348 def _read_inherits(self, loaded_yaml):
349 """Process the inherits portion of the loaded yaml document
354 A dictionary which matches the structure that would be produced by a
355 yaml reader which parses a pipeline definition document
357 def process_args(argument: Union[str, dict]) -> dict:
358 if isinstance(argument, str):
359 return {
"location": argument}
360 elif isinstance(argument, dict):
361 if "exclude" in argument
and isinstance(argument[
"exclude"], str):
362 argument[
"exclude"] = [argument[
"exclude"]]
363 if "include" in argument
and isinstance(argument[
"include"], str):
364 argument[
"include"] = [argument[
"include"]]
366 tmp_inherit = loaded_yaml.pop(
"inherits",
None)
367 if tmp_inherit
is None:
369 elif isinstance(tmp_inherit, list):
375 accumulate_tasks = {}
376 for other_pipeline
in self.
inherits:
377 tmp_IR = other_pipeline.toPipelineIR()
378 if accumulate_tasks.keys() & tmp_IR.tasks.keys():
379 raise ValueError(
"Task labels in the imported pipelines must "
381 accumulate_tasks.update(tmp_IR.tasks)
386 for label, task
in self.
tasks.items():
387 if label
not in accumulate_tasks:
388 accumulate_tasks[label] = task
389 elif accumulate_tasks[label].klass == task.klass:
390 if task.config
is not None:
391 for config
in task.config:
392 accumulate_tasks[label].add_or_update_config(config)
394 accumulate_tasks[label] = task
397 def _read_tasks(self, loaded_yaml):
398 """Process the tasks portion of the loaded yaml document
403 A dictionary which matches the structure that would be produced by a
404 yaml reader which parses a pipeline definition document
407 tmp_tasks = loaded_yaml.pop(
"tasks",
None)
408 if tmp_tasks
is None:
411 for label, definition
in tmp_tasks.items():
412 if isinstance(definition, str):
413 definition = {
"class": definition}
414 config = definition.get(
'config',
None)
416 task_config_ir =
None
418 if isinstance(config, dict):
422 file = c.pop(
"file",
None)
425 elif not isinstance(file, list):
427 task_config_ir.append(
ConfigIR(python=c.pop(
"python",
None),
428 dataId=c.pop(
"dataId",
None),
431 self.
tasks[label] =
TaskIR(label, definition[
"class"], task_config_ir)
435 """Create a `PipelineIR` object from a string formatted like a pipeline
440 pipeline_string : `str`
441 A string that is formatted according like a pipeline document
443 loaded_yaml = yaml.load(pipeline_string, Loader=PipelineYamlLoader)
444 return cls(loaded_yaml)
448 """Create a `PipelineIR` object from the document specified by the
454 Location of document to use in creating a `PipelineIR` object.
456 with open(filename,
'r')
as f:
457 loaded_yaml = yaml.load(f, Loader=PipelineYamlLoader)
458 return cls(loaded_yaml)
461 """Serialize this `PipelineIR` object into a yaml formatted string and
462 write the output to a file at the specified path.
467 Location of document to write a `PipelineIR` object.
469 with open(filename,
'w')
as f:
473 """Convert to a representation used in yaml serialization
478 accumulate[
'tasks'] = {m: t.to_primitives()
for m, t
in self.
tasks.items()}
480 accumulate[
'contracts'] = [c.to_primitives()
for c
in self.
contracts]
484 """Instance formatting as how it would look in yaml representation
489 """Instance formatting as how it would look in yaml representation
494 if not isinstance(other, PipelineIR):
496 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
497 (
"contracts",
"tasks",
"instrument")):