1 __all__ = (
"ConfigIR",
"ContractError",
"ContractIR",
"InheritIR",
"PipelineIR",
"TaskIR")
23 from collections
import Counter
24 from dataclasses
import dataclass, field
25 from typing
import List, Union, Generator
33 """This is a specialized version of yaml's SafeLoader. It checks and raises
34 an exception if it finds that there are multiple instances of the same key
35 found inside a pipeline file at a given scope.
46 all_keys = Counter(key_node.value
for key_node, _
in node.value)
47 duplicates = {k
for k, i
in all_keys.items()
if i != 1}
49 raise KeyError(
"Pipeline files must not have duplicated keys, "
50 f
"{duplicates} appeared multiple times")
55 """An exception that is raised when a pipeline contract is not satisfied
62 """Intermediate representation of contracts read from a pipeline yaml file.
65 """A string of python code representing one or more conditions on configs
66 in a pipeline. This code-as-string should, once evaluated, should be True
67 if the configs are fine, and False otherwise.
69 msg: Union[str,
None] =
None
70 """An optional message to be shown to the user if a contract fails
74 """Convert to a representation used in yaml serialization
76 accumulate = {
"contract": self.
contract}
77 if self.
msg is not None:
78 accumulate[
'msg'] = self.
msg
81 def __eq__(self, other:
"ContractIR"):
82 if not isinstance(other, ContractIR):
84 elif self.
contract == other.contract
and self.
msg == other.msg:
92 """Intermediate representation of configurations read from a pipeline yaml
95 python: Union[str,
None] =
None
96 """A string of python code that is used to modify a configuration. This can
97 also be None if there are no modifications to do.
99 dataId: Union[dict,
None] =
None
100 """A dataId that is used to constrain these config overrides to only quanta
101 with matching dataIds. This field can be None if there is no constraint.
102 This is currently an unimplemented feature, and is placed here for future
105 file: List[str] = field(default_factory=list)
106 """A list of paths which points to a file containing config overrides to be
107 applied. This value may be an empty list if there are no overrides to
110 rest: dict = field(default_factory=dict)
111 """This is a dictionary of key value pairs, where the keys are strings
112 corresponding to qualified fields on a config to override, and the values
113 are strings representing the values to apply.
117 """Convert to a representation used in yaml serialization
120 for name
in (
"python",
"dataId",
"file"):
123 if getattr(self, name):
124 accumulate[name] = getattr(self, name)
127 accumulate.update(self.rest)
130 def maybe_merge(self, other_config:
"ConfigIR") -> Generator[
"ConfigIR",
None,
None]:
131 """Merges another instance of a `ConfigIR` into this instance if
132 possible. This function returns a generator that is either self
133 if the configs were merged, or self, and other_config if that could
138 other_config : `ConfigIR`
139 An instance of `ConfigIR` to merge into this instance.
143 Generator : `ConfigIR`
144 A generator containing either self, or self and other_config if
145 the configs could be merged or not respectively.
148 if self.dataId != other_config.dataId
or self.python
or other_config.python
or\
149 self.
file or other_config.file:
150 yield from (self, other_config)
155 key_union = self.rest.keys() & other_config.rest.keys()
156 for key
in key_union:
157 if self.rest[key] != other_config.rest[key]:
158 yield from (self, other_config)
160 self.rest.update(other_config.rest)
163 self_file_set = set(self.
file)
164 other_file_set = set(other_config.file)
165 self.
file = list(self_file_set.union(other_file_set))
170 if not isinstance(other, ConfigIR):
172 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
173 (
"python",
"dataId",
"file",
"rest")):
181 """Intermediate representation of tasks read from a pipeline yaml file.
184 """An identifier used to refer to a task.
187 """A string containing a fully qualified python class to be run in a
190 config: Union[List[ConfigIR],
None] =
None
191 """List of all configs overrides associated with this task, and may be
192 `None` if there are no config overrides.
196 """Convert to a representation used in yaml serialization
198 accumulate = {
'class': self.klass}
200 accumulate[
'config'] = [c.to_primitives()
for c
in self.
config]
204 """Adds a `ConfigIR` to this task if one is not present. Merges configs
205 if there is a `ConfigIR` present and the dataId keys of both configs
206 match, otherwise adds a new entry to the config list. The exception to
207 the above is that if either the last config or other_config has a
208 python block, then other_config is always added, as python blocks can
209 modify configs in ways that cannot be predicted.
213 other_config : `ConfigIR`
214 A `ConfigIR` instance to add or merge into the config attribute of
220 self.
config.extend(self.
config.pop().maybe_merge(other_config))
223 if not isinstance(other, TaskIR):
225 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
226 (
"label",
"klass",
"config")):
234 """An intermediate representation of inherited pipelines
237 """This is the location of the pipeline to inherit. The path should be
238 specified as an absolute path. Environment variables may be used in the
239 path and should be specified as a python string template, with the name of
240 the environment variable inside braces.
242 include: Union[List[str],
None] =
None
243 """List of tasks that should be included when inheriting this pipeline.
244 Either the include or exclude attributes may be specified, but not both.
246 exclude: Union[List[str],
None] =
None
247 """List of tasks that should be excluded when inheriting this pipeline.
248 Either the include or exclude attributes may be specified, but not both.
250 importContracts: bool =
True
251 """Boolean attribute to dictate if contracts should be inherited with the
256 """Convert to a representation used in yaml serialization
258 if self.include
and self.exclude:
259 raise ValueError(
"Both an include and an exclude list cant be specified"
260 " when declaring a pipeline import")
261 tmp_pipeline = PipelineIR.from_file(os.path.expandvars(self.location))
262 if tmp_pipeline.instrument
is not None:
263 warnings.warn(
"Any instrument definitions in imported pipelines are ignored. "
264 "if an instrument is desired please define it in the top most pipeline")
267 for label, task
in tmp_pipeline.tasks.items():
268 if (self.include
and label
in self.include)
or (self.exclude
and label
not in self.exclude)\
269 or (self.include
is None and self.exclude
is None):
270 new_tasks[label] = task
271 tmp_pipeline.tasks = new_tasks
273 if not self.importContracts:
274 tmp_pipeline.contracts = []
279 if not isinstance(other, InheritIR):
281 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
282 (
"location",
"include",
"exclude",
"importContracts")):
289 """Intermediate representation of a pipeline definition
294 A dictionary which matches the structure that would be produced by a
295 yaml reader which parses a pipeline definition document
300 - If a pipeline is declared without a description
301 - If no tasks are declared in a pipeline, and no pipelines are to be
303 - If more than one instrument is specified
304 - If more than one inherited pipeline share a label
308 if "description" not in loaded_yaml:
309 raise ValueError(
"A pipeline must be declared with a description")
310 if "tasks" not in loaded_yaml
and "inherits" not in loaded_yaml:
311 raise ValueError(
"A pipeline must be declared with one or more tasks")
320 inst = loaded_yaml.pop(
"instrument",
None)
321 if isinstance(inst, list):
322 raise ValueError(
"Only one top level instrument can be defined in a pipeline")
331 def _read_contracts(self, loaded_yaml):
332 """Process the contracts portion of the loaded yaml document
337 A dictionary which matches the structure that would be produced by
338 a yaml reader which parses a pipeline definition document
340 loaded_contracts = loaded_yaml.pop(
"contracts", [])
341 if isinstance(loaded_contracts, str):
342 loaded_contracts = [loaded_contracts]
344 for contract
in loaded_contracts:
345 if isinstance(contract, dict):
347 if isinstance(contract, str):
350 def _read_inherits(self, loaded_yaml):
351 """Process the inherits portion of the loaded yaml document
356 A dictionary which matches the structure that would be produced by
357 a yaml reader which parses a pipeline definition document
359 def process_args(argument: Union[str, dict]) -> dict:
360 if isinstance(argument, str):
361 return {
"location": argument}
362 elif isinstance(argument, dict):
363 if "exclude" in argument
and isinstance(argument[
"exclude"], str):
364 argument[
"exclude"] = [argument[
"exclude"]]
365 if "include" in argument
and isinstance(argument[
"include"], str):
366 argument[
"include"] = [argument[
"include"]]
368 tmp_inherit = loaded_yaml.pop(
"inherits",
None)
369 if tmp_inherit
is None:
371 elif isinstance(tmp_inherit, list):
377 accumulate_tasks = {}
378 for other_pipeline
in self.
inherits:
379 tmp_IR = other_pipeline.toPipelineIR()
380 if accumulate_tasks.keys() & tmp_IR.tasks.keys():
381 raise ValueError(
"Task labels in the imported pipelines must "
383 accumulate_tasks.update(tmp_IR.tasks)
388 for label, task
in self.
tasks.items():
389 if label
not in accumulate_tasks:
390 accumulate_tasks[label] = task
391 elif accumulate_tasks[label].klass == task.klass:
392 if task.config
is not None:
393 for config
in task.config:
394 accumulate_tasks[label].add_or_update_config(config)
396 accumulate_tasks[label] = task
399 def _read_tasks(self, loaded_yaml):
400 """Process the tasks portion of the loaded yaml document
405 A dictionary which matches the structure that would be produced by
406 a yaml reader which parses a pipeline definition document
409 tmp_tasks = loaded_yaml.pop(
"tasks",
None)
410 if tmp_tasks
is None:
413 for label, definition
in tmp_tasks.items():
414 if isinstance(definition, str):
415 definition = {
"class": definition}
416 config = definition.get(
'config',
None)
418 task_config_ir =
None
420 if isinstance(config, dict):
424 file = c.pop(
"file",
None)
427 elif not isinstance(file, list):
429 task_config_ir.append(
ConfigIR(python=c.pop(
"python",
None),
430 dataId=c.pop(
"dataId",
None),
433 self.
tasks[label] =
TaskIR(label, definition[
"class"], task_config_ir)
437 """Create a `PipelineIR` object from a string formatted like a pipeline
442 pipeline_string : `str`
443 A string that is formatted according like a pipeline document
445 loaded_yaml = yaml.load(pipeline_string, Loader=PipelineYamlLoader)
446 return cls(loaded_yaml)
450 """Create a `PipelineIR` object from the document specified by the
456 Location of document to use in creating a `PipelineIR` object.
458 with open(filename,
'r')
as f:
459 loaded_yaml = yaml.load(f, Loader=PipelineYamlLoader)
460 return cls(loaded_yaml)
463 """Serialize this `PipelineIR` object into a yaml formatted string and
464 write the output to a file at the specified path.
469 Location of document to write a `PipelineIR` object.
471 with open(filename,
'w')
as f:
475 """Convert to a representation used in yaml serialization
480 accumulate[
'tasks'] = {m: t.to_primitives()
for m, t
in self.
tasks.items()}
482 accumulate[
'contracts'] = [c.to_primitives()
for c
in self.
contracts]
486 """Instance formatting as how it would look in yaml representation
491 """Instance formatting as how it would look in yaml representation
496 if not isinstance(other, PipelineIR):
498 elif all(getattr(self, attr) == getattr(other, attr)
for attr
in
499 (
"contracts",
"tasks",
"instrument")):