Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("ConfigIR", "ContractError", "ContractIR", "InheritIR", "PipelineIR", "TaskIR", "LabeledSubset") 

24 

25from collections import Counter 

26from collections.abc import Iterable as abcIterable 

27from dataclasses import dataclass, field 

28from typing import Any, List, Set, Union, Generator, MutableMapping, Optional, Dict 

29 

30import copy 

31import re 

32import os 

33import yaml 

34import warnings 

35 

36 

37class PipelineYamlLoader(yaml.SafeLoader): 

38 """This is a specialized version of yaml's SafeLoader. It checks and raises 

39 an exception if it finds that there are multiple instances of the same key 

40 found inside a pipeline file at a given scope. 

41 """ 

42 def construct_mapping(self, node, deep=False): 

43 # do the call to super first so that it can do all the other forms of 

44 # checking on this node. If you check the uniqueness of keys first 

45 # it would save the work that super does in the case of a failure, but 

46 # it might fail in the case that the node was the incorrect node due 

47 # to a parsing error, and the resulting exception would be difficult to 

48 # understand. 

49 mapping = super().construct_mapping(node, deep) 

50 # Check if there are any duplicate keys 

51 all_keys = Counter(key_node.value for key_node, _ in node.value) 

52 duplicates = {k for k, i in all_keys.items() if i != 1} 

53 if duplicates: 

54 raise KeyError("Pipeline files must not have duplicated keys, " 

55 f"{duplicates} appeared multiple times") 

56 return mapping 

57 

58 

59class ContractError(Exception): 

60 """An exception that is raised when a pipeline contract is not satisfied 

61 """ 

62 pass 

63 

64 

65@dataclass 

66class ContractIR: 

67 """Intermediate representation of contracts read from a pipeline yaml file. 

68 """ 

69 contract: str 

70 """A string of python code representing one or more conditions on configs 

71 in a pipeline. This code-as-string should, once evaluated, should be True 

72 if the configs are fine, and False otherwise. 

73 """ 

74 msg: Union[str, None] = None 

75 """An optional message to be shown to the user if a contract fails 

76 """ 

77 

78 def to_primitives(self) -> dict: 

79 """Convert to a representation used in yaml serialization 

80 """ 

81 accumulate = {"contract": self.contract} 

82 if self.msg is not None: 

83 accumulate['msg'] = self.msg 

84 return accumulate 

85 

86 def __eq__(self, other: "ContractIR"): 

87 if not isinstance(other, ContractIR): 

88 return False 

89 elif self.contract == other.contract and self.msg == other.msg: 

90 return True 

91 else: 

92 return False 

93 

94 

95@dataclass 

96class LabeledSubset: 

97 """Intermediate representation of named subset of task labels read from 

98 a pipeline yaml file. 

99 """ 

100 label: str 

101 """The label used to identify the subset of task labels. 

102 """ 

103 subset: Set[str] 

104 """A set of task labels contained in this subset. 

105 """ 

106 description: Optional[str] 

107 """A description of what this subset of tasks is intended to do 

108 """ 

109 

110 @staticmethod 

111 def from_primatives(label: str, value: Union[List[str], dict]) -> LabeledSubset: 

112 """Generate `LabeledSubset` objects given a properly formatted object 

113 that as been created by a yaml loader. 

114 

115 Parameters 

116 ---------- 

117 label : `str` 

118 The label that will be used to identify this labeled subset. 

119 value : `list` of `str` or `dict` 

120 Object returned from loading a labeled subset section from a yaml 

121 document. 

122 

123 Returns 

124 ------- 

125 labeledSubset : `LabeledSubset` 

126 A `LabeledSubset` object build from the inputs. 

127 

128 Raises 

129 ------ 

130 ValueError 

131 Raised if the value input is not properly formatted for parsing 

132 """ 

133 if isinstance(value, MutableMapping): 

134 subset = value.pop("subset", None) 

135 if subset is None: 

136 raise ValueError("If a labeled subset is specified as a mapping, it must contain the key " 

137 "'subset'") 

138 description = value.pop("description", None) 

139 elif isinstance(value, abcIterable): 

140 subset = value 

141 description = None 

142 else: 

143 raise ValueError(f"There was a problem parsing the labeled subset {label}, make sure the " 

144 "definition is either a valid yaml list, or a mapping with keys " 

145 "(subset, description) where subset points to a yaml list, and description is " 

146 "associated with a string") 

147 return LabeledSubset(label, set(subset), description) 

148 

149 def to_primitives(self) -> dict: 

150 """Convert to a representation used in yaml serialization 

151 """ 

152 accumulate: Dict[str, Any] = {"subset": list(self.subset)} 

153 if self.description is not None: 

154 accumulate["description"] = self.description 

155 return accumulate 

156 

157 

158@dataclass 

159class ParametersIR: 

160 """Intermediate representation of parameters that are global to a pipeline 

161 

162 These parameters are specified under a top level key named `parameters` 

163 and are declared as a yaml mapping. These entries can then be used inside 

164 task configuration blocks to specify configuration values. They may not be 

165 used in the special ``file`` or ``python`` blocks. 

166 

167 Example: 

168 paramters: 

169 shared_value: 14 

170 tasks: 

171 taskA: 

172 class: modA 

173 config: 

174 field1: parameters.shared_value 

175 taskB: 

176 class: modB 

177 config: 

178 field2: parameters.shared_value 

179 """ 

180 mapping: MutableMapping[str, str] 

181 """A mutable mapping of identifiers as keys, and shared configuration 

182 as values. 

183 """ 

184 def update(self, other: Optional[ParametersIR]): 

185 if other is not None: 

186 self.mapping.update(other.mapping) 

187 

188 def to_primitives(self) -> MutableMapping[str, str]: 

189 """Convert to a representation used in yaml serialization 

190 """ 

191 return self.mapping 

192 

193 def __contains__(self, value: str) -> bool: 

194 return value in self.mapping 

195 

196 def __getitem__(self, item: str) -> Any: 

197 return self.mapping[item] 

198 

199 def __bool__(self) -> bool: 

200 return bool(self.mapping) 

201 

202 

203@dataclass 

204class ConfigIR: 

205 """Intermediate representation of configurations read from a pipeline yaml 

206 file. 

207 """ 

208 python: Union[str, None] = None 

209 """A string of python code that is used to modify a configuration. This can 

210 also be None if there are no modifications to do. 

211 """ 

212 dataId: Union[dict, None] = None 

213 """A dataId that is used to constrain these config overrides to only quanta 

214 with matching dataIds. This field can be None if there is no constraint. 

215 This is currently an unimplemented feature, and is placed here for future 

216 use. 

217 """ 

218 file: List[str] = field(default_factory=list) 

219 """A list of paths which points to a file containing config overrides to be 

220 applied. This value may be an empty list if there are no overrides to 

221 apply. 

222 """ 

223 rest: dict = field(default_factory=dict) 

224 """This is a dictionary of key value pairs, where the keys are strings 

225 corresponding to qualified fields on a config to override, and the values 

226 are strings representing the values to apply. 

227 """ 

228 

229 def to_primitives(self) -> dict: 

230 """Convert to a representation used in yaml serialization 

231 """ 

232 accumulate = {} 

233 for name in ("python", "dataId", "file"): 

234 # if this attribute is thruthy add it to the accumulation 

235 # dictionary 

236 if getattr(self, name): 

237 accumulate[name] = getattr(self, name) 

238 # Add the dictionary containing the rest of the config keys to the 

239 # # accumulated dictionary 

240 accumulate.update(self.rest) 

241 return accumulate 

242 

243 def formatted(self, parameters: ParametersIR) -> ConfigIR: 

244 """Returns a new ConfigIR object that is formatted according to the 

245 specified parameters 

246 

247 Parameters 

248 ---------- 

249 parameters : ParametersIR 

250 Object that contains variable mappings used in substitution. 

251 

252 Returns 

253 ------- 

254 config : ConfigIR 

255 A new ConfigIR object formatted with the input parameters 

256 """ 

257 new_config = copy.deepcopy(self) 

258 for key, value in new_config.rest.items(): 

259 if not isinstance(value, str): 

260 continue 

261 match = re.match("parameters[.](.*)", value) 

262 if match and match.group(1) in parameters: 

263 new_config.rest[key] = parameters[match.group(1)] 

264 if match and match.group(1) not in parameters: 

265 warnings.warn(f"config {key} contains value {match.group(0)} which is formatted like a " 

266 "Pipeline parameter but was not found within the Pipeline, if this was not " 

267 "intentional, check for a typo") 

268 return new_config 

269 

270 def maybe_merge(self, other_config: "ConfigIR") -> Generator["ConfigIR", None, None]: 

271 """Merges another instance of a `ConfigIR` into this instance if 

272 possible. This function returns a generator that is either self 

273 if the configs were merged, or self, and other_config if that could 

274 not be merged. 

275 

276 Parameters 

277 ---------- 

278 other_config : `ConfigIR` 

279 An instance of `ConfigIR` to merge into this instance. 

280 

281 Returns 

282 ------- 

283 Generator : `ConfigIR` 

284 A generator containing either self, or self and other_config if 

285 the configs could be merged or not respectively. 

286 """ 

287 # Verify that the config blocks can be merged 

288 if self.dataId != other_config.dataId or self.python or other_config.python or\ 

289 self.file or other_config.file: 

290 yield from (self, other_config) 

291 return 

292 

293 # create a set of all keys, and verify two keys do not have different 

294 # values 

295 key_union = self.rest.keys() & other_config.rest.keys() 

296 for key in key_union: 

297 if self.rest[key] != other_config.rest[key]: 

298 yield from (self, other_config) 

299 return 

300 self.rest.update(other_config.rest) 

301 

302 # Combine the lists of override files to load 

303 self_file_set = set(self.file) 

304 other_file_set = set(other_config.file) 

305 self.file = list(self_file_set.union(other_file_set)) 

306 

307 yield self 

308 

309 def __eq__(self, other: "ConfigIR"): 

310 if not isinstance(other, ConfigIR): 

311 return False 

312 elif all(getattr(self, attr) == getattr(other, attr) for attr in 

313 ("python", "dataId", "file", "rest")): 

314 return True 

315 else: 

316 return False 

317 

318 

319@dataclass 

320class TaskIR: 

321 """Intermediate representation of tasks read from a pipeline yaml file. 

322 """ 

323 label: str 

324 """An identifier used to refer to a task. 

325 """ 

326 klass: str 

327 """A string containing a fully qualified python class to be run in a 

328 pipeline. 

329 """ 

330 config: Union[List[ConfigIR], None] = None 

331 """List of all configs overrides associated with this task, and may be 

332 `None` if there are no config overrides. 

333 """ 

334 

335 def to_primitives(self) -> dict: 

336 """Convert to a representation used in yaml serialization 

337 """ 

338 accumulate = {'class': self.klass} 

339 if self.config: 

340 accumulate['config'] = [c.to_primitives() for c in self.config] 

341 return accumulate 

342 

343 def add_or_update_config(self, other_config: ConfigIR): 

344 """Adds a `ConfigIR` to this task if one is not present. Merges configs 

345 if there is a `ConfigIR` present and the dataId keys of both configs 

346 match, otherwise adds a new entry to the config list. The exception to 

347 the above is that if either the last config or other_config has a 

348 python block, then other_config is always added, as python blocks can 

349 modify configs in ways that cannot be predicted. 

350 

351 Parameters 

352 ---------- 

353 other_config : `ConfigIR` 

354 A `ConfigIR` instance to add or merge into the config attribute of 

355 this task. 

356 """ 

357 if not self.config: 

358 self.config = [other_config] 

359 return 

360 self.config.extend(self.config.pop().maybe_merge(other_config)) 

361 

362 def __eq__(self, other: "TaskIR"): 

363 if not isinstance(other, TaskIR): 

364 return False 

365 elif all(getattr(self, attr) == getattr(other, attr) for attr in 

366 ("label", "klass", "config")): 

367 return True 

368 else: 

369 return False 

370 

371 

372@dataclass 

373class InheritIR: 

374 """An intermediate representation of inherited pipelines 

375 """ 

376 location: str 

377 """This is the location of the pipeline to inherit. The path should be 

378 specified as an absolute path. Environment variables may be used in the 

379 path and should be specified as a python string template, with the name of 

380 the environment variable inside braces. 

381 """ 

382 include: Union[List[str], None] = None 

383 """List of tasks that should be included when inheriting this pipeline. 

384 Either the include or exclude attributes may be specified, but not both. 

385 """ 

386 exclude: Union[List[str], None] = None 

387 """List of tasks that should be excluded when inheriting this pipeline. 

388 Either the include or exclude attributes may be specified, but not both. 

389 """ 

390 importContracts: bool = True 

391 """Boolean attribute to dictate if contracts should be inherited with the 

392 pipeline or not. 

393 """ 

394 

395 def toPipelineIR(self, instrument=None) -> "PipelineIR": 

396 """Load in the Pipeline specified by this object, and turn it into a 

397 PipelineIR instance. 

398 

399 Parameters 

400 ---------- 

401 instrument : Optional `str` 

402 A string giving the fully qualified path to an instrument object. 

403 If a inherited pipeline defines the same instrument as defined in 

404 this variable, an import warning message is skipped. 

405 

406 Returns 

407 ------- 

408 pipeline : `PipelineIR` 

409 A pipeline generated from the imported pipeline file 

410 """ 

411 if self.include and self.exclude: 

412 raise ValueError("Both an include and an exclude list cant be specified" 

413 " when declaring a pipeline import") 

414 tmp_pipeline = PipelineIR.from_file(os.path.expandvars(self.location)) 

415 if tmp_pipeline.instrument is not None and tmp_pipeline.instrument != instrument: 

416 warnings.warn("Any instrument definitions in imported pipelines are ignored. " 

417 "if an instrument is desired please define it in the top most pipeline") 

418 

419 included_labels = set() 

420 for label in tmp_pipeline.tasks: 

421 if (self.include and label in self.include) or (self.exclude and label not in self.exclude)\ 

422 or (self.include is None and self.exclude is None): 

423 included_labels.add(label) 

424 

425 # Handle labeled subsets being specified in the include or exclude 

426 # list, adding or removing labels. 

427 if self.include is not None: 

428 subsets_in_include = tmp_pipeline.labeled_subsets.keys() & self.include 

429 for label in subsets_in_include: 

430 included_labels.update(tmp_pipeline.labeled_subsets[label].subset) 

431 

432 elif self.exclude is not None: 

433 subsets_in_exclude = tmp_pipeline.labeled_subsets.keys() & self.exclude 

434 for label in subsets_in_exclude: 

435 included_labels.difference_update(tmp_pipeline.labeled_subsets[label].subset) 

436 

437 tmp_pipeline = tmp_pipeline.subset_from_labels(included_labels) 

438 

439 if not self.importContracts: 

440 tmp_pipeline.contracts = [] 

441 

442 return tmp_pipeline 

443 

444 def __eq__(self, other: "InheritIR"): 

445 if not isinstance(other, InheritIR): 

446 return False 

447 elif all(getattr(self, attr) == getattr(other, attr) for attr in 

448 ("location", "include", "exclude", "importContracts")): 

449 return True 

450 else: 

451 return False 

452 

453 

454class PipelineIR: 

455 """Intermediate representation of a pipeline definition 

456 

457 Parameters 

458 ---------- 

459 loaded_yaml : `dict` 

460 A dictionary which matches the structure that would be produced by a 

461 yaml reader which parses a pipeline definition document 

462 

463 Raises 

464 ------ 

465 ValueError : 

466 - If a pipeline is declared without a description 

467 - If no tasks are declared in a pipeline, and no pipelines are to be 

468 inherited 

469 - If more than one instrument is specified 

470 - If more than one inherited pipeline share a label 

471 """ 

472 def __init__(self, loaded_yaml): 

473 # Check required fields are present 

474 if "description" not in loaded_yaml: 

475 raise ValueError("A pipeline must be declared with a description") 

476 if "tasks" not in loaded_yaml and "inherits" not in loaded_yaml: 

477 raise ValueError("A pipeline must be declared with one or more tasks") 

478 

479 # These steps below must happen in this call order 

480 

481 # Process pipeline description 

482 self.description = loaded_yaml.pop("description") 

483 

484 # Process tasks 

485 self._read_tasks(loaded_yaml) 

486 

487 # Process instrument keys 

488 inst = loaded_yaml.pop("instrument", None) 

489 if isinstance(inst, list): 

490 raise ValueError("Only one top level instrument can be defined in a pipeline") 

491 self.instrument = inst 

492 

493 # Process any contracts 

494 self._read_contracts(loaded_yaml) 

495 

496 # Process any defined parameters 

497 self._read_parameters(loaded_yaml) 

498 

499 # Process any named label subsets 

500 self._read_labeled_subsets(loaded_yaml) 

501 

502 # Process any inherited pipelines 

503 self._read_inherits(loaded_yaml) 

504 

505 # verify named subsets, must be done after inheriting 

506 self._verify_labeled_subsets() 

507 

508 def _read_contracts(self, loaded_yaml): 

509 """Process the contracts portion of the loaded yaml document 

510 

511 Parameters 

512 --------- 

513 loaded_yaml : `dict` 

514 A dictionary which matches the structure that would be produced by 

515 a yaml reader which parses a pipeline definition document 

516 """ 

517 loaded_contracts = loaded_yaml.pop("contracts", []) 

518 if isinstance(loaded_contracts, str): 

519 loaded_contracts = [loaded_contracts] 

520 self.contracts = [] 

521 for contract in loaded_contracts: 

522 if isinstance(contract, dict): 

523 self.contracts.append(ContractIR(**contract)) 

524 if isinstance(contract, str): 

525 self.contracts.append(ContractIR(contract=contract)) 

526 

527 def _read_parameters(self, loaded_yaml): 

528 """Process the parameters portion of the loaded yaml document 

529 

530 Parameters 

531 --------- 

532 loaded_yaml : `dict` 

533 A dictionary which matches the structure that would be produced by 

534 a yaml reader which parses a pipeline definition document 

535 """ 

536 loaded_parameters = loaded_yaml.pop("parameters", {}) 

537 if not isinstance(loaded_parameters, dict): 

538 raise ValueError("The parameters section must be a yaml mapping") 

539 self.parameters = ParametersIR(loaded_parameters) 

540 

541 def _read_labeled_subsets(self, loaded_yaml: dict): 

542 """Process the subsets portion of the loaded yaml document 

543 

544 Parameters 

545 ---------- 

546 loaded_yaml: `MutableMapping` 

547 A dictionary which matches the structure that would be produced 

548 by a yaml reader which parses a pipeline definition document 

549 """ 

550 loaded_subsets = loaded_yaml.pop("subsets", {}) 

551 self.labeled_subsets = {} 

552 if not loaded_subsets and "subset" in loaded_yaml: 

553 raise ValueError("Top level key should be subsets and not subset, add an s") 

554 for key, value in loaded_subsets.items(): 

555 self.labeled_subsets[key] = LabeledSubset.from_primatives(key, value) 

556 

557 def _verify_labeled_subsets(self): 

558 """Verifies that all the labels in each named subset exist within the 

559 pipeline. 

560 """ 

561 # Verify that all labels defined in a labeled subset are in the 

562 # Pipeline 

563 for labeled_subset in self.labeled_subsets.values(): 

564 if not labeled_subset.subset.issubset(self.tasks.keys()): 

565 raise ValueError(f"Labels {labeled_subset.subset - self.tasks.keys()} were not found in the " 

566 "declared pipeline") 

567 # Verify subset labels are not already task labels 

568 label_intersection = self.labeled_subsets.keys() & self.tasks.keys() 

569 if label_intersection: 

570 raise ValueError(f"Labeled subsets can not use the same label as a task: {label_intersection}") 

571 

572 def _read_inherits(self, loaded_yaml): 

573 """Process the inherits portion of the loaded yaml document 

574 

575 Parameters 

576 --------- 

577 loaded_yaml : `dict` 

578 A dictionary which matches the structure that would be produced by 

579 a yaml reader which parses a pipeline definition document 

580 """ 

581 def process_args(argument: Union[str, dict]) -> dict: 

582 if isinstance(argument, str): 

583 return {"location": argument} 

584 elif isinstance(argument, dict): 

585 if "exclude" in argument and isinstance(argument["exclude"], str): 

586 argument["exclude"] = [argument["exclude"]] 

587 if "include" in argument and isinstance(argument["include"], str): 

588 argument["include"] = [argument["include"]] 

589 return argument 

590 tmp_inherit = loaded_yaml.pop("inherits", None) 

591 if tmp_inherit is None: 

592 self.inherits = [] 

593 elif isinstance(tmp_inherit, list): 

594 self.inherits = [InheritIR(**process_args(args)) for args in tmp_inherit] 

595 else: 

596 self.inherits = [InheritIR(**process_args(tmp_inherit))] 

597 

598 # integrate any imported pipelines 

599 accumulate_tasks = {} 

600 accumulate_labeled_subsets = {} 

601 accumulated_parameters = ParametersIR({}) 

602 for other_pipeline in self.inherits: 

603 tmp_IR = other_pipeline.toPipelineIR(instrument=self.instrument) 

604 if accumulate_tasks.keys() & tmp_IR.tasks.keys(): 

605 raise ValueError("Task labels in the imported pipelines must " 

606 "be unique") 

607 accumulate_tasks.update(tmp_IR.tasks) 

608 self.contracts.extend(tmp_IR.contracts) 

609 # verify that tmp_IR has unique labels for named subset among 

610 # existing labeled subsets, and with existing task labels. 

611 overlapping_subsets = accumulate_labeled_subsets.keys() & tmp_IR.labeled_subsets.keys() 

612 task_subset_overlap = ((accumulate_labeled_subsets.keys() | tmp_IR.labeled_subsets.keys()) 

613 & accumulate_tasks.keys()) 

614 if overlapping_subsets or task_subset_overlap: 

615 raise ValueError("Labeled subset names must be unique amongst imports in both labels and " 

616 f" named Subsets. Duplicate: {overlapping_subsets | task_subset_overlap}") 

617 accumulate_labeled_subsets.update(tmp_IR.labeled_subsets) 

618 accumulated_parameters.update(tmp_IR.parameters) 

619 

620 # verify that any accumulated labeled subsets dont clash with a label 

621 # from this pipeline 

622 if accumulate_labeled_subsets.keys() & self.tasks.keys(): 

623 raise ValueError("Labeled subset names must be unique amongst imports in both labels and " 

624 " named Subsets") 

625 # merge in the named subsets for self so this document can override any 

626 # that have been delcared 

627 accumulate_labeled_subsets.update(self.labeled_subsets) 

628 self.labeled_subsets = accumulate_labeled_subsets 

629 

630 # merge the dict of label:TaskIR objects, preserving any configs in the 

631 # imported pipeline if the labels point to the same class 

632 for label, task in self.tasks.items(): 

633 if label not in accumulate_tasks: 

634 accumulate_tasks[label] = task 

635 elif accumulate_tasks[label].klass == task.klass: 

636 if task.config is not None: 

637 for config in task.config: 

638 accumulate_tasks[label].add_or_update_config(config) 

639 else: 

640 accumulate_tasks[label] = task 

641 self.tasks = accumulate_tasks 

642 self.parameters.update(accumulated_parameters) 

643 

644 def _read_tasks(self, loaded_yaml): 

645 """Process the tasks portion of the loaded yaml document 

646 

647 Parameters 

648 --------- 

649 loaded_yaml : `dict` 

650 A dictionary which matches the structure that would be produced by 

651 a yaml reader which parses a pipeline definition document 

652 """ 

653 self.tasks = {} 

654 tmp_tasks = loaded_yaml.pop("tasks", None) 

655 if tmp_tasks is None: 

656 tmp_tasks = {} 

657 

658 if "parameters" in tmp_tasks: 

659 raise ValueError("parameters is a reserved word and cannot be used as a task label") 

660 

661 for label, definition in tmp_tasks.items(): 

662 if isinstance(definition, str): 

663 definition = {"class": definition} 

664 config = definition.get('config', None) 

665 if config is None: 

666 task_config_ir = None 

667 else: 

668 if isinstance(config, dict): 

669 config = [config] 

670 task_config_ir = [] 

671 for c in config: 

672 file = c.pop("file", None) 

673 if file is None: 

674 file = [] 

675 elif not isinstance(file, list): 

676 file = [file] 

677 task_config_ir.append(ConfigIR(python=c.pop("python", None), 

678 dataId=c.pop("dataId", None), 

679 file=file, 

680 rest=c)) 

681 self.tasks[label] = TaskIR(label, definition["class"], task_config_ir) 

682 

683 def _remove_contracts(self, label: str): 

684 """Remove any contracts that contain the given label 

685 

686 String comparison used in this way is not the most elegant and may 

687 have issues, but it is the only feasible way when users can specify 

688 contracts with generic strings. 

689 """ 

690 new_contracts = [] 

691 for contract in self.contracts: 

692 # match a label that is not preceded by an ASCII identifier, or 

693 # is the start of a line and is followed by a dot 

694 if re.match(f".*([^A-Za-z0-9_]|^){label}[.]", contract.contract): 

695 continue 

696 new_contracts.append(contract) 

697 self.contracts = new_contracts 

698 

699 def subset_from_labels(self, labelSpecifier: Set[str]) -> PipelineIR: 

700 """Subset a pipelineIR to contain only labels specified in 

701 labelSpecifier. 

702 

703 Parameters 

704 ---------- 

705 labelSpecifier : `set` of `str` 

706 Set containing labels that describes how to subset a pipeline. 

707 

708 Returns 

709 ------- 

710 pipeline : `PipelineIR` 

711 A new pipelineIR object that is a subset of the old pipelineIR 

712 

713 Raises 

714 ------ 

715 ValueError 

716 Raised if there is an issue with specified labels 

717 

718 Notes 

719 ----- 

720 This method attempts to prune any contracts that contain labels which 

721 are not in the declared subset of labels. This pruning is done using a 

722 string based matching due to the nature of contracts and may prune more 

723 than it should. Any labeled subsets defined that no longer have all 

724 members of the subset present in the pipeline will be removed from the 

725 resulting pipeline. 

726 """ 

727 

728 pipeline = copy.deepcopy(self) 

729 

730 # update the label specifier to expand any named subsets 

731 toRemove = set() 

732 toAdd = set() 

733 for label in labelSpecifier: 

734 if label in pipeline.labeled_subsets: 

735 toRemove.add(label) 

736 toAdd.update(pipeline.labeled_subsets[label].subset) 

737 labelSpecifier.difference_update(toRemove) 

738 labelSpecifier.update(toAdd) 

739 # verify all the labels are in the pipeline 

740 if not labelSpecifier.issubset(pipeline.tasks.keys() 

741 | pipeline.labeled_subsets): 

742 difference = labelSpecifier.difference(pipeline.tasks.keys()) 

743 raise ValueError("Not all supplied labels (specified or named subsets) are in the pipeline " 

744 f"definition, extra labels: {difference}") 

745 # copy needed so as to not modify while iterating 

746 pipeline_labels = set(pipeline.tasks.keys()) 

747 # Remove the labels from the pipelineIR, and any contracts that contain 

748 # those labels (see docstring on _remove_contracts for why this may 

749 # cause issues) 

750 for label in pipeline_labels: 

751 if label not in labelSpecifier: 

752 pipeline.tasks.pop(label) 

753 pipeline._remove_contracts(label) 

754 

755 # create a copy of the object to iterate over 

756 labeled_subsets = copy.copy(pipeline.labeled_subsets) 

757 # remove any labeled subsets that no longer have a complete set 

758 for label, labeled_subset in labeled_subsets.items(): 

759 if labeled_subset.subset - pipeline.tasks.keys(): 

760 pipeline.labeled_subsets.pop(label) 

761 

762 return pipeline 

763 

764 @classmethod 

765 def from_string(cls, pipeline_string: str): 

766 """Create a `PipelineIR` object from a string formatted like a pipeline 

767 document 

768 

769 Parameters 

770 ---------- 

771 pipeline_string : `str` 

772 A string that is formatted according like a pipeline document 

773 """ 

774 loaded_yaml = yaml.load(pipeline_string, Loader=PipelineYamlLoader) 

775 return cls(loaded_yaml) 

776 

777 @classmethod 

778 def from_file(cls, filename: str): 

779 """Create a `PipelineIR` object from the document specified by the 

780 input path. 

781 

782 Parameters 

783 ---------- 

784 filename : `str` 

785 Location of document to use in creating a `PipelineIR` object. 

786 """ 

787 with open(filename, 'r') as f: 

788 loaded_yaml = yaml.load(f, Loader=PipelineYamlLoader) 

789 return cls(loaded_yaml) 

790 

791 def to_file(self, filename: str): 

792 """Serialize this `PipelineIR` object into a yaml formatted string and 

793 write the output to a file at the specified path. 

794 

795 Parameters 

796 ---------- 

797 filename : `str` 

798 Location of document to write a `PipelineIR` object. 

799 """ 

800 with open(filename, 'w') as f: 

801 yaml.dump(self.to_primitives(), f, sort_keys=False) 

802 

803 def to_primitives(self): 

804 """Convert to a representation used in yaml serialization 

805 """ 

806 accumulate = {"description": self.description} 

807 if self.instrument is not None: 

808 accumulate['instrument'] = self.instrument 

809 if self.parameters: 

810 accumulate['parameters'] = self.parameters.to_primitives() 

811 accumulate['tasks'] = {m: t.to_primitives() for m, t in self.tasks.items()} 

812 if len(self.contracts) > 0: 

813 accumulate['contracts'] = [c.to_primitives() for c in self.contracts] 

814 if self.labeled_subsets: 

815 accumulate['subsets'] = {k: v.to_primitives() for k, v in self.labeled_subsets.items()} 

816 return accumulate 

817 

818 def __str__(self) -> str: 

819 """Instance formatting as how it would look in yaml representation 

820 """ 

821 return yaml.dump(self.to_primitives(), sort_keys=False) 

822 

823 def __repr__(self) -> str: 

824 """Instance formatting as how it would look in yaml representation 

825 """ 

826 return str(self) 

827 

828 def __eq__(self, other: "PipelineIR"): 

829 if not isinstance(other, PipelineIR): 

830 return False 

831 elif all(getattr(self, attr) == getattr(other, attr) for attr in 

832 ("contracts", "tasks", "instrument")): 

833 return True 

834 else: 

835 return False