Coverage for python/lsst/pipe/base/pipeline.py: 20%

430 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-22 13:28 -0800

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23"""Module defining Pipeline class and related methods. 

24""" 

25 

26__all__ = ["Pipeline", "TaskDef", "TaskDatasetTypes", "PipelineDatasetTypes", "LabelSpecifier"] 

27 

28import copy 

29import logging 

30import os 

31import re 

32import urllib.parse 

33import warnings 

34 

35# ------------------------------- 

36# Imports of standard modules -- 

37# ------------------------------- 

38from dataclasses import dataclass 

39from types import MappingProxyType 

40from typing import ( 

41 TYPE_CHECKING, 

42 AbstractSet, 

43 Callable, 

44 ClassVar, 

45 Dict, 

46 Generator, 

47 Iterable, 

48 Iterator, 

49 Mapping, 

50 Optional, 

51 Set, 

52 Tuple, 

53 Type, 

54 Union, 

55 cast, 

56) 

57 

58# ----------------------------- 

59# Imports for other modules -- 

60from lsst.daf.butler import DatasetType, NamedValueSet, Registry, SkyPixDimension 

61from lsst.resources import ResourcePath, ResourcePathExpression 

62from lsst.utils import doImportType 

63from lsst.utils.introspection import get_full_type_name 

64 

65from . import pipelineIR, pipeTools 

66from ._task_metadata import TaskMetadata 

67from .config import PipelineTaskConfig 

68from .configOverrides import ConfigOverrides 

69from .connections import iterConnections 

70from .connectionTypes import Input 

71from .pipelineTask import PipelineTask 

72from .task import _TASK_METADATA_TYPE 

73 

74if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 74 ↛ 75line 74 didn't jump to line 75, because the condition on line 74 was never true

75 from lsst.obs.base import Instrument 

76 from lsst.pex.config import Config 

77 

78# ---------------------------------- 

79# Local non-exported definitions -- 

80# ---------------------------------- 

81 

82_LOG = logging.getLogger(__name__) 

83 

84# ------------------------ 

85# Exported definitions -- 

86# ------------------------ 

87 

88 

89@dataclass 

90class LabelSpecifier: 

91 """A structure to specify a subset of labels to load 

92 

93 This structure may contain a set of labels to be used in subsetting a 

94 pipeline, or a beginning and end point. Beginning or end may be empty, 

95 in which case the range will be a half open interval. Unlike python 

96 iteration bounds, end bounds are *INCLUDED*. Note that range based 

97 selection is not well defined for pipelines that are not linear in nature, 

98 and correct behavior is not guaranteed, or may vary from run to run. 

99 """ 

100 

101 labels: Optional[Set[str]] = None 

102 begin: Optional[str] = None 

103 end: Optional[str] = None 

104 

105 def __post_init__(self) -> None: 

106 if self.labels is not None and (self.begin or self.end): 

107 raise ValueError( 

108 "This struct can only be initialized with a labels set or a begin (and/or) end specifier" 

109 ) 

110 

111 

112class TaskDef: 

113 """TaskDef is a collection of information about task needed by Pipeline. 

114 

115 The information includes task name, configuration object and optional 

116 task class. This class is just a collection of attributes and it exposes 

117 all of them so that attributes could potentially be modified in place 

118 (e.g. if configuration needs extra overrides). 

119 

120 Attributes 

121 ---------- 

122 taskName : `str`, optional 

123 The fully-qualified `PipelineTask` class name. If not provided, 

124 ``taskClass`` must be. 

125 config : `lsst.pipe.base.config.PipelineTaskConfig`, optional 

126 Instance of the configuration class corresponding to this task class, 

127 usually with all overrides applied. This config will be frozen. If 

128 not provided, ``taskClass`` must be provided and 

129 ``taskClass.ConfigClass()`` will be used. 

130 taskClass : `type`, optional 

131 `PipelineTask` class object; if provided and ``taskName`` is as well, 

132 the caller guarantees that they are consistent. If not provided, 

133 ``taskName`` is used to import the type. 

134 label : `str`, optional 

135 Task label, usually a short string unique in a pipeline. If not 

136 provided, ``taskClass`` must be, and ``taskClass._DefaultName`` will 

137 be used. 

138 """ 

139 

140 def __init__( 

141 self, 

142 taskName: Optional[str] = None, 

143 config: Optional[PipelineTaskConfig] = None, 

144 taskClass: Optional[Type[PipelineTask]] = None, 

145 label: Optional[str] = None, 

146 ): 

147 if taskName is None: 

148 if taskClass is None: 

149 raise ValueError("At least one of `taskName` and `taskClass` must be provided.") 

150 taskName = get_full_type_name(taskClass) 

151 elif taskClass is None: 

152 taskClass = doImportType(taskName) 

153 if config is None: 

154 if taskClass is None: 

155 raise ValueError("`taskClass` must be provided if `config` is not.") 

156 config = taskClass.ConfigClass() 

157 if label is None: 

158 if taskClass is None: 

159 raise ValueError("`taskClass` must be provided if `label` is not.") 

160 label = taskClass._DefaultName 

161 self.taskName = taskName 

162 try: 

163 config.validate() 

164 except Exception: 

165 _LOG.error("Configuration validation failed for task %s (%s)", label, taskName) 

166 raise 

167 config.freeze() 

168 self.config = config 

169 self.taskClass = taskClass 

170 self.label = label 

171 self.connections = config.connections.ConnectionsClass(config=config) 

172 

173 @property 

174 def configDatasetName(self) -> str: 

175 """Name of a dataset type for configuration of this task (`str`)""" 

176 return self.label + "_config" 

177 

178 @property 

179 def metadataDatasetName(self) -> Optional[str]: 

180 """Name of a dataset type for metadata of this task, `None` if 

181 metadata is not to be saved (`str`) 

182 """ 

183 if self.config.saveMetadata: 

184 return self.makeMetadataDatasetName(self.label) 

185 else: 

186 return None 

187 

188 @classmethod 

189 def makeMetadataDatasetName(cls, label: str) -> str: 

190 """Construct the name of the dataset type for metadata for a task. 

191 

192 Parameters 

193 ---------- 

194 label : `str` 

195 Label for the task within its pipeline. 

196 

197 Returns 

198 ------- 

199 name : `str` 

200 Name of the task's metadata dataset type. 

201 """ 

202 return f"{label}_metadata" 

203 

204 @property 

205 def logOutputDatasetName(self) -> Optional[str]: 

206 """Name of a dataset type for log output from this task, `None` if 

207 logs are not to be saved (`str`) 

208 """ 

209 if cast(PipelineTaskConfig, self.config).saveLogOutput: 

210 return self.label + "_log" 

211 else: 

212 return None 

213 

214 def __str__(self) -> str: 

215 rep = "TaskDef(" + self.taskName 

216 if self.label: 

217 rep += ", label=" + self.label 

218 rep += ")" 

219 return rep 

220 

221 def __eq__(self, other: object) -> bool: 

222 if not isinstance(other, TaskDef): 

223 return False 

224 # This does not consider equality of configs when determining equality 

225 # as config equality is a difficult thing to define. Should be updated 

226 # after DM-27847 

227 return self.taskClass == other.taskClass and self.label == other.label 

228 

229 def __hash__(self) -> int: 

230 return hash((self.taskClass, self.label)) 

231 

232 @classmethod 

233 def _unreduce(cls, taskName: str, config: PipelineTaskConfig, label: str) -> TaskDef: 

234 """Custom callable for unpickling. 

235 

236 All arguments are forwarded directly to the constructor; this 

237 trampoline is only needed because ``__reduce__`` callables can't be 

238 called with keyword arguments. 

239 """ 

240 return cls(taskName=taskName, config=config, label=label) 

241 

242 def __reduce__(self) -> Tuple[Callable[[str, PipelineTaskConfig, str], TaskDef], Tuple[str, Config, str]]: 

243 return (self._unreduce, (self.taskName, self.config, self.label)) 

244 

245 

246class Pipeline: 

247 """A `Pipeline` is a representation of a series of tasks to run, and the 

248 configuration for those tasks. 

249 

250 Parameters 

251 ---------- 

252 description : `str` 

253 A description of that this pipeline does. 

254 """ 

255 

256 def __init__(self, description: str): 

257 pipeline_dict = {"description": description, "tasks": {}} 

258 self._pipelineIR = pipelineIR.PipelineIR(pipeline_dict) 

259 

260 @classmethod 

261 def fromFile(cls, filename: str) -> Pipeline: 

262 """Load a pipeline defined in a pipeline yaml file. 

263 

264 Parameters 

265 ---------- 

266 filename: `str` 

267 A path that points to a pipeline defined in yaml format. This 

268 filename may also supply additional labels to be used in 

269 subsetting the loaded Pipeline. These labels are separated from 

270 the path by a \\#, and may be specified as a comma separated 

271 list, or a range denoted as beginning..end. Beginning or end may 

272 be empty, in which case the range will be a half open interval. 

273 Unlike python iteration bounds, end bounds are *INCLUDED*. Note 

274 that range based selection is not well defined for pipelines that 

275 are not linear in nature, and correct behavior is not guaranteed, 

276 or may vary from run to run. 

277 

278 Returns 

279 ------- 

280 pipeline: `Pipeline` 

281 The pipeline loaded from specified location with appropriate (if 

282 any) subsetting 

283 

284 Notes 

285 ----- 

286 This method attempts to prune any contracts that contain labels which 

287 are not in the declared subset of labels. This pruning is done using a 

288 string based matching due to the nature of contracts and may prune more 

289 than it should. 

290 """ 

291 return cls.from_uri(filename) 

292 

293 @classmethod 

294 def from_uri(cls, uri: ResourcePathExpression) -> Pipeline: 

295 """Load a pipeline defined in a pipeline yaml file at a location 

296 specified by a URI. 

297 

298 Parameters 

299 ---------- 

300 uri: convertible to `ResourcePath` 

301 If a string is supplied this should be a URI path that points to a 

302 pipeline defined in yaml format, either as a direct path to the 

303 yaml file, or as a directory containing a "pipeline.yaml" file (the 

304 form used by `write_to_uri` with ``expand=True``). This uri may 

305 also supply additional labels to be used in subsetting the loaded 

306 Pipeline. These labels are separated from the path by a \\#, and 

307 may be specified as a comma separated list, or a range denoted as 

308 beginning..end. Beginning or end may be empty, in which case the 

309 range will be a half open interval. Unlike python iteration bounds, 

310 end bounds are *INCLUDED*. Note that range based selection is not 

311 well defined for pipelines that are not linear in nature, and 

312 correct behavior is not guaranteed, or may vary from run to run. 

313 The same specifiers can be used with a `ResourcePath` object, by 

314 being the sole contents in the fragments attribute. 

315 

316 Returns 

317 ------- 

318 pipeline: `Pipeline` 

319 The pipeline loaded from specified location with appropriate (if 

320 any) subsetting 

321 

322 Notes 

323 ----- 

324 This method attempts to prune any contracts that contain labels which 

325 are not in the declared subset of labels. This pruning is done using a 

326 string based matching due to the nature of contracts and may prune more 

327 than it should. 

328 """ 

329 # Split up the uri and any labels that were supplied 

330 uri, label_specifier = cls._parse_file_specifier(uri) 

331 pipeline: Pipeline = cls.fromIR(pipelineIR.PipelineIR.from_uri(uri)) 

332 

333 # If there are labels supplied, only keep those 

334 if label_specifier is not None: 

335 pipeline = pipeline.subsetFromLabels(label_specifier) 

336 return pipeline 

337 

338 def subsetFromLabels(self, labelSpecifier: LabelSpecifier) -> Pipeline: 

339 """Subset a pipeline to contain only labels specified in labelSpecifier 

340 

341 Parameters 

342 ---------- 

343 labelSpecifier : `labelSpecifier` 

344 Object containing labels that describes how to subset a pipeline. 

345 

346 Returns 

347 ------- 

348 pipeline : `Pipeline` 

349 A new pipeline object that is a subset of the old pipeline 

350 

351 Raises 

352 ------ 

353 ValueError 

354 Raised if there is an issue with specified labels 

355 

356 Notes 

357 ----- 

358 This method attempts to prune any contracts that contain labels which 

359 are not in the declared subset of labels. This pruning is done using a 

360 string based matching due to the nature of contracts and may prune more 

361 than it should. 

362 """ 

363 # Labels supplied as a set 

364 if labelSpecifier.labels: 

365 labelSet = labelSpecifier.labels 

366 # Labels supplied as a range, first create a list of all the labels 

367 # in the pipeline sorted according to task dependency. Then only 

368 # keep labels that lie between the supplied bounds 

369 else: 

370 # Create a copy of the pipeline to use when assessing the label 

371 # ordering. Use a dict for fast searching while preserving order. 

372 # Remove contracts so they do not fail in the expansion step. This 

373 # is needed because a user may only configure the tasks they intend 

374 # to run, which may cause some contracts to fail if they will later 

375 # be dropped 

376 pipeline = copy.deepcopy(self) 

377 pipeline._pipelineIR.contracts = [] 

378 labels = {taskdef.label: True for taskdef in pipeline.toExpandedPipeline()} 

379 

380 # Verify the bounds are in the labels 

381 if labelSpecifier.begin is not None: 

382 if labelSpecifier.begin not in labels: 

383 raise ValueError( 

384 f"Beginning of range subset, {labelSpecifier.begin}, not found in " 

385 "pipeline definition" 

386 ) 

387 if labelSpecifier.end is not None: 

388 if labelSpecifier.end not in labels: 

389 raise ValueError( 

390 f"End of range subset, {labelSpecifier.end}, not found in pipeline definition" 

391 ) 

392 

393 labelSet = set() 

394 for label in labels: 

395 if labelSpecifier.begin is not None: 

396 if label != labelSpecifier.begin: 

397 continue 

398 else: 

399 labelSpecifier.begin = None 

400 labelSet.add(label) 

401 if labelSpecifier.end is not None and label == labelSpecifier.end: 

402 break 

403 return Pipeline.fromIR(self._pipelineIR.subset_from_labels(labelSet)) 

404 

405 @staticmethod 

406 def _parse_file_specifier(uri: ResourcePathExpression) -> Tuple[ResourcePath, Optional[LabelSpecifier]]: 

407 """Split appart a uri and any possible label subsets""" 

408 if isinstance(uri, str): 

409 # This is to support legacy pipelines during transition 

410 uri, num_replace = re.subn("[:](?!\\/\\/)", "#", uri) 

411 if num_replace: 

412 warnings.warn( 

413 f"The pipeline file {uri} seems to use the legacy : to separate " 

414 "labels, this is deprecated and will be removed after June 2021, please use " 

415 "# instead.", 

416 category=FutureWarning, 

417 ) 

418 if uri.count("#") > 1: 

419 raise ValueError("Only one set of labels is allowed when specifying a pipeline to load") 

420 # Everything else can be converted directly to ResourcePath. 

421 uri = ResourcePath(uri) 

422 label_subset = uri.fragment or None 

423 

424 specifier: Optional[LabelSpecifier] 

425 if label_subset is not None: 

426 label_subset = urllib.parse.unquote(label_subset) 

427 args: Dict[str, Union[Set[str], str, None]] 

428 # labels supplied as a list 

429 if "," in label_subset: 

430 if ".." in label_subset: 

431 raise ValueError( 

432 "Can only specify a list of labels or a rangewhen loading a Pipline not both" 

433 ) 

434 args = {"labels": set(label_subset.split(","))} 

435 # labels supplied as a range 

436 elif ".." in label_subset: 

437 # Try to de-structure the labelSubset, this will fail if more 

438 # than one range is specified 

439 begin, end, *rest = label_subset.split("..") 

440 if rest: 

441 raise ValueError("Only one range can be specified when loading a pipeline") 

442 args = {"begin": begin if begin else None, "end": end if end else None} 

443 # Assume anything else is a single label 

444 else: 

445 args = {"labels": {label_subset}} 

446 

447 # MyPy doesn't like how cavalier kwarg construction is with types. 

448 specifier = LabelSpecifier(**args) # type: ignore 

449 else: 

450 specifier = None 

451 

452 return uri, specifier 

453 

454 @classmethod 

455 def fromString(cls, pipeline_string: str) -> Pipeline: 

456 """Create a pipeline from string formatted as a pipeline document. 

457 

458 Parameters 

459 ---------- 

460 pipeline_string : `str` 

461 A string that is formatted according like a pipeline document 

462 

463 Returns 

464 ------- 

465 pipeline: `Pipeline` 

466 """ 

467 pipeline = cls.fromIR(pipelineIR.PipelineIR.from_string(pipeline_string)) 

468 return pipeline 

469 

470 @classmethod 

471 def fromIR(cls, deserialized_pipeline: pipelineIR.PipelineIR) -> Pipeline: 

472 """Create a pipeline from an already created `PipelineIR` object. 

473 

474 Parameters 

475 ---------- 

476 deserialized_pipeline: `PipelineIR` 

477 An already created pipeline intermediate representation object 

478 

479 Returns 

480 ------- 

481 pipeline: `Pipeline` 

482 """ 

483 pipeline = cls.__new__(cls) 

484 pipeline._pipelineIR = deserialized_pipeline 

485 return pipeline 

486 

487 @classmethod 

488 def fromPipeline(cls, pipeline: Pipeline) -> Pipeline: 

489 """Create a new pipeline by copying an already existing `Pipeline`. 

490 

491 Parameters 

492 ---------- 

493 pipeline: `Pipeline` 

494 An already created pipeline intermediate representation object 

495 

496 Returns 

497 ------- 

498 pipeline: `Pipeline` 

499 """ 

500 return cls.fromIR(copy.deepcopy(pipeline._pipelineIR)) 

501 

502 def __str__(self) -> str: 

503 return str(self._pipelineIR) 

504 

505 def addInstrument(self, instrument: Union[Instrument, str]) -> None: 

506 """Add an instrument to the pipeline, or replace an instrument that is 

507 already defined. 

508 

509 Parameters 

510 ---------- 

511 instrument : `~lsst.daf.butler.instrument.Instrument` or `str` 

512 Either a derived class object of a `lsst.daf.butler.instrument` or 

513 a string corresponding to a fully qualified 

514 `lsst.daf.butler.instrument` name. 

515 """ 

516 if isinstance(instrument, str): 

517 pass 

518 else: 

519 # TODO: assume that this is a subclass of Instrument, no type 

520 # checking 

521 instrument = get_full_type_name(instrument) 

522 self._pipelineIR.instrument = instrument 

523 

524 def getInstrument(self) -> Optional[str]: 

525 """Get the instrument from the pipeline. 

526 

527 Returns 

528 ------- 

529 instrument : `str`, or None 

530 The fully qualified name of a `lsst.obs.base.Instrument` subclass, 

531 name, or None if the pipeline does not have an instrument. 

532 """ 

533 return self._pipelineIR.instrument 

534 

535 def addTask(self, task: Union[Type[PipelineTask], str], label: str) -> None: 

536 """Add a new task to the pipeline, or replace a task that is already 

537 associated with the supplied label. 

538 

539 Parameters 

540 ---------- 

541 task: `PipelineTask` or `str` 

542 Either a derived class object of a `PipelineTask` or a string 

543 corresponding to a fully qualified `PipelineTask` name. 

544 label: `str` 

545 A label that is used to identify the `PipelineTask` being added 

546 """ 

547 if isinstance(task, str): 

548 taskName = task 

549 elif issubclass(task, PipelineTask): 

550 taskName = get_full_type_name(task) 

551 else: 

552 raise ValueError( 

553 "task must be either a child class of PipelineTask or a string containing" 

554 " a fully qualified name to one" 

555 ) 

556 if not label: 

557 # in some cases (with command line-generated pipeline) tasks can 

558 # be defined without label which is not acceptable, use task 

559 # _DefaultName in that case 

560 if isinstance(task, str): 

561 task_class = doImportType(task) 

562 label = task_class._DefaultName 

563 self._pipelineIR.tasks[label] = pipelineIR.TaskIR(label, taskName) 

564 

565 def removeTask(self, label: str) -> None: 

566 """Remove a task from the pipeline. 

567 

568 Parameters 

569 ---------- 

570 label : `str` 

571 The label used to identify the task that is to be removed 

572 

573 Raises 

574 ------ 

575 KeyError 

576 If no task with that label exists in the pipeline 

577 

578 """ 

579 self._pipelineIR.tasks.pop(label) 

580 

581 def addConfigOverride(self, label: str, key: str, value: object) -> None: 

582 """Apply single config override. 

583 

584 Parameters 

585 ---------- 

586 label : `str` 

587 Label of the task. 

588 key: `str` 

589 Fully-qualified field name. 

590 value : object 

591 Value to be given to a field. 

592 """ 

593 self._addConfigImpl(label, pipelineIR.ConfigIR(rest={key: value})) 

594 

595 def addConfigFile(self, label: str, filename: str) -> None: 

596 """Add overrides from a specified file. 

597 

598 Parameters 

599 ---------- 

600 label : `str` 

601 The label used to identify the task associated with config to 

602 modify 

603 filename : `str` 

604 Path to the override file. 

605 """ 

606 self._addConfigImpl(label, pipelineIR.ConfigIR(file=[filename])) 

607 

608 def addConfigPython(self, label: str, pythonString: str) -> None: 

609 """Add Overrides by running a snippet of python code against a config. 

610 

611 Parameters 

612 ---------- 

613 label : `str` 

614 The label used to identity the task associated with config to 

615 modify. 

616 pythonString: `str` 

617 A string which is valid python code to be executed. This is done 

618 with config as the only local accessible value. 

619 """ 

620 self._addConfigImpl(label, pipelineIR.ConfigIR(python=pythonString)) 

621 

622 def _addConfigImpl(self, label: str, newConfig: pipelineIR.ConfigIR) -> None: 

623 if label == "parameters": 

624 if newConfig.rest.keys() - self._pipelineIR.parameters.mapping.keys(): 

625 raise ValueError("Cannot override parameters that are not defined in pipeline") 

626 self._pipelineIR.parameters.mapping.update(newConfig.rest) 

627 if newConfig.file: 

628 raise ValueError("Setting parameters section with config file is not supported") 

629 if newConfig.python: 

630 raise ValueError("Setting parameters section using python block in unsupported") 

631 return 

632 if label not in self._pipelineIR.tasks: 

633 raise LookupError(f"There are no tasks labeled '{label}' in the pipeline") 

634 self._pipelineIR.tasks[label].add_or_update_config(newConfig) 

635 

636 def write_to_uri(self, uri: ResourcePathExpression) -> None: 

637 """Write the pipeline to a file or directory. 

638 

639 Parameters 

640 ---------- 

641 uri : convertible to `ResourcePath` 

642 URI to write to; may have any scheme with `ResourcePath` write 

643 support or no scheme for a local file/directory. Should have a 

644 ``.yaml``. 

645 """ 

646 self._pipelineIR.write_to_uri(uri) 

647 

648 def toExpandedPipeline(self) -> Generator[TaskDef, None, None]: 

649 """Returns a generator of TaskDefs which can be used to create quantum 

650 graphs. 

651 

652 Returns 

653 ------- 

654 generator : generator of `TaskDef` 

655 The generator returned will be the sorted iterator of tasks which 

656 are to be used in constructing a quantum graph. 

657 

658 Raises 

659 ------ 

660 NotImplementedError 

661 If a dataId is supplied in a config block. This is in place for 

662 future use 

663 """ 

664 taskDefs = [] 

665 for label in self._pipelineIR.tasks: 

666 taskDefs.append(self._buildTaskDef(label)) 

667 

668 # lets evaluate the contracts 

669 if self._pipelineIR.contracts is not None: 

670 label_to_config = {x.label: x.config for x in taskDefs} 

671 for contract in self._pipelineIR.contracts: 

672 # execute this in its own line so it can raise a good error 

673 # message if there was problems with the eval 

674 success = eval(contract.contract, None, label_to_config) 

675 if not success: 

676 extra_info = f": {contract.msg}" if contract.msg is not None else "" 

677 raise pipelineIR.ContractError( 

678 f"Contract(s) '{contract.contract}' were not satisfied{extra_info}" 

679 ) 

680 

681 taskDefs = sorted(taskDefs, key=lambda x: x.label) 

682 yield from pipeTools.orderPipeline(taskDefs) 

683 

684 def _buildTaskDef(self, label: str) -> TaskDef: 

685 if (taskIR := self._pipelineIR.tasks.get(label)) is None: 

686 raise NameError(f"Label {label} does not appear in this pipeline") 

687 taskClass: Type[PipelineTask] = doImportType(taskIR.klass) 

688 taskName = get_full_type_name(taskClass) 

689 config = taskClass.ConfigClass() 

690 overrides = ConfigOverrides() 

691 if self._pipelineIR.instrument is not None: 

692 overrides.addInstrumentOverride(self._pipelineIR.instrument, taskClass._DefaultName) 

693 if taskIR.config is not None: 

694 for configIR in (configIr.formatted(self._pipelineIR.parameters) for configIr in taskIR.config): 

695 if configIR.dataId is not None: 

696 raise NotImplementedError( 

697 "Specializing a config on a partial data id is not yet " 

698 "supported in Pipeline definition" 

699 ) 

700 # only apply override if it applies to everything 

701 if configIR.dataId is None: 

702 if configIR.file: 

703 for configFile in configIR.file: 

704 overrides.addFileOverride(os.path.expandvars(configFile)) 

705 if configIR.python is not None: 

706 overrides.addPythonOverride(configIR.python) 

707 for key, value in configIR.rest.items(): 

708 overrides.addValueOverride(key, value) 

709 overrides.applyTo(config) 

710 return TaskDef(taskName=taskName, config=config, taskClass=taskClass, label=label) 

711 

712 def __iter__(self) -> Generator[TaskDef, None, None]: 

713 return self.toExpandedPipeline() 

714 

715 def __getitem__(self, item: str) -> TaskDef: 

716 return self._buildTaskDef(item) 

717 

718 def __len__(self) -> int: 

719 return len(self._pipelineIR.tasks) 

720 

721 def __eq__(self, other: object) -> bool: 

722 if not isinstance(other, Pipeline): 

723 return False 

724 elif self._pipelineIR == other._pipelineIR: 

725 # Shortcut: if the IR is the same, the expanded pipeline must be 

726 # the same as well. But the converse is not true. 

727 return True 

728 else: 

729 self_expanded = {td.label: (td.taskClass,) for td in self} 

730 other_expanded = {td.label: (td.taskClass,) for td in other} 

731 if self_expanded != other_expanded: 

732 return False 

733 # After DM-27847, we should compare configuration here, or better, 

734 # delegated to TaskDef.__eq__ after making that compare configurations. 

735 raise NotImplementedError( 

736 "Pipelines cannot be compared because config instances cannot be compared; see DM-27847." 

737 ) 

738 

739 

740@dataclass(frozen=True) 

741class TaskDatasetTypes: 

742 """An immutable struct that extracts and classifies the dataset types used 

743 by a `PipelineTask` 

744 """ 

745 

746 initInputs: NamedValueSet[DatasetType] 

747 """Dataset types that are needed as inputs in order to construct this Task. 

748 

749 Task-level `initInputs` may be classified as either 

750 `~PipelineDatasetTypes.initInputs` or 

751 `~PipelineDatasetTypes.initIntermediates` at the Pipeline level. 

752 """ 

753 

754 initOutputs: NamedValueSet[DatasetType] 

755 """Dataset types that may be written after constructing this Task. 

756 

757 Task-level `initOutputs` may be classified as either 

758 `~PipelineDatasetTypes.initOutputs` or 

759 `~PipelineDatasetTypes.initIntermediates` at the Pipeline level. 

760 """ 

761 

762 inputs: NamedValueSet[DatasetType] 

763 """Dataset types that are regular inputs to this Task. 

764 

765 If an input dataset needed for a Quantum cannot be found in the input 

766 collection(s) or produced by another Task in the Pipeline, that Quantum 

767 (and all dependent Quanta) will not be produced. 

768 

769 Task-level `inputs` may be classified as either 

770 `~PipelineDatasetTypes.inputs` or `~PipelineDatasetTypes.intermediates` 

771 at the Pipeline level. 

772 """ 

773 

774 queryConstraints: NamedValueSet[DatasetType] 

775 """Regular inputs that should not be used as constraints on the initial 

776 QuantumGraph generation data ID query, according to their tasks 

777 (`NamedValueSet`). 

778 """ 

779 

780 prerequisites: NamedValueSet[DatasetType] 

781 """Dataset types that are prerequisite inputs to this Task. 

782 

783 Prerequisite inputs must exist in the input collection(s) before the 

784 pipeline is run, but do not constrain the graph - if a prerequisite is 

785 missing for a Quantum, `PrerequisiteMissingError` is raised. 

786 

787 Prerequisite inputs are not resolved until the second stage of 

788 QuantumGraph generation. 

789 """ 

790 

791 outputs: NamedValueSet[DatasetType] 

792 """Dataset types that are produced by this Task. 

793 

794 Task-level `outputs` may be classified as either 

795 `~PipelineDatasetTypes.outputs` or `~PipelineDatasetTypes.intermediates` 

796 at the Pipeline level. 

797 """ 

798 

799 @classmethod 

800 def fromTaskDef( 

801 cls, 

802 taskDef: TaskDef, 

803 *, 

804 registry: Registry, 

805 include_configs: bool = True, 

806 storage_class_mapping: Optional[Mapping[str, str]] = None, 

807 ) -> TaskDatasetTypes: 

808 """Extract and classify the dataset types from a single `PipelineTask`. 

809 

810 Parameters 

811 ---------- 

812 taskDef: `TaskDef` 

813 An instance of a `TaskDef` class for a particular `PipelineTask`. 

814 registry: `Registry` 

815 Registry used to construct normalized `DatasetType` objects and 

816 retrieve those that are incomplete. 

817 include_configs : `bool`, optional 

818 If `True` (default) include config dataset types as 

819 ``initOutputs``. 

820 storage_class_mapping : `Mapping` of `str` to `StorageClass`, optional 

821 If a taskdef contains a component dataset type that is unknown 

822 to the registry, its parent StorageClass will be looked up in this 

823 mapping if it is supplied. If the mapping does not contain the 

824 composite dataset type, or the mapping is not supplied an exception 

825 will be raised. 

826 

827 Returns 

828 ------- 

829 types: `TaskDatasetTypes` 

830 The dataset types used by this task. 

831 

832 Raises 

833 ------ 

834 ValueError 

835 Raised if dataset type connection definition differs from 

836 registry definition. 

837 LookupError 

838 Raised if component parent StorageClass could not be determined 

839 and storage_class_mapping does not contain the composite type, or 

840 is set to None. 

841 """ 

842 

843 def makeDatasetTypesSet( 

844 connectionType: str, 

845 is_input: bool, 

846 freeze: bool = True, 

847 ) -> NamedValueSet[DatasetType]: 

848 """Constructs a set of true `DatasetType` objects 

849 

850 Parameters 

851 ---------- 

852 connectionType : `str` 

853 Name of the connection type to produce a set for, corresponds 

854 to an attribute of type `list` on the connection class instance 

855 is_input : `bool` 

856 These are input dataset types, else they are output dataset 

857 types. 

858 freeze : `bool`, optional 

859 If `True`, call `NamedValueSet.freeze` on the object returned. 

860 

861 Returns 

862 ------- 

863 datasetTypes : `NamedValueSet` 

864 A set of all datasetTypes which correspond to the input 

865 connection type specified in the connection class of this 

866 `PipelineTask` 

867 

868 Raises 

869 ------ 

870 ValueError 

871 Raised if dataset type connection definition differs from 

872 registry definition. 

873 LookupError 

874 Raised if component parent StorageClass could not be determined 

875 and storage_class_mapping does not contain the composite type, 

876 or is set to None. 

877 

878 Notes 

879 ----- 

880 This function is a closure over the variables ``registry`` and 

881 ``taskDef``, and ``storage_class_mapping``. 

882 """ 

883 datasetTypes = NamedValueSet[DatasetType]() 

884 for c in iterConnections(taskDef.connections, connectionType): 

885 dimensions = set(getattr(c, "dimensions", set())) 

886 if "skypix" in dimensions: 

887 try: 

888 datasetType = registry.getDatasetType(c.name) 

889 except LookupError as err: 

890 raise LookupError( 

891 f"DatasetType '{c.name}' referenced by " 

892 f"{type(taskDef.connections).__name__} uses 'skypix' as a dimension " 

893 f"placeholder, but does not already exist in the registry. " 

894 f"Note that reference catalog names are now used as the dataset " 

895 f"type name instead of 'ref_cat'." 

896 ) from err 

897 rest1 = set(registry.dimensions.extract(dimensions - set(["skypix"])).names) 

898 rest2 = set( 

899 dim.name for dim in datasetType.dimensions if not isinstance(dim, SkyPixDimension) 

900 ) 

901 if rest1 != rest2: 

902 raise ValueError( 

903 f"Non-skypix dimensions for dataset type {c.name} declared in " 

904 f"connections ({rest1}) are inconsistent with those in " 

905 f"registry's version of this dataset ({rest2})." 

906 ) 

907 else: 

908 # Component dataset types are not explicitly in the 

909 # registry. This complicates consistency checks with 

910 # registry and requires we work out the composite storage 

911 # class. 

912 registryDatasetType = None 

913 try: 

914 registryDatasetType = registry.getDatasetType(c.name) 

915 except KeyError: 

916 compositeName, componentName = DatasetType.splitDatasetTypeName(c.name) 

917 if componentName: 

918 if storage_class_mapping is None or compositeName not in storage_class_mapping: 

919 raise LookupError( 

920 "Component parent class cannot be determined, and " 

921 "composite name was not in storage class mapping, or no " 

922 "storage_class_mapping was supplied" 

923 ) 

924 else: 

925 parentStorageClass = storage_class_mapping[compositeName] 

926 else: 

927 parentStorageClass = None 

928 datasetType = c.makeDatasetType( 

929 registry.dimensions, parentStorageClass=parentStorageClass 

930 ) 

931 registryDatasetType = datasetType 

932 else: 

933 datasetType = c.makeDatasetType( 

934 registry.dimensions, parentStorageClass=registryDatasetType.parentStorageClass 

935 ) 

936 

937 if registryDatasetType and datasetType != registryDatasetType: 

938 # The dataset types differ but first check to see if 

939 # they are compatible before raising. 

940 if is_input: 

941 # This DatasetType must be compatible on get. 

942 is_compatible = datasetType.is_compatible_with(registryDatasetType) 

943 else: 

944 # Has to be able to be converted to expect type 

945 # on put. 

946 is_compatible = registryDatasetType.is_compatible_with(datasetType) 

947 if is_compatible: 

948 # For inputs we want the pipeline to use the 

949 # pipeline definition, for outputs it should use 

950 # the registry definition. 

951 if not is_input: 

952 datasetType = registryDatasetType 

953 _LOG.debug( 

954 "Dataset types differ (task %s != registry %s) but are compatible" 

955 " for %s in %s.", 

956 datasetType, 

957 registryDatasetType, 

958 "input" if is_input else "output", 

959 taskDef.label, 

960 ) 

961 else: 

962 try: 

963 # Explicitly check for storage class just to 

964 # make more specific message. 

965 _ = datasetType.storageClass 

966 except KeyError: 

967 raise ValueError( 

968 "Storage class does not exist for supplied dataset type " 

969 f"{datasetType} for {taskDef.label}." 

970 ) from None 

971 raise ValueError( 

972 f"Supplied dataset type ({datasetType}) inconsistent with " 

973 f"registry definition ({registryDatasetType}) " 

974 f"for {taskDef.label}." 

975 ) 

976 datasetTypes.add(datasetType) 

977 if freeze: 

978 datasetTypes.freeze() 

979 return datasetTypes 

980 

981 # optionally add initOutput dataset for config 

982 initOutputs = makeDatasetTypesSet("initOutputs", is_input=False, freeze=False) 

983 if include_configs: 

984 initOutputs.add( 

985 DatasetType( 

986 taskDef.configDatasetName, 

987 registry.dimensions.empty, 

988 storageClass="Config", 

989 ) 

990 ) 

991 initOutputs.freeze() 

992 

993 # optionally add output dataset for metadata 

994 outputs = makeDatasetTypesSet("outputs", is_input=False, freeze=False) 

995 if taskDef.metadataDatasetName is not None: 

996 # Metadata is supposed to be of the TaskMetadata type, its 

997 # dimensions correspond to a task quantum. 

998 dimensions = registry.dimensions.extract(taskDef.connections.dimensions) 

999 

1000 # Allow the storage class definition to be read from the existing 

1001 # dataset type definition if present. 

1002 try: 

1003 current = registry.getDatasetType(taskDef.metadataDatasetName) 

1004 except KeyError: 

1005 # No previous definition so use the default. 

1006 storageClass = "TaskMetadata" if _TASK_METADATA_TYPE is TaskMetadata else "PropertySet" 

1007 else: 

1008 storageClass = current.storageClass.name 

1009 

1010 outputs.update({DatasetType(taskDef.metadataDatasetName, dimensions, storageClass)}) 

1011 if taskDef.logOutputDatasetName is not None: 

1012 # Log output dimensions correspond to a task quantum. 

1013 dimensions = registry.dimensions.extract(taskDef.connections.dimensions) 

1014 outputs.update({DatasetType(taskDef.logOutputDatasetName, dimensions, "ButlerLogRecords")}) 

1015 

1016 outputs.freeze() 

1017 

1018 inputs = makeDatasetTypesSet("inputs", is_input=True) 

1019 queryConstraints = NamedValueSet( 

1020 inputs[c.name] 

1021 for c in cast(Iterable[Input], iterConnections(taskDef.connections, "inputs")) 

1022 if not c.deferGraphConstraint 

1023 ) 

1024 

1025 return cls( 

1026 initInputs=makeDatasetTypesSet("initInputs", is_input=True), 

1027 initOutputs=initOutputs, 

1028 inputs=inputs, 

1029 queryConstraints=queryConstraints, 

1030 prerequisites=makeDatasetTypesSet("prerequisiteInputs", is_input=True), 

1031 outputs=outputs, 

1032 ) 

1033 

1034 

1035@dataclass(frozen=True) 

1036class PipelineDatasetTypes: 

1037 """An immutable struct that classifies the dataset types used in a 

1038 `Pipeline`. 

1039 """ 

1040 

1041 packagesDatasetName: ClassVar[str] = "packages" 

1042 """Name of a dataset type used to save package versions. 

1043 """ 

1044 

1045 initInputs: NamedValueSet[DatasetType] 

1046 """Dataset types that are needed as inputs in order to construct the Tasks 

1047 in this Pipeline. 

1048 

1049 This does not include dataset types that are produced when constructing 

1050 other Tasks in the Pipeline (these are classified as `initIntermediates`). 

1051 """ 

1052 

1053 initOutputs: NamedValueSet[DatasetType] 

1054 """Dataset types that may be written after constructing the Tasks in this 

1055 Pipeline. 

1056 

1057 This does not include dataset types that are also used as inputs when 

1058 constructing other Tasks in the Pipeline (these are classified as 

1059 `initIntermediates`). 

1060 """ 

1061 

1062 initIntermediates: NamedValueSet[DatasetType] 

1063 """Dataset types that are both used when constructing one or more Tasks 

1064 in the Pipeline and produced as a side-effect of constructing another 

1065 Task in the Pipeline. 

1066 """ 

1067 

1068 inputs: NamedValueSet[DatasetType] 

1069 """Dataset types that are regular inputs for the full pipeline. 

1070 

1071 If an input dataset needed for a Quantum cannot be found in the input 

1072 collection(s), that Quantum (and all dependent Quanta) will not be 

1073 produced. 

1074 """ 

1075 

1076 queryConstraints: NamedValueSet[DatasetType] 

1077 """Regular inputs that should be used as constraints on the initial 

1078 QuantumGraph generation data ID query, according to their tasks 

1079 (`NamedValueSet`). 

1080 """ 

1081 

1082 prerequisites: NamedValueSet[DatasetType] 

1083 """Dataset types that are prerequisite inputs for the full Pipeline. 

1084 

1085 Prerequisite inputs must exist in the input collection(s) before the 

1086 pipeline is run, but do not constrain the graph - if a prerequisite is 

1087 missing for a Quantum, `PrerequisiteMissingError` is raised. 

1088 

1089 Prerequisite inputs are not resolved until the second stage of 

1090 QuantumGraph generation. 

1091 """ 

1092 

1093 intermediates: NamedValueSet[DatasetType] 

1094 """Dataset types that are output by one Task in the Pipeline and consumed 

1095 as inputs by one or more other Tasks in the Pipeline. 

1096 """ 

1097 

1098 outputs: NamedValueSet[DatasetType] 

1099 """Dataset types that are output by a Task in the Pipeline and not consumed 

1100 by any other Task in the Pipeline. 

1101 """ 

1102 

1103 byTask: Mapping[str, TaskDatasetTypes] 

1104 """Per-Task dataset types, keyed by label in the `Pipeline`. 

1105 

1106 This is guaranteed to be zip-iterable with the `Pipeline` itself (assuming 

1107 neither has been modified since the dataset types were extracted, of 

1108 course). 

1109 """ 

1110 

1111 @classmethod 

1112 def fromPipeline( 

1113 cls, 

1114 pipeline: Union[Pipeline, Iterable[TaskDef]], 

1115 *, 

1116 registry: Registry, 

1117 include_configs: bool = True, 

1118 include_packages: bool = True, 

1119 ) -> PipelineDatasetTypes: 

1120 """Extract and classify the dataset types from all tasks in a 

1121 `Pipeline`. 

1122 

1123 Parameters 

1124 ---------- 

1125 pipeline: `Pipeline` or `Iterable` [ `TaskDef` ] 

1126 A collection of tasks that can be run together. 

1127 registry: `Registry` 

1128 Registry used to construct normalized `DatasetType` objects and 

1129 retrieve those that are incomplete. 

1130 include_configs : `bool`, optional 

1131 If `True` (default) include config dataset types as 

1132 ``initOutputs``. 

1133 include_packages : `bool`, optional 

1134 If `True` (default) include the dataset type for software package 

1135 versions in ``initOutputs``. 

1136 

1137 Returns 

1138 ------- 

1139 types: `PipelineDatasetTypes` 

1140 The dataset types used by this `Pipeline`. 

1141 

1142 Raises 

1143 ------ 

1144 ValueError 

1145 Raised if Tasks are inconsistent about which datasets are marked 

1146 prerequisite. This indicates that the Tasks cannot be run as part 

1147 of the same `Pipeline`. 

1148 """ 

1149 allInputs = NamedValueSet[DatasetType]() 

1150 allOutputs = NamedValueSet[DatasetType]() 

1151 allInitInputs = NamedValueSet[DatasetType]() 

1152 allInitOutputs = NamedValueSet[DatasetType]() 

1153 prerequisites = NamedValueSet[DatasetType]() 

1154 queryConstraints = NamedValueSet[DatasetType]() 

1155 byTask = dict() 

1156 if include_packages: 

1157 allInitOutputs.add( 

1158 DatasetType( 

1159 cls.packagesDatasetName, 

1160 registry.dimensions.empty, 

1161 storageClass="Packages", 

1162 ) 

1163 ) 

1164 # create a list of TaskDefs in case the input is a generator 

1165 pipeline = list(pipeline) 

1166 

1167 # collect all the output dataset types 

1168 typeStorageclassMap: Dict[str, str] = {} 

1169 for taskDef in pipeline: 

1170 for outConnection in iterConnections(taskDef.connections, "outputs"): 

1171 typeStorageclassMap[outConnection.name] = outConnection.storageClass 

1172 

1173 for taskDef in pipeline: 

1174 thisTask = TaskDatasetTypes.fromTaskDef( 

1175 taskDef, 

1176 registry=registry, 

1177 include_configs=include_configs, 

1178 storage_class_mapping=typeStorageclassMap, 

1179 ) 

1180 allInitInputs.update(thisTask.initInputs) 

1181 allInitOutputs.update(thisTask.initOutputs) 

1182 allInputs.update(thisTask.inputs) 

1183 # Inputs are query constraints if any task considers them a query 

1184 # constraint. 

1185 queryConstraints.update(thisTask.queryConstraints) 

1186 prerequisites.update(thisTask.prerequisites) 

1187 allOutputs.update(thisTask.outputs) 

1188 byTask[taskDef.label] = thisTask 

1189 if not prerequisites.isdisjoint(allInputs): 

1190 raise ValueError( 

1191 "{} marked as both prerequisites and regular inputs".format( 

1192 {dt.name for dt in allInputs & prerequisites} 

1193 ) 

1194 ) 

1195 if not prerequisites.isdisjoint(allOutputs): 

1196 raise ValueError( 

1197 "{} marked as both prerequisites and outputs".format( 

1198 {dt.name for dt in allOutputs & prerequisites} 

1199 ) 

1200 ) 

1201 # Make sure that components which are marked as inputs get treated as 

1202 # intermediates if there is an output which produces the composite 

1203 # containing the component 

1204 intermediateComponents = NamedValueSet[DatasetType]() 

1205 intermediateComposites = NamedValueSet[DatasetType]() 

1206 outputNameMapping = {dsType.name: dsType for dsType in allOutputs} 

1207 for dsType in allInputs: 

1208 # get the name of a possible component 

1209 name, component = dsType.nameAndComponent() 

1210 # if there is a component name, that means this is a component 

1211 # DatasetType, if there is an output which produces the parent of 

1212 # this component, treat this input as an intermediate 

1213 if component is not None: 

1214 # This needs to be in this if block, because someone might have 

1215 # a composite that is a pure input from existing data 

1216 if name in outputNameMapping: 

1217 intermediateComponents.add(dsType) 

1218 intermediateComposites.add(outputNameMapping[name]) 

1219 

1220 def checkConsistency(a: NamedValueSet, b: NamedValueSet) -> None: 

1221 common = a.names & b.names 

1222 for name in common: 

1223 # Any compatibility is allowed. This function does not know 

1224 # if a dataset type is to be used for input or output. 

1225 if not (a[name].is_compatible_with(b[name]) or b[name].is_compatible_with(a[name])): 

1226 raise ValueError(f"Conflicting definitions for dataset type: {a[name]} != {b[name]}.") 

1227 

1228 checkConsistency(allInitInputs, allInitOutputs) 

1229 checkConsistency(allInputs, allOutputs) 

1230 checkConsistency(allInputs, intermediateComposites) 

1231 checkConsistency(allOutputs, intermediateComposites) 

1232 

1233 def frozen(s: AbstractSet[DatasetType]) -> NamedValueSet[DatasetType]: 

1234 assert isinstance(s, NamedValueSet) 

1235 s.freeze() 

1236 return s 

1237 

1238 inputs = frozen(allInputs - allOutputs - intermediateComponents) 

1239 

1240 return cls( 

1241 initInputs=frozen(allInitInputs - allInitOutputs), 

1242 initIntermediates=frozen(allInitInputs & allInitOutputs), 

1243 initOutputs=frozen(allInitOutputs - allInitInputs), 

1244 inputs=inputs, 

1245 queryConstraints=frozen(queryConstraints & inputs), 

1246 # If there are storage class differences in inputs and outputs 

1247 # the intermediates have to choose priority. Here choose that 

1248 # inputs to tasks much match the requested storage class by 

1249 # applying the inputs over the top of the outputs. 

1250 intermediates=frozen(allOutputs & allInputs | intermediateComponents), 

1251 outputs=frozen(allOutputs - allInputs - intermediateComposites), 

1252 prerequisites=frozen(prerequisites), 

1253 byTask=MappingProxyType(byTask), # MappingProxyType -> frozen view of dict for immutability 

1254 ) 

1255 

1256 @classmethod 

1257 def initOutputNames( 

1258 cls, 

1259 pipeline: Union[Pipeline, Iterable[TaskDef]], 

1260 *, 

1261 include_configs: bool = True, 

1262 include_packages: bool = True, 

1263 ) -> Iterator[str]: 

1264 """Return the names of dataset types ot task initOutputs, Configs, 

1265 and package versions for a pipeline. 

1266 

1267 Parameters 

1268 ---------- 

1269 pipeline: `Pipeline` or `Iterable` [ `TaskDef` ] 

1270 A `Pipeline` instance or collection of `TaskDef` instances. 

1271 include_configs : `bool`, optional 

1272 If `True` (default) include config dataset types. 

1273 include_packages : `bool`, optional 

1274 If `True` (default) include the dataset type for package versions. 

1275 

1276 Yields 

1277 ------ 

1278 datasetTypeName : `str` 

1279 Name of the dataset type. 

1280 """ 

1281 if include_packages: 

1282 # Package versions dataset type 

1283 yield cls.packagesDatasetName 

1284 

1285 if isinstance(pipeline, Pipeline): 

1286 pipeline = pipeline.toExpandedPipeline() 

1287 

1288 for taskDef in pipeline: 

1289 

1290 # all task InitOutputs 

1291 for name in taskDef.connections.initOutputs: 

1292 attribute = getattr(taskDef.connections, name) 

1293 yield attribute.name 

1294 

1295 # config dataset name 

1296 if include_configs: 

1297 yield taskDef.configDatasetName