Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23"""Module defining Pipeline class and related methods. 

24""" 

25 

26__all__ = ["Pipeline", "TaskDef", "TaskDatasetTypes", "PipelineDatasetTypes"] 

27 

28# ------------------------------- 

29# Imports of standard modules -- 

30# ------------------------------- 

31from dataclasses import dataclass 

32from types import MappingProxyType 

33from typing import Mapping, Union, Generator, TYPE_CHECKING 

34 

35import copy 

36import os 

37 

38# ----------------------------- 

39# Imports for other modules -- 

40from lsst.daf.butler import DatasetType, NamedValueSet, Registry, SkyPixDimension 

41from lsst.utils import doImport 

42from .configOverrides import ConfigOverrides 

43from .connections import iterConnections 

44from .pipelineTask import PipelineTask 

45 

46from . import pipelineIR 

47from . import pipeTools 

48 

49if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true

50 from lsst.obs.base.instrument import Instrument 

51 

52# ---------------------------------- 

53# Local non-exported definitions -- 

54# ---------------------------------- 

55 

56# ------------------------ 

57# Exported definitions -- 

58# ------------------------ 

59 

60 

61class TaskDef: 

62 """TaskDef is a collection of information about task needed by Pipeline. 

63 

64 The information includes task name, configuration object and optional 

65 task class. This class is just a collection of attributes and it exposes 

66 all of them so that attributes could potentially be modified in place 

67 (e.g. if configuration needs extra overrides). 

68 

69 Attributes 

70 ---------- 

71 taskName : `str` 

72 `PipelineTask` class name, currently it is not specified whether this 

73 is a fully-qualified name or partial name (e.g. ``module.TaskClass``). 

74 Framework should be prepared to handle all cases. 

75 config : `lsst.pex.config.Config` 

76 Instance of the configuration class corresponding to this task class, 

77 usually with all overrides applied. 

78 taskClass : `type` or ``None`` 

79 `PipelineTask` class object, can be ``None``. If ``None`` then 

80 framework will have to locate and load class. 

81 label : `str`, optional 

82 Task label, usually a short string unique in a pipeline. 

83 """ 

84 def __init__(self, taskName, config, taskClass=None, label=""): 

85 self.taskName = taskName 

86 self.config = config 

87 self.taskClass = taskClass 

88 self.label = label 

89 self.connections = config.connections.ConnectionsClass(config=config) 

90 

91 @property 

92 def configDatasetName(self): 

93 """Name of a dataset type for configuration of this task (`str`) 

94 """ 

95 return self.label + "_config" 

96 

97 @property 

98 def metadataDatasetName(self): 

99 """Name of a dataset type for metadata of this task, `None` if 

100 metadata is not to be saved (`str`) 

101 """ 

102 if self.config.saveMetadata: 

103 return self.label + "_metadata" 

104 else: 

105 return None 

106 

107 def __str__(self): 

108 rep = "TaskDef(" + self.taskName 

109 if self.label: 

110 rep += ", label=" + self.label 

111 rep += ")" 

112 return rep 

113 

114 

115class Pipeline: 

116 """A `Pipeline` is a representation of a series of tasks to run, and the 

117 configuration for those tasks. 

118 

119 Parameters 

120 ---------- 

121 description : `str` 

122 A description of that this pipeline does. 

123 """ 

124 def __init__(self, description: str) -> Pipeline: 

125 pipeline_dict = {"description": description, "tasks": {}} 

126 self._pipelineIR = pipelineIR.PipelineIR(pipeline_dict) 

127 

128 @classmethod 

129 def fromFile(cls, filename: str) -> Pipeline: 

130 """Load a pipeline defined in a pipeline yaml file. 

131 

132 Parameters 

133 ---------- 

134 filename: `str` 

135 A path that points to a pipeline defined in yaml format 

136 

137 Returns 

138 ------- 

139 pipeline: `Pipeline` 

140 """ 

141 pipeline = cls.fromIR(pipelineIR.PipelineIR.from_file(filename)) 

142 return pipeline 

143 

144 @classmethod 

145 def fromString(cls, pipeline_string: str) -> Pipeline: 

146 """Create a pipeline from string formatted as a pipeline document. 

147 

148 Parameters 

149 ---------- 

150 pipeline_string : `str` 

151 A string that is formatted according like a pipeline document 

152 

153 Returns 

154 ------- 

155 pipeline: `Pipeline` 

156 """ 

157 pipeline = cls.fromIR(pipelineIR.PipelineIR.from_string(pipeline_string)) 

158 return pipeline 

159 

160 @classmethod 

161 def fromIR(cls, deserialized_pipeline: pipelineIR.PipelineIR) -> Pipeline: 

162 """Create a pipeline from an already created `PipelineIR` object. 

163 

164 Parameters 

165 ---------- 

166 deserialized_pipeline: `PipelineIR` 

167 An already created pipeline intermediate representation object 

168 

169 Returns 

170 ------- 

171 pipeline: `Pipeline` 

172 """ 

173 pipeline = cls.__new__(cls) 

174 pipeline._pipelineIR = deserialized_pipeline 

175 return pipeline 

176 

177 @classmethod 

178 def fromPipeline(cls, pipeline: pipelineIR.PipelineIR) -> Pipeline: 

179 """Create a new pipeline by copying an already existing `Pipeline`. 

180 

181 Parameters 

182 ---------- 

183 pipeline: `Pipeline` 

184 An already created pipeline intermediate representation object 

185 

186 Returns 

187 ------- 

188 pipeline: `Pipeline` 

189 """ 

190 return cls.fromIR(copy.deep_copy(pipeline._pipelineIR)) 

191 

192 def __str__(self) -> str: 

193 return str(self._pipelineIR) 

194 

195 def addInstrument(self, instrument: Union[Instrument, str]): 

196 """Add an instrument to the pipeline, or replace an instrument that is 

197 already defined. 

198 

199 Parameters 

200 ---------- 

201 instrument : `~lsst.daf.butler.instrument.Instrument` or `str` 

202 Either a derived class object of a `lsst.daf.butler.instrument` or a 

203 string corresponding to a fully qualified 

204 `lsst.daf.butler.instrument` name. 

205 """ 

206 if isinstance(instrument, str): 

207 pass 

208 else: 

209 # TODO: assume that this is a subclass of Instrument, no type checking 

210 instrument = f"{instrument.__module__}.{instrument.__qualname__}" 

211 self._pipelineIR.instrument = instrument 

212 

213 def getInstrument(self): 

214 """Get the instrument from the pipeline. 

215 

216 Returns 

217 ------- 

218 instrument : `~lsst.daf.butler.instrument.Instrument`, `str`, or None 

219 A derived class object of a `lsst.daf.butler.instrument`, a string 

220 corresponding to a fully qualified `lsst.daf.butler.instrument` 

221 name, or None if the pipeline does not have an instrument. 

222 """ 

223 return self._pipelineIR.instrument 

224 

225 def addTask(self, task: Union[PipelineTask, str], label: str): 

226 """Add a new task to the pipeline, or replace a task that is already 

227 associated with the supplied label. 

228 

229 Parameters 

230 ---------- 

231 task: `PipelineTask` or `str` 

232 Either a derived class object of a `PipelineTask` or a string 

233 corresponding to a fully qualified `PipelineTask` name. 

234 label: `str` 

235 A label that is used to identify the `PipelineTask` being added 

236 """ 

237 if isinstance(task, str): 

238 taskName = task 

239 elif issubclass(task, PipelineTask): 

240 taskName = f"{task.__module__}.{task.__qualname__}" 

241 else: 

242 raise ValueError("task must be either a child class of PipelineTask or a string containing" 

243 " a fully qualified name to one") 

244 if not label: 

245 # in some cases (with command line-generated pipeline) tasks can 

246 # be defined without label which is not acceptable, use task 

247 # _DefaultName in that case 

248 if isinstance(task, str): 

249 task = doImport(task) 

250 label = task._DefaultName 

251 self._pipelineIR.tasks[label] = pipelineIR.TaskIR(label, taskName) 

252 

253 def removeTask(self, label: str): 

254 """Remove a task from the pipeline. 

255 

256 Parameters 

257 ---------- 

258 label : `str` 

259 The label used to identify the task that is to be removed 

260 

261 Raises 

262 ------ 

263 KeyError 

264 If no task with that label exists in the pipeline 

265 

266 """ 

267 self._pipelineIR.tasks.pop(label) 

268 

269 def addConfigOverride(self, label: str, key: str, value: object): 

270 """Apply single config override. 

271 

272 Parameters 

273 ---------- 

274 label : `str` 

275 Label of the task. 

276 key: `str` 

277 Fully-qualified field name. 

278 value : object 

279 Value to be given to a field. 

280 """ 

281 self._addConfigImpl(label, pipelineIR.ConfigIR(rest={key: value})) 

282 

283 def addConfigFile(self, label: str, filename: str): 

284 """Add overrides from a specified file. 

285 

286 Parameters 

287 ---------- 

288 label : `str` 

289 The label used to identify the task associated with config to 

290 modify 

291 filename : `str` 

292 Path to the override file. 

293 """ 

294 self._addConfigImpl(label, pipelineIR.ConfigIR(file=[filename])) 

295 

296 def addConfigPython(self, label: str, pythonString: str): 

297 """Add Overrides by running a snippet of python code against a config. 

298 

299 Parameters 

300 ---------- 

301 label : `str` 

302 The label used to identity the task associated with config to 

303 modify. 

304 pythonString: `str` 

305 A string which is valid python code to be executed. This is done 

306 with config as the only local accessible value. 

307 """ 

308 self._addConfigImpl(label, pipelineIR.ConfigIR(python=pythonString)) 

309 

310 def _addConfigImpl(self, label: str, newConfig: pipelineIR.ConfigIR): 

311 if label not in self._pipelineIR.tasks: 

312 raise LookupError(f"There are no tasks labeled '{label}' in the pipeline") 

313 self._pipelineIR.tasks[label].add_or_update_config(newConfig) 

314 

315 def toFile(self, filename: str): 

316 self._pipelineIR.to_file(filename) 

317 

318 def toExpandedPipeline(self) -> Generator[TaskDef]: 

319 """Returns a generator of TaskDefs which can be used to create quantum 

320 graphs. 

321 

322 Returns 

323 ------- 

324 generator : generator of `TaskDef` 

325 The generator returned will be the sorted iterator of tasks which 

326 are to be used in constructing a quantum graph. 

327 

328 Raises 

329 ------ 

330 NotImplementedError 

331 If a dataId is supplied in a config block. This is in place for 

332 future use 

333 """ 

334 taskDefs = [] 

335 for label, taskIR in self._pipelineIR.tasks.items(): 

336 taskClass = doImport(taskIR.klass) 

337 taskName = taskClass.__qualname__ 

338 config = taskClass.ConfigClass() 

339 overrides = ConfigOverrides() 

340 if self._pipelineIR.instrument is not None: 

341 overrides.addInstrumentOverride(self._pipelineIR.instrument, taskClass._DefaultName) 

342 if taskIR.config is not None: 

343 for configIR in taskIR.config: 

344 if configIR.dataId is not None: 

345 raise NotImplementedError("Specializing a config on a partial data id is not yet " 

346 "supported in Pipeline definition") 

347 # only apply override if it applies to everything 

348 if configIR.dataId is None: 

349 if configIR.file: 

350 for configFile in configIR.file: 

351 overrides.addFileOverride(os.path.expandvars(configFile)) 

352 if configIR.python is not None: 

353 overrides.addPythonOverride(configIR.python) 

354 for key, value in configIR.rest.items(): 

355 overrides.addValueOverride(key, value) 

356 overrides.applyTo(config) 

357 # This may need to be revisited 

358 config.validate() 

359 taskDefs.append(TaskDef(taskName=taskName, config=config, taskClass=taskClass, label=label)) 

360 

361 # lets evaluate the contracts 

362 if self._pipelineIR.contracts is not None: 

363 label_to_config = {x.label: x.config for x in taskDefs} 

364 for contract in self._pipelineIR.contracts: 

365 # execute this in its own line so it can raise a good error message if there was problems 

366 # with the eval 

367 success = eval(contract.contract, None, label_to_config) 

368 if not success: 

369 extra_info = f": {contract.msg}" if contract.msg is not None else "" 

370 raise pipelineIR.ContractError(f"Contract(s) '{contract.contract}' were not " 

371 f"satisfied{extra_info}") 

372 

373 yield from pipeTools.orderPipeline(taskDefs) 

374 

375 def __len__(self): 

376 return len(self._pipelineIR.tasks) 

377 

378 def __eq__(self, other: "Pipeline"): 

379 if not isinstance(other, Pipeline): 

380 return False 

381 return self._pipelineIR == other._pipelineIR 

382 

383 

384@dataclass(frozen=True) 

385class TaskDatasetTypes: 

386 """An immutable struct that extracts and classifies the dataset types used 

387 by a `PipelineTask` 

388 """ 

389 

390 initInputs: NamedValueSet[DatasetType] 

391 """Dataset types that are needed as inputs in order to construct this Task. 

392 

393 Task-level `initInputs` may be classified as either 

394 `~PipelineDatasetTypes.initInputs` or 

395 `~PipelineDatasetTypes.initIntermediates` at the Pipeline level. 

396 """ 

397 

398 initOutputs: NamedValueSet[DatasetType] 

399 """Dataset types that may be written after constructing this Task. 

400 

401 Task-level `initOutputs` may be classified as either 

402 `~PipelineDatasetTypes.initOutputs` or 

403 `~PipelineDatasetTypes.initIntermediates` at the Pipeline level. 

404 """ 

405 

406 inputs: NamedValueSet[DatasetType] 

407 """Dataset types that are regular inputs to this Task. 

408 

409 If an input dataset needed for a Quantum cannot be found in the input 

410 collection(s) or produced by another Task in the Pipeline, that Quantum 

411 (and all dependent Quanta) will not be produced. 

412 

413 Task-level `inputs` may be classified as either 

414 `~PipelineDatasetTypes.inputs` or `~PipelineDatasetTypes.intermediates` 

415 at the Pipeline level. 

416 """ 

417 

418 prerequisites: NamedValueSet[DatasetType] 

419 """Dataset types that are prerequisite inputs to this Task. 

420 

421 Prerequisite inputs must exist in the input collection(s) before the 

422 pipeline is run, but do not constrain the graph - if a prerequisite is 

423 missing for a Quantum, `PrerequisiteMissingError` is raised. 

424 

425 Prerequisite inputs are not resolved until the second stage of 

426 QuantumGraph generation. 

427 """ 

428 

429 outputs: NamedValueSet[DatasetType] 

430 """Dataset types that are produced by this Task. 

431 

432 Task-level `outputs` may be classified as either 

433 `~PipelineDatasetTypes.outputs` or `~PipelineDatasetTypes.intermediates` 

434 at the Pipeline level. 

435 """ 

436 

437 @classmethod 

438 def fromTaskDef(cls, taskDef: TaskDef, *, registry: Registry) -> TaskDatasetTypes: 

439 """Extract and classify the dataset types from a single `PipelineTask`. 

440 

441 Parameters 

442 ---------- 

443 taskDef: `TaskDef` 

444 An instance of a `TaskDef` class for a particular `PipelineTask`. 

445 registry: `Registry` 

446 Registry used to construct normalized `DatasetType` objects and 

447 retrieve those that are incomplete. 

448 

449 Returns 

450 ------- 

451 types: `TaskDatasetTypes` 

452 The dataset types used by this task. 

453 """ 

454 def makeDatasetTypesSet(connectionType, freeze=True): 

455 """Constructs a set of true `DatasetType` objects 

456 

457 Parameters 

458 ---------- 

459 connectionType : `str` 

460 Name of the connection type to produce a set for, corresponds 

461 to an attribute of type `list` on the connection class instance 

462 freeze : `bool`, optional 

463 If `True`, call `NamedValueSet.freeze` on the object returned. 

464 

465 Returns 

466 ------- 

467 datasetTypes : `NamedValueSet` 

468 A set of all datasetTypes which correspond to the input 

469 connection type specified in the connection class of this 

470 `PipelineTask` 

471 

472 Notes 

473 ----- 

474 This function is a closure over the variables ``registry`` and 

475 ``taskDef``. 

476 """ 

477 datasetTypes = NamedValueSet() 

478 for c in iterConnections(taskDef.connections, connectionType): 

479 dimensions = set(getattr(c, 'dimensions', set())) 

480 if "skypix" in dimensions: 

481 try: 

482 datasetType = registry.getDatasetType(c.name) 

483 except LookupError as err: 

484 raise LookupError( 

485 f"DatasetType '{c.name}' referenced by " 

486 f"{type(taskDef.connections).__name__} uses 'skypix' as a dimension " 

487 f"placeholder, but does not already exist in the registry. " 

488 f"Note that reference catalog names are now used as the dataset " 

489 f"type name instead of 'ref_cat'." 

490 ) from err 

491 rest1 = set(registry.dimensions.extract(dimensions - set(["skypix"])).names) 

492 rest2 = set(dim.name for dim in datasetType.dimensions 

493 if not isinstance(dim, SkyPixDimension)) 

494 if rest1 != rest2: 

495 raise ValueError(f"Non-skypix dimensions for dataset type {c.name} declared in " 

496 f"connections ({rest1}) are inconsistent with those in " 

497 f"registry's version of this dataset ({rest2}).") 

498 else: 

499 # Component dataset types are not explicitly in the 

500 # registry. This complicates consistency checks with 

501 # registry and requires we work out the composite storage 

502 # class. 

503 registryDatasetType = None 

504 try: 

505 registryDatasetType = registry.getDatasetType(c.name) 

506 except KeyError: 

507 compositeName, componentName = DatasetType.splitDatasetTypeName(c.name) 

508 parentStorageClass = DatasetType.PlaceholderParentStorageClass \ 

509 if componentName else None 

510 datasetType = DatasetType(c.name, registry.dimensions.extract(dimensions), 

511 c.storageClass, 

512 parentStorageClass=parentStorageClass) 

513 registryDatasetType = datasetType 

514 else: 

515 datasetType = DatasetType(c.name, registry.dimensions.extract(dimensions), 

516 c.storageClass, 

517 parentStorageClass=registryDatasetType.parentStorageClass) 

518 

519 if registryDatasetType and datasetType != registryDatasetType: 

520 raise ValueError(f"Supplied dataset type ({datasetType}) inconsistent with " 

521 f"registry definition ({registryDatasetType}) " 

522 f"for {taskDef.label}.") 

523 datasetTypes.add(datasetType) 

524 if freeze: 

525 datasetTypes.freeze() 

526 return datasetTypes 

527 

528 # optionally add output dataset for metadata 

529 outputs = makeDatasetTypesSet("outputs", freeze=False) 

530 if taskDef.metadataDatasetName is not None: 

531 # Metadata is supposed to be of the PropertySet type, its dimensions 

532 # correspond to a task quantum 

533 dimensions = registry.dimensions.extract(taskDef.connections.dimensions) 

534 outputs |= {DatasetType(taskDef.metadataDatasetName, dimensions, "PropertySet")} 

535 outputs.freeze() 

536 

537 return cls( 

538 initInputs=makeDatasetTypesSet("initInputs"), 

539 initOutputs=makeDatasetTypesSet("initOutputs"), 

540 inputs=makeDatasetTypesSet("inputs"), 

541 prerequisites=makeDatasetTypesSet("prerequisiteInputs"), 

542 outputs=outputs, 

543 ) 

544 

545 

546@dataclass(frozen=True) 

547class PipelineDatasetTypes: 

548 """An immutable struct that classifies the dataset types used in a 

549 `Pipeline`. 

550 """ 

551 

552 initInputs: NamedValueSet[DatasetType] 

553 """Dataset types that are needed as inputs in order to construct the Tasks 

554 in this Pipeline. 

555 

556 This does not include dataset types that are produced when constructing 

557 other Tasks in the Pipeline (these are classified as `initIntermediates`). 

558 """ 

559 

560 initOutputs: NamedValueSet[DatasetType] 

561 """Dataset types that may be written after constructing the Tasks in this 

562 Pipeline. 

563 

564 This does not include dataset types that are also used as inputs when 

565 constructing other Tasks in the Pipeline (these are classified as 

566 `initIntermediates`). 

567 """ 

568 

569 initIntermediates: NamedValueSet[DatasetType] 

570 """Dataset types that are both used when constructing one or more Tasks 

571 in the Pipeline and produced as a side-effect of constructing another 

572 Task in the Pipeline. 

573 """ 

574 

575 inputs: NamedValueSet[DatasetType] 

576 """Dataset types that are regular inputs for the full pipeline. 

577 

578 If an input dataset needed for a Quantum cannot be found in the input 

579 collection(s), that Quantum (and all dependent Quanta) will not be 

580 produced. 

581 """ 

582 

583 prerequisites: NamedValueSet[DatasetType] 

584 """Dataset types that are prerequisite inputs for the full Pipeline. 

585 

586 Prerequisite inputs must exist in the input collection(s) before the 

587 pipeline is run, but do not constrain the graph - if a prerequisite is 

588 missing for a Quantum, `PrerequisiteMissingError` is raised. 

589 

590 Prerequisite inputs are not resolved until the second stage of 

591 QuantumGraph generation. 

592 """ 

593 

594 intermediates: NamedValueSet[DatasetType] 

595 """Dataset types that are output by one Task in the Pipeline and consumed 

596 as inputs by one or more other Tasks in the Pipeline. 

597 """ 

598 

599 outputs: NamedValueSet[DatasetType] 

600 """Dataset types that are output by a Task in the Pipeline and not consumed 

601 by any other Task in the Pipeline. 

602 """ 

603 

604 byTask: Mapping[str, TaskDatasetTypes] 

605 """Per-Task dataset types, keyed by label in the `Pipeline`. 

606 

607 This is guaranteed to be zip-iterable with the `Pipeline` itself (assuming 

608 neither has been modified since the dataset types were extracted, of 

609 course). 

610 """ 

611 

612 @classmethod 

613 def fromPipeline(cls, pipeline, *, registry: Registry) -> PipelineDatasetTypes: 

614 """Extract and classify the dataset types from all tasks in a 

615 `Pipeline`. 

616 

617 Parameters 

618 ---------- 

619 pipeline: `Pipeline` 

620 An ordered collection of tasks that can be run together. 

621 registry: `Registry` 

622 Registry used to construct normalized `DatasetType` objects and 

623 retrieve those that are incomplete. 

624 

625 Returns 

626 ------- 

627 types: `PipelineDatasetTypes` 

628 The dataset types used by this `Pipeline`. 

629 

630 Raises 

631 ------ 

632 ValueError 

633 Raised if Tasks are inconsistent about which datasets are marked 

634 prerequisite. This indicates that the Tasks cannot be run as part 

635 of the same `Pipeline`. 

636 """ 

637 allInputs = NamedValueSet() 

638 allOutputs = NamedValueSet() 

639 allInitInputs = NamedValueSet() 

640 allInitOutputs = NamedValueSet() 

641 prerequisites = NamedValueSet() 

642 byTask = dict() 

643 if isinstance(pipeline, Pipeline): 

644 pipeline = pipeline.toExpandedPipeline() 

645 for taskDef in pipeline: 

646 thisTask = TaskDatasetTypes.fromTaskDef(taskDef, registry=registry) 

647 allInitInputs |= thisTask.initInputs 

648 allInitOutputs |= thisTask.initOutputs 

649 allInputs |= thisTask.inputs 

650 prerequisites |= thisTask.prerequisites 

651 allOutputs |= thisTask.outputs 

652 byTask[taskDef.label] = thisTask 

653 if not prerequisites.isdisjoint(allInputs): 

654 raise ValueError("{} marked as both prerequisites and regular inputs".format( 

655 {dt.name for dt in allInputs & prerequisites} 

656 )) 

657 if not prerequisites.isdisjoint(allOutputs): 

658 raise ValueError("{} marked as both prerequisites and outputs".format( 

659 {dt.name for dt in allOutputs & prerequisites} 

660 )) 

661 # Make sure that components which are marked as inputs get treated as 

662 # intermediates if there is an output which produces the composite 

663 # containing the component 

664 intermediateComponents = NamedValueSet() 

665 intermediateComposites = NamedValueSet() 

666 outputNameMapping = {dsType.name: dsType for dsType in allOutputs} 

667 for dsType in allInputs: 

668 # get the name of a possible component 

669 name, component = dsType.nameAndComponent() 

670 # if there is a component name, that means this is a component 

671 # DatasetType, if there is an output which produces the parent of 

672 # this component, treat this input as an intermediate 

673 if component is not None: 

674 if name in outputNameMapping: 

675 if outputNameMapping[name].dimensions != dsType.dimensions: 

676 raise ValueError(f"Component dataset type {dsType.name} has different " 

677 f"dimensions ({dsType.dimensions}) than its parent " 

678 f"({outputNameMapping[name].dimensions}).") 

679 composite = DatasetType(name, dsType.dimensions, outputNameMapping[name].storageClass, 

680 universe=registry.dimensions) 

681 intermediateComponents.add(dsType) 

682 intermediateComposites.add(composite) 

683 

684 def checkConsistency(a: NamedValueSet, b: NamedValueSet): 

685 common = a.names & b.names 

686 for name in common: 

687 if a[name] != b[name]: 

688 raise ValueError(f"Conflicting definitions for dataset type: {a[name]} != {b[name]}.") 

689 

690 checkConsistency(allInitInputs, allInitOutputs) 

691 checkConsistency(allInputs, allOutputs) 

692 checkConsistency(allInputs, intermediateComposites) 

693 checkConsistency(allOutputs, intermediateComposites) 

694 

695 def frozen(s: NamedValueSet) -> NamedValueSet: 

696 s.freeze() 

697 return s 

698 

699 return cls( 

700 initInputs=frozen(allInitInputs - allInitOutputs), 

701 initIntermediates=frozen(allInitInputs & allInitOutputs), 

702 initOutputs=frozen(allInitOutputs - allInitInputs), 

703 inputs=frozen(allInputs - allOutputs - intermediateComponents), 

704 intermediates=frozen(allInputs & allOutputs | intermediateComponents), 

705 outputs=frozen(allOutputs - allInputs - intermediateComposites), 

706 prerequisites=frozen(prerequisites), 

707 byTask=MappingProxyType(byTask), # MappingProxyType -> frozen view of dict for immutability 

708 )