Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23"""Module defining Pipeline class and related methods. 

24""" 

25 

26__all__ = ["Pipeline", "TaskDef", "TaskDatasetTypes", "PipelineDatasetTypes"] 

27 

28# ------------------------------- 

29# Imports of standard modules -- 

30# ------------------------------- 

31from dataclasses import dataclass 

32from types import MappingProxyType 

33from typing import Mapping, Union, Generator, TYPE_CHECKING 

34 

35import copy 

36import os 

37 

38# ----------------------------- 

39# Imports for other modules -- 

40from lsst.daf.butler import DatasetType, NamedValueSet, Registry, SkyPixDimension 

41from lsst.utils import doImport 

42from .configOverrides import ConfigOverrides 

43from .connections import iterConnections 

44from .pipelineTask import PipelineTask 

45 

46from . import pipelineIR 

47from . import pipeTools 

48 

49if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true

50 from lsst.obs.base.instrument import Instrument 

51 

52# ---------------------------------- 

53# Local non-exported definitions -- 

54# ---------------------------------- 

55 

56# ------------------------ 

57# Exported definitions -- 

58# ------------------------ 

59 

60 

61class TaskDef: 

62 """TaskDef is a collection of information about task needed by Pipeline. 

63 

64 The information includes task name, configuration object and optional 

65 task class. This class is just a collection of attributes and it exposes 

66 all of them so that attributes could potentially be modified in place 

67 (e.g. if configuration needs extra overrides). 

68 

69 Attributes 

70 ---------- 

71 taskName : `str` 

72 `PipelineTask` class name, currently it is not specified whether this 

73 is a fully-qualified name or partial name (e.g. ``module.TaskClass``). 

74 Framework should be prepared to handle all cases. 

75 config : `lsst.pex.config.Config` 

76 Instance of the configuration class corresponding to this task class, 

77 usually with all overrides applied. This config will be frozen. 

78 taskClass : `type` or ``None`` 

79 `PipelineTask` class object, can be ``None``. If ``None`` then 

80 framework will have to locate and load class. 

81 label : `str`, optional 

82 Task label, usually a short string unique in a pipeline. 

83 """ 

84 def __init__(self, taskName, config, taskClass=None, label=""): 

85 self.taskName = taskName 

86 config.freeze() 

87 self.config = config 

88 self.taskClass = taskClass 

89 self.label = label 

90 self.connections = config.connections.ConnectionsClass(config=config) 

91 

92 @property 

93 def configDatasetName(self): 

94 """Name of a dataset type for configuration of this task (`str`) 

95 """ 

96 return self.label + "_config" 

97 

98 @property 

99 def metadataDatasetName(self): 

100 """Name of a dataset type for metadata of this task, `None` if 

101 metadata is not to be saved (`str`) 

102 """ 

103 if self.config.saveMetadata: 

104 return self.label + "_metadata" 

105 else: 

106 return None 

107 

108 def __str__(self): 

109 rep = "TaskDef(" + self.taskName 

110 if self.label: 

111 rep += ", label=" + self.label 

112 rep += ")" 

113 return rep 

114 

115 

116class Pipeline: 

117 """A `Pipeline` is a representation of a series of tasks to run, and the 

118 configuration for those tasks. 

119 

120 Parameters 

121 ---------- 

122 description : `str` 

123 A description of that this pipeline does. 

124 """ 

125 def __init__(self, description: str) -> Pipeline: 

126 pipeline_dict = {"description": description, "tasks": {}} 

127 self._pipelineIR = pipelineIR.PipelineIR(pipeline_dict) 

128 

129 @classmethod 

130 def fromFile(cls, filename: str) -> Pipeline: 

131 """Load a pipeline defined in a pipeline yaml file. 

132 

133 Parameters 

134 ---------- 

135 filename: `str` 

136 A path that points to a pipeline defined in yaml format 

137 

138 Returns 

139 ------- 

140 pipeline: `Pipeline` 

141 """ 

142 pipeline = cls.fromIR(pipelineIR.PipelineIR.from_file(filename)) 

143 return pipeline 

144 

145 @classmethod 

146 def fromString(cls, pipeline_string: str) -> Pipeline: 

147 """Create a pipeline from string formatted as a pipeline document. 

148 

149 Parameters 

150 ---------- 

151 pipeline_string : `str` 

152 A string that is formatted according like a pipeline document 

153 

154 Returns 

155 ------- 

156 pipeline: `Pipeline` 

157 """ 

158 pipeline = cls.fromIR(pipelineIR.PipelineIR.from_string(pipeline_string)) 

159 return pipeline 

160 

161 @classmethod 

162 def fromIR(cls, deserialized_pipeline: pipelineIR.PipelineIR) -> Pipeline: 

163 """Create a pipeline from an already created `PipelineIR` object. 

164 

165 Parameters 

166 ---------- 

167 deserialized_pipeline: `PipelineIR` 

168 An already created pipeline intermediate representation object 

169 

170 Returns 

171 ------- 

172 pipeline: `Pipeline` 

173 """ 

174 pipeline = cls.__new__(cls) 

175 pipeline._pipelineIR = deserialized_pipeline 

176 return pipeline 

177 

178 @classmethod 

179 def fromPipeline(cls, pipeline: pipelineIR.PipelineIR) -> Pipeline: 

180 """Create a new pipeline by copying an already existing `Pipeline`. 

181 

182 Parameters 

183 ---------- 

184 pipeline: `Pipeline` 

185 An already created pipeline intermediate representation object 

186 

187 Returns 

188 ------- 

189 pipeline: `Pipeline` 

190 """ 

191 return cls.fromIR(copy.deep_copy(pipeline._pipelineIR)) 

192 

193 def __str__(self) -> str: 

194 return str(self._pipelineIR) 

195 

196 def addInstrument(self, instrument: Union[Instrument, str]): 

197 """Add an instrument to the pipeline, or replace an instrument that is 

198 already defined. 

199 

200 Parameters 

201 ---------- 

202 instrument : `~lsst.daf.butler.instrument.Instrument` or `str` 

203 Either a derived class object of a `lsst.daf.butler.instrument` or a 

204 string corresponding to a fully qualified 

205 `lsst.daf.butler.instrument` name. 

206 """ 

207 if isinstance(instrument, str): 

208 pass 

209 else: 

210 # TODO: assume that this is a subclass of Instrument, no type checking 

211 instrument = f"{instrument.__module__}.{instrument.__qualname__}" 

212 self._pipelineIR.instrument = instrument 

213 

214 def getInstrument(self): 

215 """Get the instrument from the pipeline. 

216 

217 Returns 

218 ------- 

219 instrument : `~lsst.daf.butler.instrument.Instrument`, `str`, or None 

220 A derived class object of a `lsst.daf.butler.instrument`, a string 

221 corresponding to a fully qualified `lsst.daf.butler.instrument` 

222 name, or None if the pipeline does not have an instrument. 

223 """ 

224 return self._pipelineIR.instrument 

225 

226 def addTask(self, task: Union[PipelineTask, str], label: str): 

227 """Add a new task to the pipeline, or replace a task that is already 

228 associated with the supplied label. 

229 

230 Parameters 

231 ---------- 

232 task: `PipelineTask` or `str` 

233 Either a derived class object of a `PipelineTask` or a string 

234 corresponding to a fully qualified `PipelineTask` name. 

235 label: `str` 

236 A label that is used to identify the `PipelineTask` being added 

237 """ 

238 if isinstance(task, str): 

239 taskName = task 

240 elif issubclass(task, PipelineTask): 

241 taskName = f"{task.__module__}.{task.__qualname__}" 

242 else: 

243 raise ValueError("task must be either a child class of PipelineTask or a string containing" 

244 " a fully qualified name to one") 

245 if not label: 

246 # in some cases (with command line-generated pipeline) tasks can 

247 # be defined without label which is not acceptable, use task 

248 # _DefaultName in that case 

249 if isinstance(task, str): 

250 task = doImport(task) 

251 label = task._DefaultName 

252 self._pipelineIR.tasks[label] = pipelineIR.TaskIR(label, taskName) 

253 

254 def removeTask(self, label: str): 

255 """Remove a task from the pipeline. 

256 

257 Parameters 

258 ---------- 

259 label : `str` 

260 The label used to identify the task that is to be removed 

261 

262 Raises 

263 ------ 

264 KeyError 

265 If no task with that label exists in the pipeline 

266 

267 """ 

268 self._pipelineIR.tasks.pop(label) 

269 

270 def addConfigOverride(self, label: str, key: str, value: object): 

271 """Apply single config override. 

272 

273 Parameters 

274 ---------- 

275 label : `str` 

276 Label of the task. 

277 key: `str` 

278 Fully-qualified field name. 

279 value : object 

280 Value to be given to a field. 

281 """ 

282 self._addConfigImpl(label, pipelineIR.ConfigIR(rest={key: value})) 

283 

284 def addConfigFile(self, label: str, filename: str): 

285 """Add overrides from a specified file. 

286 

287 Parameters 

288 ---------- 

289 label : `str` 

290 The label used to identify the task associated with config to 

291 modify 

292 filename : `str` 

293 Path to the override file. 

294 """ 

295 self._addConfigImpl(label, pipelineIR.ConfigIR(file=[filename])) 

296 

297 def addConfigPython(self, label: str, pythonString: str): 

298 """Add Overrides by running a snippet of python code against a config. 

299 

300 Parameters 

301 ---------- 

302 label : `str` 

303 The label used to identity the task associated with config to 

304 modify. 

305 pythonString: `str` 

306 A string which is valid python code to be executed. This is done 

307 with config as the only local accessible value. 

308 """ 

309 self._addConfigImpl(label, pipelineIR.ConfigIR(python=pythonString)) 

310 

311 def _addConfigImpl(self, label: str, newConfig: pipelineIR.ConfigIR): 

312 if label not in self._pipelineIR.tasks: 

313 raise LookupError(f"There are no tasks labeled '{label}' in the pipeline") 

314 self._pipelineIR.tasks[label].add_or_update_config(newConfig) 

315 

316 def toFile(self, filename: str): 

317 self._pipelineIR.to_file(filename) 

318 

319 def toExpandedPipeline(self) -> Generator[TaskDef]: 

320 """Returns a generator of TaskDefs which can be used to create quantum 

321 graphs. 

322 

323 Returns 

324 ------- 

325 generator : generator of `TaskDef` 

326 The generator returned will be the sorted iterator of tasks which 

327 are to be used in constructing a quantum graph. 

328 

329 Raises 

330 ------ 

331 NotImplementedError 

332 If a dataId is supplied in a config block. This is in place for 

333 future use 

334 """ 

335 taskDefs = [] 

336 for label, taskIR in self._pipelineIR.tasks.items(): 

337 taskClass = doImport(taskIR.klass) 

338 taskName = taskClass.__qualname__ 

339 config = taskClass.ConfigClass() 

340 overrides = ConfigOverrides() 

341 if self._pipelineIR.instrument is not None: 

342 overrides.addInstrumentOverride(self._pipelineIR.instrument, taskClass._DefaultName) 

343 if taskIR.config is not None: 

344 for configIR in taskIR.config: 

345 if configIR.dataId is not None: 

346 raise NotImplementedError("Specializing a config on a partial data id is not yet " 

347 "supported in Pipeline definition") 

348 # only apply override if it applies to everything 

349 if configIR.dataId is None: 

350 if configIR.file: 

351 for configFile in configIR.file: 

352 overrides.addFileOverride(os.path.expandvars(configFile)) 

353 if configIR.python is not None: 

354 overrides.addPythonOverride(configIR.python) 

355 for key, value in configIR.rest.items(): 

356 overrides.addValueOverride(key, value) 

357 overrides.applyTo(config) 

358 # This may need to be revisited 

359 config.validate() 

360 taskDefs.append(TaskDef(taskName=taskName, config=config, taskClass=taskClass, label=label)) 

361 

362 # lets evaluate the contracts 

363 if self._pipelineIR.contracts is not None: 

364 label_to_config = {x.label: x.config for x in taskDefs} 

365 for contract in self._pipelineIR.contracts: 

366 # execute this in its own line so it can raise a good error message if there was problems 

367 # with the eval 

368 success = eval(contract.contract, None, label_to_config) 

369 if not success: 

370 extra_info = f": {contract.msg}" if contract.msg is not None else "" 

371 raise pipelineIR.ContractError(f"Contract(s) '{contract.contract}' were not " 

372 f"satisfied{extra_info}") 

373 

374 yield from pipeTools.orderPipeline(taskDefs) 

375 

376 def __len__(self): 

377 return len(self._pipelineIR.tasks) 

378 

379 def __eq__(self, other: "Pipeline"): 

380 if not isinstance(other, Pipeline): 

381 return False 

382 return self._pipelineIR == other._pipelineIR 

383 

384 

385@dataclass(frozen=True) 

386class TaskDatasetTypes: 

387 """An immutable struct that extracts and classifies the dataset types used 

388 by a `PipelineTask` 

389 """ 

390 

391 initInputs: NamedValueSet[DatasetType] 

392 """Dataset types that are needed as inputs in order to construct this Task. 

393 

394 Task-level `initInputs` may be classified as either 

395 `~PipelineDatasetTypes.initInputs` or 

396 `~PipelineDatasetTypes.initIntermediates` at the Pipeline level. 

397 """ 

398 

399 initOutputs: NamedValueSet[DatasetType] 

400 """Dataset types that may be written after constructing this Task. 

401 

402 Task-level `initOutputs` may be classified as either 

403 `~PipelineDatasetTypes.initOutputs` or 

404 `~PipelineDatasetTypes.initIntermediates` at the Pipeline level. 

405 """ 

406 

407 inputs: NamedValueSet[DatasetType] 

408 """Dataset types that are regular inputs to this Task. 

409 

410 If an input dataset needed for a Quantum cannot be found in the input 

411 collection(s) or produced by another Task in the Pipeline, that Quantum 

412 (and all dependent Quanta) will not be produced. 

413 

414 Task-level `inputs` may be classified as either 

415 `~PipelineDatasetTypes.inputs` or `~PipelineDatasetTypes.intermediates` 

416 at the Pipeline level. 

417 """ 

418 

419 prerequisites: NamedValueSet[DatasetType] 

420 """Dataset types that are prerequisite inputs to this Task. 

421 

422 Prerequisite inputs must exist in the input collection(s) before the 

423 pipeline is run, but do not constrain the graph - if a prerequisite is 

424 missing for a Quantum, `PrerequisiteMissingError` is raised. 

425 

426 Prerequisite inputs are not resolved until the second stage of 

427 QuantumGraph generation. 

428 """ 

429 

430 outputs: NamedValueSet[DatasetType] 

431 """Dataset types that are produced by this Task. 

432 

433 Task-level `outputs` may be classified as either 

434 `~PipelineDatasetTypes.outputs` or `~PipelineDatasetTypes.intermediates` 

435 at the Pipeline level. 

436 """ 

437 

438 @classmethod 

439 def fromTaskDef(cls, taskDef: TaskDef, *, registry: Registry) -> TaskDatasetTypes: 

440 """Extract and classify the dataset types from a single `PipelineTask`. 

441 

442 Parameters 

443 ---------- 

444 taskDef: `TaskDef` 

445 An instance of a `TaskDef` class for a particular `PipelineTask`. 

446 registry: `Registry` 

447 Registry used to construct normalized `DatasetType` objects and 

448 retrieve those that are incomplete. 

449 

450 Returns 

451 ------- 

452 types: `TaskDatasetTypes` 

453 The dataset types used by this task. 

454 """ 

455 def makeDatasetTypesSet(connectionType, freeze=True): 

456 """Constructs a set of true `DatasetType` objects 

457 

458 Parameters 

459 ---------- 

460 connectionType : `str` 

461 Name of the connection type to produce a set for, corresponds 

462 to an attribute of type `list` on the connection class instance 

463 freeze : `bool`, optional 

464 If `True`, call `NamedValueSet.freeze` on the object returned. 

465 

466 Returns 

467 ------- 

468 datasetTypes : `NamedValueSet` 

469 A set of all datasetTypes which correspond to the input 

470 connection type specified in the connection class of this 

471 `PipelineTask` 

472 

473 Notes 

474 ----- 

475 This function is a closure over the variables ``registry`` and 

476 ``taskDef``. 

477 """ 

478 datasetTypes = NamedValueSet() 

479 for c in iterConnections(taskDef.connections, connectionType): 

480 dimensions = set(getattr(c, 'dimensions', set())) 

481 if "skypix" in dimensions: 

482 try: 

483 datasetType = registry.getDatasetType(c.name) 

484 except LookupError as err: 

485 raise LookupError( 

486 f"DatasetType '{c.name}' referenced by " 

487 f"{type(taskDef.connections).__name__} uses 'skypix' as a dimension " 

488 f"placeholder, but does not already exist in the registry. " 

489 f"Note that reference catalog names are now used as the dataset " 

490 f"type name instead of 'ref_cat'." 

491 ) from err 

492 rest1 = set(registry.dimensions.extract(dimensions - set(["skypix"])).names) 

493 rest2 = set(dim.name for dim in datasetType.dimensions 

494 if not isinstance(dim, SkyPixDimension)) 

495 if rest1 != rest2: 

496 raise ValueError(f"Non-skypix dimensions for dataset type {c.name} declared in " 

497 f"connections ({rest1}) are inconsistent with those in " 

498 f"registry's version of this dataset ({rest2}).") 

499 else: 

500 # Component dataset types are not explicitly in the 

501 # registry. This complicates consistency checks with 

502 # registry and requires we work out the composite storage 

503 # class. 

504 registryDatasetType = None 

505 try: 

506 registryDatasetType = registry.getDatasetType(c.name) 

507 except KeyError: 

508 compositeName, componentName = DatasetType.splitDatasetTypeName(c.name) 

509 parentStorageClass = DatasetType.PlaceholderParentStorageClass \ 

510 if componentName else None 

511 datasetType = c.makeDatasetType( 

512 registry.dimensions, 

513 parentStorageClass=parentStorageClass 

514 ) 

515 registryDatasetType = datasetType 

516 else: 

517 datasetType = c.makeDatasetType( 

518 registry.dimensions, 

519 parentStorageClass=registryDatasetType.parentStorageClass 

520 ) 

521 

522 if registryDatasetType and datasetType != registryDatasetType: 

523 raise ValueError(f"Supplied dataset type ({datasetType}) inconsistent with " 

524 f"registry definition ({registryDatasetType}) " 

525 f"for {taskDef.label}.") 

526 datasetTypes.add(datasetType) 

527 if freeze: 

528 datasetTypes.freeze() 

529 return datasetTypes 

530 

531 # optionally add output dataset for metadata 

532 outputs = makeDatasetTypesSet("outputs", freeze=False) 

533 if taskDef.metadataDatasetName is not None: 

534 # Metadata is supposed to be of the PropertySet type, its dimensions 

535 # correspond to a task quantum 

536 dimensions = registry.dimensions.extract(taskDef.connections.dimensions) 

537 outputs |= {DatasetType(taskDef.metadataDatasetName, dimensions, "PropertySet")} 

538 outputs.freeze() 

539 

540 return cls( 

541 initInputs=makeDatasetTypesSet("initInputs"), 

542 initOutputs=makeDatasetTypesSet("initOutputs"), 

543 inputs=makeDatasetTypesSet("inputs"), 

544 prerequisites=makeDatasetTypesSet("prerequisiteInputs"), 

545 outputs=outputs, 

546 ) 

547 

548 

549@dataclass(frozen=True) 

550class PipelineDatasetTypes: 

551 """An immutable struct that classifies the dataset types used in a 

552 `Pipeline`. 

553 """ 

554 

555 initInputs: NamedValueSet[DatasetType] 

556 """Dataset types that are needed as inputs in order to construct the Tasks 

557 in this Pipeline. 

558 

559 This does not include dataset types that are produced when constructing 

560 other Tasks in the Pipeline (these are classified as `initIntermediates`). 

561 """ 

562 

563 initOutputs: NamedValueSet[DatasetType] 

564 """Dataset types that may be written after constructing the Tasks in this 

565 Pipeline. 

566 

567 This does not include dataset types that are also used as inputs when 

568 constructing other Tasks in the Pipeline (these are classified as 

569 `initIntermediates`). 

570 """ 

571 

572 initIntermediates: NamedValueSet[DatasetType] 

573 """Dataset types that are both used when constructing one or more Tasks 

574 in the Pipeline and produced as a side-effect of constructing another 

575 Task in the Pipeline. 

576 """ 

577 

578 inputs: NamedValueSet[DatasetType] 

579 """Dataset types that are regular inputs for the full pipeline. 

580 

581 If an input dataset needed for a Quantum cannot be found in the input 

582 collection(s), that Quantum (and all dependent Quanta) will not be 

583 produced. 

584 """ 

585 

586 prerequisites: NamedValueSet[DatasetType] 

587 """Dataset types that are prerequisite inputs for the full Pipeline. 

588 

589 Prerequisite inputs must exist in the input collection(s) before the 

590 pipeline is run, but do not constrain the graph - if a prerequisite is 

591 missing for a Quantum, `PrerequisiteMissingError` is raised. 

592 

593 Prerequisite inputs are not resolved until the second stage of 

594 QuantumGraph generation. 

595 """ 

596 

597 intermediates: NamedValueSet[DatasetType] 

598 """Dataset types that are output by one Task in the Pipeline and consumed 

599 as inputs by one or more other Tasks in the Pipeline. 

600 """ 

601 

602 outputs: NamedValueSet[DatasetType] 

603 """Dataset types that are output by a Task in the Pipeline and not consumed 

604 by any other Task in the Pipeline. 

605 """ 

606 

607 byTask: Mapping[str, TaskDatasetTypes] 

608 """Per-Task dataset types, keyed by label in the `Pipeline`. 

609 

610 This is guaranteed to be zip-iterable with the `Pipeline` itself (assuming 

611 neither has been modified since the dataset types were extracted, of 

612 course). 

613 """ 

614 

615 @classmethod 

616 def fromPipeline(cls, pipeline, *, registry: Registry) -> PipelineDatasetTypes: 

617 """Extract and classify the dataset types from all tasks in a 

618 `Pipeline`. 

619 

620 Parameters 

621 ---------- 

622 pipeline: `Pipeline` 

623 An ordered collection of tasks that can be run together. 

624 registry: `Registry` 

625 Registry used to construct normalized `DatasetType` objects and 

626 retrieve those that are incomplete. 

627 

628 Returns 

629 ------- 

630 types: `PipelineDatasetTypes` 

631 The dataset types used by this `Pipeline`. 

632 

633 Raises 

634 ------ 

635 ValueError 

636 Raised if Tasks are inconsistent about which datasets are marked 

637 prerequisite. This indicates that the Tasks cannot be run as part 

638 of the same `Pipeline`. 

639 """ 

640 allInputs = NamedValueSet() 

641 allOutputs = NamedValueSet() 

642 allInitInputs = NamedValueSet() 

643 allInitOutputs = NamedValueSet() 

644 prerequisites = NamedValueSet() 

645 byTask = dict() 

646 if isinstance(pipeline, Pipeline): 

647 pipeline = pipeline.toExpandedPipeline() 

648 for taskDef in pipeline: 

649 thisTask = TaskDatasetTypes.fromTaskDef(taskDef, registry=registry) 

650 allInitInputs |= thisTask.initInputs 

651 allInitOutputs |= thisTask.initOutputs 

652 allInputs |= thisTask.inputs 

653 prerequisites |= thisTask.prerequisites 

654 allOutputs |= thisTask.outputs 

655 byTask[taskDef.label] = thisTask 

656 if not prerequisites.isdisjoint(allInputs): 

657 raise ValueError("{} marked as both prerequisites and regular inputs".format( 

658 {dt.name for dt in allInputs & prerequisites} 

659 )) 

660 if not prerequisites.isdisjoint(allOutputs): 

661 raise ValueError("{} marked as both prerequisites and outputs".format( 

662 {dt.name for dt in allOutputs & prerequisites} 

663 )) 

664 # Make sure that components which are marked as inputs get treated as 

665 # intermediates if there is an output which produces the composite 

666 # containing the component 

667 intermediateComponents = NamedValueSet() 

668 intermediateComposites = NamedValueSet() 

669 outputNameMapping = {dsType.name: dsType for dsType in allOutputs} 

670 for dsType in allInputs: 

671 # get the name of a possible component 

672 name, component = dsType.nameAndComponent() 

673 # if there is a component name, that means this is a component 

674 # DatasetType, if there is an output which produces the parent of 

675 # this component, treat this input as an intermediate 

676 if component is not None: 

677 if name in outputNameMapping: 

678 if outputNameMapping[name].dimensions != dsType.dimensions: 

679 raise ValueError(f"Component dataset type {dsType.name} has different " 

680 f"dimensions ({dsType.dimensions}) than its parent " 

681 f"({outputNameMapping[name].dimensions}).") 

682 composite = DatasetType(name, dsType.dimensions, outputNameMapping[name].storageClass, 

683 universe=registry.dimensions) 

684 intermediateComponents.add(dsType) 

685 intermediateComposites.add(composite) 

686 

687 def checkConsistency(a: NamedValueSet, b: NamedValueSet): 

688 common = a.names & b.names 

689 for name in common: 

690 if a[name] != b[name]: 

691 raise ValueError(f"Conflicting definitions for dataset type: {a[name]} != {b[name]}.") 

692 

693 checkConsistency(allInitInputs, allInitOutputs) 

694 checkConsistency(allInputs, allOutputs) 

695 checkConsistency(allInputs, intermediateComposites) 

696 checkConsistency(allOutputs, intermediateComposites) 

697 

698 def frozen(s: NamedValueSet) -> NamedValueSet: 

699 s.freeze() 

700 return s 

701 

702 return cls( 

703 initInputs=frozen(allInitInputs - allInitOutputs), 

704 initIntermediates=frozen(allInitInputs & allInitOutputs), 

705 initOutputs=frozen(allInitOutputs - allInitInputs), 

706 inputs=frozen(allInputs - allOutputs - intermediateComponents), 

707 intermediates=frozen(allInputs & allOutputs | intermediateComponents), 

708 outputs=frozen(allOutputs - allInputs - intermediateComposites), 

709 prerequisites=frozen(prerequisites), 

710 byTask=MappingProxyType(byTask), # MappingProxyType -> frozen view of dict for immutability 

711 )