Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23"""Module defining Pipeline class and related methods. 

24""" 

25 

26__all__ = ["Pipeline", "TaskDef", "TaskDatasetTypes", "PipelineDatasetTypes"] 

27 

28# ------------------------------- 

29# Imports of standard modules -- 

30# ------------------------------- 

31from dataclasses import dataclass 

32from types import MappingProxyType 

33from typing import Mapping, Union, Generator, TYPE_CHECKING 

34 

35import copy 

36import os 

37 

38# ----------------------------- 

39# Imports for other modules -- 

40from lsst.daf.butler import DatasetType, NamedValueSet, Registry, SkyPixDimension 

41from lsst.utils import doImport 

42from .configOverrides import ConfigOverrides 

43from .connections import iterConnections 

44from .pipelineTask import PipelineTask 

45 

46from . import pipelineIR 

47from . import pipeTools 

48 

49if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true

50 from lsst.obs.base.instrument import Instrument 

51 

52# ---------------------------------- 

53# Local non-exported definitions -- 

54# ---------------------------------- 

55 

56# ------------------------ 

57# Exported definitions -- 

58# ------------------------ 

59 

60 

61class TaskDef: 

62 """TaskDef is a collection of information about task needed by Pipeline. 

63 

64 The information includes task name, configuration object and optional 

65 task class. This class is just a collection of attributes and it exposes 

66 all of them so that attributes could potentially be modified in place 

67 (e.g. if configuration needs extra overrides). 

68 

69 Attributes 

70 ---------- 

71 taskName : `str` 

72 `PipelineTask` class name, currently it is not specified whether this 

73 is a fully-qualified name or partial name (e.g. ``module.TaskClass``). 

74 Framework should be prepared to handle all cases. 

75 config : `lsst.pex.config.Config` 

76 Instance of the configuration class corresponding to this task class, 

77 usually with all overrides applied. This config will be frozen. 

78 taskClass : `type` or ``None`` 

79 `PipelineTask` class object, can be ``None``. If ``None`` then 

80 framework will have to locate and load class. 

81 label : `str`, optional 

82 Task label, usually a short string unique in a pipeline. 

83 """ 

84 def __init__(self, taskName, config, taskClass=None, label=""): 

85 self.taskName = taskName 

86 config.freeze() 

87 self.config = config 

88 self.taskClass = taskClass 

89 self.label = label 

90 self.connections = config.connections.ConnectionsClass(config=config) 

91 

92 @property 

93 def configDatasetName(self): 

94 """Name of a dataset type for configuration of this task (`str`) 

95 """ 

96 return self.label + "_config" 

97 

98 @property 

99 def metadataDatasetName(self): 

100 """Name of a dataset type for metadata of this task, `None` if 

101 metadata is not to be saved (`str`) 

102 """ 

103 if self.config.saveMetadata: 

104 return self.label + "_metadata" 

105 else: 

106 return None 

107 

108 def __str__(self): 

109 rep = "TaskDef(" + self.taskName 

110 if self.label: 

111 rep += ", label=" + self.label 

112 rep += ")" 

113 return rep 

114 

115 

116class Pipeline: 

117 """A `Pipeline` is a representation of a series of tasks to run, and the 

118 configuration for those tasks. 

119 

120 Parameters 

121 ---------- 

122 description : `str` 

123 A description of that this pipeline does. 

124 """ 

125 def __init__(self, description: str) -> Pipeline: 

126 pipeline_dict = {"description": description, "tasks": {}} 

127 self._pipelineIR = pipelineIR.PipelineIR(pipeline_dict) 

128 

129 @classmethod 

130 def fromFile(cls, filename: str) -> Pipeline: 

131 """Load a pipeline defined in a pipeline yaml file. 

132 

133 Parameters 

134 ---------- 

135 filename: `str` 

136 A path that points to a pipeline defined in yaml format 

137 

138 Returns 

139 ------- 

140 pipeline: `Pipeline` 

141 """ 

142 pipeline = cls.fromIR(pipelineIR.PipelineIR.from_file(filename)) 

143 return pipeline 

144 

145 @classmethod 

146 def fromString(cls, pipeline_string: str) -> Pipeline: 

147 """Create a pipeline from string formatted as a pipeline document. 

148 

149 Parameters 

150 ---------- 

151 pipeline_string : `str` 

152 A string that is formatted according like a pipeline document 

153 

154 Returns 

155 ------- 

156 pipeline: `Pipeline` 

157 """ 

158 pipeline = cls.fromIR(pipelineIR.PipelineIR.from_string(pipeline_string)) 

159 return pipeline 

160 

161 @classmethod 

162 def fromIR(cls, deserialized_pipeline: pipelineIR.PipelineIR) -> Pipeline: 

163 """Create a pipeline from an already created `PipelineIR` object. 

164 

165 Parameters 

166 ---------- 

167 deserialized_pipeline: `PipelineIR` 

168 An already created pipeline intermediate representation object 

169 

170 Returns 

171 ------- 

172 pipeline: `Pipeline` 

173 """ 

174 pipeline = cls.__new__(cls) 

175 pipeline._pipelineIR = deserialized_pipeline 

176 return pipeline 

177 

178 @classmethod 

179 def fromPipeline(cls, pipeline: pipelineIR.PipelineIR) -> Pipeline: 

180 """Create a new pipeline by copying an already existing `Pipeline`. 

181 

182 Parameters 

183 ---------- 

184 pipeline: `Pipeline` 

185 An already created pipeline intermediate representation object 

186 

187 Returns 

188 ------- 

189 pipeline: `Pipeline` 

190 """ 

191 return cls.fromIR(copy.deep_copy(pipeline._pipelineIR)) 

192 

193 def __str__(self) -> str: 

194 return str(self._pipelineIR) 

195 

196 def addInstrument(self, instrument: Union[Instrument, str]): 

197 """Add an instrument to the pipeline, or replace an instrument that is 

198 already defined. 

199 

200 Parameters 

201 ---------- 

202 instrument : `~lsst.daf.butler.instrument.Instrument` or `str` 

203 Either a derived class object of a `lsst.daf.butler.instrument` or a 

204 string corresponding to a fully qualified 

205 `lsst.daf.butler.instrument` name. 

206 """ 

207 if isinstance(instrument, str): 

208 pass 

209 else: 

210 # TODO: assume that this is a subclass of Instrument, no type checking 

211 instrument = f"{instrument.__module__}.{instrument.__qualname__}" 

212 self._pipelineIR.instrument = instrument 

213 

214 def getInstrument(self): 

215 """Get the instrument from the pipeline. 

216 

217 Returns 

218 ------- 

219 instrument : `~lsst.daf.butler.instrument.Instrument`, `str`, or None 

220 A derived class object of a `lsst.daf.butler.instrument`, a string 

221 corresponding to a fully qualified `lsst.daf.butler.instrument` 

222 name, or None if the pipeline does not have an instrument. 

223 """ 

224 return self._pipelineIR.instrument 

225 

226 def addTask(self, task: Union[PipelineTask, str], label: str): 

227 """Add a new task to the pipeline, or replace a task that is already 

228 associated with the supplied label. 

229 

230 Parameters 

231 ---------- 

232 task: `PipelineTask` or `str` 

233 Either a derived class object of a `PipelineTask` or a string 

234 corresponding to a fully qualified `PipelineTask` name. 

235 label: `str` 

236 A label that is used to identify the `PipelineTask` being added 

237 """ 

238 if isinstance(task, str): 

239 taskName = task 

240 elif issubclass(task, PipelineTask): 

241 taskName = f"{task.__module__}.{task.__qualname__}" 

242 else: 

243 raise ValueError("task must be either a child class of PipelineTask or a string containing" 

244 " a fully qualified name to one") 

245 if not label: 

246 # in some cases (with command line-generated pipeline) tasks can 

247 # be defined without label which is not acceptable, use task 

248 # _DefaultName in that case 

249 if isinstance(task, str): 

250 task = doImport(task) 

251 label = task._DefaultName 

252 self._pipelineIR.tasks[label] = pipelineIR.TaskIR(label, taskName) 

253 

254 def removeTask(self, label: str): 

255 """Remove a task from the pipeline. 

256 

257 Parameters 

258 ---------- 

259 label : `str` 

260 The label used to identify the task that is to be removed 

261 

262 Raises 

263 ------ 

264 KeyError 

265 If no task with that label exists in the pipeline 

266 

267 """ 

268 self._pipelineIR.tasks.pop(label) 

269 

270 def addConfigOverride(self, label: str, key: str, value: object): 

271 """Apply single config override. 

272 

273 Parameters 

274 ---------- 

275 label : `str` 

276 Label of the task. 

277 key: `str` 

278 Fully-qualified field name. 

279 value : object 

280 Value to be given to a field. 

281 """ 

282 self._addConfigImpl(label, pipelineIR.ConfigIR(rest={key: value})) 

283 

284 def addConfigFile(self, label: str, filename: str): 

285 """Add overrides from a specified file. 

286 

287 Parameters 

288 ---------- 

289 label : `str` 

290 The label used to identify the task associated with config to 

291 modify 

292 filename : `str` 

293 Path to the override file. 

294 """ 

295 self._addConfigImpl(label, pipelineIR.ConfigIR(file=[filename])) 

296 

297 def addConfigPython(self, label: str, pythonString: str): 

298 """Add Overrides by running a snippet of python code against a config. 

299 

300 Parameters 

301 ---------- 

302 label : `str` 

303 The label used to identity the task associated with config to 

304 modify. 

305 pythonString: `str` 

306 A string which is valid python code to be executed. This is done 

307 with config as the only local accessible value. 

308 """ 

309 self._addConfigImpl(label, pipelineIR.ConfigIR(python=pythonString)) 

310 

311 def _addConfigImpl(self, label: str, newConfig: pipelineIR.ConfigIR): 

312 if label not in self._pipelineIR.tasks: 

313 raise LookupError(f"There are no tasks labeled '{label}' in the pipeline") 

314 self._pipelineIR.tasks[label].add_or_update_config(newConfig) 

315 

316 def toFile(self, filename: str): 

317 self._pipelineIR.to_file(filename) 

318 

319 def toExpandedPipeline(self) -> Generator[TaskDef]: 

320 """Returns a generator of TaskDefs which can be used to create quantum 

321 graphs. 

322 

323 Returns 

324 ------- 

325 generator : generator of `TaskDef` 

326 The generator returned will be the sorted iterator of tasks which 

327 are to be used in constructing a quantum graph. 

328 

329 Raises 

330 ------ 

331 NotImplementedError 

332 If a dataId is supplied in a config block. This is in place for 

333 future use 

334 """ 

335 taskDefs = [] 

336 for label, taskIR in self._pipelineIR.tasks.items(): 

337 taskClass = doImport(taskIR.klass) 

338 taskName = taskClass.__qualname__ 

339 config = taskClass.ConfigClass() 

340 overrides = ConfigOverrides() 

341 if self._pipelineIR.instrument is not None: 

342 overrides.addInstrumentOverride(self._pipelineIR.instrument, taskClass._DefaultName) 

343 if taskIR.config is not None: 

344 for configIR in taskIR.config: 

345 if configIR.dataId is not None: 

346 raise NotImplementedError("Specializing a config on a partial data id is not yet " 

347 "supported in Pipeline definition") 

348 # only apply override if it applies to everything 

349 if configIR.dataId is None: 

350 if configIR.file: 

351 for configFile in configIR.file: 

352 overrides.addFileOverride(os.path.expandvars(configFile)) 

353 if configIR.python is not None: 

354 overrides.addPythonOverride(configIR.python) 

355 for key, value in configIR.rest.items(): 

356 overrides.addValueOverride(key, value) 

357 overrides.applyTo(config) 

358 # This may need to be revisited 

359 config.validate() 

360 taskDefs.append(TaskDef(taskName=taskName, config=config, taskClass=taskClass, label=label)) 

361 

362 # lets evaluate the contracts 

363 if self._pipelineIR.contracts is not None: 

364 label_to_config = {x.label: x.config for x in taskDefs} 

365 for contract in self._pipelineIR.contracts: 

366 # execute this in its own line so it can raise a good error message if there was problems 

367 # with the eval 

368 success = eval(contract.contract, None, label_to_config) 

369 if not success: 

370 extra_info = f": {contract.msg}" if contract.msg is not None else "" 

371 raise pipelineIR.ContractError(f"Contract(s) '{contract.contract}' were not " 

372 f"satisfied{extra_info}") 

373 

374 yield from pipeTools.orderPipeline(taskDefs) 

375 

376 def __len__(self): 

377 return len(self._pipelineIR.tasks) 

378 

379 def __eq__(self, other: "Pipeline"): 

380 if not isinstance(other, Pipeline): 

381 return False 

382 return self._pipelineIR == other._pipelineIR 

383 

384 

385@dataclass(frozen=True) 

386class TaskDatasetTypes: 

387 """An immutable struct that extracts and classifies the dataset types used 

388 by a `PipelineTask` 

389 """ 

390 

391 initInputs: NamedValueSet[DatasetType] 

392 """Dataset types that are needed as inputs in order to construct this Task. 

393 

394 Task-level `initInputs` may be classified as either 

395 `~PipelineDatasetTypes.initInputs` or 

396 `~PipelineDatasetTypes.initIntermediates` at the Pipeline level. 

397 """ 

398 

399 initOutputs: NamedValueSet[DatasetType] 

400 """Dataset types that may be written after constructing this Task. 

401 

402 Task-level `initOutputs` may be classified as either 

403 `~PipelineDatasetTypes.initOutputs` or 

404 `~PipelineDatasetTypes.initIntermediates` at the Pipeline level. 

405 """ 

406 

407 inputs: NamedValueSet[DatasetType] 

408 """Dataset types that are regular inputs to this Task. 

409 

410 If an input dataset needed for a Quantum cannot be found in the input 

411 collection(s) or produced by another Task in the Pipeline, that Quantum 

412 (and all dependent Quanta) will not be produced. 

413 

414 Task-level `inputs` may be classified as either 

415 `~PipelineDatasetTypes.inputs` or `~PipelineDatasetTypes.intermediates` 

416 at the Pipeline level. 

417 """ 

418 

419 prerequisites: NamedValueSet[DatasetType] 

420 """Dataset types that are prerequisite inputs to this Task. 

421 

422 Prerequisite inputs must exist in the input collection(s) before the 

423 pipeline is run, but do not constrain the graph - if a prerequisite is 

424 missing for a Quantum, `PrerequisiteMissingError` is raised. 

425 

426 Prerequisite inputs are not resolved until the second stage of 

427 QuantumGraph generation. 

428 """ 

429 

430 outputs: NamedValueSet[DatasetType] 

431 """Dataset types that are produced by this Task. 

432 

433 Task-level `outputs` may be classified as either 

434 `~PipelineDatasetTypes.outputs` or `~PipelineDatasetTypes.intermediates` 

435 at the Pipeline level. 

436 """ 

437 

438 @classmethod 

439 def fromTaskDef(cls, taskDef: TaskDef, *, registry: Registry) -> TaskDatasetTypes: 

440 """Extract and classify the dataset types from a single `PipelineTask`. 

441 

442 Parameters 

443 ---------- 

444 taskDef: `TaskDef` 

445 An instance of a `TaskDef` class for a particular `PipelineTask`. 

446 registry: `Registry` 

447 Registry used to construct normalized `DatasetType` objects and 

448 retrieve those that are incomplete. 

449 

450 Returns 

451 ------- 

452 types: `TaskDatasetTypes` 

453 The dataset types used by this task. 

454 """ 

455 def makeDatasetTypesSet(connectionType, freeze=True): 

456 """Constructs a set of true `DatasetType` objects 

457 

458 Parameters 

459 ---------- 

460 connectionType : `str` 

461 Name of the connection type to produce a set for, corresponds 

462 to an attribute of type `list` on the connection class instance 

463 freeze : `bool`, optional 

464 If `True`, call `NamedValueSet.freeze` on the object returned. 

465 

466 Returns 

467 ------- 

468 datasetTypes : `NamedValueSet` 

469 A set of all datasetTypes which correspond to the input 

470 connection type specified in the connection class of this 

471 `PipelineTask` 

472 

473 Notes 

474 ----- 

475 This function is a closure over the variables ``registry`` and 

476 ``taskDef``. 

477 """ 

478 datasetTypes = NamedValueSet() 

479 for c in iterConnections(taskDef.connections, connectionType): 

480 dimensions = set(getattr(c, 'dimensions', set())) 

481 if "skypix" in dimensions: 

482 try: 

483 datasetType = registry.getDatasetType(c.name) 

484 except LookupError as err: 

485 raise LookupError( 

486 f"DatasetType '{c.name}' referenced by " 

487 f"{type(taskDef.connections).__name__} uses 'skypix' as a dimension " 

488 f"placeholder, but does not already exist in the registry. " 

489 f"Note that reference catalog names are now used as the dataset " 

490 f"type name instead of 'ref_cat'." 

491 ) from err 

492 rest1 = set(registry.dimensions.extract(dimensions - set(["skypix"])).names) 

493 rest2 = set(dim.name for dim in datasetType.dimensions 

494 if not isinstance(dim, SkyPixDimension)) 

495 if rest1 != rest2: 

496 raise ValueError(f"Non-skypix dimensions for dataset type {c.name} declared in " 

497 f"connections ({rest1}) are inconsistent with those in " 

498 f"registry's version of this dataset ({rest2}).") 

499 else: 

500 # Component dataset types are not explicitly in the 

501 # registry. This complicates consistency checks with 

502 # registry and requires we work out the composite storage 

503 # class. 

504 registryDatasetType = None 

505 try: 

506 registryDatasetType = registry.getDatasetType(c.name) 

507 except KeyError: 

508 compositeName, componentName = DatasetType.splitDatasetTypeName(c.name) 

509 parentStorageClass = DatasetType.PlaceholderParentStorageClass \ 

510 if componentName else None 

511 datasetType = DatasetType(c.name, registry.dimensions.extract(dimensions), 

512 c.storageClass, 

513 parentStorageClass=parentStorageClass) 

514 registryDatasetType = datasetType 

515 else: 

516 datasetType = DatasetType(c.name, registry.dimensions.extract(dimensions), 

517 c.storageClass, 

518 parentStorageClass=registryDatasetType.parentStorageClass) 

519 

520 if registryDatasetType and datasetType != registryDatasetType: 

521 raise ValueError(f"Supplied dataset type ({datasetType}) inconsistent with " 

522 f"registry definition ({registryDatasetType}) " 

523 f"for {taskDef.label}.") 

524 datasetTypes.add(datasetType) 

525 if freeze: 

526 datasetTypes.freeze() 

527 return datasetTypes 

528 

529 # optionally add output dataset for metadata 

530 outputs = makeDatasetTypesSet("outputs", freeze=False) 

531 if taskDef.metadataDatasetName is not None: 

532 # Metadata is supposed to be of the PropertySet type, its dimensions 

533 # correspond to a task quantum 

534 dimensions = registry.dimensions.extract(taskDef.connections.dimensions) 

535 outputs |= {DatasetType(taskDef.metadataDatasetName, dimensions, "PropertySet")} 

536 outputs.freeze() 

537 

538 return cls( 

539 initInputs=makeDatasetTypesSet("initInputs"), 

540 initOutputs=makeDatasetTypesSet("initOutputs"), 

541 inputs=makeDatasetTypesSet("inputs"), 

542 prerequisites=makeDatasetTypesSet("prerequisiteInputs"), 

543 outputs=outputs, 

544 ) 

545 

546 

547@dataclass(frozen=True) 

548class PipelineDatasetTypes: 

549 """An immutable struct that classifies the dataset types used in a 

550 `Pipeline`. 

551 """ 

552 

553 initInputs: NamedValueSet[DatasetType] 

554 """Dataset types that are needed as inputs in order to construct the Tasks 

555 in this Pipeline. 

556 

557 This does not include dataset types that are produced when constructing 

558 other Tasks in the Pipeline (these are classified as `initIntermediates`). 

559 """ 

560 

561 initOutputs: NamedValueSet[DatasetType] 

562 """Dataset types that may be written after constructing the Tasks in this 

563 Pipeline. 

564 

565 This does not include dataset types that are also used as inputs when 

566 constructing other Tasks in the Pipeline (these are classified as 

567 `initIntermediates`). 

568 """ 

569 

570 initIntermediates: NamedValueSet[DatasetType] 

571 """Dataset types that are both used when constructing one or more Tasks 

572 in the Pipeline and produced as a side-effect of constructing another 

573 Task in the Pipeline. 

574 """ 

575 

576 inputs: NamedValueSet[DatasetType] 

577 """Dataset types that are regular inputs for the full pipeline. 

578 

579 If an input dataset needed for a Quantum cannot be found in the input 

580 collection(s), that Quantum (and all dependent Quanta) will not be 

581 produced. 

582 """ 

583 

584 prerequisites: NamedValueSet[DatasetType] 

585 """Dataset types that are prerequisite inputs for the full Pipeline. 

586 

587 Prerequisite inputs must exist in the input collection(s) before the 

588 pipeline is run, but do not constrain the graph - if a prerequisite is 

589 missing for a Quantum, `PrerequisiteMissingError` is raised. 

590 

591 Prerequisite inputs are not resolved until the second stage of 

592 QuantumGraph generation. 

593 """ 

594 

595 intermediates: NamedValueSet[DatasetType] 

596 """Dataset types that are output by one Task in the Pipeline and consumed 

597 as inputs by one or more other Tasks in the Pipeline. 

598 """ 

599 

600 outputs: NamedValueSet[DatasetType] 

601 """Dataset types that are output by a Task in the Pipeline and not consumed 

602 by any other Task in the Pipeline. 

603 """ 

604 

605 byTask: Mapping[str, TaskDatasetTypes] 

606 """Per-Task dataset types, keyed by label in the `Pipeline`. 

607 

608 This is guaranteed to be zip-iterable with the `Pipeline` itself (assuming 

609 neither has been modified since the dataset types were extracted, of 

610 course). 

611 """ 

612 

613 @classmethod 

614 def fromPipeline(cls, pipeline, *, registry: Registry) -> PipelineDatasetTypes: 

615 """Extract and classify the dataset types from all tasks in a 

616 `Pipeline`. 

617 

618 Parameters 

619 ---------- 

620 pipeline: `Pipeline` 

621 An ordered collection of tasks that can be run together. 

622 registry: `Registry` 

623 Registry used to construct normalized `DatasetType` objects and 

624 retrieve those that are incomplete. 

625 

626 Returns 

627 ------- 

628 types: `PipelineDatasetTypes` 

629 The dataset types used by this `Pipeline`. 

630 

631 Raises 

632 ------ 

633 ValueError 

634 Raised if Tasks are inconsistent about which datasets are marked 

635 prerequisite. This indicates that the Tasks cannot be run as part 

636 of the same `Pipeline`. 

637 """ 

638 allInputs = NamedValueSet() 

639 allOutputs = NamedValueSet() 

640 allInitInputs = NamedValueSet() 

641 allInitOutputs = NamedValueSet() 

642 prerequisites = NamedValueSet() 

643 byTask = dict() 

644 if isinstance(pipeline, Pipeline): 

645 pipeline = pipeline.toExpandedPipeline() 

646 for taskDef in pipeline: 

647 thisTask = TaskDatasetTypes.fromTaskDef(taskDef, registry=registry) 

648 allInitInputs |= thisTask.initInputs 

649 allInitOutputs |= thisTask.initOutputs 

650 allInputs |= thisTask.inputs 

651 prerequisites |= thisTask.prerequisites 

652 allOutputs |= thisTask.outputs 

653 byTask[taskDef.label] = thisTask 

654 if not prerequisites.isdisjoint(allInputs): 

655 raise ValueError("{} marked as both prerequisites and regular inputs".format( 

656 {dt.name for dt in allInputs & prerequisites} 

657 )) 

658 if not prerequisites.isdisjoint(allOutputs): 

659 raise ValueError("{} marked as both prerequisites and outputs".format( 

660 {dt.name for dt in allOutputs & prerequisites} 

661 )) 

662 # Make sure that components which are marked as inputs get treated as 

663 # intermediates if there is an output which produces the composite 

664 # containing the component 

665 intermediateComponents = NamedValueSet() 

666 intermediateComposites = NamedValueSet() 

667 outputNameMapping = {dsType.name: dsType for dsType in allOutputs} 

668 for dsType in allInputs: 

669 # get the name of a possible component 

670 name, component = dsType.nameAndComponent() 

671 # if there is a component name, that means this is a component 

672 # DatasetType, if there is an output which produces the parent of 

673 # this component, treat this input as an intermediate 

674 if component is not None: 

675 if name in outputNameMapping: 

676 if outputNameMapping[name].dimensions != dsType.dimensions: 

677 raise ValueError(f"Component dataset type {dsType.name} has different " 

678 f"dimensions ({dsType.dimensions}) than its parent " 

679 f"({outputNameMapping[name].dimensions}).") 

680 composite = DatasetType(name, dsType.dimensions, outputNameMapping[name].storageClass, 

681 universe=registry.dimensions) 

682 intermediateComponents.add(dsType) 

683 intermediateComposites.add(composite) 

684 

685 def checkConsistency(a: NamedValueSet, b: NamedValueSet): 

686 common = a.names & b.names 

687 for name in common: 

688 if a[name] != b[name]: 

689 raise ValueError(f"Conflicting definitions for dataset type: {a[name]} != {b[name]}.") 

690 

691 checkConsistency(allInitInputs, allInitOutputs) 

692 checkConsistency(allInputs, allOutputs) 

693 checkConsistency(allInputs, intermediateComposites) 

694 checkConsistency(allOutputs, intermediateComposites) 

695 

696 def frozen(s: NamedValueSet) -> NamedValueSet: 

697 s.freeze() 

698 return s 

699 

700 return cls( 

701 initInputs=frozen(allInitInputs - allInitOutputs), 

702 initIntermediates=frozen(allInitInputs & allInitOutputs), 

703 initOutputs=frozen(allInitOutputs - allInitInputs), 

704 inputs=frozen(allInputs - allOutputs - intermediateComponents), 

705 intermediates=frozen(allInputs & allOutputs | intermediateComponents), 

706 outputs=frozen(allOutputs - allInputs - intermediateComposites), 

707 prerequisites=frozen(prerequisites), 

708 byTask=MappingProxyType(byTask), # MappingProxyType -> frozen view of dict for immutability 

709 )