Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

# This file is part of pipe_base. 

# 

# Developed for the LSST Data Management System. 

# This product includes software developed by the LSST Project 

# (http://www.lsst.org). 

# See the COPYRIGHT file at the top-level directory of this distribution 

# for details of code ownership. 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with this program. If not, see <http://www.gnu.org/licenses/>. 

 

"""Module defining connection classes for PipelineTask. 

""" 

 

__all__ = ["PipelineTaskConnections", "InputQuantizedConnection", "OutputQuantizedConnection", 

"DeferredDatasetRef", "iterConnections"] 

 

from collections import UserDict, namedtuple 

from types import SimpleNamespace 

import typing 

 

import itertools 

import string 

 

from . import config as configMod 

from .connectionTypes import (InitInput, InitOutput, Input, PrerequisiteInput, 

Output, BaseConnection) 

from lsst.daf.butler import DatasetRef, Quantum 

 

40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never trueif typing.TYPE_CHECKING: 

from .config import PipelineTaskConfig 

 

 

class ScalarError(TypeError): 

"""Exception raised when dataset type is configured as scalar 

but there are multiple DataIds in a Quantum for that dataset. 

 

Parameters 

---------- 

key : `str` 

Name of the configuration field for dataset type. 

numDataIds : `int` 

Actual number of DataIds in a Quantum for this dataset type. 

""" 

def __init__(self, key, numDataIds): 

super().__init__((f"Expected scalar for output dataset field {key}, " 

"received {numDataIds} DataIds")) 

 

 

class PipelineTaskConnectionDict(UserDict): 

"""This is a special dict class used by PipelineTaskConnectionMetaclass 

 

This dict is used in PipelineTaskConnection class creation, as the 

dictionary that is initially used as __dict__. It exists to 

intercept connection fields declared in a PipelineTaskConnection, and 

what name is used to identify them. The names are then added to class 

level list according to the connection type of the class attribute. The 

names are also used as keys in a class level dictionary associated with 

the corresponding class attribute. This information is a duplicate of 

what exists in __dict__, but provides a simple place to lookup and 

iterate on only these variables. 

""" 

def __init__(self, *args, **kwargs): 

super().__init__(*args, **kwargs) 

# Initialize class level variables used to track any declared 

# class level variables that are instances of 

# connectionTypes.BaseConnection 

self.data['inputs'] = [] 

self.data['prerequisiteInputs'] = [] 

self.data['outputs'] = [] 

self.data['initInputs'] = [] 

self.data['initOutputs'] = [] 

self.data['allConnections'] = {} 

 

def __setitem__(self, name, value): 

if isinstance(value, Input): 

self.data['inputs'].append(name) 

88 ↛ 89line 88 didn't jump to line 89, because the condition on line 88 was never true elif isinstance(value, PrerequisiteInput): 

self.data['prerequisiteInputs'].append(name) 

elif isinstance(value, Output): 

self.data['outputs'].append(name) 

elif isinstance(value, InitInput): 

self.data['initInputs'].append(name) 

94 ↛ 95line 94 didn't jump to line 95, because the condition on line 94 was never true elif isinstance(value, InitOutput): 

self.data['initOutputs'].append(name) 

# This should not be an elif, as it needs tested for 

# everything that inherits from BaseConnection 

if isinstance(value, BaseConnection): 

object.__setattr__(value, 'varName', name) 

self.data['allConnections'][name] = value 

# defer to the default behavior 

super().__setitem__(name, value) 

 

 

class PipelineTaskConnectionsMetaclass(type): 

"""Metaclass used in the declaration of PipelineTaskConnections classes 

""" 

def __prepare__(name, bases, **kwargs): # noqa: 805 

# Create an instance of our special dict to catch and track all 

# variables that are instances of connectionTypes.BaseConnection 

# Copy any existing connections from a parent class 

dct = PipelineTaskConnectionDict() 

for base in bases: 

114 ↛ 113line 114 didn't jump to line 113, because the condition on line 114 was never false if isinstance(base, PipelineTaskConnectionsMetaclass): 

115 ↛ 116line 115 didn't jump to line 116, because the loop on line 115 never started for name, value in base.allConnections.items(): 

dct[name] = value 

return dct 

 

def __new__(cls, name, bases, dct, **kwargs): 

dimensionsValueError = TypeError("PipelineTaskConnections class must be created with a dimensions " 

"attribute which is an iterable of dimension names") 

 

if name != 'PipelineTaskConnections': 

# Verify that dimensions are passed as a keyword in class 

# declaration 

126 ↛ 127line 126 didn't jump to line 127, because the condition on line 126 was never true if 'dimensions' not in kwargs: 

for base in bases: 

if hasattr(base, 'dimensions'): 

kwargs['dimensions'] = base.dimensions 

break 

if 'dimensions' not in kwargs: 

raise dimensionsValueError 

try: 

dct['dimensions'] = set(kwargs['dimensions']) 

except TypeError as exc: 

raise dimensionsValueError from exc 

# Lookup any python string templates that may have been used in the 

# declaration of the name field of a class connection attribute 

allTemplates = set() 

stringFormatter = string.Formatter() 

# Loop over all connections 

for obj in dct['allConnections'].values(): 

nameValue = obj.name 

# add all the parameters to the set of templates 

for param in stringFormatter.parse(nameValue): 

if param[1] is not None: 

allTemplates.add(param[1]) 

 

# look up any template from base classes and merge them all 

# together 

mergeDict = {} 

for base in bases[::-1]: 

153 ↛ 154line 153 didn't jump to line 154, because the condition on line 153 was never true if hasattr(base, 'defaultTemplates'): 

mergeDict.update(base.defaultTemplates) 

if 'defaultTemplates' in kwargs: 

mergeDict.update(kwargs['defaultTemplates']) 

 

if len(mergeDict) > 0: 

kwargs['defaultTemplates'] = mergeDict 

 

# Verify that if templated strings were used, defaults were 

# supplied as an argument in the declaration of the connection 

# class 

164 ↛ 165line 164 didn't jump to line 165, because the condition on line 164 was never true if len(allTemplates) > 0 and 'defaultTemplates' not in kwargs: 

raise TypeError("PipelineTaskConnection class contains templated attribute names, but no " 

"defaut templates were provided, add a dictionary attribute named " 

"defaultTemplates which contains the mapping between template key and value") 

if len(allTemplates) > 0: 

# Verify all templates have a default, and throw if they do not 

defaultTemplateKeys = set(kwargs['defaultTemplates'].keys()) 

templateDifference = allTemplates.difference(defaultTemplateKeys) 

172 ↛ 173line 172 didn't jump to line 173, because the condition on line 172 was never true if templateDifference: 

raise TypeError(f"Default template keys were not provided for {templateDifference}") 

# Verify that templates do not share names with variable names 

# used for a connection, this is needed because of how 

# templates are specified in an associated config class. 

nameTemplateIntersection = allTemplates.intersection(set(dct['allConnections'].keys())) 

178 ↛ 179line 178 didn't jump to line 179, because the condition on line 178 was never true if len(nameTemplateIntersection) > 0: 

raise TypeError(f"Template parameters cannot share names with Class attributes") 

dct['defaultTemplates'] = kwargs.get('defaultTemplates', {}) 

 

# Convert all the connection containers into frozensets so they cannot 

# be modified at the class scope 

for connectionName in ("inputs", "prerequisiteInputs", "outputs", "initInputs", "initOutputs"): 

dct[connectionName] = frozenset(dct[connectionName]) 

# our custom dict type must be turned into an actual dict to be used in 

# type.__new__ 

return super().__new__(cls, name, bases, dict(dct)) 

 

def __init__(cls, name, bases, dct, **kwargs): 

# This overrides the default init to drop the kwargs argument. Python 

# metaclasses will have this argument set if any kwargs are passes at 

# class construction time, but should be consumed before calling 

# __init__ on the type metaclass. This is in accordance with python 

# documentation on metaclasses 

super().__init__(name, bases, dct) 

 

 

class QuantizedConnection(SimpleNamespace): 

"""A Namespace to map defined variable names of connections to their 

`lsst.daf.buter.DatasetRef`s 

 

This class maps the names used to define a connection on a 

PipelineTaskConnectionsClass to the corresponding 

`lsst.daf.butler.DatasetRef`s provided by a `lsst.daf.butler.Quantum` 

instance. This will be a quantum of execution based on the graph created 

by examining all the connections defined on the 

`PipelineTaskConnectionsClass`. 

""" 

def __init__(self, **kwargs): 

# Create a variable to track what attributes are added. This is used 

# later when iterating over this QuantizedConnection instance 

object.__setattr__(self, "_attributes", set()) 

 

def __setattr__(self, name: str, value: typing.Union[DatasetRef, typing.List[DatasetRef]]): 

# Capture the attribute name as it is added to this object 

self._attributes.add(name) 

super().__setattr__(name, value) 

 

def __delattr__(self, name): 

object.__delattr__(self, name) 

self._attributes.remove(name) 

 

def __iter__(self) -> typing.Generator[typing.Tuple[str, typing.Union[DatasetRef, 

typing.List[DatasetRef]]], None, None]: 

"""Make an Iterator for this QuantizedConnection 

 

Iterating over a QuantizedConnection will yield a tuple with the name 

of an attribute and the value associated with that name. This is 

similar to dict.items() but is on the namespace attributes rather than 

dict keys. 

""" 

yield from ((name, getattr(self, name)) for name in self._attributes) 

 

def keys(self) -> typing.Generator[str, None, None]: 

"""Returns an iterator over all the attributes added to a 

QuantizedConnection class 

""" 

yield from self._attributes 

 

 

class InputQuantizedConnection(QuantizedConnection): 

pass 

 

 

class OutputQuantizedConnection(QuantizedConnection): 

pass 

 

 

class DeferredDatasetRef(namedtuple("DeferredDatasetRefBase", "datasetRef")): 

"""Class which denotes that a datasetRef should be treated as deferred when 

interacting with the butler 

 

Parameters 

---------- 

datasetRef : `lsst.daf.butler.DatasetRef` 

The `lsst.daf.butler.DatasetRef` that will be eventually used to 

resolve a dataset 

""" 

__slots__ = () 

 

 

class PipelineTaskConnections(metaclass=PipelineTaskConnectionsMetaclass): 

"""PipelineTaskConnections is a class used to declare desired IO when a 

PipelineTask is run by an activator 

 

Parameters 

---------- 

config : `PipelineTaskConfig` A `PipelineTaskConfig` class instance who's 

class has been configured to use this `PipelineTaskConnectionsClass` 

 

Notes ----- PipelineTaskConnection classes are created by declaring class 

attributes of types defined in lsst.pipe.base.connectionTypes and are 

listed as follows: 

 

* InitInput - Defines connections in a quantum graph which are used as 

inputs to the __init__ function of the PipelineTask corresponding to this 

class 

* InitOuput - Defines connections in a quantum graph which are to be 

persisted using a butler at the end of the __init__ function of the 

PipelineTask corresponding to this class. The variable name used to define 

this connection should be the same as an attribute name on the PipelineTask 

instance. E.g. if a InitOutput is declared with the name outputSchema in a 

PipelineTaskConnections class, then a PipelineTask instance should have an 

attribute self.outputSchema defined. Its value is what will be saved by the 

activator framework. 

* PrerequisiteInput - An input connection type that defines a 

`lsst.daf.butler.DatasetType` that must be present at execution time, but 

that will not be used during the course of creating the quantum graph to be 

executed. These most often are things produced outside the processing 

pipeline, such as reference catalogs. 

* Input - Input `lsst.daf.butler.DatasetType`s that will be used in the run 

method of a PipelineTask. The name used to declare class attribute must 

match a function argument name in the run method of a PipelineTask. E.g. If 

the PipelineTaskConnections defines an Input with the name calexp, then the 

corresponding signature should be PipelineTask.run(calexp, ...) 

* Output - A `lsst.daf.butler.DatasetType` that will be produced by an 

execution of a PipelineTask. The name used to declare the connection must 

correspond to an attribute of a `Struct` that is returned by a 

`PipelineTask` run method. E.g. if an output connection is defined with 

the name measCat, then the corresponding PipelineTask.run method must 

return Struct(measCat=X,..) where X matches the storageClass type defined 

on the output connection. 

 

The process of declaring a PipelineTaskConnection class involves parameters 

passed in the declaration statement. 

 

The first parameter is dimensions which is an iterable of strings which 

defines the unit of processing the run method of a corresponding 

`PipelineTask` will operate on. These dimensions must match dimensions that 

exist in the butler registry which will be used in executing the 

corresponding `PipelineTask`. 

 

The second parameter is labeled ``defaultTemplates`` and is conditionally 

optional. The name attributes of connections can be specified as python 

format strings, with named format arguments. If any of the name parameters 

on connections defined in a `PipelineTaskConnections` class contain a 

template, then a default template value must be specified in the 

``defaultTemplates`` argument. This is done by passing a dictionary with 

keys corresponding to a template identifier, and values corresponding to 

the value to use as a default when formatting the string. For example if 

ConnectionClass.calexp.name = '{input}Coadd_calexp' then 

``defaultTemplates`` = {'input': 'deep'}. 

 

Once a `PipelineTaskConnections` class is created, it is used in the 

creation of a `PipelineTaskConfig`. This is further documented in the 

documentation of `PipelineTaskConfig`. For the purposes of this 

documentation, the relevant information is that the config class allows 

configuration of connection names by users when running a pipeline. 

 

Instances of a `PipelineTaskConnections` class are used by the pipeline 

task execution framework to introspect what a corresponding `PipelineTask` 

will require, and what it will produce. 

 

Examples 

-------- 

>>> from lsst.pipe.base import connectionTypes as cT 

>>> from lsst.pipe.base import PipelineTaskConnections 

>>> from lsst.pipe.base import PipelineTaskConfig 

>>> class ExampleConnections(PipelineTaskConnections, 

... dimensions=("A", "B"), 

... defaultTemplates={"foo": "Example"}): 

... inputConnection = cT.Input(doc="Example input", 

... dimensions=("A", "B"), 

... storageClass=Exposure, 

... name="{foo}Dataset") 

... outputConnection = cT.Output(doc="Example output", 

... dimensions=("A", "B"), 

... storageClass=Exposure, 

... name="{foo}output") 

>>> class ExampleConfig(PipelineTaskConfig, 

... pipelineConnections=ExampleConnections): 

... pass 

>>> config = ExampleConfig() 

>>> config.connections.foo = Modified 

>>> config.connections.outputConnection = "TotallyDifferent" 

>>> connections = ExampleConnections(config=config) 

>>> assert(connections.inputConnection.name == "ModifiedDataset") 

>>> assert(connections.outputCOnnection.name == "TotallyDifferent") 

""" 

 

def __init__(self, *, config: 'PipelineTaskConfig' = None): 

self.inputs = set(self.inputs) 

self.prerequisiteInputs = set(self.prerequisiteInputs) 

self.outputs = set(self.outputs) 

self.initInputs = set(self.initInputs) 

self.initOutputs = set(self.initOutputs) 

 

if config is None or not isinstance(config, configMod.PipelineTaskConfig): 

raise ValueError("PipelineTaskConnections must be instantiated with" 

" a PipelineTaskConfig instance") 

self.config = config 

# Extract the template names that were defined in the config instance 

# by looping over the keys of the defaultTemplates dict specified at 

# class declaration time 

templateValues = {name: getattr(config.connections, name) for name in getattr(self, 

'defaultTemplates').keys()} 

# Extract the configured value corresponding to each connection 

# variable. I.e. for each connection identifier, populate a override 

# for the connection.name attribute 

self._nameOverrides = {name: getattr(config.connections, name).format(**templateValues) 

for name in self.allConnections.keys()} 

 

# connections.name corresponds to a dataset type name, create a reverse 

# mapping that goes from dataset type name to attribute identifier name 

# (variable name) on the connection class 

self._typeNameToVarName = {v: k for k, v in self._nameOverrides.items()} 

 

def buildDatasetRefs(self, quantum: Quantum) -> typing.Tuple[InputQuantizedConnection, 

OutputQuantizedConnection]: 

"""Builds QuantizedConnections corresponding to input Quantum 

 

Parameters 

---------- 

quantum : `lsst.daf.butler.Quantum` 

Quantum object which defines the inputs and outputs for a given 

unit of processing 

 

Returns 

------- 

retVal : `tuple` of (`InputQuantizedConnection`, 

`OutputQuantizedConnection`) Namespaces mapping attribute names 

(identifiers of connections) to butler references defined in the 

input `lsst.daf.butler.Quantum` 

""" 

inputDatasetRefs = InputQuantizedConnection() 

outputDatasetRefs = OutputQuantizedConnection() 

# operate on a reference object and an interable of names of class 

# connection attributes 

for refs, names in zip((inputDatasetRefs, outputDatasetRefs), 

(itertools.chain(self.inputs, self.prerequisiteInputs), self.outputs)): 

# get a name of a class connection attribute 

for attributeName in names: 

# get the attribute identified by name 

attribute = getattr(self, attributeName) 

# Branch if the attribute dataset type is an input 

if attribute.name in quantum.predictedInputs: 

# Get the DatasetRefs 

quantumInputRefs = quantum.predictedInputs[attribute.name] 

# if the dataset is marked to load deferred, wrap it in a 

# DeferredDatasetRef 

if attribute.deferLoad: 

quantumInputRefs = [DeferredDatasetRef(datasetRef=ref) for ref in quantumInputRefs] 

# Unpack arguments that are not marked multiples (list of 

# length one) 

if not attribute.multiple: 

if len(quantumInputRefs) != 1: 

raise ScalarError(attributeName, len(quantumInputRefs)) 

quantumInputRefs = quantumInputRefs[0] 

# Add to the QuantizedConnection identifier 

setattr(refs, attributeName, quantumInputRefs) 

# Branch if the attribute dataset type is an output 

elif attribute.name in quantum.outputs: 

value = quantum.outputs[attribute.name] 

# Unpack arguments that are not marked multiples (list of 

# length one) 

if not attribute.multiple: 

value = value[0] 

# Add to the QuantizedConnection identifier 

setattr(refs, attributeName, value) 

# Specified attribute is not in inputs or outputs dont know how 

# to handle, throw 

else: 

raise ValueError(f"Attribute with name {attributeName} has no counterpoint " 

"in input quantum") 

return inputDatasetRefs, outputDatasetRefs 

 

def adjustQuantum(self, datasetRefMap: InputQuantizedConnection): 

"""Override to make adjustments to `lsst.daf.butler.DatasetRef`s in the 

`lsst.daf.butler.core.Quantum` during the graph generation stage of the 

activator. 

 

Parameters 

---------- 

datasetRefMap : `dict` 

Mapping with keys of dataset type name to `list` of 

`lsst.daf.butler.DatasetRef`s 

 

Returns 

------- 

datasetRefMap : `dict` 

Modified mapping of input with possible adjusted 

`lsst.daf.butler.DatasetRef`s 

 

Raises 

------ 

Exception 

Overrides of this function have the option of raising and Exception 

if a field in the input does not satisfy a need for a corresponding 

pipelineTask, i.e. no reference catalogs are found. 

""" 

return datasetRefMap 

 

 

def iterConnections(connections: PipelineTaskConnections, connectionType: str) -> typing.Generator: 

"""Creates an iterator over the selected connections type which yields 

all the defined connections of that type. 

 

Parameters 

---------- 

connections: `PipelineTaskConnections` 

An instance of a `PipelineTaskConnections` object that will be iterated 

over. 

connectionType: `str` 

The type of connections to iterate over, valid values are inputs, 

outputs, prerequisiteInputs, initInputs, initOutputs. 

 

Yields 

------- 

connection: `BaseConnection` 

A connection defined on the input connections object of the type 

supplied. The yielded value Will be an derived type of 

`BaseConnection`. 

""" 

for name in getattr(connections, connectionType): 

yield getattr(connections, name)