22 """Module defining connection classes for PipelineTask. 25 __all__ = [
"PipelineTaskConnections",
"InputQuantizedConnection",
"OutputQuantizedConnection",
26 "DeferredDatasetRef",
"iterConnections"]
28 from collections
import UserDict, namedtuple
29 from types
import SimpleNamespace
35 from .
import config
as configMod
36 from .connectionTypes
import (InitInput, InitOutput, Input, PrerequisiteInput,
37 Output, BaseConnection)
38 from lsst.daf.butler
import DatasetRef, Quantum
40 if typing.TYPE_CHECKING:
41 from .config
import PipelineTaskConfig
45 """Exception raised when dataset type is configured as scalar 46 but there are multiple data IDs in a Quantum for that dataset. 51 """This is a special dict class used by PipelineTaskConnectionMetaclass 53 This dict is used in PipelineTaskConnection class creation, as the 54 dictionary that is initially used as __dict__. It exists to 55 intercept connection fields declared in a PipelineTaskConnection, and 56 what name is used to identify them. The names are then added to class 57 level list according to the connection type of the class attribute. The 58 names are also used as keys in a class level dictionary associated with 59 the corresponding class attribute. This information is a duplicate of 60 what exists in __dict__, but provides a simple place to lookup and 61 iterate on only these variables. 68 self.data[
'inputs'] = []
69 self.data[
'prerequisiteInputs'] = []
70 self.data[
'outputs'] = []
71 self.data[
'initInputs'] = []
72 self.data[
'initOutputs'] = []
73 self.data[
'allConnections'] = {}
76 if isinstance(value, Input):
77 self.data[
'inputs'].append(name)
78 elif isinstance(value, PrerequisiteInput):
79 self.data[
'prerequisiteInputs'].append(name)
80 elif isinstance(value, Output):
81 self.data[
'outputs'].append(name)
82 elif isinstance(value, InitInput):
83 self.data[
'initInputs'].append(name)
84 elif isinstance(value, InitOutput):
85 self.data[
'initOutputs'].append(name)
88 if isinstance(value, BaseConnection):
89 object.__setattr__(value,
'varName', name)
90 self.data[
'allConnections'][name] = value
96 """Metaclass used in the declaration of PipelineTaskConnections classes 104 if isinstance(base, PipelineTaskConnectionsMetaclass):
105 for name, value
in base.allConnections.items():
110 dimensionsValueError = TypeError(
"PipelineTaskConnections class must be created with a dimensions " 111 "attribute which is an iterable of dimension names")
113 if name !=
'PipelineTaskConnections':
116 if 'dimensions' not in kwargs:
118 if hasattr(base,
'dimensions'):
119 kwargs[
'dimensions'] = base.dimensions
121 if 'dimensions' not in kwargs:
122 raise dimensionsValueError
124 dct[
'dimensions'] = set(kwargs[
'dimensions'])
125 except TypeError
as exc:
126 raise dimensionsValueError
from exc
130 stringFormatter = string.Formatter()
132 for obj
in dct[
'allConnections'].values():
135 for param
in stringFormatter.parse(nameValue):
136 if param[1]
is not None:
137 allTemplates.add(param[1])
142 for base
in bases[::-1]:
143 if hasattr(base,
'defaultTemplates'):
144 mergeDict.update(base.defaultTemplates)
145 if 'defaultTemplates' in kwargs:
146 mergeDict.update(kwargs[
'defaultTemplates'])
148 if len(mergeDict) > 0:
149 kwargs[
'defaultTemplates'] = mergeDict
154 if len(allTemplates) > 0
and 'defaultTemplates' not in kwargs:
155 raise TypeError(
"PipelineTaskConnection class contains templated attribute names, but no " 156 "defaut templates were provided, add a dictionary attribute named " 157 "defaultTemplates which contains the mapping between template key and value")
158 if len(allTemplates) > 0:
160 defaultTemplateKeys = set(kwargs[
'defaultTemplates'].keys())
161 templateDifference = allTemplates.difference(defaultTemplateKeys)
162 if templateDifference:
163 raise TypeError(f
"Default template keys were not provided for {templateDifference}")
167 nameTemplateIntersection = allTemplates.intersection(set(dct[
'allConnections'].keys()))
168 if len(nameTemplateIntersection) > 0:
169 raise TypeError(f
"Template parameters cannot share names with Class attributes")
170 dct[
'defaultTemplates'] = kwargs.get(
'defaultTemplates', {})
174 for connectionName
in (
"inputs",
"prerequisiteInputs",
"outputs",
"initInputs",
"initOutputs"):
175 dct[connectionName] = frozenset(dct[connectionName])
178 return super().
__new__(cls, name, bases, dict(dct))
190 """A Namespace to map defined variable names of connections to their 191 `lsst.daf.buter.DatasetRef`s 193 This class maps the names used to define a connection on a 194 PipelineTaskConnectionsClass to the corresponding 195 `lsst.daf.butler.DatasetRef`s provided by a `lsst.daf.butler.Quantum` 196 instance. This will be a quantum of execution based on the graph created 197 by examining all the connections defined on the 198 `PipelineTaskConnectionsClass`. 203 object.__setattr__(self,
"_attributes", set())
205 def __setattr__(self, name: str, value: typing.Union[DatasetRef, typing.List[DatasetRef]]):
207 self._attributes.add(name)
211 object.__delattr__(self, name)
212 self._attributes.remove(name)
214 def __iter__(self) -> typing.Generator[typing.Tuple[str, typing.Union[DatasetRef,
215 typing.List[DatasetRef]]], None, None]:
216 """Make an Iterator for this QuantizedConnection 218 Iterating over a QuantizedConnection will yield a tuple with the name 219 of an attribute and the value associated with that name. This is 220 similar to dict.items() but is on the namespace attributes rather than 223 yield from ((name, getattr(self, name))
for name
in self._attributes)
225 def keys(self) -> typing.Generator[str, None, None]:
226 """Returns an iterator over all the attributes added to a 227 QuantizedConnection class 229 yield from self._attributes
236 class OutputQuantizedConnection(QuantizedConnection):
241 """Class which denotes that a datasetRef should be treated as deferred when 242 interacting with the butler 246 datasetRef : `lsst.daf.butler.DatasetRef` 247 The `lsst.daf.butler.DatasetRef` that will be eventually used to 254 """PipelineTaskConnections is a class used to declare desired IO when a 255 PipelineTask is run by an activator 259 config : `PipelineTaskConfig` 260 A `PipelineTaskConfig` class instance whose class has been configured 261 to use this `PipelineTaskConnectionsClass` 265 ``PipelineTaskConnection`` classes are created by declaring class 266 attributes of types defined in `lsst.pipe.base.connectionTypes` and are 269 * ``InitInput`` - Defines connections in a quantum graph which are used as 270 inputs to the ``__init__`` function of the `PipelineTask` corresponding 272 * ``InitOuput`` - Defines connections in a quantum graph which are to be 273 persisted using a butler at the end of the ``__init__`` function of the 274 `PipelineTask` corresponding to this class. The variable name used to 275 define this connection should be the same as an attribute name on the 276 `PipelineTask` instance. E.g. if an ``InitOutput`` is declared with 277 the name ``outputSchema`` in a ``PipelineTaskConnections`` class, then 278 a `PipelineTask` instance should have an attribute 279 ``self.outputSchema`` defined. Its value is what will be saved by the 281 * ``PrerequisiteInput`` - An input connection type that defines a 282 `lsst.daf.butler.DatasetType` that must be present at execution time, 283 but that will not be used during the course of creating the quantum 284 graph to be executed. These most often are things produced outside the 285 processing pipeline, such as reference catalogs. 286 * ``Input`` - Input `lsst.daf.butler.DatasetType` objects that will be used 287 in the ``run`` method of a `PipelineTask`. The name used to declare 288 class attribute must match a function argument name in the ``run`` 289 method of a `PipelineTask`. E.g. If the ``PipelineTaskConnections`` 290 defines an ``Input`` with the name ``calexp``, then the corresponding 291 signature should be ``PipelineTask.run(calexp, ...)`` 292 * ``Output`` - A `lsst.daf.butler.DatasetType` that will be produced by an 293 execution of a `PipelineTask`. The name used to declare the connection 294 must correspond to an attribute of a `Struct` that is returned by a 295 `PipelineTask` ``run`` method. E.g. if an output connection is 296 defined with the name ``measCat``, then the corresponding 297 ``PipelineTask.run`` method must return ``Struct(measCat=X,..)`` where 298 X matches the ``storageClass`` type defined on the output connection. 300 The process of declaring a ``PipelineTaskConnection`` class involves 301 parameters passed in the declaration statement. 303 The first parameter is ``dimensions`` which is an iterable of strings which 304 defines the unit of processing the run method of a corresponding 305 `PipelineTask` will operate on. These dimensions must match dimensions that 306 exist in the butler registry which will be used in executing the 307 corresponding `PipelineTask`. 309 The second parameter is labeled ``defaultTemplates`` and is conditionally 310 optional. The name attributes of connections can be specified as python 311 format strings, with named format arguments. If any of the name parameters 312 on connections defined in a `PipelineTaskConnections` class contain a 313 template, then a default template value must be specified in the 314 ``defaultTemplates`` argument. This is done by passing a dictionary with 315 keys corresponding to a template identifier, and values corresponding to 316 the value to use as a default when formatting the string. For example if 317 ``ConnectionClass.calexp.name = '{input}Coadd_calexp'`` then 318 ``defaultTemplates`` = {'input': 'deep'}. 320 Once a `PipelineTaskConnections` class is created, it is used in the 321 creation of a `PipelineTaskConfig`. This is further documented in the 322 documentation of `PipelineTaskConfig`. For the purposes of this 323 documentation, the relevant information is that the config class allows 324 configuration of connection names by users when running a pipeline. 326 Instances of a `PipelineTaskConnections` class are used by the pipeline 327 task execution framework to introspect what a corresponding `PipelineTask` 328 will require, and what it will produce. 332 >>> from lsst.pipe.base import connectionTypes as cT 333 >>> from lsst.pipe.base import PipelineTaskConnections 334 >>> from lsst.pipe.base import PipelineTaskConfig 335 >>> class ExampleConnections(PipelineTaskConnections, 336 ... dimensions=("A", "B"), 337 ... defaultTemplates={"foo": "Example"}): 338 ... inputConnection = cT.Input(doc="Example input", 339 ... dimensions=("A", "B"), 340 ... storageClass=Exposure, 341 ... name="{foo}Dataset") 342 ... outputConnection = cT.Output(doc="Example output", 343 ... dimensions=("A", "B"), 344 ... storageClass=Exposure, 345 ... name="{foo}output") 346 >>> class ExampleConfig(PipelineTaskConfig, 347 ... pipelineConnections=ExampleConnections): 349 >>> config = ExampleConfig() 350 >>> config.connections.foo = Modified 351 >>> config.connections.outputConnection = "TotallyDifferent" 352 >>> connections = ExampleConnections(config=config) 353 >>> assert(connections.inputConnection.name == "ModifiedDataset") 354 >>> assert(connections.outputConnection.name == "TotallyDifferent") 357 def __init__(self, *, config:
'PipelineTaskConfig' =
None):
364 if config
is None or not isinstance(config, configMod.PipelineTaskConfig):
365 raise ValueError(
"PipelineTaskConnections must be instantiated with" 366 " a PipelineTaskConfig instance")
371 templateValues = {name: getattr(config.connections, name)
for name
in getattr(self,
372 'defaultTemplates').keys()}
376 self.
_nameOverrides = {name: getattr(config.connections, name).format(**templateValues)
377 for name
in self.allConnections.keys()}
385 OutputQuantizedConnection]:
386 """Builds QuantizedConnections corresponding to input Quantum 390 quantum : `lsst.daf.butler.Quantum` 391 Quantum object which defines the inputs and outputs for a given 396 retVal : `tuple` of (`InputQuantizedConnection`, 397 `OutputQuantizedConnection`) Namespaces mapping attribute names 398 (identifiers of connections) to butler references defined in the 399 input `lsst.daf.butler.Quantum` 405 for refs, names
in zip((inputDatasetRefs, outputDatasetRefs),
408 for attributeName
in names:
410 attribute = getattr(self, attributeName)
412 if attribute.name
in quantum.predictedInputs:
414 quantumInputRefs = quantum.predictedInputs[attribute.name]
417 if attribute.deferLoad:
421 if not attribute.multiple:
422 if len(quantumInputRefs) > 1:
424 f
"Received multiple datasets " 425 f
"{', '.join(str(r.dataId) for r in quantumInputRefs)} " 426 f
"for scalar connection {attributeName} " 427 f
"({quantumInputRefs[0].datasetType.name}) " 428 f
"of quantum for {quantum.taskName} with data ID {quantum.dataId}." 430 if len(quantumInputRefs) == 0:
432 quantumInputRefs = quantumInputRefs[0]
434 setattr(refs, attributeName, quantumInputRefs)
436 elif attribute.name
in quantum.outputs:
437 value = quantum.outputs[attribute.name]
440 if not attribute.multiple:
443 setattr(refs, attributeName, value)
447 raise ValueError(f
"Attribute with name {attributeName} has no counterpoint " 449 return inputDatasetRefs, outputDatasetRefs
452 """Override to make adjustments to `lsst.daf.butler.DatasetRef` objects 453 in the `lsst.daf.butler.core.Quantum` during the graph generation stage 456 The base class implementation simply checks that input connections with 457 ``multiple`` set to `False` have no more than one dataset. 461 datasetRefMap : `dict` 462 Mapping from dataset type name to `list` of 463 `lsst.daf.butler.DatasetRef` objects 467 datasetRefMap : `dict` 468 Modified mapping of input with possible adjusted 469 `lsst.daf.butler.DatasetRef` objects. 474 Raised if any `Input` or `PrerequisiteInput` connection has 475 ``multiple`` set to `False`, but multiple datasets. 477 Overrides of this function have the option of raising an Exception 478 if a field in the input does not satisfy a need for a corresponding 479 pipelineTask, i.e. no reference catalogs are found. 483 refs = datasetRefMap[connection.name]
484 if not connection.multiple
and len(refs) > 1:
486 f
"Found multiple datasets {', '.join(str(r.dataId) for r in refs)} " 487 f
"for scalar connection {connection.name} ({refs[0].datasetType.name})." 492 def iterConnections(connections: PipelineTaskConnections, connectionType: str) -> typing.Generator:
493 """Creates an iterator over the selected connections type which yields 494 all the defined connections of that type. 498 connections: `PipelineTaskConnections` 499 An instance of a `PipelineTaskConnections` object that will be iterated 501 connectionType: `str` 502 The type of connections to iterate over, valid values are inputs, 503 outputs, prerequisiteInputs, initInputs, initOutputs. 507 connection: `BaseConnection` 508 A connection defined on the input connections object of the type 509 supplied. The yielded value Will be an derived type of 512 for name
in getattr(connections, connectionType):
513 yield getattr(connections, name)
def __init__(self, kwargs)
def __setitem__(self, name, value)
def __delattr__(self, name)
def __init__(self, args, kwargs)