Coverage for python/lsst/pipe/base/connections.py : 49%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# This file is part of pipe_base. # # Developed for the LSST Data Management System. # This product includes software developed by the LSST Project # (http://www.lsst.org). # See the COPYRIGHT file at the top-level directory of this distribution # for details of code ownership. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
"DeferredDatasetRef", "iterConnections"]
Output, BaseConnection)
from .config import PipelineTaskConfig
"""Exception raised when dataset type is configured as scalar but there are multiple DataIds in a Quantum for that dataset.
Parameters ---------- key : `str` Name of the configuration field for dataset type. numDataIds : `int` Actual number of DataIds in a Quantum for this dataset type. """ super().__init__((f"Expected scalar for output dataset field {key}, " f"received {numDataIds} DataIds"))
"""This is a special dict class used by PipelineTaskConnectionMetaclass
This dict is used in PipelineTaskConnection class creation, as the dictionary that is initially used as __dict__. It exists to intercept connection fields declared in a PipelineTaskConnection, and what name is used to identify them. The names are then added to class level list according to the connection type of the class attribute. The names are also used as keys in a class level dictionary associated with the corresponding class attribute. This information is a duplicate of what exists in __dict__, but provides a simple place to lookup and iterate on only these variables. """ # Initialize class level variables used to track any declared # class level variables that are instances of # connectionTypes.BaseConnection
self.data['prerequisiteInputs'].append(name) self.data['initInputs'].append(name) self.data['initOutputs'].append(name) # This should not be an elif, as it needs tested for # everything that inherits from BaseConnection # defer to the default behavior
"""Metaclass used in the declaration of PipelineTaskConnections classes """ # Create an instance of our special dict to catch and track all # variables that are instances of connectionTypes.BaseConnection # Copy any existing connections from a parent class dct[name] = value
"attribute which is an iterable of dimension names")
# Verify that dimensions are passed as a keyword in class # declaration for base in bases: if hasattr(base, 'dimensions'): kwargs['dimensions'] = base.dimensions break if 'dimensions' not in kwargs: raise dimensionsValueError except TypeError as exc: raise dimensionsValueError from exc # Lookup any python string templates that may have been used in the # declaration of the name field of a class connection attribute # Loop over all connections # add all the parameters to the set of templates allTemplates.add(param[1])
# look up any template from base classes and merge them all # together mergeDict.update(base.defaultTemplates) mergeDict.update(kwargs['defaultTemplates'])
kwargs['defaultTemplates'] = mergeDict
# Verify that if templated strings were used, defaults were # supplied as an argument in the declaration of the connection # class raise TypeError("PipelineTaskConnection class contains templated attribute names, but no " "defaut templates were provided, add a dictionary attribute named " "defaultTemplates which contains the mapping between template key and value") # Verify all templates have a default, and throw if they do not defaultTemplateKeys = set(kwargs['defaultTemplates'].keys()) templateDifference = allTemplates.difference(defaultTemplateKeys) if templateDifference: raise TypeError(f"Default template keys were not provided for {templateDifference}") # Verify that templates do not share names with variable names # used for a connection, this is needed because of how # templates are specified in an associated config class. nameTemplateIntersection = allTemplates.intersection(set(dct['allConnections'].keys())) if len(nameTemplateIntersection) > 0: raise TypeError(f"Template parameters cannot share names with Class attributes")
# Convert all the connection containers into frozensets so they cannot # be modified at the class scope # our custom dict type must be turned into an actual dict to be used in # type.__new__
# This overrides the default init to drop the kwargs argument. Python # metaclasses will have this argument set if any kwargs are passes at # class construction time, but should be consumed before calling # __init__ on the type metaclass. This is in accordance with python # documentation on metaclasses
"""A Namespace to map defined variable names of connections to their `lsst.daf.buter.DatasetRef`s
This class maps the names used to define a connection on a PipelineTaskConnectionsClass to the corresponding `lsst.daf.butler.DatasetRef`s provided by a `lsst.daf.butler.Quantum` instance. This will be a quantum of execution based on the graph created by examining all the connections defined on the `PipelineTaskConnectionsClass`. """ # Create a variable to track what attributes are added. This is used # later when iterating over this QuantizedConnection instance object.__setattr__(self, "_attributes", set())
# Capture the attribute name as it is added to this object self._attributes.add(name) super().__setattr__(name, value)
object.__delattr__(self, name) self._attributes.remove(name)
typing.List[DatasetRef]]], None, None]: """Make an Iterator for this QuantizedConnection
Iterating over a QuantizedConnection will yield a tuple with the name of an attribute and the value associated with that name. This is similar to dict.items() but is on the namespace attributes rather than dict keys. """ yield from ((name, getattr(self, name)) for name in self._attributes)
"""Returns an iterator over all the attributes added to a QuantizedConnection class """ yield from self._attributes
"""Class which denotes that a datasetRef should be treated as deferred when interacting with the butler
Parameters ---------- datasetRef : `lsst.daf.butler.DatasetRef` The `lsst.daf.butler.DatasetRef` that will be eventually used to resolve a dataset """
"""PipelineTaskConnections is a class used to declare desired IO when a PipelineTask is run by an activator
Parameters ---------- config : `PipelineTaskConfig` A `PipelineTaskConfig` class instance who's class has been configured to use this `PipelineTaskConnectionsClass`
Notes ----- PipelineTaskConnection classes are created by declaring class attributes of types defined in lsst.pipe.base.connectionTypes and are listed as follows:
* InitInput - Defines connections in a quantum graph which are used as inputs to the __init__ function of the PipelineTask corresponding to this class * InitOuput - Defines connections in a quantum graph which are to be persisted using a butler at the end of the __init__ function of the PipelineTask corresponding to this class. The variable name used to define this connection should be the same as an attribute name on the PipelineTask instance. E.g. if a InitOutput is declared with the name outputSchema in a PipelineTaskConnections class, then a PipelineTask instance should have an attribute self.outputSchema defined. Its value is what will be saved by the activator framework. * PrerequisiteInput - An input connection type that defines a `lsst.daf.butler.DatasetType` that must be present at execution time, but that will not be used during the course of creating the quantum graph to be executed. These most often are things produced outside the processing pipeline, such as reference catalogs. * Input - Input `lsst.daf.butler.DatasetType`s that will be used in the run method of a PipelineTask. The name used to declare class attribute must match a function argument name in the run method of a PipelineTask. E.g. If the PipelineTaskConnections defines an Input with the name calexp, then the corresponding signature should be PipelineTask.run(calexp, ...) * Output - A `lsst.daf.butler.DatasetType` that will be produced by an execution of a PipelineTask. The name used to declare the connection must correspond to an attribute of a `Struct` that is returned by a `PipelineTask` run method. E.g. if an output connection is defined with the name measCat, then the corresponding PipelineTask.run method must return Struct(measCat=X,..) where X matches the storageClass type defined on the output connection.
The process of declaring a PipelineTaskConnection class involves parameters passed in the declaration statement.
The first parameter is dimensions which is an iterable of strings which defines the unit of processing the run method of a corresponding `PipelineTask` will operate on. These dimensions must match dimensions that exist in the butler registry which will be used in executing the corresponding `PipelineTask`.
The second parameter is labeled ``defaultTemplates`` and is conditionally optional. The name attributes of connections can be specified as python format strings, with named format arguments. If any of the name parameters on connections defined in a `PipelineTaskConnections` class contain a template, then a default template value must be specified in the ``defaultTemplates`` argument. This is done by passing a dictionary with keys corresponding to a template identifier, and values corresponding to the value to use as a default when formatting the string. For example if ConnectionClass.calexp.name = '{input}Coadd_calexp' then ``defaultTemplates`` = {'input': 'deep'}.
Once a `PipelineTaskConnections` class is created, it is used in the creation of a `PipelineTaskConfig`. This is further documented in the documentation of `PipelineTaskConfig`. For the purposes of this documentation, the relevant information is that the config class allows configuration of connection names by users when running a pipeline.
Instances of a `PipelineTaskConnections` class are used by the pipeline task execution framework to introspect what a corresponding `PipelineTask` will require, and what it will produce.
Examples -------- >>> from lsst.pipe.base import connectionTypes as cT >>> from lsst.pipe.base import PipelineTaskConnections >>> from lsst.pipe.base import PipelineTaskConfig >>> class ExampleConnections(PipelineTaskConnections, ... dimensions=("A", "B"), ... defaultTemplates={"foo": "Example"}): ... inputConnection = cT.Input(doc="Example input", ... dimensions=("A", "B"), ... storageClass=Exposure, ... name="{foo}Dataset") ... outputConnection = cT.Output(doc="Example output", ... dimensions=("A", "B"), ... storageClass=Exposure, ... name="{foo}output") >>> class ExampleConfig(PipelineTaskConfig, ... pipelineConnections=ExampleConnections): ... pass >>> config = ExampleConfig() >>> config.connections.foo = Modified >>> config.connections.outputConnection = "TotallyDifferent" >>> connections = ExampleConnections(config=config) >>> assert(connections.inputConnection.name == "ModifiedDataset") >>> assert(connections.outputCOnnection.name == "TotallyDifferent") """
self.inputs = set(self.inputs) self.prerequisiteInputs = set(self.prerequisiteInputs) self.outputs = set(self.outputs) self.initInputs = set(self.initInputs) self.initOutputs = set(self.initOutputs)
if config is None or not isinstance(config, configMod.PipelineTaskConfig): raise ValueError("PipelineTaskConnections must be instantiated with" " a PipelineTaskConfig instance") self.config = config # Extract the template names that were defined in the config instance # by looping over the keys of the defaultTemplates dict specified at # class declaration time templateValues = {name: getattr(config.connections, name) for name in getattr(self, 'defaultTemplates').keys()} # Extract the configured value corresponding to each connection # variable. I.e. for each connection identifier, populate a override # for the connection.name attribute self._nameOverrides = {name: getattr(config.connections, name).format(**templateValues) for name in self.allConnections.keys()}
# connections.name corresponds to a dataset type name, create a reverse # mapping that goes from dataset type name to attribute identifier name # (variable name) on the connection class self._typeNameToVarName = {v: k for k, v in self._nameOverrides.items()}
OutputQuantizedConnection]: """Builds QuantizedConnections corresponding to input Quantum
Parameters ---------- quantum : `lsst.daf.butler.Quantum` Quantum object which defines the inputs and outputs for a given unit of processing
Returns ------- retVal : `tuple` of (`InputQuantizedConnection`, `OutputQuantizedConnection`) Namespaces mapping attribute names (identifiers of connections) to butler references defined in the input `lsst.daf.butler.Quantum` """ inputDatasetRefs = InputQuantizedConnection() outputDatasetRefs = OutputQuantizedConnection() # operate on a reference object and an interable of names of class # connection attributes for refs, names in zip((inputDatasetRefs, outputDatasetRefs), (itertools.chain(self.inputs, self.prerequisiteInputs), self.outputs)): # get a name of a class connection attribute for attributeName in names: # get the attribute identified by name attribute = getattr(self, attributeName) # Branch if the attribute dataset type is an input if attribute.name in quantum.predictedInputs: # Get the DatasetRefs quantumInputRefs = quantum.predictedInputs[attribute.name] # if the dataset is marked to load deferred, wrap it in a # DeferredDatasetRef if attribute.deferLoad: quantumInputRefs = [DeferredDatasetRef(datasetRef=ref) for ref in quantumInputRefs] # Unpack arguments that are not marked multiples (list of # length one) if not attribute.multiple: if len(quantumInputRefs) != 1: raise ScalarError(attributeName, len(quantumInputRefs)) quantumInputRefs = quantumInputRefs[0] # Add to the QuantizedConnection identifier setattr(refs, attributeName, quantumInputRefs) # Branch if the attribute dataset type is an output elif attribute.name in quantum.outputs: value = quantum.outputs[attribute.name] # Unpack arguments that are not marked multiples (list of # length one) if not attribute.multiple: value = value[0] # Add to the QuantizedConnection identifier setattr(refs, attributeName, value) # Specified attribute is not in inputs or outputs dont know how # to handle, throw else: raise ValueError(f"Attribute with name {attributeName} has no counterpoint " "in input quantum") return inputDatasetRefs, outputDatasetRefs
"""Override to make adjustments to `lsst.daf.butler.DatasetRef`s in the `lsst.daf.butler.core.Quantum` during the graph generation stage of the activator.
Parameters ---------- datasetRefMap : `dict` Mapping with keys of dataset type name to `list` of `lsst.daf.butler.DatasetRef`s
Returns ------- datasetRefMap : `dict` Modified mapping of input with possible adjusted `lsst.daf.butler.DatasetRef`s
Raises ------ Exception Overrides of this function have the option of raising and Exception if a field in the input does not satisfy a need for a corresponding pipelineTask, i.e. no reference catalogs are found. """ return datasetRefMap
"""Creates an iterator over the selected connections type which yields all the defined connections of that type.
Parameters ---------- connections: `PipelineTaskConnections` An instance of a `PipelineTaskConnections` object that will be iterated over. connectionType: `str` The type of connections to iterate over, valid values are inputs, outputs, prerequisiteInputs, initInputs, initOutputs.
Yields ------- connection: `BaseConnection` A connection defined on the input connections object of the type supplied. The yielded value Will be an derived type of `BaseConnection`. """ for name in getattr(connections, connectionType): yield getattr(connections, name) |