Coverage for python/lsst/pipe/base/connectionTypes.py: 79%
75 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:48 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:48 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Module defining connection types to be used within a
29`PipelineTaskConnections` class.
30"""
32__all__ = ["InitInput", "InitOutput", "Input", "PrerequisiteInput", "Output", "BaseConnection"]
34import dataclasses
35from collections.abc import Callable, Iterable, Sequence
36from typing import ClassVar
38from deprecated.sphinx import deprecated as deprecated_sphinx # avoid clash with BaseConnection.deprecated
39from lsst.daf.butler import DataCoordinate, DatasetRef, DatasetType, DimensionUniverse, Registry, StorageClass
40from lsst.utils.introspection import find_outside_stacklevel
43@dataclasses.dataclass(frozen=True)
44class BaseConnection:
45 """Base class used for declaring `PipelineTask` connections.
47 Attributes
48 ----------
49 name : `str`
50 The name used to identify the dataset type.
51 storageClass : `str`
52 The storage class used when (un)/persisting the dataset type.
53 multiple : `bool`
54 Indicates if this connection should expect to contain multiple objects
55 of the given dataset type. Tasks with more than one connection with
56 ``multiple=True`` with the same dimensions may want to implement
57 `PipelineTaskConnections.adjustQuantum` to ensure those datasets are
58 consistent (i.e. zip-iterable) in `PipelineTask.runQuantum()` and
59 notify the execution system as early as possible of outputs that will
60 not be produced because the corresponding input is missing.
61 deprecated : `str`, optional
62 A description of why this connection is deprecated, including the
63 version after which it may be removed.
65 If not `None`, the string is appended to the docstring for this
66 connection and the corresponding config Field.
67 """
69 name: str
70 storageClass: str
71 doc: str = ""
72 multiple: bool = False
73 deprecated: str | None = dataclasses.field(default=None, kw_only=True)
75 _connection_type_set: ClassVar[str]
76 _deprecation_context: str = ""
78 def __post_init__(self):
79 if self.deprecated and not self._deprecation_context: 79 ↛ 80line 79 didn't jump to line 80, because the condition on line 79 was never true
80 info = {}
81 _ = find_outside_stacklevel("lsst.pipe.base", "dataclasses", stack_info=info)
82 object.__setattr__(self, "_deprecation_context", f"{info['filename']}:{info['lineno']}")
84 def __get__(self, inst, klass):
85 """Descriptor access method.
87 This is a method used to turn a connection into a descriptor.
88 When a connection is added to a connection class, it is a class level
89 variable. This method makes accessing this connection, on the
90 instance of the connection class owning this connection, return a
91 result specialized for that instance. In the case of connections
92 this specifically means names specified in a config instance will
93 be visible instead of the default names for the connection, and that
94 removed connections will not be accessible on the instance.
95 """
96 # If inst is None, this is being accessed by the class and not an
97 # instance, return this connection itself
98 if inst is None:
99 return self
100 # Attempt to return the configured connection object from the
101 # connections instance allConnections mapping.
102 try:
103 return inst.allConnections[self.varName]
104 except KeyError:
105 raise AttributeError(
106 f"Connection {self.varName!r} of {klass.__name__} has been removed."
107 ) from None
109 # TODO: remove on DM-40443.
110 @deprecated_sphinx(
111 reason="Deprecated in favor of PipelineGraph, and will be removed after v27.",
112 version="27.0",
113 category=FutureWarning,
114 )
115 def makeDatasetType(
116 self, universe: DimensionUniverse, parentStorageClass: StorageClass | str | None = None
117 ) -> DatasetType:
118 """Construct a true `~lsst.daf.butler.DatasetType` instance with
119 normalized dimensions.
121 Parameters
122 ----------
123 universe : `lsst.daf.butler.DimensionUniverse`
124 Set of all known dimensions to be used to normalize the dimension
125 names specified in config.
126 parentStorageClass : `lsst.daf.butler.StorageClass` or `str`, optional
127 Parent storage class for component datasets; `None` otherwise.
129 Returns
130 -------
131 datasetType : `~lsst.daf.butler.DatasetType`
132 The `~lsst.daf.butler.DatasetType` defined by this connection.
133 """
134 return DatasetType(
135 self.name, universe.empty, self.storageClass, parentStorageClass=parentStorageClass
136 )
139@dataclasses.dataclass(frozen=True)
140class DimensionedConnection(BaseConnection):
141 """Class used for declaring PipelineTask connections that includes
142 dimensions.
144 Attributes
145 ----------
146 name : `str`
147 The name used to identify the dataset type.
148 storageClass : `str`
149 The storage class used when (un)/persisting the dataset type.
150 multiple : `bool`
151 Indicates if this connection should expect to contain multiple objects
152 of the given dataset type. Tasks with more than one connection with
153 ``multiple=True`` with the same dimensions may want to implement
154 `PipelineTaskConnections.adjustQuantum` to ensure those datasets are
155 consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
156 the execution system as early as possible of outputs that will not be
157 produced because the corresponding input is missing.
158 dimensions : iterable of `str`
159 The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
160 to identify the dataset type identified by the specified name.
161 isCalibration : `bool`, optional
162 `True` if this dataset type may be included in CALIBRATION-type
163 collections to associate it with a validity range, `False` (default)
164 otherwise.
165 """
167 dimensions: Iterable[str] = ()
168 isCalibration: bool = False
170 def __post_init__(self):
171 super().__post_init__()
172 if isinstance(self.dimensions, str): 172 ↛ 173line 172 didn't jump to line 173, because the condition on line 172 was never true
173 raise TypeError(
174 "Dimensions must be iterable of dimensions, got str, possibly omitted trailing comma"
175 )
176 if not isinstance(self.dimensions, Iterable): 176 ↛ 177line 176 didn't jump to line 177, because the condition on line 176 was never true
177 raise TypeError("Dimensions must be iterable of dimensions")
179 # TODO: remove on DM-40443.
180 @deprecated_sphinx(
181 reason="Deprecated in favor of PipelineGraph, and will be removed after v27.",
182 version="27.0",
183 category=FutureWarning,
184 )
185 def makeDatasetType(
186 self, universe: DimensionUniverse, parentStorageClass: StorageClass | str | None = None
187 ) -> DatasetType:
188 """Construct a true `~lsst.daf.butler.DatasetType` instance with
189 normalized dimensions.
191 Parameters
192 ----------
193 universe : `lsst.daf.butler.DimensionUniverse`
194 Set of all known dimensions to be used to normalize the dimension
195 names specified in config.
196 parentStorageClass : `lsst.daf.butler.StorageClass` or `str`, optional
197 Parent storage class for component datasets; `None` otherwise.
199 Returns
200 -------
201 datasetType : `~lsst.daf.butler.DatasetType`
202 The `~lsst.daf.butler.DatasetType` defined by this connection.
203 """
204 return DatasetType(
205 self.name,
206 universe.conform(self.dimensions),
207 self.storageClass,
208 isCalibration=self.isCalibration,
209 parentStorageClass=parentStorageClass,
210 )
213@dataclasses.dataclass(frozen=True)
214class BaseInput(DimensionedConnection):
215 """Class used for declaring PipelineTask input connections.
217 Attributes
218 ----------
219 name : `str`
220 The default name used to identify the dataset type.
221 storageClass : `str`
222 The storage class used when (un)/persisting the dataset type.
223 multiple : `bool`
224 Indicates if this connection should expect to contain multiple objects
225 of the given dataset type. Tasks with more than one connection with
226 ``multiple=True`` with the same dimensions may want to implement
227 `PipelineTaskConnections.adjustQuantum` to ensure those datasets are
228 consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
229 the execution system as early as possible of outputs that will not be
230 produced because the corresponding input is missing.
231 dimensions : iterable of `str`
232 The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
233 to identify the dataset type identified by the specified name.
234 deferLoad : `bool`
235 Indicates that this dataset type will be loaded as a
236 `lsst.daf.butler.DeferredDatasetHandle`. PipelineTasks can use this
237 object to load the object at a later time.
238 minimum : `bool`
239 Minimum number of datasets required for this connection, per quantum.
240 This is checked in the base implementation of
241 `PipelineTaskConnections.adjustQuantum`, which raises `NoWorkFound` if
242 the minimum is not met for `Input` connections (causing the quantum to
243 be pruned, skipped, or never created, depending on the context), and
244 `FileNotFoundError` for `PrerequisiteInput` connections (causing
245 QuantumGraph generation to fail). `PipelineTask` implementations may
246 provide custom `~PipelineTaskConnections.adjustQuantum` implementations
247 for more fine-grained or configuration-driven constraints, as long as
248 they are compatible with this minium.
250 Raises
251 ------
252 TypeError
253 Raised if ``minimum`` is greater than one but ``multiple=False``.
254 NotImplementedError
255 Raised if ``minimum`` is zero for a regular `Input` connection; this
256 is not currently supported by our QuantumGraph generation algorithm.
257 """
259 deferLoad: bool = False
260 minimum: int = 1
262 def __post_init__(self) -> None:
263 super().__post_init__()
264 if self.minimum > 1 and not self.multiple: 264 ↛ 265line 264 didn't jump to line 265, because the condition on line 264 was never true
265 raise TypeError(f"Cannot set minimum={self.minimum} if multiple=False.")
268@dataclasses.dataclass(frozen=True)
269class Input(BaseInput):
270 """Class used for declaring PipelineTask input connections.
272 Attributes
273 ----------
274 name : `str`
275 The default name used to identify the dataset type.
276 storageClass : `str`
277 The storage class used when (un)/persisting the dataset type.
278 multiple : `bool`
279 Indicates if this connection should expect to contain multiple objects
280 of the given dataset type. Tasks with more than one connection with
281 ``multiple=True`` with the same dimensions may want to implement
282 `PipelineTaskConnections.adjustQuantum` to ensure those datasets are
283 consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
284 the execution system as early as possible of outputs that will not be
285 produced because the corresponding input is missing.
286 dimensions : iterable of `str`
287 The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
288 to identify the dataset type identified by the specified name.
289 deferLoad : `bool`
290 Indicates that this dataset type will be loaded as a
291 `lsst.daf.butler.DeferredDatasetHandle`. PipelineTasks can use this
292 object to load the object at a later time.
293 minimum : `bool`
294 Minimum number of datasets required for this connection, per quantum.
295 This is checked in the base implementation of
296 `PipelineTaskConnections.adjustQuantum`, which raises `NoWorkFound` if
297 the minimum is not met for `Input` connections (causing the quantum to
298 be pruned, skipped, or never created, depending on the context), and
299 `FileNotFoundError` for `PrerequisiteInput` connections (causing
300 QuantumGraph generation to fail). `PipelineTask` implementations may
301 provide custom `~PipelineTaskConnections.adjustQuantum` implementations
302 for more fine-grained or configuration-driven constraints, as long as
303 they are compatible with this minium.
304 deferGraphConstraint : `bool`, optional
305 If `True`, do not include this dataset type's existence in the initial
306 query that starts the QuantumGraph generation process. This can be
307 used to make QuantumGraph generation faster by avoiding redundant
308 datasets, and in certain cases it can (along with careful attention to
309 which tasks are included in the same QuantumGraph) be used to work
310 around the QuantumGraph generation algorithm's inflexible handling of
311 spatial overlaps. This option has no effect when the connection is not
312 an overall input of the pipeline (or subset thereof) for which a graph
313 is being created, and it never affects the ordering of quanta.
314 deferBinding : `bool`, optional
315 If `True`, the dataset will not be automatically included in
316 the pipeline graph, ``deferGraphConstraint`` is implied.
317 The custom QuantumGraphBuilder is required to bind it and add a
318 corresponding edge to the pipeline graph.
319 This option allows to have the same dataset type as both
320 input and output of a quantum.
322 Raises
323 ------
324 TypeError
325 Raised if ``minimum`` is greater than one but ``multiple=False``.
326 NotImplementedError
327 Raised if ``minimum`` is zero for a regular `Input` connection; this
328 is not currently supported by our QuantumGraph generation algorithm.
329 """
331 deferGraphConstraint: bool = False
333 deferBinding: bool = False
335 _connection_type_set: ClassVar[str] = "inputs"
337 def __post_init__(self) -> None:
338 super().__post_init__()
339 if self.minimum == 0: 339 ↛ 340line 339 didn't jump to line 340, because the condition on line 339 was never true
340 raise TypeError(f"Cannot set minimum={self.minimum} for regular input.")
343@dataclasses.dataclass(frozen=True)
344class PrerequisiteInput(BaseInput):
345 """Class used for declaring PipelineTask prerequisite connections.
347 Attributes
348 ----------
349 name : `str`
350 The default name used to identify the dataset type.
351 storageClass : `str`
352 The storage class used when (un)/persisting the dataset type.
353 multiple : `bool`
354 Indicates if this connection should expect to contain multiple objects
355 of the given dataset type. Tasks with more than one connection with
356 ``multiple=True`` with the same dimensions may want to implement
357 `PipelineTaskConnections.adjustQuantum` to ensure those datasets are
358 consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
359 the execution system as early as possible of outputs that will not be
360 produced because the corresponding input is missing.
361 dimensions : iterable of `str`
362 The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
363 to identify the dataset type identified by the specified name.
364 minimum : `bool`
365 Minimum number of datasets required for this connection, per quantum.
366 This is checked in the base implementation of
367 `PipelineTaskConnections.adjustQuantum`, which raises
368 `FileNotFoundError` (causing QuantumGraph generation to fail).
369 `PipelineTask` implementations may
370 provide custom `~PipelineTaskConnections.adjustQuantum` implementations
371 for more fine-grained or configuration-driven constraints, as long as
372 they are compatible with this minium.
373 lookupFunction : `typing.Callable`, optional
374 An optional callable function that will look up PrerequisiteInputs
375 using the DatasetType, registry, quantum dataId, and input collections
376 passed to it. If no function is specified, the default temporal spatial
377 lookup will be used.
379 Raises
380 ------
381 TypeError
382 Raised if ``minimum`` is greater than one but ``multiple=False``.
384 Notes
385 -----
386 Prerequisite inputs are used for datasets that must exist in the data
387 repository before a pipeline including this is run; they cannot be produced
388 by another task in the same pipeline.
390 In exchange for this limitation, they have a number of advantages relative
391 to regular `Input` connections:
393 - The query used to find them then during `QuantumGraph` generation can be
394 fully customized by providing a ``lookupFunction``.
395 - Failed searches for prerequisites during `QuantumGraph` generation will
396 usually generate more helpful diagnostics than those for regular `Input`
397 connections.
398 - The default query for prerequisite inputs relates the quantum dimensions
399 directly to the dimensions of its dataset type, without being constrained
400 by any of the other dimensions in the pipeline. This allows them to be
401 used for temporal calibration lookups (which regular `Input` connections
402 cannot do at present) and to work around `QuantumGraph` generation
403 limitations involving cases where naive spatial overlap relationships
404 between dimensions are not desired (e.g. a task that wants all detectors
405 in each visit for which the visit overlaps a tract, not just those where
406 that detector+visit combination overlaps the tract).
407 - Prerequisite inputs may be optional (regular inputs are never optional).
408 """
410 lookupFunction: (
411 Callable[[DatasetType, Registry, DataCoordinate, Sequence[str]], Iterable[DatasetRef]] | None
412 ) = None
414 _connection_type_set: ClassVar[str] = "prerequisiteInputs"
417@dataclasses.dataclass(frozen=True)
418class Output(DimensionedConnection):
419 """Connection for output dataset."""
421 _connection_type_set: ClassVar[str] = "outputs"
424@dataclasses.dataclass(frozen=True)
425class InitInput(BaseConnection):
426 """Connection for initInput dataset."""
428 _connection_type_set: ClassVar[str] = "initInputs"
431@dataclasses.dataclass(frozen=True)
432class InitOutput(BaseConnection):
433 """Connection for initOutput dataset."""
435 _connection_type_set: ClassVar[str] = "initOutputs"