Coverage for python/lsst/pipe/base/tests/simpleQGraph.py: 30%
154 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-31 04:19 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-31 04:19 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Bunch of common classes and methods for use in unit tests.
23"""
24from __future__ import annotations
26__all__ = ["AddTaskConfig", "AddTask", "AddTaskFactoryMock"]
28import itertools
29import logging
30from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Type, Union, cast
32import lsst.daf.butler.tests as butlerTests
33import lsst.pex.config as pexConfig
34import numpy
35from lsst.daf.butler import Butler, Config, DataId, DatasetType, Formatter
36from lsst.daf.butler.core.logging import ButlerLogRecords
37from lsst.resources import ResourcePath
38from lsst.utils import doImportType
40from .. import connectionTypes as cT
41from .._instrument import Instrument
42from ..config import PipelineTaskConfig
43from ..connections import PipelineTaskConnections
44from ..graph import QuantumGraph
45from ..graphBuilder import DatasetQueryConstraintVariant as DSQVariant
46from ..graphBuilder import GraphBuilder
47from ..pipeline import Pipeline, TaskDatasetTypes, TaskDef
48from ..pipelineTask import PipelineTask
49from ..struct import Struct
50from ..task import _TASK_FULL_METADATA_TYPE
51from ..taskFactory import TaskFactory
53if TYPE_CHECKING: 53 ↛ 54line 53 didn't jump to line 54, because the condition on line 53 was never true
54 from lsst.daf.butler import Registry
56 from ..configOverrides import ConfigOverrides
58_LOG = logging.getLogger(__name__)
61class SimpleInstrument(Instrument):
62 def __init__(self, *args: Any, **kwargs: Any):
63 pass
65 @staticmethod
66 def getName() -> str:
67 return "INSTRU"
69 def getRawFormatter(self, dataId: DataId) -> Type[Formatter]:
70 return Formatter
72 def register(self, registry: Registry, *, update: bool = False) -> None:
73 pass
76class AddTaskConnections(
77 PipelineTaskConnections,
78 dimensions=("instrument", "detector"),
79 defaultTemplates={"in_tmpl": "_in", "out_tmpl": "_out"},
80):
81 """Connections for AddTask, has one input and two outputs,
82 plus one init output.
83 """
85 input = cT.Input(
86 name="add_dataset{in_tmpl}",
87 dimensions=["instrument", "detector"],
88 storageClass="NumpyArray",
89 doc="Input dataset type for this task",
90 )
91 output = cT.Output(
92 name="add_dataset{out_tmpl}",
93 dimensions=["instrument", "detector"],
94 storageClass="NumpyArray",
95 doc="Output dataset type for this task",
96 )
97 output2 = cT.Output(
98 name="add2_dataset{out_tmpl}",
99 dimensions=["instrument", "detector"],
100 storageClass="NumpyArray",
101 doc="Output dataset type for this task",
102 )
103 initout = cT.InitOutput(
104 name="add_init_output{out_tmpl}",
105 storageClass="NumpyArray",
106 doc="Init Output dataset type for this task",
107 )
110class AddTaskConfig(PipelineTaskConfig, pipelineConnections=AddTaskConnections):
111 """Config for AddTask."""
113 addend = pexConfig.Field[int](doc="amount to add", default=3)
116class AddTask(PipelineTask):
117 """Trivial PipelineTask for testing, has some extras useful for specific
118 unit tests.
119 """
121 ConfigClass = AddTaskConfig
122 _DefaultName = "add_task"
124 initout = numpy.array([999])
125 """InitOutputs for this task"""
127 taskFactory: Optional[AddTaskFactoryMock] = None
128 """Factory that makes instances"""
130 def run(self, input: int) -> Struct: # type: ignore
132 if self.taskFactory:
133 # do some bookkeeping
134 if self.taskFactory.stopAt == self.taskFactory.countExec:
135 raise RuntimeError("pretend something bad happened")
136 self.taskFactory.countExec += 1
138 self.config = cast(AddTaskConfig, self.config)
139 self.metadata.add("add", self.config.addend)
140 output = input + self.config.addend
141 output2 = output + self.config.addend
142 _LOG.info("input = %s, output = %s, output2 = %s", input, output, output2)
143 return Struct(output=output, output2=output2)
146class AddTaskFactoryMock(TaskFactory):
147 """Special task factory that instantiates AddTask.
149 It also defines some bookkeeping variables used by AddTask to report
150 progress to unit tests.
151 """
153 def __init__(self, stopAt: int = -1):
154 self.countExec = 0 # incremented by AddTask
155 self.stopAt = stopAt # AddTask raises exception at this call to run()
157 def makeTask(
158 self,
159 taskClass: Type[PipelineTask],
160 name: Optional[str],
161 config: Optional[PipelineTaskConfig],
162 overrides: Optional[ConfigOverrides],
163 butler: Optional[Butler],
164 ) -> PipelineTask:
165 if config is None:
166 config = taskClass.ConfigClass()
167 if overrides:
168 overrides.applyTo(config)
169 task = taskClass(config=config, initInputs=None, name=name)
170 task.taskFactory = self # type: ignore
171 return task
174def registerDatasetTypes(registry: Registry, pipeline: Union[Pipeline, Iterable[TaskDef]]) -> None:
175 """Register all dataset types used by tasks in a registry.
177 Copied and modified from `PreExecInit.initializeDatasetTypes`.
179 Parameters
180 ----------
181 registry : `~lsst.daf.butler.Registry`
182 Registry instance.
183 pipeline : `typing.Iterable` of `TaskDef`
184 Iterable of TaskDef instances, likely the output of the method
185 toExpandedPipeline on a `~lsst.pipe.base.Pipeline` object
186 """
187 for taskDef in pipeline:
188 configDatasetType = DatasetType(
189 taskDef.configDatasetName, {}, storageClass="Config", universe=registry.dimensions
190 )
191 storageClass = "Packages"
192 packagesDatasetType = DatasetType(
193 "packages", {}, storageClass=storageClass, universe=registry.dimensions
194 )
195 datasetTypes = TaskDatasetTypes.fromTaskDef(taskDef, registry=registry)
196 for datasetType in itertools.chain(
197 datasetTypes.initInputs,
198 datasetTypes.initOutputs,
199 datasetTypes.inputs,
200 datasetTypes.outputs,
201 datasetTypes.prerequisites,
202 [configDatasetType, packagesDatasetType],
203 ):
204 _LOG.info("Registering %s with registry", datasetType)
205 # this is a no-op if it already exists and is consistent,
206 # and it raises if it is inconsistent. But components must be
207 # skipped
208 if not datasetType.isComponent():
209 registry.registerDatasetType(datasetType)
212def makeSimplePipeline(nQuanta: int, instrument: Optional[str] = None) -> Pipeline:
213 """Make a simple Pipeline for tests.
215 This is called by ``makeSimpleQGraph`` if no pipeline is passed to that
216 function. It can also be used to customize the pipeline used by
217 ``makeSimpleQGraph`` function by calling this first and passing the result
218 to it.
220 Parameters
221 ----------
222 nQuanta : `int`
223 The number of quanta to add to the pipeline.
224 instrument : `str` or `None`, optional
225 The importable name of an instrument to be added to the pipeline or
226 if no instrument should be added then an empty string or `None`, by
227 default None
229 Returns
230 -------
231 pipeline : `~lsst.pipe.base.Pipeline`
232 The created pipeline object.
233 """
234 pipeline = Pipeline("test pipeline")
235 # make a bunch of tasks that execute in well defined order (via data
236 # dependencies)
237 for lvl in range(nQuanta):
238 pipeline.addTask(AddTask, f"task{lvl}")
239 pipeline.addConfigOverride(f"task{lvl}", "connections.in_tmpl", lvl)
240 pipeline.addConfigOverride(f"task{lvl}", "connections.out_tmpl", lvl + 1)
241 if instrument:
242 pipeline.addInstrument(instrument)
243 return pipeline
246def makeSimpleButler(root: str, run: str = "test", inMemory: bool = True) -> Butler:
247 """Create new data butler instance.
249 Parameters
250 ----------
251 root : `str`
252 Path or URI to the root location of the new repository.
253 run : `str`, optional
254 Run collection name.
255 inMemory : `bool`, optional
256 If true make in-memory repository.
258 Returns
259 -------
260 butler : `~lsst.daf.butler.Butler`
261 Data butler instance.
262 """
263 root_path = ResourcePath(root, forceDirectory=True)
264 if not root_path.isLocal:
265 raise ValueError(f"Only works with local root not {root_path}")
266 config = Config()
267 if not inMemory:
268 config["registry", "db"] = f"sqlite:///{root_path.ospath}/gen3.sqlite"
269 config["datastore", "cls"] = "lsst.daf.butler.datastores.fileDatastore.FileDatastore"
270 repo = butlerTests.makeTestRepo(str(root_path), {}, config=config)
271 butler = Butler(butler=repo, run=run)
272 return butler
275def populateButler(
276 pipeline: Pipeline, butler: Butler, datasetTypes: Dict[Optional[str], List[str]] = None
277) -> None:
278 """Populate data butler with data needed for test.
280 Initializes data butler with a bunch of items:
281 - registers dataset types which are defined by pipeline
282 - create dimensions data for (instrument, detector)
283 - adds datasets based on ``datasetTypes`` dictionary, if dictionary is
284 missing then a single dataset with type "add_dataset0" is added
286 All datasets added to butler have ``dataId={instrument=instrument,
287 detector=0}`` where ``instrument`` is extracted from pipeline, "INSTR" is
288 used if pipeline is missing instrument definition. Type of the dataset is
289 guessed from dataset type name (assumes that pipeline is made of `AddTask`
290 tasks).
292 Parameters
293 ----------
294 pipeline : `~lsst.pipe.base.Pipeline`
295 Pipeline instance.
296 butler : `~lsst.daf.butler.Butler`
297 Data butler instance.
298 datasetTypes : `dict` [ `str`, `list` ], optional
299 Dictionary whose keys are collection names and values are lists of
300 dataset type names. By default a single dataset of type "add_dataset0"
301 is added to a ``butler.run`` collection.
302 """
304 # Add dataset types to registry
305 taskDefs = list(pipeline.toExpandedPipeline())
306 registerDatasetTypes(butler.registry, taskDefs)
308 instrument = pipeline.getInstrument()
309 if instrument is not None:
310 instrument_class = doImportType(instrument)
311 instrumentName = instrument_class.getName()
312 else:
313 instrumentName = "INSTR"
315 # Add all needed dimensions to registry
316 butler.registry.insertDimensionData("instrument", dict(name=instrumentName))
317 butler.registry.insertDimensionData("detector", dict(instrument=instrumentName, id=0, full_name="det0"))
319 taskDefMap = dict((taskDef.label, taskDef) for taskDef in taskDefs)
320 # Add inputs to butler
321 if not datasetTypes:
322 datasetTypes = {None: ["add_dataset0"]}
323 for run, dsTypes in datasetTypes.items():
324 if run is not None:
325 butler.registry.registerRun(run)
326 for dsType in dsTypes:
327 if dsType == "packages":
328 # Version is intentionally inconsistent.
329 # Dict is convertible to Packages if Packages is installed.
330 data: Any = {"python": "9.9.99"}
331 butler.put(data, dsType, run=run)
332 else:
333 if dsType.endswith("_config"):
334 # find a confing from matching task name or make a new one
335 taskLabel, _, _ = dsType.rpartition("_")
336 taskDef = taskDefMap.get(taskLabel)
337 if taskDef is not None:
338 data = taskDef.config
339 else:
340 data = AddTaskConfig()
341 elif dsType.endswith("_metadata"):
342 data = _TASK_FULL_METADATA_TYPE()
343 elif dsType.endswith("_log"):
344 data = ButlerLogRecords.from_records([])
345 else:
346 data = numpy.array([0.0, 1.0, 2.0, 5.0])
347 butler.put(data, dsType, run=run, instrument=instrumentName, detector=0)
350def makeSimpleQGraph(
351 nQuanta: int = 5,
352 pipeline: Optional[Pipeline] = None,
353 butler: Optional[Butler] = None,
354 root: Optional[str] = None,
355 callPopulateButler: bool = True,
356 run: str = "test",
357 skipExistingIn: Any = None,
358 inMemory: bool = True,
359 userQuery: str = "",
360 datasetTypes: Optional[Dict[Optional[str], List[str]]] = None,
361 datasetQueryConstraint: DSQVariant = DSQVariant.ALL,
362 makeDatastoreRecords: bool = False,
363 resolveRefs: bool = False,
364) -> Tuple[Butler, QuantumGraph]:
365 """Make simple QuantumGraph for tests.
367 Makes simple one-task pipeline with AddTask, sets up in-memory registry
368 and butler, fills them with minimal data, and generates QuantumGraph with
369 all of that.
371 Parameters
372 ----------
373 nQuanta : `int`
374 Number of quanta in a graph, only used if ``pipeline`` is None.
375 pipeline : `~lsst.pipe.base.Pipeline`
376 If `None` then a pipeline is made with `AddTask` and default
377 `AddTaskConfig`.
378 butler : `~lsst.daf.butler.Butler`, optional
379 Data butler instance, if None then new data butler is created by
380 calling `makeSimpleButler`.
381 callPopulateButler : `bool`, optional
382 If True insert datasets into the butler prior to building a graph.
383 If False butler argument must not be None, and must be pre-populated.
384 Defaults to True.
385 root : `str`
386 Path or URI to the root location of the new repository. Only used if
387 ``butler`` is None.
388 run : `str`, optional
389 Name of the RUN collection to add to butler, only used if ``butler``
390 is None.
391 skipExistingIn
392 Expressions representing the collections to search for existing
393 output datasets that should be skipped. May be any of the types
394 accepted by `lsst.daf.butler.CollectionSearch.fromExpression`.
395 inMemory : `bool`, optional
396 If true make in-memory repository, only used if ``butler`` is `None`.
397 userQuery : `str`, optional
398 The user query to pass to ``makeGraph``, by default an empty string.
399 datasetTypes : `dict` [ `str`, `list` ], optional
400 Dictionary whose keys are collection names and values are lists of
401 dataset type names. By default a single dataset of type "add_dataset0"
402 is added to a ``butler.run`` collection.
403 datasetQueryQConstraint : `DatasetQueryConstraintVariant`
404 The query constraint variant that should be used to constrain the
405 query based on dataset existence, defaults to
406 `DatasetQueryConstraintVariant.ALL`.
407 makeDatastoreRecords : `bool`, optional
408 If `True` then add datstore records to generated quanta.
409 resolveRefs : `bool`, optional
410 If `True` then resolve all input references and generate random dataset
411 IDs for all output and intermediate datasets.
413 Returns
414 -------
415 butler : `~lsst.daf.butler.Butler`
416 Butler instance
417 qgraph : `~lsst.pipe.base.QuantumGraph`
418 Quantum graph instance
419 """
421 if pipeline is None:
422 pipeline = makeSimplePipeline(nQuanta=nQuanta)
424 if butler is None:
425 if root is None:
426 raise ValueError("Must provide `root` when `butler` is None")
427 if callPopulateButler is False:
428 raise ValueError("populateButler can only be False when butler is supplied as an argument")
429 butler = makeSimpleButler(root, run=run, inMemory=inMemory)
431 if callPopulateButler:
432 populateButler(pipeline, butler, datasetTypes=datasetTypes)
434 # Make the graph
435 _LOG.debug("Instantiating GraphBuilder, skipExistingIn=%s", skipExistingIn)
436 builder = GraphBuilder(
437 registry=butler.registry,
438 skipExistingIn=skipExistingIn,
439 datastore=butler.datastore if makeDatastoreRecords else None,
440 )
441 _LOG.debug(
442 "Calling GraphBuilder.makeGraph, collections=%r, run=%r, userQuery=%r",
443 butler.collections,
444 run or butler.run,
445 userQuery,
446 )
447 qgraph = builder.makeGraph(
448 pipeline,
449 collections=butler.collections,
450 run=run or butler.run,
451 userQuery=userQuery,
452 datasetQueryConstraint=datasetQueryConstraint,
453 resolveRefs=resolveRefs,
454 )
456 return butler, qgraph