Coverage for python/lsst/pipe/base/tests/simpleQGraph.py: 28%
152 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-01 02:06 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-01 02:06 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Bunch of common classes and methods for use in unit tests.
23"""
24from __future__ import annotations
26__all__ = ["AddTaskConfig", "AddTask", "AddTaskFactoryMock"]
28import itertools
29import logging
30from collections.abc import Iterable, Mapping
31from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union, cast
33import lsst.daf.butler.tests as butlerTests
34import lsst.pex.config as pexConfig
35import numpy
36from lsst.daf.butler import Butler, Config, DataId, DatasetRef, DatasetType, Formatter, LimitedButler
37from lsst.daf.butler.core.logging import ButlerLogRecords
38from lsst.resources import ResourcePath
39from lsst.utils import doImportType
41from .. import connectionTypes as cT
42from .._instrument import Instrument
43from ..config import PipelineTaskConfig
44from ..connections import PipelineTaskConnections
45from ..graph import QuantumGraph
46from ..graphBuilder import DatasetQueryConstraintVariant as DSQVariant
47from ..graphBuilder import GraphBuilder
48from ..pipeline import Pipeline, TaskDatasetTypes, TaskDef
49from ..pipelineTask import PipelineTask
50from ..struct import Struct
51from ..task import _TASK_FULL_METADATA_TYPE
52from ..taskFactory import TaskFactory
54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true
55 from lsst.daf.butler import Registry
57_LOG = logging.getLogger(__name__)
60class SimpleInstrument(Instrument):
61 def __init__(self, *args: Any, **kwargs: Any):
62 pass
64 @staticmethod
65 def getName() -> str:
66 return "INSTRU"
68 def getRawFormatter(self, dataId: DataId) -> Type[Formatter]:
69 return Formatter
71 def register(self, registry: Registry, *, update: bool = False) -> None:
72 pass
75class AddTaskConnections(
76 PipelineTaskConnections,
77 dimensions=("instrument", "detector"),
78 defaultTemplates={"in_tmpl": "_in", "out_tmpl": "_out"},
79):
80 """Connections for AddTask, has one input and two outputs,
81 plus one init output.
82 """
84 input = cT.Input(
85 name="add_dataset{in_tmpl}",
86 dimensions=["instrument", "detector"],
87 storageClass="NumpyArray",
88 doc="Input dataset type for this task",
89 )
90 output = cT.Output(
91 name="add_dataset{out_tmpl}",
92 dimensions=["instrument", "detector"],
93 storageClass="NumpyArray",
94 doc="Output dataset type for this task",
95 )
96 output2 = cT.Output(
97 name="add2_dataset{out_tmpl}",
98 dimensions=["instrument", "detector"],
99 storageClass="NumpyArray",
100 doc="Output dataset type for this task",
101 )
102 initout = cT.InitOutput(
103 name="add_init_output{out_tmpl}",
104 storageClass="NumpyArray",
105 doc="Init Output dataset type for this task",
106 )
109class AddTaskConfig(PipelineTaskConfig, pipelineConnections=AddTaskConnections):
110 """Config for AddTask."""
112 addend = pexConfig.Field[int](doc="amount to add", default=3)
115class AddTask(PipelineTask):
116 """Trivial PipelineTask for testing, has some extras useful for specific
117 unit tests.
118 """
120 ConfigClass = AddTaskConfig
121 _DefaultName = "add_task"
123 initout = numpy.array([999])
124 """InitOutputs for this task"""
126 taskFactory: Optional[AddTaskFactoryMock] = None
127 """Factory that makes instances"""
129 def run(self, input: int) -> Struct: # type: ignore
130 if self.taskFactory:
131 # do some bookkeeping
132 if self.taskFactory.stopAt == self.taskFactory.countExec:
133 raise RuntimeError("pretend something bad happened")
134 self.taskFactory.countExec += 1
136 self.config = cast(AddTaskConfig, self.config)
137 self.metadata.add("add", self.config.addend)
138 output = input + self.config.addend
139 output2 = output + self.config.addend
140 _LOG.info("input = %s, output = %s, output2 = %s", input, output, output2)
141 return Struct(output=output, output2=output2)
144class AddTaskFactoryMock(TaskFactory):
145 """Special task factory that instantiates AddTask.
147 It also defines some bookkeeping variables used by AddTask to report
148 progress to unit tests.
149 """
151 def __init__(self, stopAt: int = -1):
152 self.countExec = 0 # incremented by AddTask
153 self.stopAt = stopAt # AddTask raises exception at this call to run()
155 def makeTask(
156 self, taskDef: TaskDef, butler: LimitedButler, initInputRefs: Iterable[DatasetRef] | None
157 ) -> PipelineTask:
158 taskClass = taskDef.taskClass
159 assert taskClass is not None
160 task = taskClass(config=taskDef.config, initInputs=None, name=taskDef.label)
161 task.taskFactory = self # type: ignore
162 return task
165def registerDatasetTypes(registry: Registry, pipeline: Union[Pipeline, Iterable[TaskDef]]) -> None:
166 """Register all dataset types used by tasks in a registry.
168 Copied and modified from `PreExecInit.initializeDatasetTypes`.
170 Parameters
171 ----------
172 registry : `~lsst.daf.butler.Registry`
173 Registry instance.
174 pipeline : `typing.Iterable` of `TaskDef`
175 Iterable of TaskDef instances, likely the output of the method
176 toExpandedPipeline on a `~lsst.pipe.base.Pipeline` object
177 """
178 for taskDef in pipeline:
179 configDatasetType = DatasetType(
180 taskDef.configDatasetName, {}, storageClass="Config", universe=registry.dimensions
181 )
182 storageClass = "Packages"
183 packagesDatasetType = DatasetType(
184 "packages", {}, storageClass=storageClass, universe=registry.dimensions
185 )
186 datasetTypes = TaskDatasetTypes.fromTaskDef(taskDef, registry=registry)
187 for datasetType in itertools.chain(
188 datasetTypes.initInputs,
189 datasetTypes.initOutputs,
190 datasetTypes.inputs,
191 datasetTypes.outputs,
192 datasetTypes.prerequisites,
193 [configDatasetType, packagesDatasetType],
194 ):
195 _LOG.info("Registering %s with registry", datasetType)
196 # this is a no-op if it already exists and is consistent,
197 # and it raises if it is inconsistent. But components must be
198 # skipped
199 if not datasetType.isComponent():
200 registry.registerDatasetType(datasetType)
203def makeSimplePipeline(nQuanta: int, instrument: Optional[str] = None) -> Pipeline:
204 """Make a simple Pipeline for tests.
206 This is called by ``makeSimpleQGraph`` if no pipeline is passed to that
207 function. It can also be used to customize the pipeline used by
208 ``makeSimpleQGraph`` function by calling this first and passing the result
209 to it.
211 Parameters
212 ----------
213 nQuanta : `int`
214 The number of quanta to add to the pipeline.
215 instrument : `str` or `None`, optional
216 The importable name of an instrument to be added to the pipeline or
217 if no instrument should be added then an empty string or `None`, by
218 default None
220 Returns
221 -------
222 pipeline : `~lsst.pipe.base.Pipeline`
223 The created pipeline object.
224 """
225 pipeline = Pipeline("test pipeline")
226 # make a bunch of tasks that execute in well defined order (via data
227 # dependencies)
228 for lvl in range(nQuanta):
229 pipeline.addTask(AddTask, f"task{lvl}")
230 pipeline.addConfigOverride(f"task{lvl}", "connections.in_tmpl", lvl)
231 pipeline.addConfigOverride(f"task{lvl}", "connections.out_tmpl", lvl + 1)
232 if instrument:
233 pipeline.addInstrument(instrument)
234 return pipeline
237def makeSimpleButler(root: str, run: str = "test", inMemory: bool = True) -> Butler:
238 """Create new data butler instance.
240 Parameters
241 ----------
242 root : `str`
243 Path or URI to the root location of the new repository.
244 run : `str`, optional
245 Run collection name.
246 inMemory : `bool`, optional
247 If true make in-memory repository.
249 Returns
250 -------
251 butler : `~lsst.daf.butler.Butler`
252 Data butler instance.
253 """
254 root_path = ResourcePath(root, forceDirectory=True)
255 if not root_path.isLocal:
256 raise ValueError(f"Only works with local root not {root_path}")
257 config = Config()
258 if not inMemory:
259 config["registry", "db"] = f"sqlite:///{root_path.ospath}/gen3.sqlite"
260 config["datastore", "cls"] = "lsst.daf.butler.datastores.fileDatastore.FileDatastore"
261 repo = butlerTests.makeTestRepo(str(root_path), {}, config=config)
262 butler = Butler(butler=repo, run=run)
263 return butler
266def populateButler(
267 pipeline: Pipeline, butler: Butler, datasetTypes: Dict[Optional[str], List[str]] | None = None
268) -> None:
269 """Populate data butler with data needed for test.
271 Initializes data butler with a bunch of items:
272 - registers dataset types which are defined by pipeline
273 - create dimensions data for (instrument, detector)
274 - adds datasets based on ``datasetTypes`` dictionary, if dictionary is
275 missing then a single dataset with type "add_dataset0" is added
277 All datasets added to butler have ``dataId={instrument=instrument,
278 detector=0}`` where ``instrument`` is extracted from pipeline, "INSTR" is
279 used if pipeline is missing instrument definition. Type of the dataset is
280 guessed from dataset type name (assumes that pipeline is made of `AddTask`
281 tasks).
283 Parameters
284 ----------
285 pipeline : `~lsst.pipe.base.Pipeline`
286 Pipeline instance.
287 butler : `~lsst.daf.butler.Butler`
288 Data butler instance.
289 datasetTypes : `dict` [ `str`, `list` ], optional
290 Dictionary whose keys are collection names and values are lists of
291 dataset type names. By default a single dataset of type "add_dataset0"
292 is added to a ``butler.run`` collection.
293 """
295 # Add dataset types to registry
296 taskDefs = list(pipeline.toExpandedPipeline())
297 registerDatasetTypes(butler.registry, taskDefs)
299 instrument = pipeline.getInstrument()
300 if instrument is not None:
301 instrument_class = doImportType(instrument)
302 instrumentName = instrument_class.getName()
303 else:
304 instrumentName = "INSTR"
306 # Add all needed dimensions to registry
307 butler.registry.insertDimensionData("instrument", dict(name=instrumentName))
308 butler.registry.insertDimensionData("detector", dict(instrument=instrumentName, id=0, full_name="det0"))
310 taskDefMap = dict((taskDef.label, taskDef) for taskDef in taskDefs)
311 # Add inputs to butler
312 if not datasetTypes:
313 datasetTypes = {None: ["add_dataset0"]}
314 for run, dsTypes in datasetTypes.items():
315 if run is not None:
316 butler.registry.registerRun(run)
317 for dsType in dsTypes:
318 if dsType == "packages":
319 # Version is intentionally inconsistent.
320 # Dict is convertible to Packages if Packages is installed.
321 data: Any = {"python": "9.9.99"}
322 butler.put(data, dsType, run=run)
323 else:
324 if dsType.endswith("_config"):
325 # find a config from matching task name or make a new one
326 taskLabel, _, _ = dsType.rpartition("_")
327 taskDef = taskDefMap.get(taskLabel)
328 if taskDef is not None:
329 data = taskDef.config
330 else:
331 data = AddTaskConfig()
332 elif dsType.endswith("_metadata"):
333 data = _TASK_FULL_METADATA_TYPE()
334 elif dsType.endswith("_log"):
335 data = ButlerLogRecords.from_records([])
336 else:
337 data = numpy.array([0.0, 1.0, 2.0, 5.0])
338 butler.put(data, dsType, run=run, instrument=instrumentName, detector=0)
341def makeSimpleQGraph(
342 nQuanta: int = 5,
343 pipeline: Optional[Pipeline] = None,
344 butler: Optional[Butler] = None,
345 root: Optional[str] = None,
346 callPopulateButler: bool = True,
347 run: str = "test",
348 skipExistingIn: Any = None,
349 inMemory: bool = True,
350 userQuery: str = "",
351 datasetTypes: Optional[Dict[Optional[str], List[str]]] = None,
352 datasetQueryConstraint: DSQVariant = DSQVariant.ALL,
353 makeDatastoreRecords: bool = False,
354 resolveRefs: bool = False,
355 bind: Optional[Mapping[str, Any]] = None,
356) -> Tuple[Butler, QuantumGraph]:
357 """Make simple QuantumGraph for tests.
359 Makes simple one-task pipeline with AddTask, sets up in-memory registry
360 and butler, fills them with minimal data, and generates QuantumGraph with
361 all of that.
363 Parameters
364 ----------
365 nQuanta : `int`
366 Number of quanta in a graph, only used if ``pipeline`` is None.
367 pipeline : `~lsst.pipe.base.Pipeline`
368 If `None` then a pipeline is made with `AddTask` and default
369 `AddTaskConfig`.
370 butler : `~lsst.daf.butler.Butler`, optional
371 Data butler instance, if None then new data butler is created by
372 calling `makeSimpleButler`.
373 callPopulateButler : `bool`, optional
374 If True insert datasets into the butler prior to building a graph.
375 If False butler argument must not be None, and must be pre-populated.
376 Defaults to True.
377 root : `str`
378 Path or URI to the root location of the new repository. Only used if
379 ``butler`` is None.
380 run : `str`, optional
381 Name of the RUN collection to add to butler, only used if ``butler``
382 is None.
383 skipExistingIn
384 Expressions representing the collections to search for existing
385 output datasets that should be skipped. See
386 :ref:`daf_butler_ordered_collection_searches`.
387 inMemory : `bool`, optional
388 If true make in-memory repository, only used if ``butler`` is `None`.
389 userQuery : `str`, optional
390 The user query to pass to ``makeGraph``, by default an empty string.
391 datasetTypes : `dict` [ `str`, `list` ], optional
392 Dictionary whose keys are collection names and values are lists of
393 dataset type names. By default a single dataset of type "add_dataset0"
394 is added to a ``butler.run`` collection.
395 datasetQueryQConstraint : `DatasetQueryConstraintVariant`
396 The query constraint variant that should be used to constrain the
397 query based on dataset existence, defaults to
398 `DatasetQueryConstraintVariant.ALL`.
399 makeDatastoreRecords : `bool`, optional
400 If `True` then add datstore records to generated quanta.
401 resolveRefs : `bool`, optional
402 If `True` then resolve all input references and generate random dataset
403 IDs for all output and intermediate datasets.
404 bind : `Mapping`, optional
405 Mapping containing literal values that should be injected into the
406 ``userQuery`` expression, keyed by the identifiers they replace.
408 Returns
409 -------
410 butler : `~lsst.daf.butler.Butler`
411 Butler instance
412 qgraph : `~lsst.pipe.base.QuantumGraph`
413 Quantum graph instance
414 """
416 if pipeline is None:
417 pipeline = makeSimplePipeline(nQuanta=nQuanta)
419 if butler is None:
420 if root is None:
421 raise ValueError("Must provide `root` when `butler` is None")
422 if callPopulateButler is False:
423 raise ValueError("populateButler can only be False when butler is supplied as an argument")
424 butler = makeSimpleButler(root, run=run, inMemory=inMemory)
426 if callPopulateButler:
427 populateButler(pipeline, butler, datasetTypes=datasetTypes)
429 # Make the graph
430 _LOG.debug("Instantiating GraphBuilder, skipExistingIn=%s", skipExistingIn)
431 builder = GraphBuilder(
432 registry=butler.registry,
433 skipExistingIn=skipExistingIn,
434 datastore=butler.datastore if makeDatastoreRecords else None,
435 )
436 _LOG.debug(
437 "Calling GraphBuilder.makeGraph, collections=%r, run=%r, userQuery=%r bind=%s",
438 butler.collections,
439 run or butler.run,
440 userQuery,
441 bind,
442 )
443 qgraph = builder.makeGraph(
444 pipeline,
445 collections=butler.collections,
446 run=run or butler.run,
447 userQuery=userQuery,
448 datasetQueryConstraint=datasetQueryConstraint,
449 resolveRefs=resolveRefs,
450 bind=bind,
451 )
453 return butler, qgraph