Coverage for python/lsst/pipe/base/tests/simpleQGraph.py: 28%
152 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-18 01:59 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-18 01:59 -0800
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Bunch of common classes and methods for use in unit tests.
23"""
24from __future__ import annotations
26__all__ = ["AddTaskConfig", "AddTask", "AddTaskFactoryMock"]
28import itertools
29import logging
30from collections.abc import Iterable, Mapping
31from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union, cast
33import lsst.daf.butler.tests as butlerTests
34import lsst.pex.config as pexConfig
35import numpy
36from lsst.daf.butler import Butler, Config, DataId, DatasetRef, DatasetType, Formatter, LimitedButler
37from lsst.daf.butler.core.logging import ButlerLogRecords
38from lsst.resources import ResourcePath
39from lsst.utils import doImportType
41from .. import connectionTypes as cT
42from .._instrument import Instrument
43from ..config import PipelineTaskConfig
44from ..connections import PipelineTaskConnections
45from ..graph import QuantumGraph
46from ..graphBuilder import DatasetQueryConstraintVariant as DSQVariant
47from ..graphBuilder import GraphBuilder
48from ..pipeline import Pipeline, TaskDatasetTypes, TaskDef
49from ..pipelineTask import PipelineTask
50from ..struct import Struct
51from ..task import _TASK_FULL_METADATA_TYPE
52from ..taskFactory import TaskFactory
54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true
55 from lsst.daf.butler import Registry
57_LOG = logging.getLogger(__name__)
60class SimpleInstrument(Instrument):
61 def __init__(self, *args: Any, **kwargs: Any):
62 pass
64 @staticmethod
65 def getName() -> str:
66 return "INSTRU"
68 def getRawFormatter(self, dataId: DataId) -> Type[Formatter]:
69 return Formatter
71 def register(self, registry: Registry, *, update: bool = False) -> None:
72 pass
75class AddTaskConnections(
76 PipelineTaskConnections,
77 dimensions=("instrument", "detector"),
78 defaultTemplates={"in_tmpl": "_in", "out_tmpl": "_out"},
79):
80 """Connections for AddTask, has one input and two outputs,
81 plus one init output.
82 """
84 input = cT.Input(
85 name="add_dataset{in_tmpl}",
86 dimensions=["instrument", "detector"],
87 storageClass="NumpyArray",
88 doc="Input dataset type for this task",
89 )
90 output = cT.Output(
91 name="add_dataset{out_tmpl}",
92 dimensions=["instrument", "detector"],
93 storageClass="NumpyArray",
94 doc="Output dataset type for this task",
95 )
96 output2 = cT.Output(
97 name="add2_dataset{out_tmpl}",
98 dimensions=["instrument", "detector"],
99 storageClass="NumpyArray",
100 doc="Output dataset type for this task",
101 )
102 initout = cT.InitOutput(
103 name="add_init_output{out_tmpl}",
104 storageClass="NumpyArray",
105 doc="Init Output dataset type for this task",
106 )
109class AddTaskConfig(PipelineTaskConfig, pipelineConnections=AddTaskConnections):
110 """Config for AddTask."""
112 addend = pexConfig.Field[int](doc="amount to add", default=3)
115class AddTask(PipelineTask):
116 """Trivial PipelineTask for testing, has some extras useful for specific
117 unit tests.
118 """
120 ConfigClass = AddTaskConfig
121 _DefaultName = "add_task"
123 initout = numpy.array([999])
124 """InitOutputs for this task"""
126 taskFactory: Optional[AddTaskFactoryMock] = None
127 """Factory that makes instances"""
129 def run(self, input: int) -> Struct: # type: ignore
131 if self.taskFactory:
132 # do some bookkeeping
133 if self.taskFactory.stopAt == self.taskFactory.countExec:
134 raise RuntimeError("pretend something bad happened")
135 self.taskFactory.countExec += 1
137 self.config = cast(AddTaskConfig, self.config)
138 self.metadata.add("add", self.config.addend)
139 output = input + self.config.addend
140 output2 = output + self.config.addend
141 _LOG.info("input = %s, output = %s, output2 = %s", input, output, output2)
142 return Struct(output=output, output2=output2)
145class AddTaskFactoryMock(TaskFactory):
146 """Special task factory that instantiates AddTask.
148 It also defines some bookkeeping variables used by AddTask to report
149 progress to unit tests.
150 """
152 def __init__(self, stopAt: int = -1):
153 self.countExec = 0 # incremented by AddTask
154 self.stopAt = stopAt # AddTask raises exception at this call to run()
156 def makeTask(
157 self, taskDef: TaskDef, butler: LimitedButler, initInputRefs: Iterable[DatasetRef] | None
158 ) -> PipelineTask:
159 taskClass = taskDef.taskClass
160 assert taskClass is not None
161 task = taskClass(config=taskDef.config, initInputs=None, name=taskDef.label)
162 task.taskFactory = self # type: ignore
163 return task
166def registerDatasetTypes(registry: Registry, pipeline: Union[Pipeline, Iterable[TaskDef]]) -> None:
167 """Register all dataset types used by tasks in a registry.
169 Copied and modified from `PreExecInit.initializeDatasetTypes`.
171 Parameters
172 ----------
173 registry : `~lsst.daf.butler.Registry`
174 Registry instance.
175 pipeline : `typing.Iterable` of `TaskDef`
176 Iterable of TaskDef instances, likely the output of the method
177 toExpandedPipeline on a `~lsst.pipe.base.Pipeline` object
178 """
179 for taskDef in pipeline:
180 configDatasetType = DatasetType(
181 taskDef.configDatasetName, {}, storageClass="Config", universe=registry.dimensions
182 )
183 storageClass = "Packages"
184 packagesDatasetType = DatasetType(
185 "packages", {}, storageClass=storageClass, universe=registry.dimensions
186 )
187 datasetTypes = TaskDatasetTypes.fromTaskDef(taskDef, registry=registry)
188 for datasetType in itertools.chain(
189 datasetTypes.initInputs,
190 datasetTypes.initOutputs,
191 datasetTypes.inputs,
192 datasetTypes.outputs,
193 datasetTypes.prerequisites,
194 [configDatasetType, packagesDatasetType],
195 ):
196 _LOG.info("Registering %s with registry", datasetType)
197 # this is a no-op if it already exists and is consistent,
198 # and it raises if it is inconsistent. But components must be
199 # skipped
200 if not datasetType.isComponent():
201 registry.registerDatasetType(datasetType)
204def makeSimplePipeline(nQuanta: int, instrument: Optional[str] = None) -> Pipeline:
205 """Make a simple Pipeline for tests.
207 This is called by ``makeSimpleQGraph`` if no pipeline is passed to that
208 function. It can also be used to customize the pipeline used by
209 ``makeSimpleQGraph`` function by calling this first and passing the result
210 to it.
212 Parameters
213 ----------
214 nQuanta : `int`
215 The number of quanta to add to the pipeline.
216 instrument : `str` or `None`, optional
217 The importable name of an instrument to be added to the pipeline or
218 if no instrument should be added then an empty string or `None`, by
219 default None
221 Returns
222 -------
223 pipeline : `~lsst.pipe.base.Pipeline`
224 The created pipeline object.
225 """
226 pipeline = Pipeline("test pipeline")
227 # make a bunch of tasks that execute in well defined order (via data
228 # dependencies)
229 for lvl in range(nQuanta):
230 pipeline.addTask(AddTask, f"task{lvl}")
231 pipeline.addConfigOverride(f"task{lvl}", "connections.in_tmpl", lvl)
232 pipeline.addConfigOverride(f"task{lvl}", "connections.out_tmpl", lvl + 1)
233 if instrument:
234 pipeline.addInstrument(instrument)
235 return pipeline
238def makeSimpleButler(root: str, run: str = "test", inMemory: bool = True) -> Butler:
239 """Create new data butler instance.
241 Parameters
242 ----------
243 root : `str`
244 Path or URI to the root location of the new repository.
245 run : `str`, optional
246 Run collection name.
247 inMemory : `bool`, optional
248 If true make in-memory repository.
250 Returns
251 -------
252 butler : `~lsst.daf.butler.Butler`
253 Data butler instance.
254 """
255 root_path = ResourcePath(root, forceDirectory=True)
256 if not root_path.isLocal:
257 raise ValueError(f"Only works with local root not {root_path}")
258 config = Config()
259 if not inMemory:
260 config["registry", "db"] = f"sqlite:///{root_path.ospath}/gen3.sqlite"
261 config["datastore", "cls"] = "lsst.daf.butler.datastores.fileDatastore.FileDatastore"
262 repo = butlerTests.makeTestRepo(str(root_path), {}, config=config)
263 butler = Butler(butler=repo, run=run)
264 return butler
267def populateButler(
268 pipeline: Pipeline, butler: Butler, datasetTypes: Dict[Optional[str], List[str]] | None = None
269) -> None:
270 """Populate data butler with data needed for test.
272 Initializes data butler with a bunch of items:
273 - registers dataset types which are defined by pipeline
274 - create dimensions data for (instrument, detector)
275 - adds datasets based on ``datasetTypes`` dictionary, if dictionary is
276 missing then a single dataset with type "add_dataset0" is added
278 All datasets added to butler have ``dataId={instrument=instrument,
279 detector=0}`` where ``instrument`` is extracted from pipeline, "INSTR" is
280 used if pipeline is missing instrument definition. Type of the dataset is
281 guessed from dataset type name (assumes that pipeline is made of `AddTask`
282 tasks).
284 Parameters
285 ----------
286 pipeline : `~lsst.pipe.base.Pipeline`
287 Pipeline instance.
288 butler : `~lsst.daf.butler.Butler`
289 Data butler instance.
290 datasetTypes : `dict` [ `str`, `list` ], optional
291 Dictionary whose keys are collection names and values are lists of
292 dataset type names. By default a single dataset of type "add_dataset0"
293 is added to a ``butler.run`` collection.
294 """
296 # Add dataset types to registry
297 taskDefs = list(pipeline.toExpandedPipeline())
298 registerDatasetTypes(butler.registry, taskDefs)
300 instrument = pipeline.getInstrument()
301 if instrument is not None:
302 instrument_class = doImportType(instrument)
303 instrumentName = instrument_class.getName()
304 else:
305 instrumentName = "INSTR"
307 # Add all needed dimensions to registry
308 butler.registry.insertDimensionData("instrument", dict(name=instrumentName))
309 butler.registry.insertDimensionData("detector", dict(instrument=instrumentName, id=0, full_name="det0"))
311 taskDefMap = dict((taskDef.label, taskDef) for taskDef in taskDefs)
312 # Add inputs to butler
313 if not datasetTypes:
314 datasetTypes = {None: ["add_dataset0"]}
315 for run, dsTypes in datasetTypes.items():
316 if run is not None:
317 butler.registry.registerRun(run)
318 for dsType in dsTypes:
319 if dsType == "packages":
320 # Version is intentionally inconsistent.
321 # Dict is convertible to Packages if Packages is installed.
322 data: Any = {"python": "9.9.99"}
323 butler.put(data, dsType, run=run)
324 else:
325 if dsType.endswith("_config"):
326 # find a config from matching task name or make a new one
327 taskLabel, _, _ = dsType.rpartition("_")
328 taskDef = taskDefMap.get(taskLabel)
329 if taskDef is not None:
330 data = taskDef.config
331 else:
332 data = AddTaskConfig()
333 elif dsType.endswith("_metadata"):
334 data = _TASK_FULL_METADATA_TYPE()
335 elif dsType.endswith("_log"):
336 data = ButlerLogRecords.from_records([])
337 else:
338 data = numpy.array([0.0, 1.0, 2.0, 5.0])
339 butler.put(data, dsType, run=run, instrument=instrumentName, detector=0)
342def makeSimpleQGraph(
343 nQuanta: int = 5,
344 pipeline: Optional[Pipeline] = None,
345 butler: Optional[Butler] = None,
346 root: Optional[str] = None,
347 callPopulateButler: bool = True,
348 run: str = "test",
349 skipExistingIn: Any = None,
350 inMemory: bool = True,
351 userQuery: str = "",
352 datasetTypes: Optional[Dict[Optional[str], List[str]]] = None,
353 datasetQueryConstraint: DSQVariant = DSQVariant.ALL,
354 makeDatastoreRecords: bool = False,
355 resolveRefs: bool = False,
356 bind: Optional[Mapping[str, Any]] = None,
357) -> Tuple[Butler, QuantumGraph]:
358 """Make simple QuantumGraph for tests.
360 Makes simple one-task pipeline with AddTask, sets up in-memory registry
361 and butler, fills them with minimal data, and generates QuantumGraph with
362 all of that.
364 Parameters
365 ----------
366 nQuanta : `int`
367 Number of quanta in a graph, only used if ``pipeline`` is None.
368 pipeline : `~lsst.pipe.base.Pipeline`
369 If `None` then a pipeline is made with `AddTask` and default
370 `AddTaskConfig`.
371 butler : `~lsst.daf.butler.Butler`, optional
372 Data butler instance, if None then new data butler is created by
373 calling `makeSimpleButler`.
374 callPopulateButler : `bool`, optional
375 If True insert datasets into the butler prior to building a graph.
376 If False butler argument must not be None, and must be pre-populated.
377 Defaults to True.
378 root : `str`
379 Path or URI to the root location of the new repository. Only used if
380 ``butler`` is None.
381 run : `str`, optional
382 Name of the RUN collection to add to butler, only used if ``butler``
383 is None.
384 skipExistingIn
385 Expressions representing the collections to search for existing
386 output datasets that should be skipped. See
387 :ref:`daf_butler_ordered_collection_searches`.
388 inMemory : `bool`, optional
389 If true make in-memory repository, only used if ``butler`` is `None`.
390 userQuery : `str`, optional
391 The user query to pass to ``makeGraph``, by default an empty string.
392 datasetTypes : `dict` [ `str`, `list` ], optional
393 Dictionary whose keys are collection names and values are lists of
394 dataset type names. By default a single dataset of type "add_dataset0"
395 is added to a ``butler.run`` collection.
396 datasetQueryQConstraint : `DatasetQueryConstraintVariant`
397 The query constraint variant that should be used to constrain the
398 query based on dataset existence, defaults to
399 `DatasetQueryConstraintVariant.ALL`.
400 makeDatastoreRecords : `bool`, optional
401 If `True` then add datstore records to generated quanta.
402 resolveRefs : `bool`, optional
403 If `True` then resolve all input references and generate random dataset
404 IDs for all output and intermediate datasets.
405 bind : `Mapping`, optional
406 Mapping containing literal values that should be injected into the
407 ``userQuery`` expression, keyed by the identifiers they replace.
409 Returns
410 -------
411 butler : `~lsst.daf.butler.Butler`
412 Butler instance
413 qgraph : `~lsst.pipe.base.QuantumGraph`
414 Quantum graph instance
415 """
417 if pipeline is None:
418 pipeline = makeSimplePipeline(nQuanta=nQuanta)
420 if butler is None:
421 if root is None:
422 raise ValueError("Must provide `root` when `butler` is None")
423 if callPopulateButler is False:
424 raise ValueError("populateButler can only be False when butler is supplied as an argument")
425 butler = makeSimpleButler(root, run=run, inMemory=inMemory)
427 if callPopulateButler:
428 populateButler(pipeline, butler, datasetTypes=datasetTypes)
430 # Make the graph
431 _LOG.debug("Instantiating GraphBuilder, skipExistingIn=%s", skipExistingIn)
432 builder = GraphBuilder(
433 registry=butler.registry,
434 skipExistingIn=skipExistingIn,
435 datastore=butler.datastore if makeDatastoreRecords else None,
436 )
437 _LOG.debug(
438 "Calling GraphBuilder.makeGraph, collections=%r, run=%r, userQuery=%r bind=%s",
439 butler.collections,
440 run or butler.run,
441 userQuery,
442 bind,
443 )
444 qgraph = builder.makeGraph(
445 pipeline,
446 collections=butler.collections,
447 run=run or butler.run,
448 userQuery=userQuery,
449 datasetQueryConstraint=datasetQueryConstraint,
450 resolveRefs=resolveRefs,
451 bind=bind,
452 )
454 return butler, qgraph