Coverage for python/lsst/pipe/base/tests/simpleQGraph.py: 28%

152 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-19 02:08 -0800

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Bunch of common classes and methods for use in unit tests. 

23""" 

24from __future__ import annotations 

25 

26__all__ = ["AddTaskConfig", "AddTask", "AddTaskFactoryMock"] 

27 

28import itertools 

29import logging 

30from collections.abc import Iterable, Mapping 

31from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union, cast 

32 

33import lsst.daf.butler.tests as butlerTests 

34import lsst.pex.config as pexConfig 

35import numpy 

36from lsst.daf.butler import Butler, Config, DataId, DatasetRef, DatasetType, Formatter, LimitedButler 

37from lsst.daf.butler.core.logging import ButlerLogRecords 

38from lsst.resources import ResourcePath 

39from lsst.utils import doImportType 

40 

41from .. import connectionTypes as cT 

42from .._instrument import Instrument 

43from ..config import PipelineTaskConfig 

44from ..connections import PipelineTaskConnections 

45from ..graph import QuantumGraph 

46from ..graphBuilder import DatasetQueryConstraintVariant as DSQVariant 

47from ..graphBuilder import GraphBuilder 

48from ..pipeline import Pipeline, TaskDatasetTypes, TaskDef 

49from ..pipelineTask import PipelineTask 

50from ..struct import Struct 

51from ..task import _TASK_FULL_METADATA_TYPE 

52from ..taskFactory import TaskFactory 

53 

54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true

55 from lsst.daf.butler import Registry 

56 

57_LOG = logging.getLogger(__name__) 

58 

59 

60class SimpleInstrument(Instrument): 

61 def __init__(self, *args: Any, **kwargs: Any): 

62 pass 

63 

64 @staticmethod 

65 def getName() -> str: 

66 return "INSTRU" 

67 

68 def getRawFormatter(self, dataId: DataId) -> Type[Formatter]: 

69 return Formatter 

70 

71 def register(self, registry: Registry, *, update: bool = False) -> None: 

72 pass 

73 

74 

75class AddTaskConnections( 

76 PipelineTaskConnections, 

77 dimensions=("instrument", "detector"), 

78 defaultTemplates={"in_tmpl": "_in", "out_tmpl": "_out"}, 

79): 

80 """Connections for AddTask, has one input and two outputs, 

81 plus one init output. 

82 """ 

83 

84 input = cT.Input( 

85 name="add_dataset{in_tmpl}", 

86 dimensions=["instrument", "detector"], 

87 storageClass="NumpyArray", 

88 doc="Input dataset type for this task", 

89 ) 

90 output = cT.Output( 

91 name="add_dataset{out_tmpl}", 

92 dimensions=["instrument", "detector"], 

93 storageClass="NumpyArray", 

94 doc="Output dataset type for this task", 

95 ) 

96 output2 = cT.Output( 

97 name="add2_dataset{out_tmpl}", 

98 dimensions=["instrument", "detector"], 

99 storageClass="NumpyArray", 

100 doc="Output dataset type for this task", 

101 ) 

102 initout = cT.InitOutput( 

103 name="add_init_output{out_tmpl}", 

104 storageClass="NumpyArray", 

105 doc="Init Output dataset type for this task", 

106 ) 

107 

108 

109class AddTaskConfig(PipelineTaskConfig, pipelineConnections=AddTaskConnections): 

110 """Config for AddTask.""" 

111 

112 addend = pexConfig.Field[int](doc="amount to add", default=3) 

113 

114 

115class AddTask(PipelineTask): 

116 """Trivial PipelineTask for testing, has some extras useful for specific 

117 unit tests. 

118 """ 

119 

120 ConfigClass = AddTaskConfig 

121 _DefaultName = "add_task" 

122 

123 initout = numpy.array([999]) 

124 """InitOutputs for this task""" 

125 

126 taskFactory: Optional[AddTaskFactoryMock] = None 

127 """Factory that makes instances""" 

128 

129 def run(self, input: int) -> Struct: # type: ignore 

130 

131 if self.taskFactory: 

132 # do some bookkeeping 

133 if self.taskFactory.stopAt == self.taskFactory.countExec: 

134 raise RuntimeError("pretend something bad happened") 

135 self.taskFactory.countExec += 1 

136 

137 self.config = cast(AddTaskConfig, self.config) 

138 self.metadata.add("add", self.config.addend) 

139 output = input + self.config.addend 

140 output2 = output + self.config.addend 

141 _LOG.info("input = %s, output = %s, output2 = %s", input, output, output2) 

142 return Struct(output=output, output2=output2) 

143 

144 

145class AddTaskFactoryMock(TaskFactory): 

146 """Special task factory that instantiates AddTask. 

147 

148 It also defines some bookkeeping variables used by AddTask to report 

149 progress to unit tests. 

150 """ 

151 

152 def __init__(self, stopAt: int = -1): 

153 self.countExec = 0 # incremented by AddTask 

154 self.stopAt = stopAt # AddTask raises exception at this call to run() 

155 

156 def makeTask( 

157 self, taskDef: TaskDef, butler: LimitedButler, initInputRefs: Iterable[DatasetRef] | None 

158 ) -> PipelineTask: 

159 taskClass = taskDef.taskClass 

160 assert taskClass is not None 

161 task = taskClass(config=taskDef.config, initInputs=None, name=taskDef.label) 

162 task.taskFactory = self # type: ignore 

163 return task 

164 

165 

166def registerDatasetTypes(registry: Registry, pipeline: Union[Pipeline, Iterable[TaskDef]]) -> None: 

167 """Register all dataset types used by tasks in a registry. 

168 

169 Copied and modified from `PreExecInit.initializeDatasetTypes`. 

170 

171 Parameters 

172 ---------- 

173 registry : `~lsst.daf.butler.Registry` 

174 Registry instance. 

175 pipeline : `typing.Iterable` of `TaskDef` 

176 Iterable of TaskDef instances, likely the output of the method 

177 toExpandedPipeline on a `~lsst.pipe.base.Pipeline` object 

178 """ 

179 for taskDef in pipeline: 

180 configDatasetType = DatasetType( 

181 taskDef.configDatasetName, {}, storageClass="Config", universe=registry.dimensions 

182 ) 

183 storageClass = "Packages" 

184 packagesDatasetType = DatasetType( 

185 "packages", {}, storageClass=storageClass, universe=registry.dimensions 

186 ) 

187 datasetTypes = TaskDatasetTypes.fromTaskDef(taskDef, registry=registry) 

188 for datasetType in itertools.chain( 

189 datasetTypes.initInputs, 

190 datasetTypes.initOutputs, 

191 datasetTypes.inputs, 

192 datasetTypes.outputs, 

193 datasetTypes.prerequisites, 

194 [configDatasetType, packagesDatasetType], 

195 ): 

196 _LOG.info("Registering %s with registry", datasetType) 

197 # this is a no-op if it already exists and is consistent, 

198 # and it raises if it is inconsistent. But components must be 

199 # skipped 

200 if not datasetType.isComponent(): 

201 registry.registerDatasetType(datasetType) 

202 

203 

204def makeSimplePipeline(nQuanta: int, instrument: Optional[str] = None) -> Pipeline: 

205 """Make a simple Pipeline for tests. 

206 

207 This is called by ``makeSimpleQGraph`` if no pipeline is passed to that 

208 function. It can also be used to customize the pipeline used by 

209 ``makeSimpleQGraph`` function by calling this first and passing the result 

210 to it. 

211 

212 Parameters 

213 ---------- 

214 nQuanta : `int` 

215 The number of quanta to add to the pipeline. 

216 instrument : `str` or `None`, optional 

217 The importable name of an instrument to be added to the pipeline or 

218 if no instrument should be added then an empty string or `None`, by 

219 default None 

220 

221 Returns 

222 ------- 

223 pipeline : `~lsst.pipe.base.Pipeline` 

224 The created pipeline object. 

225 """ 

226 pipeline = Pipeline("test pipeline") 

227 # make a bunch of tasks that execute in well defined order (via data 

228 # dependencies) 

229 for lvl in range(nQuanta): 

230 pipeline.addTask(AddTask, f"task{lvl}") 

231 pipeline.addConfigOverride(f"task{lvl}", "connections.in_tmpl", lvl) 

232 pipeline.addConfigOverride(f"task{lvl}", "connections.out_tmpl", lvl + 1) 

233 if instrument: 

234 pipeline.addInstrument(instrument) 

235 return pipeline 

236 

237 

238def makeSimpleButler(root: str, run: str = "test", inMemory: bool = True) -> Butler: 

239 """Create new data butler instance. 

240 

241 Parameters 

242 ---------- 

243 root : `str` 

244 Path or URI to the root location of the new repository. 

245 run : `str`, optional 

246 Run collection name. 

247 inMemory : `bool`, optional 

248 If true make in-memory repository. 

249 

250 Returns 

251 ------- 

252 butler : `~lsst.daf.butler.Butler` 

253 Data butler instance. 

254 """ 

255 root_path = ResourcePath(root, forceDirectory=True) 

256 if not root_path.isLocal: 

257 raise ValueError(f"Only works with local root not {root_path}") 

258 config = Config() 

259 if not inMemory: 

260 config["registry", "db"] = f"sqlite:///{root_path.ospath}/gen3.sqlite" 

261 config["datastore", "cls"] = "lsst.daf.butler.datastores.fileDatastore.FileDatastore" 

262 repo = butlerTests.makeTestRepo(str(root_path), {}, config=config) 

263 butler = Butler(butler=repo, run=run) 

264 return butler 

265 

266 

267def populateButler( 

268 pipeline: Pipeline, butler: Butler, datasetTypes: Dict[Optional[str], List[str]] | None = None 

269) -> None: 

270 """Populate data butler with data needed for test. 

271 

272 Initializes data butler with a bunch of items: 

273 - registers dataset types which are defined by pipeline 

274 - create dimensions data for (instrument, detector) 

275 - adds datasets based on ``datasetTypes`` dictionary, if dictionary is 

276 missing then a single dataset with type "add_dataset0" is added 

277 

278 All datasets added to butler have ``dataId={instrument=instrument, 

279 detector=0}`` where ``instrument`` is extracted from pipeline, "INSTR" is 

280 used if pipeline is missing instrument definition. Type of the dataset is 

281 guessed from dataset type name (assumes that pipeline is made of `AddTask` 

282 tasks). 

283 

284 Parameters 

285 ---------- 

286 pipeline : `~lsst.pipe.base.Pipeline` 

287 Pipeline instance. 

288 butler : `~lsst.daf.butler.Butler` 

289 Data butler instance. 

290 datasetTypes : `dict` [ `str`, `list` ], optional 

291 Dictionary whose keys are collection names and values are lists of 

292 dataset type names. By default a single dataset of type "add_dataset0" 

293 is added to a ``butler.run`` collection. 

294 """ 

295 

296 # Add dataset types to registry 

297 taskDefs = list(pipeline.toExpandedPipeline()) 

298 registerDatasetTypes(butler.registry, taskDefs) 

299 

300 instrument = pipeline.getInstrument() 

301 if instrument is not None: 

302 instrument_class = doImportType(instrument) 

303 instrumentName = instrument_class.getName() 

304 else: 

305 instrumentName = "INSTR" 

306 

307 # Add all needed dimensions to registry 

308 butler.registry.insertDimensionData("instrument", dict(name=instrumentName)) 

309 butler.registry.insertDimensionData("detector", dict(instrument=instrumentName, id=0, full_name="det0")) 

310 

311 taskDefMap = dict((taskDef.label, taskDef) for taskDef in taskDefs) 

312 # Add inputs to butler 

313 if not datasetTypes: 

314 datasetTypes = {None: ["add_dataset0"]} 

315 for run, dsTypes in datasetTypes.items(): 

316 if run is not None: 

317 butler.registry.registerRun(run) 

318 for dsType in dsTypes: 

319 if dsType == "packages": 

320 # Version is intentionally inconsistent. 

321 # Dict is convertible to Packages if Packages is installed. 

322 data: Any = {"python": "9.9.99"} 

323 butler.put(data, dsType, run=run) 

324 else: 

325 if dsType.endswith("_config"): 

326 # find a config from matching task name or make a new one 

327 taskLabel, _, _ = dsType.rpartition("_") 

328 taskDef = taskDefMap.get(taskLabel) 

329 if taskDef is not None: 

330 data = taskDef.config 

331 else: 

332 data = AddTaskConfig() 

333 elif dsType.endswith("_metadata"): 

334 data = _TASK_FULL_METADATA_TYPE() 

335 elif dsType.endswith("_log"): 

336 data = ButlerLogRecords.from_records([]) 

337 else: 

338 data = numpy.array([0.0, 1.0, 2.0, 5.0]) 

339 butler.put(data, dsType, run=run, instrument=instrumentName, detector=0) 

340 

341 

342def makeSimpleQGraph( 

343 nQuanta: int = 5, 

344 pipeline: Optional[Pipeline] = None, 

345 butler: Optional[Butler] = None, 

346 root: Optional[str] = None, 

347 callPopulateButler: bool = True, 

348 run: str = "test", 

349 skipExistingIn: Any = None, 

350 inMemory: bool = True, 

351 userQuery: str = "", 

352 datasetTypes: Optional[Dict[Optional[str], List[str]]] = None, 

353 datasetQueryConstraint: DSQVariant = DSQVariant.ALL, 

354 makeDatastoreRecords: bool = False, 

355 resolveRefs: bool = False, 

356 bind: Optional[Mapping[str, Any]] = None, 

357) -> Tuple[Butler, QuantumGraph]: 

358 """Make simple QuantumGraph for tests. 

359 

360 Makes simple one-task pipeline with AddTask, sets up in-memory registry 

361 and butler, fills them with minimal data, and generates QuantumGraph with 

362 all of that. 

363 

364 Parameters 

365 ---------- 

366 nQuanta : `int` 

367 Number of quanta in a graph, only used if ``pipeline`` is None. 

368 pipeline : `~lsst.pipe.base.Pipeline` 

369 If `None` then a pipeline is made with `AddTask` and default 

370 `AddTaskConfig`. 

371 butler : `~lsst.daf.butler.Butler`, optional 

372 Data butler instance, if None then new data butler is created by 

373 calling `makeSimpleButler`. 

374 callPopulateButler : `bool`, optional 

375 If True insert datasets into the butler prior to building a graph. 

376 If False butler argument must not be None, and must be pre-populated. 

377 Defaults to True. 

378 root : `str` 

379 Path or URI to the root location of the new repository. Only used if 

380 ``butler`` is None. 

381 run : `str`, optional 

382 Name of the RUN collection to add to butler, only used if ``butler`` 

383 is None. 

384 skipExistingIn 

385 Expressions representing the collections to search for existing 

386 output datasets that should be skipped. See 

387 :ref:`daf_butler_ordered_collection_searches`. 

388 inMemory : `bool`, optional 

389 If true make in-memory repository, only used if ``butler`` is `None`. 

390 userQuery : `str`, optional 

391 The user query to pass to ``makeGraph``, by default an empty string. 

392 datasetTypes : `dict` [ `str`, `list` ], optional 

393 Dictionary whose keys are collection names and values are lists of 

394 dataset type names. By default a single dataset of type "add_dataset0" 

395 is added to a ``butler.run`` collection. 

396 datasetQueryQConstraint : `DatasetQueryConstraintVariant` 

397 The query constraint variant that should be used to constrain the 

398 query based on dataset existence, defaults to 

399 `DatasetQueryConstraintVariant.ALL`. 

400 makeDatastoreRecords : `bool`, optional 

401 If `True` then add datstore records to generated quanta. 

402 resolveRefs : `bool`, optional 

403 If `True` then resolve all input references and generate random dataset 

404 IDs for all output and intermediate datasets. 

405 bind : `Mapping`, optional 

406 Mapping containing literal values that should be injected into the 

407 ``userQuery`` expression, keyed by the identifiers they replace. 

408 

409 Returns 

410 ------- 

411 butler : `~lsst.daf.butler.Butler` 

412 Butler instance 

413 qgraph : `~lsst.pipe.base.QuantumGraph` 

414 Quantum graph instance 

415 """ 

416 

417 if pipeline is None: 

418 pipeline = makeSimplePipeline(nQuanta=nQuanta) 

419 

420 if butler is None: 

421 if root is None: 

422 raise ValueError("Must provide `root` when `butler` is None") 

423 if callPopulateButler is False: 

424 raise ValueError("populateButler can only be False when butler is supplied as an argument") 

425 butler = makeSimpleButler(root, run=run, inMemory=inMemory) 

426 

427 if callPopulateButler: 

428 populateButler(pipeline, butler, datasetTypes=datasetTypes) 

429 

430 # Make the graph 

431 _LOG.debug("Instantiating GraphBuilder, skipExistingIn=%s", skipExistingIn) 

432 builder = GraphBuilder( 

433 registry=butler.registry, 

434 skipExistingIn=skipExistingIn, 

435 datastore=butler.datastore if makeDatastoreRecords else None, 

436 ) 

437 _LOG.debug( 

438 "Calling GraphBuilder.makeGraph, collections=%r, run=%r, userQuery=%r bind=%s", 

439 butler.collections, 

440 run or butler.run, 

441 userQuery, 

442 bind, 

443 ) 

444 qgraph = builder.makeGraph( 

445 pipeline, 

446 collections=butler.collections, 

447 run=run or butler.run, 

448 userQuery=userQuery, 

449 datasetQueryConstraint=datasetQueryConstraint, 

450 resolveRefs=resolveRefs, 

451 bind=bind, 

452 ) 

453 

454 return butler, qgraph