Coverage for python/lsst/pipe/base/tests/simpleQGraph.py: 28%

152 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-04 02:05 -0800

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Bunch of common classes and methods for use in unit tests. 

23""" 

24from __future__ import annotations 

25 

26__all__ = ["AddTaskConfig", "AddTask", "AddTaskFactoryMock"] 

27 

28import itertools 

29import logging 

30from collections.abc import Iterable, Mapping 

31from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union, cast 

32 

33import lsst.daf.butler.tests as butlerTests 

34import lsst.pex.config as pexConfig 

35import numpy 

36from lsst.daf.butler import Butler, Config, DataId, DatasetRef, DatasetType, Formatter, LimitedButler 

37from lsst.daf.butler.core.logging import ButlerLogRecords 

38from lsst.resources import ResourcePath 

39from lsst.utils import doImportType 

40 

41from .. import connectionTypes as cT 

42from .._instrument import Instrument 

43from ..config import PipelineTaskConfig 

44from ..connections import PipelineTaskConnections 

45from ..graph import QuantumGraph 

46from ..graphBuilder import DatasetQueryConstraintVariant as DSQVariant 

47from ..graphBuilder import GraphBuilder 

48from ..pipeline import Pipeline, TaskDatasetTypes, TaskDef 

49from ..pipelineTask import PipelineTask 

50from ..struct import Struct 

51from ..task import _TASK_FULL_METADATA_TYPE 

52from ..taskFactory import TaskFactory 

53 

54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true

55 from lsst.daf.butler import Registry 

56 

57_LOG = logging.getLogger(__name__) 

58 

59 

60class SimpleInstrument(Instrument): 

61 def __init__(self, *args: Any, **kwargs: Any): 

62 pass 

63 

64 @staticmethod 

65 def getName() -> str: 

66 return "INSTRU" 

67 

68 def getRawFormatter(self, dataId: DataId) -> Type[Formatter]: 

69 return Formatter 

70 

71 def register(self, registry: Registry, *, update: bool = False) -> None: 

72 pass 

73 

74 

75class AddTaskConnections( 

76 PipelineTaskConnections, 

77 dimensions=("instrument", "detector"), 

78 defaultTemplates={"in_tmpl": "_in", "out_tmpl": "_out"}, 

79): 

80 """Connections for AddTask, has one input and two outputs, 

81 plus one init output. 

82 """ 

83 

84 input = cT.Input( 

85 name="add_dataset{in_tmpl}", 

86 dimensions=["instrument", "detector"], 

87 storageClass="NumpyArray", 

88 doc="Input dataset type for this task", 

89 ) 

90 output = cT.Output( 

91 name="add_dataset{out_tmpl}", 

92 dimensions=["instrument", "detector"], 

93 storageClass="NumpyArray", 

94 doc="Output dataset type for this task", 

95 ) 

96 output2 = cT.Output( 

97 name="add2_dataset{out_tmpl}", 

98 dimensions=["instrument", "detector"], 

99 storageClass="NumpyArray", 

100 doc="Output dataset type for this task", 

101 ) 

102 initout = cT.InitOutput( 

103 name="add_init_output{out_tmpl}", 

104 storageClass="NumpyArray", 

105 doc="Init Output dataset type for this task", 

106 ) 

107 

108 

109class AddTaskConfig(PipelineTaskConfig, pipelineConnections=AddTaskConnections): 

110 """Config for AddTask.""" 

111 

112 addend = pexConfig.Field[int](doc="amount to add", default=3) 

113 

114 

115class AddTask(PipelineTask): 

116 """Trivial PipelineTask for testing, has some extras useful for specific 

117 unit tests. 

118 """ 

119 

120 ConfigClass = AddTaskConfig 

121 _DefaultName = "add_task" 

122 

123 initout = numpy.array([999]) 

124 """InitOutputs for this task""" 

125 

126 taskFactory: Optional[AddTaskFactoryMock] = None 

127 """Factory that makes instances""" 

128 

129 def run(self, input: int) -> Struct: # type: ignore 

130 if self.taskFactory: 

131 # do some bookkeeping 

132 if self.taskFactory.stopAt == self.taskFactory.countExec: 

133 raise RuntimeError("pretend something bad happened") 

134 self.taskFactory.countExec += 1 

135 

136 self.config = cast(AddTaskConfig, self.config) 

137 self.metadata.add("add", self.config.addend) 

138 output = input + self.config.addend 

139 output2 = output + self.config.addend 

140 _LOG.info("input = %s, output = %s, output2 = %s", input, output, output2) 

141 return Struct(output=output, output2=output2) 

142 

143 

144class AddTaskFactoryMock(TaskFactory): 

145 """Special task factory that instantiates AddTask. 

146 

147 It also defines some bookkeeping variables used by AddTask to report 

148 progress to unit tests. 

149 """ 

150 

151 def __init__(self, stopAt: int = -1): 

152 self.countExec = 0 # incremented by AddTask 

153 self.stopAt = stopAt # AddTask raises exception at this call to run() 

154 

155 def makeTask( 

156 self, taskDef: TaskDef, butler: LimitedButler, initInputRefs: Iterable[DatasetRef] | None 

157 ) -> PipelineTask: 

158 taskClass = taskDef.taskClass 

159 assert taskClass is not None 

160 task = taskClass(config=taskDef.config, initInputs=None, name=taskDef.label) 

161 task.taskFactory = self # type: ignore 

162 return task 

163 

164 

165def registerDatasetTypes(registry: Registry, pipeline: Union[Pipeline, Iterable[TaskDef]]) -> None: 

166 """Register all dataset types used by tasks in a registry. 

167 

168 Copied and modified from `PreExecInit.initializeDatasetTypes`. 

169 

170 Parameters 

171 ---------- 

172 registry : `~lsst.daf.butler.Registry` 

173 Registry instance. 

174 pipeline : `typing.Iterable` of `TaskDef` 

175 Iterable of TaskDef instances, likely the output of the method 

176 toExpandedPipeline on a `~lsst.pipe.base.Pipeline` object 

177 """ 

178 for taskDef in pipeline: 

179 configDatasetType = DatasetType( 

180 taskDef.configDatasetName, {}, storageClass="Config", universe=registry.dimensions 

181 ) 

182 storageClass = "Packages" 

183 packagesDatasetType = DatasetType( 

184 "packages", {}, storageClass=storageClass, universe=registry.dimensions 

185 ) 

186 datasetTypes = TaskDatasetTypes.fromTaskDef(taskDef, registry=registry) 

187 for datasetType in itertools.chain( 

188 datasetTypes.initInputs, 

189 datasetTypes.initOutputs, 

190 datasetTypes.inputs, 

191 datasetTypes.outputs, 

192 datasetTypes.prerequisites, 

193 [configDatasetType, packagesDatasetType], 

194 ): 

195 _LOG.info("Registering %s with registry", datasetType) 

196 # this is a no-op if it already exists and is consistent, 

197 # and it raises if it is inconsistent. But components must be 

198 # skipped 

199 if not datasetType.isComponent(): 

200 registry.registerDatasetType(datasetType) 

201 

202 

203def makeSimplePipeline(nQuanta: int, instrument: Optional[str] = None) -> Pipeline: 

204 """Make a simple Pipeline for tests. 

205 

206 This is called by ``makeSimpleQGraph`` if no pipeline is passed to that 

207 function. It can also be used to customize the pipeline used by 

208 ``makeSimpleQGraph`` function by calling this first and passing the result 

209 to it. 

210 

211 Parameters 

212 ---------- 

213 nQuanta : `int` 

214 The number of quanta to add to the pipeline. 

215 instrument : `str` or `None`, optional 

216 The importable name of an instrument to be added to the pipeline or 

217 if no instrument should be added then an empty string or `None`, by 

218 default None 

219 

220 Returns 

221 ------- 

222 pipeline : `~lsst.pipe.base.Pipeline` 

223 The created pipeline object. 

224 """ 

225 pipeline = Pipeline("test pipeline") 

226 # make a bunch of tasks that execute in well defined order (via data 

227 # dependencies) 

228 for lvl in range(nQuanta): 

229 pipeline.addTask(AddTask, f"task{lvl}") 

230 pipeline.addConfigOverride(f"task{lvl}", "connections.in_tmpl", lvl) 

231 pipeline.addConfigOverride(f"task{lvl}", "connections.out_tmpl", lvl + 1) 

232 if instrument: 

233 pipeline.addInstrument(instrument) 

234 return pipeline 

235 

236 

237def makeSimpleButler(root: str, run: str = "test", inMemory: bool = True) -> Butler: 

238 """Create new data butler instance. 

239 

240 Parameters 

241 ---------- 

242 root : `str` 

243 Path or URI to the root location of the new repository. 

244 run : `str`, optional 

245 Run collection name. 

246 inMemory : `bool`, optional 

247 If true make in-memory repository. 

248 

249 Returns 

250 ------- 

251 butler : `~lsst.daf.butler.Butler` 

252 Data butler instance. 

253 """ 

254 root_path = ResourcePath(root, forceDirectory=True) 

255 if not root_path.isLocal: 

256 raise ValueError(f"Only works with local root not {root_path}") 

257 config = Config() 

258 if not inMemory: 

259 config["registry", "db"] = f"sqlite:///{root_path.ospath}/gen3.sqlite" 

260 config["datastore", "cls"] = "lsst.daf.butler.datastores.fileDatastore.FileDatastore" 

261 repo = butlerTests.makeTestRepo(str(root_path), {}, config=config) 

262 butler = Butler(butler=repo, run=run) 

263 return butler 

264 

265 

266def populateButler( 

267 pipeline: Pipeline, butler: Butler, datasetTypes: Dict[Optional[str], List[str]] | None = None 

268) -> None: 

269 """Populate data butler with data needed for test. 

270 

271 Initializes data butler with a bunch of items: 

272 - registers dataset types which are defined by pipeline 

273 - create dimensions data for (instrument, detector) 

274 - adds datasets based on ``datasetTypes`` dictionary, if dictionary is 

275 missing then a single dataset with type "add_dataset0" is added 

276 

277 All datasets added to butler have ``dataId={instrument=instrument, 

278 detector=0}`` where ``instrument`` is extracted from pipeline, "INSTR" is 

279 used if pipeline is missing instrument definition. Type of the dataset is 

280 guessed from dataset type name (assumes that pipeline is made of `AddTask` 

281 tasks). 

282 

283 Parameters 

284 ---------- 

285 pipeline : `~lsst.pipe.base.Pipeline` 

286 Pipeline instance. 

287 butler : `~lsst.daf.butler.Butler` 

288 Data butler instance. 

289 datasetTypes : `dict` [ `str`, `list` ], optional 

290 Dictionary whose keys are collection names and values are lists of 

291 dataset type names. By default a single dataset of type "add_dataset0" 

292 is added to a ``butler.run`` collection. 

293 """ 

294 

295 # Add dataset types to registry 

296 taskDefs = list(pipeline.toExpandedPipeline()) 

297 registerDatasetTypes(butler.registry, taskDefs) 

298 

299 instrument = pipeline.getInstrument() 

300 if instrument is not None: 

301 instrument_class = doImportType(instrument) 

302 instrumentName = instrument_class.getName() 

303 else: 

304 instrumentName = "INSTR" 

305 

306 # Add all needed dimensions to registry 

307 butler.registry.insertDimensionData("instrument", dict(name=instrumentName)) 

308 butler.registry.insertDimensionData("detector", dict(instrument=instrumentName, id=0, full_name="det0")) 

309 

310 taskDefMap = dict((taskDef.label, taskDef) for taskDef in taskDefs) 

311 # Add inputs to butler 

312 if not datasetTypes: 

313 datasetTypes = {None: ["add_dataset0"]} 

314 for run, dsTypes in datasetTypes.items(): 

315 if run is not None: 

316 butler.registry.registerRun(run) 

317 for dsType in dsTypes: 

318 if dsType == "packages": 

319 # Version is intentionally inconsistent. 

320 # Dict is convertible to Packages if Packages is installed. 

321 data: Any = {"python": "9.9.99"} 

322 butler.put(data, dsType, run=run) 

323 else: 

324 if dsType.endswith("_config"): 

325 # find a config from matching task name or make a new one 

326 taskLabel, _, _ = dsType.rpartition("_") 

327 taskDef = taskDefMap.get(taskLabel) 

328 if taskDef is not None: 

329 data = taskDef.config 

330 else: 

331 data = AddTaskConfig() 

332 elif dsType.endswith("_metadata"): 

333 data = _TASK_FULL_METADATA_TYPE() 

334 elif dsType.endswith("_log"): 

335 data = ButlerLogRecords.from_records([]) 

336 else: 

337 data = numpy.array([0.0, 1.0, 2.0, 5.0]) 

338 butler.put(data, dsType, run=run, instrument=instrumentName, detector=0) 

339 

340 

341def makeSimpleQGraph( 

342 nQuanta: int = 5, 

343 pipeline: Optional[Pipeline] = None, 

344 butler: Optional[Butler] = None, 

345 root: Optional[str] = None, 

346 callPopulateButler: bool = True, 

347 run: str = "test", 

348 skipExistingIn: Any = None, 

349 inMemory: bool = True, 

350 userQuery: str = "", 

351 datasetTypes: Optional[Dict[Optional[str], List[str]]] = None, 

352 datasetQueryConstraint: DSQVariant = DSQVariant.ALL, 

353 makeDatastoreRecords: bool = False, 

354 resolveRefs: bool = False, 

355 bind: Optional[Mapping[str, Any]] = None, 

356) -> Tuple[Butler, QuantumGraph]: 

357 """Make simple QuantumGraph for tests. 

358 

359 Makes simple one-task pipeline with AddTask, sets up in-memory registry 

360 and butler, fills them with minimal data, and generates QuantumGraph with 

361 all of that. 

362 

363 Parameters 

364 ---------- 

365 nQuanta : `int` 

366 Number of quanta in a graph, only used if ``pipeline`` is None. 

367 pipeline : `~lsst.pipe.base.Pipeline` 

368 If `None` then a pipeline is made with `AddTask` and default 

369 `AddTaskConfig`. 

370 butler : `~lsst.daf.butler.Butler`, optional 

371 Data butler instance, if None then new data butler is created by 

372 calling `makeSimpleButler`. 

373 callPopulateButler : `bool`, optional 

374 If True insert datasets into the butler prior to building a graph. 

375 If False butler argument must not be None, and must be pre-populated. 

376 Defaults to True. 

377 root : `str` 

378 Path or URI to the root location of the new repository. Only used if 

379 ``butler`` is None. 

380 run : `str`, optional 

381 Name of the RUN collection to add to butler, only used if ``butler`` 

382 is None. 

383 skipExistingIn 

384 Expressions representing the collections to search for existing 

385 output datasets that should be skipped. See 

386 :ref:`daf_butler_ordered_collection_searches`. 

387 inMemory : `bool`, optional 

388 If true make in-memory repository, only used if ``butler`` is `None`. 

389 userQuery : `str`, optional 

390 The user query to pass to ``makeGraph``, by default an empty string. 

391 datasetTypes : `dict` [ `str`, `list` ], optional 

392 Dictionary whose keys are collection names and values are lists of 

393 dataset type names. By default a single dataset of type "add_dataset0" 

394 is added to a ``butler.run`` collection. 

395 datasetQueryQConstraint : `DatasetQueryConstraintVariant` 

396 The query constraint variant that should be used to constrain the 

397 query based on dataset existence, defaults to 

398 `DatasetQueryConstraintVariant.ALL`. 

399 makeDatastoreRecords : `bool`, optional 

400 If `True` then add datstore records to generated quanta. 

401 resolveRefs : `bool`, optional 

402 If `True` then resolve all input references and generate random dataset 

403 IDs for all output and intermediate datasets. 

404 bind : `Mapping`, optional 

405 Mapping containing literal values that should be injected into the 

406 ``userQuery`` expression, keyed by the identifiers they replace. 

407 

408 Returns 

409 ------- 

410 butler : `~lsst.daf.butler.Butler` 

411 Butler instance 

412 qgraph : `~lsst.pipe.base.QuantumGraph` 

413 Quantum graph instance 

414 """ 

415 

416 if pipeline is None: 

417 pipeline = makeSimplePipeline(nQuanta=nQuanta) 

418 

419 if butler is None: 

420 if root is None: 

421 raise ValueError("Must provide `root` when `butler` is None") 

422 if callPopulateButler is False: 

423 raise ValueError("populateButler can only be False when butler is supplied as an argument") 

424 butler = makeSimpleButler(root, run=run, inMemory=inMemory) 

425 

426 if callPopulateButler: 

427 populateButler(pipeline, butler, datasetTypes=datasetTypes) 

428 

429 # Make the graph 

430 _LOG.debug("Instantiating GraphBuilder, skipExistingIn=%s", skipExistingIn) 

431 builder = GraphBuilder( 

432 registry=butler.registry, 

433 skipExistingIn=skipExistingIn, 

434 datastore=butler.datastore if makeDatastoreRecords else None, 

435 ) 

436 _LOG.debug( 

437 "Calling GraphBuilder.makeGraph, collections=%r, run=%r, userQuery=%r bind=%s", 

438 butler.collections, 

439 run or butler.run, 

440 userQuery, 

441 bind, 

442 ) 

443 qgraph = builder.makeGraph( 

444 pipeline, 

445 collections=butler.collections, 

446 run=run or butler.run, 

447 userQuery=userQuery, 

448 datasetQueryConstraint=datasetQueryConstraint, 

449 resolveRefs=resolveRefs, 

450 bind=bind, 

451 ) 

452 

453 return butler, qgraph