Coverage for tests/test_quantumBackedButler.py: 7%

232 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-08 05:05 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import json 

23import os 

24import unittest 

25import unittest.mock 

26from typing import cast 

27 

28from lsst.daf.butler import ( 

29 Butler, 

30 Config, 

31 DatasetRef, 

32 DatasetType, 

33 DimensionUniverse, 

34 Quantum, 

35 QuantumBackedButler, 

36 QuantumProvenanceData, 

37 Registry, 

38 RegistryConfig, 

39 StorageClass, 

40) 

41from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

42from lsst.resources import ResourcePath 

43 

44TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

45 

46 

47class QuantumBackedButlerTestCase(unittest.TestCase): 

48 """Test case for QuantumBackedButler.""" 

49 

50 def setUp(self) -> None: 

51 self.root = makeTestTempDir(TESTDIR) 

52 self.config = Config() 

53 self.config["root"] = self.root 

54 self.universe = DimensionUniverse() 

55 

56 # Make a butler and import dimension definitions. 

57 registryConfig = RegistryConfig(self.config.get("registry")) 

58 Registry.createFromConfig(registryConfig, butlerRoot=self.root) 

59 self.butler = Butler(self.config, writeable=True, run="RUN") 

60 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

61 

62 # make all dataset types 

63 graph = self.universe.extract(("instrument", "detector")) 

64 storageClass = StorageClass("StructuredDataDict") 

65 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass) 

66 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass) 

67 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass) 

68 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass) 

69 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass) 

70 

71 self.dataset_types: dict[str, DatasetType] = {} 

72 dataset_types = ( 

73 self.datasetTypeInit, 

74 self.datasetTypeInput, 

75 self.datasetTypeOutput, 

76 self.datasetTypeOutput2, 

77 self.datasetTypeExtra, 

78 ) 

79 for dataset_type in dataset_types: 

80 self.butler.registry.registerDatasetType(dataset_type) 

81 self.dataset_types[dataset_type.name] = dataset_type 

82 

83 dataIds = [ 

84 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id)) 

85 for detector_id in (1, 2, 3, 4) 

86 ] 

87 

88 # make actual input datasets 

89 self.input_refs = [ 

90 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds 

91 ] 

92 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])] 

93 self.all_input_refs = self.input_refs + self.init_inputs_refs 

94 

95 # generate dataset refs for outputs 

96 self.output_refs = [DatasetRef(self.datasetTypeOutput, dataId, run="RUN") for dataId in dataIds] 

97 self.output_refs2 = [DatasetRef(self.datasetTypeOutput2, dataId, run="RUN") for dataId in dataIds] 

98 

99 self.missing_refs = [DatasetRef(self.datasetTypeExtra, dataId, run="RUN") for dataId in dataIds] 

100 

101 def tearDown(self) -> None: 

102 removeTestTempDir(self.root) 

103 

104 def make_quantum(self, step: int = 1) -> Quantum: 

105 """Make a Quantum which includes datastore records.""" 

106 

107 if step == 1: 

108 datastore_records = self.butler.datastore.export_records(self.all_input_refs) 

109 predictedInputs = {self.datasetTypeInput: self.input_refs} 

110 outputs = {self.datasetTypeOutput: self.output_refs} 

111 initInputs = {self.datasetTypeInit: self.init_inputs_refs[0]} 

112 elif step == 2: 

113 # The result should be empty, this is just to test that it works. 

114 datastore_records = self.butler.datastore.export_records(self.output_refs) 

115 predictedInputs = {self.datasetTypeInput: self.output_refs} 

116 outputs = {self.datasetTypeOutput2: self.output_refs2} 

117 initInputs = {} 

118 else: 

119 raise ValueError(f"unexpected {step} value") 

120 

121 return Quantum( 

122 taskName="some.task.name", 

123 inputs=predictedInputs, 

124 outputs=outputs, 

125 initInputs=initInputs, 

126 datastore_records=datastore_records, 

127 ) 

128 

129 def test_initialize(self) -> None: 

130 """Test for initialize factory method""" 

131 

132 quantum = self.make_quantum() 

133 qbb = QuantumBackedButler.initialize( 

134 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

135 ) 

136 self._test_factory(qbb) 

137 

138 def test_initialize_repo_index(self) -> None: 

139 """Test for initialize using config file and repo index.""" 

140 

141 # Store config to a file. 

142 self.config.dumpToUri(self.root) 

143 

144 butler_index = Config() 

145 butler_index["label"] = self.root 

146 with ResourcePath.temporary_uri(suffix=".yaml") as index_path: 

147 butler_index.dumpToUri(index_path) 

148 

149 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(index_path)}): 

150 quantum = self.make_quantum() 

151 qbb = QuantumBackedButler.initialize( 

152 config="label", 

153 quantum=quantum, 

154 dimensions=self.universe, 

155 dataset_types=self.dataset_types, 

156 ) 

157 self._test_factory(qbb) 

158 

159 def test_from_predicted(self) -> None: 

160 """Test for from_predicted factory method""" 

161 

162 datastore_records = self.butler.datastore.export_records(self.all_input_refs) 

163 qbb = QuantumBackedButler.from_predicted( 

164 config=self.config, 

165 predicted_inputs=[ref.id for ref in self.all_input_refs], 

166 predicted_outputs=[ref.id for ref in self.output_refs], 

167 dimensions=self.universe, 

168 datastore_records=datastore_records, 

169 dataset_types=self.dataset_types, 

170 ) 

171 self._test_factory(qbb) 

172 

173 def _test_factory(self, qbb: QuantumBackedButler) -> None: 

174 """Test state immediately after construction.""" 

175 

176 self.assertTrue(qbb.isWriteable()) 

177 self.assertEqual(qbb._predicted_inputs, set(ref.id for ref in self.all_input_refs)) 

178 self.assertEqual(qbb._predicted_outputs, set(ref.id for ref in self.output_refs)) 

179 self.assertEqual(qbb._available_inputs, set()) 

180 self.assertEqual(qbb._unavailable_inputs, set()) 

181 self.assertEqual(qbb._actual_inputs, set()) 

182 self.assertEqual(qbb._actual_output_refs, set()) 

183 

184 def test_getput(self) -> None: 

185 """Test for getDirect/putDirect methods""" 

186 

187 quantum = self.make_quantum() 

188 qbb = QuantumBackedButler.initialize( 

189 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

190 ) 

191 

192 # Verify all input data are readable. 

193 for ref in self.input_refs: 

194 data = qbb.get(ref) 

195 self.assertEqual(data, {"data": ref.dataId["detector"]}) 

196 for ref in self.init_inputs_refs: 

197 data = qbb.get(ref) 

198 self.assertEqual(data, {"data": -1}) 

199 for ref in self.missing_refs: 

200 with self.assertRaises(FileNotFoundError): 

201 data = qbb.get(ref) 

202 

203 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs) 

204 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs) 

205 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs)) 

206 

207 # Write all expected outputs. 

208 for ref in self.output_refs: 

209 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

210 

211 # Must be able to read them back 

212 for ref in self.output_refs: 

213 data = qbb.get(ref) 

214 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

215 

216 self.assertEqual(qbb._actual_output_refs, set(self.output_refs)) 

217 

218 def test_getDeferred(self) -> None: 

219 """Test for getDirectDeferred method""" 

220 

221 quantum = self.make_quantum() 

222 qbb = QuantumBackedButler.initialize( 

223 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

224 ) 

225 

226 # get some input data 

227 input_refs = self.input_refs[:2] 

228 for ref in input_refs: 

229 data = qbb.getDeferred(ref) 

230 self.assertEqual(data.get(), {"data": ref.dataId["detector"]}) 

231 for ref in self.init_inputs_refs: 

232 data = qbb.getDeferred(ref) 

233 self.assertEqual(data.get(), {"data": -1}) 

234 for ref in self.missing_refs: 

235 data = qbb.getDeferred(ref) 

236 with self.assertRaises(FileNotFoundError): 

237 data.get() 

238 

239 # _avalable_inputs is not 

240 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs)) 

241 self.assertEqual(qbb._actual_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs)) 

242 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs)) 

243 

244 def test_datasetExistsDirect(self) -> None: 

245 """Test for dataset existence method""" 

246 

247 quantum = self.make_quantum() 

248 qbb = QuantumBackedButler.initialize( 

249 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

250 ) 

251 

252 # get some input data 

253 input_refs = self.input_refs[:2] 

254 for ref in input_refs: 

255 exists = qbb.stored(ref) 

256 self.assertTrue(exists) 

257 for ref in self.init_inputs_refs: 

258 exists = qbb.stored(ref) 

259 self.assertTrue(exists) 

260 for ref in self.missing_refs: 

261 exists = qbb.stored(ref) 

262 self.assertFalse(exists) 

263 

264 # Now do the same checks in bulk. 

265 missing_set = set(self.missing_refs) 

266 refs = input_refs + self.init_inputs_refs + self.missing_refs 

267 stored_many = qbb.stored_many(refs) 

268 for ref, stored in stored_many.items(): 

269 if ref in missing_set: 

270 self.assertFalse(stored) 

271 else: 

272 self.assertTrue(stored) 

273 

274 # _available_inputs is not 

275 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs)) 

276 self.assertEqual(qbb._actual_inputs, set()) 

277 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with getDirect? 

278 

279 def test_markInputUnused(self) -> None: 

280 """Test for markInputUnused method""" 

281 

282 quantum = self.make_quantum() 

283 qbb = QuantumBackedButler.initialize( 

284 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

285 ) 

286 

287 # get some input data 

288 for ref in self.input_refs: 

289 data = qbb.get(ref) 

290 self.assertEqual(data, {"data": ref.dataId["detector"]}) 

291 for ref in self.init_inputs_refs: 

292 data = qbb.get(ref) 

293 self.assertEqual(data, {"data": -1}) 

294 

295 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs) 

296 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs) 

297 

298 qbb.markInputUnused(self.input_refs[0]) 

299 self.assertEqual( 

300 qbb._actual_inputs, set(ref.id for ref in self.input_refs[1:] + self.init_inputs_refs) 

301 ) 

302 

303 def test_pruneDatasets(self) -> None: 

304 """Test for pruneDatasets methods""" 

305 

306 quantum = self.make_quantum() 

307 qbb = QuantumBackedButler.initialize( 

308 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

309 ) 

310 

311 # Write all expected outputs. 

312 for ref in self.output_refs: 

313 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

314 

315 # Must be able to read them back 

316 for ref in self.output_refs: 

317 data = qbb.get(ref) 

318 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

319 

320 # Check for invalid arguments. 

321 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"): 

322 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True) 

323 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"): 

324 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True) 

325 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"): 

326 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False) 

327 

328 # Disassociate only. 

329 ref = self.output_refs[0] 

330 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False) 

331 self.assertFalse(qbb.stored(ref)) 

332 with self.assertRaises(FileNotFoundError): 

333 data = qbb.get(ref) 

334 

335 # can store it again 

336 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

337 self.assertTrue(qbb.stored(ref)) 

338 

339 # Purge completely. 

340 ref = self.output_refs[1] 

341 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True) 

342 self.assertFalse(qbb.stored(ref)) 

343 with self.assertRaises(FileNotFoundError): 

344 data = qbb.get(ref) 

345 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

346 self.assertTrue(qbb.stored(ref)) 

347 

348 def test_extract_provenance_data(self) -> None: 

349 """Test for extract_provenance_data method""" 

350 

351 quantum = self.make_quantum() 

352 qbb = QuantumBackedButler.initialize( 

353 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

354 ) 

355 

356 # read/store everything 

357 for ref in self.input_refs: 

358 qbb.get(ref) 

359 for ref in self.init_inputs_refs: 

360 qbb.get(ref) 

361 for ref in self.output_refs: 

362 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

363 

364 provenance1 = qbb.extract_provenance_data() 

365 prov_json = provenance1.json() 

366 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json)) 

367 for provenance in (provenance1, provenance2): 

368 input_ids = set(ref.id for ref in self.input_refs + self.init_inputs_refs) 

369 self.assertEqual(provenance.predicted_inputs, input_ids) 

370 self.assertEqual(provenance.available_inputs, input_ids) 

371 self.assertEqual(provenance.actual_inputs, input_ids) 

372 output_ids = set(ref.id for ref in self.output_refs) 

373 self.assertEqual(provenance.predicted_outputs, output_ids) 

374 self.assertEqual(provenance.actual_outputs, output_ids) 

375 datastore_name = "FileDatastore@<butlerRoot>/datastore" 

376 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name}) 

377 datastore_records = provenance.datastore_records[datastore_name] 

378 self.assertEqual(set(datastore_records.dataset_ids), output_ids) 

379 class_name = "lsst.daf.butler.core.storedFileInfo.StoredFileInfo" 

380 table_name = "file_datastore_records" 

381 self.assertEqual(set(datastore_records.records.keys()), {class_name}) 

382 self.assertEqual(set(datastore_records.records[class_name].keys()), {table_name}) 

383 self.assertEqual( 

384 set(record["dataset_id"] for record in datastore_records.records[class_name][table_name]), 

385 output_ids, 

386 ) 

387 

388 def test_collect_and_transfer(self) -> None: 

389 """Test for collect_and_transfer method""" 

390 

391 quantum1 = self.make_quantum(1) 

392 qbb1 = QuantumBackedButler.initialize( 

393 config=self.config, quantum=quantum1, dimensions=self.universe, dataset_types=self.dataset_types 

394 ) 

395 

396 quantum2 = self.make_quantum(2) 

397 qbb2 = QuantumBackedButler.initialize( 

398 config=self.config, quantum=quantum2, dimensions=self.universe, dataset_types=self.dataset_types 

399 ) 

400 

401 # read/store everything 

402 for ref in self.input_refs: 

403 qbb1.get(ref) 

404 for ref in self.init_inputs_refs: 

405 qbb1.get(ref) 

406 for ref in self.output_refs: 

407 qbb1.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

408 

409 for ref in self.output_refs: 

410 qbb2.get(ref) 

411 for ref in self.output_refs2: 

412 qbb2.put({"data": cast(int, ref.dataId["detector"]) ** 3}, ref) 

413 

414 QuantumProvenanceData.collect_and_transfer( 

415 self.butler, 

416 [quantum1, quantum2], 

417 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()], 

418 ) 

419 

420 for ref in self.output_refs: 

421 data = self.butler.get(ref) 

422 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

423 

424 for ref in self.output_refs2: 

425 data = self.butler.get(ref) 

426 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 3}) 

427 

428 

429if __name__ == "__main__": 

430 unittest.main()