Coverage for tests/test_quantumBackedButler.py: 7%

233 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28import json 

29import os 

30import unittest 

31import unittest.mock 

32from typing import cast 

33 

34from lsst.daf.butler import ( 

35 Butler, 

36 Config, 

37 DatasetRef, 

38 DatasetType, 

39 DimensionUniverse, 

40 Quantum, 

41 QuantumBackedButler, 

42 QuantumProvenanceData, 

43 RegistryConfig, 

44 StorageClass, 

45) 

46from lsst.daf.butler.registry import _RegistryFactory 

47from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

48from lsst.resources import ResourcePath 

49 

50TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

51 

52 

53class QuantumBackedButlerTestCase(unittest.TestCase): 

54 """Test case for QuantumBackedButler.""" 

55 

56 def setUp(self) -> None: 

57 self.root = makeTestTempDir(TESTDIR) 

58 self.config = Config() 

59 self.config["root"] = self.root 

60 self.universe = DimensionUniverse() 

61 

62 # Make a butler and import dimension definitions. 

63 registryConfig = RegistryConfig(self.config.get("registry")) 

64 _RegistryFactory(registryConfig).create_from_config(butlerRoot=self.root) 

65 self.butler = Butler(self.config, writeable=True, run="RUN") 

66 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

67 

68 # make all dataset types 

69 graph = self.universe.extract(("instrument", "detector")) 

70 storageClass = StorageClass("StructuredDataDict") 

71 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass) 

72 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass) 

73 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass) 

74 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass) 

75 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass) 

76 

77 self.dataset_types: dict[str, DatasetType] = {} 

78 dataset_types = ( 

79 self.datasetTypeInit, 

80 self.datasetTypeInput, 

81 self.datasetTypeOutput, 

82 self.datasetTypeOutput2, 

83 self.datasetTypeExtra, 

84 ) 

85 for dataset_type in dataset_types: 

86 self.butler.registry.registerDatasetType(dataset_type) 

87 self.dataset_types[dataset_type.name] = dataset_type 

88 

89 dataIds = [ 

90 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id)) 

91 for detector_id in (1, 2, 3, 4) 

92 ] 

93 

94 # make actual input datasets 

95 self.input_refs = [ 

96 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds 

97 ] 

98 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])] 

99 self.all_input_refs = self.input_refs + self.init_inputs_refs 

100 

101 # generate dataset refs for outputs 

102 self.output_refs = [DatasetRef(self.datasetTypeOutput, dataId, run="RUN") for dataId in dataIds] 

103 self.output_refs2 = [DatasetRef(self.datasetTypeOutput2, dataId, run="RUN") for dataId in dataIds] 

104 

105 self.missing_refs = [DatasetRef(self.datasetTypeExtra, dataId, run="RUN") for dataId in dataIds] 

106 

107 def tearDown(self) -> None: 

108 removeTestTempDir(self.root) 

109 

110 def make_quantum(self, step: int = 1) -> Quantum: 

111 """Make a Quantum which includes datastore records.""" 

112 if step == 1: 

113 datastore_records = self.butler._datastore.export_records(self.all_input_refs) 

114 predictedInputs = {self.datasetTypeInput: self.input_refs} 

115 outputs = {self.datasetTypeOutput: self.output_refs} 

116 initInputs = {self.datasetTypeInit: self.init_inputs_refs[0]} 

117 elif step == 2: 

118 # The result should be empty, this is just to test that it works. 

119 datastore_records = self.butler._datastore.export_records(self.output_refs) 

120 predictedInputs = {self.datasetTypeInput: self.output_refs} 

121 outputs = {self.datasetTypeOutput2: self.output_refs2} 

122 initInputs = {} 

123 else: 

124 raise ValueError(f"unexpected {step} value") 

125 

126 return Quantum( 

127 taskName="some.task.name", 

128 inputs=predictedInputs, 

129 outputs=outputs, 

130 initInputs=initInputs, 

131 datastore_records=datastore_records, 

132 ) 

133 

134 def test_initialize(self) -> None: 

135 """Test for initialize factory method""" 

136 quantum = self.make_quantum() 

137 qbb = QuantumBackedButler.initialize( 

138 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

139 ) 

140 self._test_factory(qbb) 

141 

142 def test_initialize_repo_index(self) -> None: 

143 """Test for initialize using config file and repo index.""" 

144 # Store config to a file. 

145 self.config.dumpToUri(self.root) 

146 

147 butler_index = Config() 

148 butler_index["label"] = self.root 

149 with ResourcePath.temporary_uri(suffix=".yaml") as index_path: 

150 butler_index.dumpToUri(index_path) 

151 

152 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(index_path)}): 

153 quantum = self.make_quantum() 

154 qbb = QuantumBackedButler.initialize( 

155 config="label", 

156 quantum=quantum, 

157 dimensions=self.universe, 

158 dataset_types=self.dataset_types, 

159 ) 

160 self._test_factory(qbb) 

161 

162 def test_from_predicted(self) -> None: 

163 """Test for from_predicted factory method""" 

164 datastore_records = self.butler._datastore.export_records(self.all_input_refs) 

165 qbb = QuantumBackedButler.from_predicted( 

166 config=self.config, 

167 predicted_inputs=[ref.id for ref in self.all_input_refs], 

168 predicted_outputs=[ref.id for ref in self.output_refs], 

169 dimensions=self.universe, 

170 datastore_records=datastore_records, 

171 dataset_types=self.dataset_types, 

172 ) 

173 self._test_factory(qbb) 

174 

175 def _test_factory(self, qbb: QuantumBackedButler) -> None: 

176 """Test state immediately after construction.""" 

177 self.assertTrue(qbb.isWriteable()) 

178 self.assertEqual(qbb._predicted_inputs, {ref.id for ref in self.all_input_refs}) 

179 self.assertEqual(qbb._predicted_outputs, {ref.id for ref in self.output_refs}) 

180 self.assertEqual(qbb._available_inputs, set()) 

181 self.assertEqual(qbb._unavailable_inputs, set()) 

182 self.assertEqual(qbb._actual_inputs, set()) 

183 self.assertEqual(qbb._actual_output_refs, set()) 

184 

185 def test_getput(self) -> None: 

186 """Test for getDirect/putDirect methods""" 

187 quantum = self.make_quantum() 

188 qbb = QuantumBackedButler.initialize( 

189 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

190 ) 

191 

192 # Verify all input data are readable. 

193 for ref in self.input_refs: 

194 data = qbb.get(ref) 

195 self.assertEqual(data, {"data": ref.dataId["detector"]}) 

196 for ref in self.init_inputs_refs: 

197 data = qbb.get(ref) 

198 self.assertEqual(data, {"data": -1}) 

199 for ref in self.missing_refs: 

200 with self.assertRaises(FileNotFoundError): 

201 data = qbb.get(ref) 

202 

203 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs) 

204 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs) 

205 self.assertEqual(qbb._unavailable_inputs, {ref.id for ref in self.missing_refs}) 

206 

207 # Write all expected outputs. 

208 for ref in self.output_refs: 

209 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

210 

211 # Must be able to read them back 

212 for ref in self.output_refs: 

213 data = qbb.get(ref) 

214 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

215 

216 self.assertEqual(qbb._actual_output_refs, set(self.output_refs)) 

217 

218 def test_getDeferred(self) -> None: 

219 """Test for getDirectDeferred method""" 

220 quantum = self.make_quantum() 

221 qbb = QuantumBackedButler.initialize( 

222 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

223 ) 

224 

225 # get some input data 

226 input_refs = self.input_refs[:2] 

227 for ref in input_refs: 

228 data = qbb.getDeferred(ref) 

229 self.assertEqual(data.get(), {"data": ref.dataId["detector"]}) 

230 for ref in self.init_inputs_refs: 

231 data = qbb.getDeferred(ref) 

232 self.assertEqual(data.get(), {"data": -1}) 

233 for ref in self.missing_refs: 

234 data = qbb.getDeferred(ref) 

235 with self.assertRaises(FileNotFoundError): 

236 data.get() 

237 

238 # _avalable_inputs is not 

239 self.assertEqual(qbb._available_inputs, {ref.id for ref in input_refs + self.init_inputs_refs}) 

240 self.assertEqual(qbb._actual_inputs, {ref.id for ref in input_refs + self.init_inputs_refs}) 

241 self.assertEqual(qbb._unavailable_inputs, {ref.id for ref in self.missing_refs}) 

242 

243 def test_datasetExistsDirect(self) -> None: 

244 """Test for dataset existence method""" 

245 quantum = self.make_quantum() 

246 qbb = QuantumBackedButler.initialize( 

247 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

248 ) 

249 

250 # get some input data 

251 input_refs = self.input_refs[:2] 

252 for ref in input_refs: 

253 exists = qbb.stored(ref) 

254 self.assertTrue(exists) 

255 for ref in self.init_inputs_refs: 

256 exists = qbb.stored(ref) 

257 self.assertTrue(exists) 

258 for ref in self.missing_refs: 

259 exists = qbb.stored(ref) 

260 self.assertFalse(exists) 

261 

262 # Now do the same checks in bulk. 

263 missing_set = set(self.missing_refs) 

264 refs = input_refs + self.init_inputs_refs + self.missing_refs 

265 stored_many = qbb.stored_many(refs) 

266 for ref, stored in stored_many.items(): 

267 if ref in missing_set: 

268 self.assertFalse(stored) 

269 else: 

270 self.assertTrue(stored) 

271 

272 # _available_inputs is not 

273 self.assertEqual(qbb._available_inputs, {ref.id for ref in input_refs + self.init_inputs_refs}) 

274 self.assertEqual(qbb._actual_inputs, set()) 

275 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with getDirect? 

276 

277 def test_markInputUnused(self) -> None: 

278 """Test for markInputUnused method""" 

279 quantum = self.make_quantum() 

280 qbb = QuantumBackedButler.initialize( 

281 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

282 ) 

283 

284 # get some input data 

285 for ref in self.input_refs: 

286 data = qbb.get(ref) 

287 self.assertEqual(data, {"data": ref.dataId["detector"]}) 

288 for ref in self.init_inputs_refs: 

289 data = qbb.get(ref) 

290 self.assertEqual(data, {"data": -1}) 

291 

292 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs) 

293 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs) 

294 

295 qbb.markInputUnused(self.input_refs[0]) 

296 self.assertEqual(qbb._actual_inputs, {ref.id for ref in self.input_refs[1:] + self.init_inputs_refs}) 

297 

298 def test_pruneDatasets(self) -> None: 

299 """Test for pruneDatasets methods""" 

300 quantum = self.make_quantum() 

301 qbb = QuantumBackedButler.initialize( 

302 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

303 ) 

304 

305 # Write all expected outputs. 

306 for ref in self.output_refs: 

307 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

308 

309 # Must be able to read them back 

310 for ref in self.output_refs: 

311 data = qbb.get(ref) 

312 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

313 

314 # Check for invalid arguments. 

315 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"): 

316 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True) 

317 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"): 

318 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True) 

319 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"): 

320 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False) 

321 

322 # Disassociate only. 

323 ref = self.output_refs[0] 

324 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False) 

325 self.assertFalse(qbb.stored(ref)) 

326 with self.assertRaises(FileNotFoundError): 

327 data = qbb.get(ref) 

328 

329 # can store it again 

330 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

331 self.assertTrue(qbb.stored(ref)) 

332 

333 # Purge completely. 

334 ref = self.output_refs[1] 

335 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True) 

336 self.assertFalse(qbb.stored(ref)) 

337 with self.assertRaises(FileNotFoundError): 

338 data = qbb.get(ref) 

339 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

340 self.assertTrue(qbb.stored(ref)) 

341 

342 def test_extract_provenance_data(self) -> None: 

343 """Test for extract_provenance_data method""" 

344 quantum = self.make_quantum() 

345 qbb = QuantumBackedButler.initialize( 

346 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types 

347 ) 

348 

349 # read/store everything 

350 for ref in self.input_refs: 

351 qbb.get(ref) 

352 for ref in self.init_inputs_refs: 

353 qbb.get(ref) 

354 for ref in self.output_refs: 

355 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

356 

357 provenance1 = qbb.extract_provenance_data() 

358 prov_json = provenance1.json() 

359 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json)) 

360 for provenance in (provenance1, provenance2): 

361 input_ids = {ref.id for ref in self.input_refs + self.init_inputs_refs} 

362 self.assertEqual(provenance.predicted_inputs, input_ids) 

363 self.assertEqual(provenance.available_inputs, input_ids) 

364 self.assertEqual(provenance.actual_inputs, input_ids) 

365 output_ids = {ref.id for ref in self.output_refs} 

366 self.assertEqual(provenance.predicted_outputs, output_ids) 

367 self.assertEqual(provenance.actual_outputs, output_ids) 

368 datastore_name = "FileDatastore@<butlerRoot>/datastore" 

369 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name}) 

370 datastore_records = provenance.datastore_records[datastore_name] 

371 self.assertEqual(set(datastore_records.dataset_ids), output_ids) 

372 class_name = "lsst.daf.butler.core.storedFileInfo.StoredFileInfo" 

373 table_name = "file_datastore_records" 

374 self.assertEqual(set(datastore_records.records.keys()), {class_name}) 

375 self.assertEqual(set(datastore_records.records[class_name].keys()), {table_name}) 

376 self.assertEqual( 

377 {record["dataset_id"] for record in datastore_records.records[class_name][table_name]}, 

378 output_ids, 

379 ) 

380 

381 def test_collect_and_transfer(self) -> None: 

382 """Test for collect_and_transfer method""" 

383 quantum1 = self.make_quantum(1) 

384 qbb1 = QuantumBackedButler.initialize( 

385 config=self.config, quantum=quantum1, dimensions=self.universe, dataset_types=self.dataset_types 

386 ) 

387 

388 quantum2 = self.make_quantum(2) 

389 qbb2 = QuantumBackedButler.initialize( 

390 config=self.config, quantum=quantum2, dimensions=self.universe, dataset_types=self.dataset_types 

391 ) 

392 

393 # read/store everything 

394 for ref in self.input_refs: 

395 qbb1.get(ref) 

396 for ref in self.init_inputs_refs: 

397 qbb1.get(ref) 

398 for ref in self.output_refs: 

399 qbb1.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref) 

400 

401 for ref in self.output_refs: 

402 qbb2.get(ref) 

403 for ref in self.output_refs2: 

404 qbb2.put({"data": cast(int, ref.dataId["detector"]) ** 3}, ref) 

405 

406 QuantumProvenanceData.collect_and_transfer( 

407 self.butler, 

408 [quantum1, quantum2], 

409 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()], 

410 ) 

411 

412 for ref in self.output_refs: 

413 data = self.butler.get(ref) 

414 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2}) 

415 

416 for ref in self.output_refs2: 

417 data = self.butler.get(ref) 

418 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 3}) 

419 

420 

421if __name__ == "__main__": 

422 unittest.main()