Coverage for tests/test_quantumBackedButler.py: 7%
213 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-02 09:50 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-02 09:50 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import json
23import os
24import unittest
25from typing import cast
27from lsst.daf.butler import (
28 Butler,
29 Config,
30 DatasetRef,
31 DatasetType,
32 DimensionUniverse,
33 Quantum,
34 QuantumBackedButler,
35 QuantumProvenanceData,
36 Registry,
37 RegistryConfig,
38 StorageClass,
39)
40from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
42TESTDIR = os.path.abspath(os.path.dirname(__file__))
45class QuantumBackedButlerTestCase(unittest.TestCase):
46 """Test case for QuantumBackedButler."""
48 def setUp(self) -> None:
49 self.root = makeTestTempDir(TESTDIR)
50 self.config = Config()
51 self.config["root"] = self.root
52 self.universe = DimensionUniverse()
54 # Make a butler and import dimension definitions.
55 registryConfig = RegistryConfig(self.config.get("registry"))
56 Registry.createFromConfig(registryConfig, butlerRoot=self.root)
57 self.butler = Butler(self.config, writeable=True, run="RUN")
58 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
60 # make all dataset types
61 graph = self.universe.extract(("instrument", "detector"))
62 storageClass = StorageClass("StructuredDataDict")
63 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass)
64 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass)
65 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass)
66 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass)
67 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass)
69 self.dataset_types: dict[str, DatasetType] = {}
70 dataset_types = (
71 self.datasetTypeInit,
72 self.datasetTypeInput,
73 self.datasetTypeOutput,
74 self.datasetTypeOutput2,
75 self.datasetTypeExtra,
76 )
77 for dataset_type in dataset_types:
78 self.butler.registry.registerDatasetType(dataset_type)
79 self.dataset_types[dataset_type.name] = dataset_type
81 dataIds = [
82 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id))
83 for detector_id in (1, 2, 3, 4)
84 ]
86 # make actual input datasets
87 self.input_refs = [
88 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds
89 ]
90 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])]
91 self.all_input_refs = self.input_refs + self.init_inputs_refs
93 # generate dataset refs for outputs
94 self.output_refs = [DatasetRef(self.datasetTypeOutput, dataId, run="RUN") for dataId in dataIds]
95 self.output_refs2 = [DatasetRef(self.datasetTypeOutput2, dataId, run="RUN") for dataId in dataIds]
97 self.missing_refs = [DatasetRef(self.datasetTypeExtra, dataId, run="RUN") for dataId in dataIds]
99 def tearDown(self) -> None:
100 removeTestTempDir(self.root)
102 def make_quantum(self, step: int = 1) -> Quantum:
103 """Make a Quantum which includes datastore records."""
105 if step == 1:
106 datastore_records = self.butler.datastore.export_records(self.all_input_refs)
107 predictedInputs = {self.datasetTypeInput: self.input_refs}
108 outputs = {self.datasetTypeOutput: self.output_refs}
109 initInputs = {self.datasetTypeInit: self.init_inputs_refs[0]}
110 elif step == 2:
111 # The result should be empty, this is just to test that it works.
112 datastore_records = self.butler.datastore.export_records(self.output_refs)
113 predictedInputs = {self.datasetTypeInput: self.output_refs}
114 outputs = {self.datasetTypeOutput2: self.output_refs2}
115 initInputs = {}
116 else:
117 raise ValueError(f"unexpected {step} value")
119 return Quantum(
120 taskName="some.task.name",
121 inputs=predictedInputs,
122 outputs=outputs,
123 initInputs=initInputs,
124 datastore_records=datastore_records,
125 )
127 def test_initialize(self) -> None:
128 """Test for initialize factory method"""
130 quantum = self.make_quantum()
131 qbb = QuantumBackedButler.initialize(
132 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
133 )
134 self._test_factory(qbb)
136 def test_from_predicted(self) -> None:
137 """Test for from_predicted factory method"""
139 datastore_records = self.butler.datastore.export_records(self.all_input_refs)
140 qbb = QuantumBackedButler.from_predicted(
141 config=self.config,
142 predicted_inputs=[ref.getCheckedId() for ref in self.all_input_refs],
143 predicted_outputs=[ref.getCheckedId() for ref in self.output_refs],
144 dimensions=self.universe,
145 datastore_records=datastore_records,
146 dataset_types=self.dataset_types,
147 )
148 self._test_factory(qbb)
150 def _test_factory(self, qbb: QuantumBackedButler) -> None:
151 """Test state immediately after construction."""
153 self.assertTrue(qbb.isWriteable())
154 self.assertEqual(qbb._predicted_inputs, set(ref.id for ref in self.all_input_refs))
155 self.assertEqual(qbb._predicted_outputs, set(ref.id for ref in self.output_refs))
156 self.assertEqual(qbb._available_inputs, set())
157 self.assertEqual(qbb._unavailable_inputs, set())
158 self.assertEqual(qbb._actual_inputs, set())
159 self.assertEqual(qbb._actual_output_refs, set())
161 def test_getput(self) -> None:
162 """Test for getDirect/putDirect methods"""
164 quantum = self.make_quantum()
165 qbb = QuantumBackedButler.initialize(
166 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
167 )
169 # Verify all input data are readable.
170 for ref in self.input_refs:
171 data = qbb.get(ref)
172 self.assertEqual(data, {"data": ref.dataId["detector"]})
173 for ref in self.init_inputs_refs:
174 data = qbb.get(ref)
175 self.assertEqual(data, {"data": -1})
176 for ref in self.missing_refs:
177 with self.assertRaises(FileNotFoundError):
178 data = qbb.get(ref)
180 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
181 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
182 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs))
184 # Write all expected outputs.
185 for ref in self.output_refs:
186 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
188 # Must be able to read them back
189 for ref in self.output_refs:
190 data = qbb.get(ref)
191 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
193 self.assertEqual(qbb._actual_output_refs, set(self.output_refs))
195 def test_getDeferred(self) -> None:
196 """Test for getDirectDeferred method"""
198 quantum = self.make_quantum()
199 qbb = QuantumBackedButler.initialize(
200 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
201 )
203 # get some input data
204 input_refs = self.input_refs[:2]
205 for ref in input_refs:
206 data = qbb.getDeferred(ref)
207 self.assertEqual(data.get(), {"data": ref.dataId["detector"]})
208 for ref in self.init_inputs_refs:
209 data = qbb.getDeferred(ref)
210 self.assertEqual(data.get(), {"data": -1})
211 for ref in self.missing_refs:
212 data = qbb.getDeferred(ref)
213 with self.assertRaises(FileNotFoundError):
214 data.get()
216 # _avalable_inputs is not
217 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs))
218 self.assertEqual(qbb._actual_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs))
219 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs))
221 def test_datasetExistsDirect(self) -> None:
222 """Test for datasetExistsDirect method"""
224 quantum = self.make_quantum()
225 qbb = QuantumBackedButler.initialize(
226 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
227 )
229 # get some input data
230 input_refs = self.input_refs[:2]
231 for ref in input_refs:
232 exists = qbb.datasetExistsDirect(ref)
233 self.assertTrue(exists)
234 for ref in self.init_inputs_refs:
235 exists = qbb.datasetExistsDirect(ref)
236 self.assertTrue(exists)
237 for ref in self.missing_refs:
238 exists = qbb.datasetExistsDirect(ref)
239 self.assertFalse(exists)
241 # _available_inputs is not
242 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs))
243 self.assertEqual(qbb._actual_inputs, set())
244 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with getDirect?
246 def test_markInputUnused(self) -> None:
247 """Test for markInputUnused method"""
249 quantum = self.make_quantum()
250 qbb = QuantumBackedButler.initialize(
251 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
252 )
254 # get some input data
255 for ref in self.input_refs:
256 data = qbb.get(ref)
257 self.assertEqual(data, {"data": ref.dataId["detector"]})
258 for ref in self.init_inputs_refs:
259 data = qbb.get(ref)
260 self.assertEqual(data, {"data": -1})
262 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
263 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
265 qbb.markInputUnused(self.input_refs[0])
266 self.assertEqual(
267 qbb._actual_inputs, set(ref.id for ref in self.input_refs[1:] + self.init_inputs_refs)
268 )
270 def test_pruneDatasets(self) -> None:
271 """Test for pruneDatasets methods"""
273 quantum = self.make_quantum()
274 qbb = QuantumBackedButler.initialize(
275 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
276 )
278 # Write all expected outputs.
279 for ref in self.output_refs:
280 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
282 # Must be able to read them back
283 for ref in self.output_refs:
284 data = qbb.get(ref)
285 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
287 # Check for invalid arguments.
288 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"):
289 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True)
290 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"):
291 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True)
292 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"):
293 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False)
295 # Disassociate only.
296 ref = self.output_refs[0]
297 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False)
298 self.assertFalse(qbb.datasetExistsDirect(ref))
299 with self.assertRaises(FileNotFoundError):
300 data = qbb.get(ref)
302 # can store it again
303 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
304 self.assertTrue(qbb.datasetExistsDirect(ref))
306 # Purge completely.
307 ref = self.output_refs[1]
308 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True)
309 self.assertFalse(qbb.datasetExistsDirect(ref))
310 with self.assertRaises(FileNotFoundError):
311 data = qbb.get(ref)
312 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
313 self.assertTrue(qbb.datasetExistsDirect(ref))
315 def test_extract_provenance_data(self) -> None:
316 """Test for extract_provenance_data method"""
318 quantum = self.make_quantum()
319 qbb = QuantumBackedButler.initialize(
320 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
321 )
323 # read/store everything
324 for ref in self.input_refs:
325 qbb.get(ref)
326 for ref in self.init_inputs_refs:
327 qbb.get(ref)
328 for ref in self.output_refs:
329 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
331 provenance1 = qbb.extract_provenance_data()
332 prov_json = provenance1.json()
333 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json))
334 for provenance in (provenance1, provenance2):
335 input_ids = set(ref.id for ref in self.input_refs + self.init_inputs_refs)
336 self.assertEqual(provenance.predicted_inputs, input_ids)
337 self.assertEqual(provenance.available_inputs, input_ids)
338 self.assertEqual(provenance.actual_inputs, input_ids)
339 output_ids = set(ref.id for ref in self.output_refs)
340 self.assertEqual(provenance.predicted_outputs, output_ids)
341 self.assertEqual(provenance.actual_outputs, output_ids)
342 datastore_name = "FileDatastore@<butlerRoot>/datastore"
343 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name})
344 datastore_records = provenance.datastore_records[datastore_name]
345 self.assertEqual(set(datastore_records.dataset_ids), output_ids)
346 class_name = "lsst.daf.butler.core.storedFileInfo.StoredFileInfo"
347 table_name = "file_datastore_records"
348 self.assertEqual(set(datastore_records.records.keys()), {class_name})
349 self.assertEqual(set(datastore_records.records[class_name].keys()), {table_name})
350 self.assertEqual(
351 set(record["dataset_id"] for record in datastore_records.records[class_name][table_name]),
352 output_ids,
353 )
355 def test_collect_and_transfer(self) -> None:
356 """Test for collect_and_transfer method"""
358 quantum1 = self.make_quantum(1)
359 qbb1 = QuantumBackedButler.initialize(
360 config=self.config, quantum=quantum1, dimensions=self.universe, dataset_types=self.dataset_types
361 )
363 quantum2 = self.make_quantum(2)
364 qbb2 = QuantumBackedButler.initialize(
365 config=self.config, quantum=quantum2, dimensions=self.universe, dataset_types=self.dataset_types
366 )
368 # read/store everything
369 for ref in self.input_refs:
370 qbb1.get(ref)
371 for ref in self.init_inputs_refs:
372 qbb1.get(ref)
373 for ref in self.output_refs:
374 qbb1.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
376 for ref in self.output_refs:
377 qbb2.get(ref)
378 for ref in self.output_refs2:
379 qbb2.put({"data": cast(int, ref.dataId["detector"]) ** 3}, ref)
381 QuantumProvenanceData.collect_and_transfer(
382 self.butler,
383 [quantum1, quantum2],
384 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()],
385 )
387 for ref in self.output_refs:
388 data = self.butler.get(ref)
389 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
391 for ref in self.output_refs2:
392 data = self.butler.get(ref)
393 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 3})
396if __name__ == "__main__":
397 unittest.main()