Coverage for tests/test_quantumBackedButler.py: 7%
225 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-06-06 09:38 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-06-06 09:38 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import json
23import os
24import unittest
25import unittest.mock
26from typing import cast
28from lsst.daf.butler import (
29 Butler,
30 Config,
31 DatasetRef,
32 DatasetType,
33 DimensionUniverse,
34 Quantum,
35 QuantumBackedButler,
36 QuantumProvenanceData,
37 Registry,
38 RegistryConfig,
39 StorageClass,
40)
41from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
42from lsst.resources import ResourcePath
44TESTDIR = os.path.abspath(os.path.dirname(__file__))
47class QuantumBackedButlerTestCase(unittest.TestCase):
48 """Test case for QuantumBackedButler."""
50 def setUp(self) -> None:
51 self.root = makeTestTempDir(TESTDIR)
52 self.config = Config()
53 self.config["root"] = self.root
54 self.universe = DimensionUniverse()
56 # Make a butler and import dimension definitions.
57 registryConfig = RegistryConfig(self.config.get("registry"))
58 Registry.createFromConfig(registryConfig, butlerRoot=self.root)
59 self.butler = Butler(self.config, writeable=True, run="RUN")
60 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
62 # make all dataset types
63 graph = self.universe.extract(("instrument", "detector"))
64 storageClass = StorageClass("StructuredDataDict")
65 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass)
66 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass)
67 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass)
68 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass)
69 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass)
71 self.dataset_types: dict[str, DatasetType] = {}
72 dataset_types = (
73 self.datasetTypeInit,
74 self.datasetTypeInput,
75 self.datasetTypeOutput,
76 self.datasetTypeOutput2,
77 self.datasetTypeExtra,
78 )
79 for dataset_type in dataset_types:
80 self.butler.registry.registerDatasetType(dataset_type)
81 self.dataset_types[dataset_type.name] = dataset_type
83 dataIds = [
84 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id))
85 for detector_id in (1, 2, 3, 4)
86 ]
88 # make actual input datasets
89 self.input_refs = [
90 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds
91 ]
92 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])]
93 self.all_input_refs = self.input_refs + self.init_inputs_refs
95 # generate dataset refs for outputs
96 self.output_refs = [DatasetRef(self.datasetTypeOutput, dataId, run="RUN") for dataId in dataIds]
97 self.output_refs2 = [DatasetRef(self.datasetTypeOutput2, dataId, run="RUN") for dataId in dataIds]
99 self.missing_refs = [DatasetRef(self.datasetTypeExtra, dataId, run="RUN") for dataId in dataIds]
101 def tearDown(self) -> None:
102 removeTestTempDir(self.root)
104 def make_quantum(self, step: int = 1) -> Quantum:
105 """Make a Quantum which includes datastore records."""
107 if step == 1:
108 datastore_records = self.butler.datastore.export_records(self.all_input_refs)
109 predictedInputs = {self.datasetTypeInput: self.input_refs}
110 outputs = {self.datasetTypeOutput: self.output_refs}
111 initInputs = {self.datasetTypeInit: self.init_inputs_refs[0]}
112 elif step == 2:
113 # The result should be empty, this is just to test that it works.
114 datastore_records = self.butler.datastore.export_records(self.output_refs)
115 predictedInputs = {self.datasetTypeInput: self.output_refs}
116 outputs = {self.datasetTypeOutput2: self.output_refs2}
117 initInputs = {}
118 else:
119 raise ValueError(f"unexpected {step} value")
121 return Quantum(
122 taskName="some.task.name",
123 inputs=predictedInputs,
124 outputs=outputs,
125 initInputs=initInputs,
126 datastore_records=datastore_records,
127 )
129 def test_initialize(self) -> None:
130 """Test for initialize factory method"""
132 quantum = self.make_quantum()
133 qbb = QuantumBackedButler.initialize(
134 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
135 )
136 self._test_factory(qbb)
138 def test_initialize_repo_index(self) -> None:
139 """Test for initialize using config file and repo index."""
141 # Store config to a file.
142 self.config.dumpToUri(self.root)
144 butler_index = Config()
145 butler_index["label"] = self.root
146 with ResourcePath.temporary_uri(suffix=".yaml") as index_path:
147 butler_index.dumpToUri(index_path)
149 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(index_path)}):
150 quantum = self.make_quantum()
151 qbb = QuantumBackedButler.initialize(
152 config="label",
153 quantum=quantum,
154 dimensions=self.universe,
155 dataset_types=self.dataset_types,
156 )
157 self._test_factory(qbb)
159 def test_from_predicted(self) -> None:
160 """Test for from_predicted factory method"""
162 datastore_records = self.butler.datastore.export_records(self.all_input_refs)
163 qbb = QuantumBackedButler.from_predicted(
164 config=self.config,
165 predicted_inputs=[ref.id for ref in self.all_input_refs],
166 predicted_outputs=[ref.id for ref in self.output_refs],
167 dimensions=self.universe,
168 datastore_records=datastore_records,
169 dataset_types=self.dataset_types,
170 )
171 self._test_factory(qbb)
173 def _test_factory(self, qbb: QuantumBackedButler) -> None:
174 """Test state immediately after construction."""
176 self.assertTrue(qbb.isWriteable())
177 self.assertEqual(qbb._predicted_inputs, set(ref.id for ref in self.all_input_refs))
178 self.assertEqual(qbb._predicted_outputs, set(ref.id for ref in self.output_refs))
179 self.assertEqual(qbb._available_inputs, set())
180 self.assertEqual(qbb._unavailable_inputs, set())
181 self.assertEqual(qbb._actual_inputs, set())
182 self.assertEqual(qbb._actual_output_refs, set())
184 def test_getput(self) -> None:
185 """Test for getDirect/putDirect methods"""
187 quantum = self.make_quantum()
188 qbb = QuantumBackedButler.initialize(
189 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
190 )
192 # Verify all input data are readable.
193 for ref in self.input_refs:
194 data = qbb.get(ref)
195 self.assertEqual(data, {"data": ref.dataId["detector"]})
196 for ref in self.init_inputs_refs:
197 data = qbb.get(ref)
198 self.assertEqual(data, {"data": -1})
199 for ref in self.missing_refs:
200 with self.assertRaises(FileNotFoundError):
201 data = qbb.get(ref)
203 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
204 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
205 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs))
207 # Write all expected outputs.
208 for ref in self.output_refs:
209 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
211 # Must be able to read them back
212 for ref in self.output_refs:
213 data = qbb.get(ref)
214 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
216 self.assertEqual(qbb._actual_output_refs, set(self.output_refs))
218 def test_getDeferred(self) -> None:
219 """Test for getDirectDeferred method"""
221 quantum = self.make_quantum()
222 qbb = QuantumBackedButler.initialize(
223 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
224 )
226 # get some input data
227 input_refs = self.input_refs[:2]
228 for ref in input_refs:
229 data = qbb.getDeferred(ref)
230 self.assertEqual(data.get(), {"data": ref.dataId["detector"]})
231 for ref in self.init_inputs_refs:
232 data = qbb.getDeferred(ref)
233 self.assertEqual(data.get(), {"data": -1})
234 for ref in self.missing_refs:
235 data = qbb.getDeferred(ref)
236 with self.assertRaises(FileNotFoundError):
237 data.get()
239 # _avalable_inputs is not
240 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs))
241 self.assertEqual(qbb._actual_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs))
242 self.assertEqual(qbb._unavailable_inputs, set(ref.id for ref in self.missing_refs))
244 def test_datasetExistsDirect(self) -> None:
245 """Test for datasetExistsDirect method"""
247 quantum = self.make_quantum()
248 qbb = QuantumBackedButler.initialize(
249 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
250 )
252 # get some input data
253 input_refs = self.input_refs[:2]
254 for ref in input_refs:
255 exists = qbb.datasetExistsDirect(ref)
256 self.assertTrue(exists)
257 for ref in self.init_inputs_refs:
258 exists = qbb.datasetExistsDirect(ref)
259 self.assertTrue(exists)
260 for ref in self.missing_refs:
261 exists = qbb.datasetExistsDirect(ref)
262 self.assertFalse(exists)
264 # _available_inputs is not
265 self.assertEqual(qbb._available_inputs, set(ref.id for ref in input_refs + self.init_inputs_refs))
266 self.assertEqual(qbb._actual_inputs, set())
267 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with getDirect?
269 def test_markInputUnused(self) -> None:
270 """Test for markInputUnused method"""
272 quantum = self.make_quantum()
273 qbb = QuantumBackedButler.initialize(
274 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
275 )
277 # get some input data
278 for ref in self.input_refs:
279 data = qbb.get(ref)
280 self.assertEqual(data, {"data": ref.dataId["detector"]})
281 for ref in self.init_inputs_refs:
282 data = qbb.get(ref)
283 self.assertEqual(data, {"data": -1})
285 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
286 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
288 qbb.markInputUnused(self.input_refs[0])
289 self.assertEqual(
290 qbb._actual_inputs, set(ref.id for ref in self.input_refs[1:] + self.init_inputs_refs)
291 )
293 def test_pruneDatasets(self) -> None:
294 """Test for pruneDatasets methods"""
296 quantum = self.make_quantum()
297 qbb = QuantumBackedButler.initialize(
298 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
299 )
301 # Write all expected outputs.
302 for ref in self.output_refs:
303 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
305 # Must be able to read them back
306 for ref in self.output_refs:
307 data = qbb.get(ref)
308 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
310 # Check for invalid arguments.
311 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"):
312 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True)
313 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"):
314 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True)
315 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"):
316 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False)
318 # Disassociate only.
319 ref = self.output_refs[0]
320 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False)
321 self.assertFalse(qbb.datasetExistsDirect(ref))
322 with self.assertRaises(FileNotFoundError):
323 data = qbb.get(ref)
325 # can store it again
326 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
327 self.assertTrue(qbb.datasetExistsDirect(ref))
329 # Purge completely.
330 ref = self.output_refs[1]
331 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True)
332 self.assertFalse(qbb.datasetExistsDirect(ref))
333 with self.assertRaises(FileNotFoundError):
334 data = qbb.get(ref)
335 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
336 self.assertTrue(qbb.datasetExistsDirect(ref))
338 def test_extract_provenance_data(self) -> None:
339 """Test for extract_provenance_data method"""
341 quantum = self.make_quantum()
342 qbb = QuantumBackedButler.initialize(
343 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
344 )
346 # read/store everything
347 for ref in self.input_refs:
348 qbb.get(ref)
349 for ref in self.init_inputs_refs:
350 qbb.get(ref)
351 for ref in self.output_refs:
352 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
354 provenance1 = qbb.extract_provenance_data()
355 prov_json = provenance1.json()
356 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json))
357 for provenance in (provenance1, provenance2):
358 input_ids = set(ref.id for ref in self.input_refs + self.init_inputs_refs)
359 self.assertEqual(provenance.predicted_inputs, input_ids)
360 self.assertEqual(provenance.available_inputs, input_ids)
361 self.assertEqual(provenance.actual_inputs, input_ids)
362 output_ids = set(ref.id for ref in self.output_refs)
363 self.assertEqual(provenance.predicted_outputs, output_ids)
364 self.assertEqual(provenance.actual_outputs, output_ids)
365 datastore_name = "FileDatastore@<butlerRoot>/datastore"
366 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name})
367 datastore_records = provenance.datastore_records[datastore_name]
368 self.assertEqual(set(datastore_records.dataset_ids), output_ids)
369 class_name = "lsst.daf.butler.core.storedFileInfo.StoredFileInfo"
370 table_name = "file_datastore_records"
371 self.assertEqual(set(datastore_records.records.keys()), {class_name})
372 self.assertEqual(set(datastore_records.records[class_name].keys()), {table_name})
373 self.assertEqual(
374 set(record["dataset_id"] for record in datastore_records.records[class_name][table_name]),
375 output_ids,
376 )
378 def test_collect_and_transfer(self) -> None:
379 """Test for collect_and_transfer method"""
381 quantum1 = self.make_quantum(1)
382 qbb1 = QuantumBackedButler.initialize(
383 config=self.config, quantum=quantum1, dimensions=self.universe, dataset_types=self.dataset_types
384 )
386 quantum2 = self.make_quantum(2)
387 qbb2 = QuantumBackedButler.initialize(
388 config=self.config, quantum=quantum2, dimensions=self.universe, dataset_types=self.dataset_types
389 )
391 # read/store everything
392 for ref in self.input_refs:
393 qbb1.get(ref)
394 for ref in self.init_inputs_refs:
395 qbb1.get(ref)
396 for ref in self.output_refs:
397 qbb1.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
399 for ref in self.output_refs:
400 qbb2.get(ref)
401 for ref in self.output_refs2:
402 qbb2.put({"data": cast(int, ref.dataId["detector"]) ** 3}, ref)
404 QuantumProvenanceData.collect_and_transfer(
405 self.butler,
406 [quantum1, quantum2],
407 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()],
408 )
410 for ref in self.output_refs:
411 data = self.butler.get(ref)
412 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
414 for ref in self.output_refs2:
415 data = self.butler.get(ref)
416 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 3})
419if __name__ == "__main__":
420 unittest.main()