Coverage for tests/test_quantumBackedButler.py: 7%
232 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 09:13 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 09:13 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import json
23import os
24import unittest
25import unittest.mock
26from typing import cast
28from lsst.daf.butler import (
29 Butler,
30 Config,
31 DatasetRef,
32 DatasetType,
33 DimensionUniverse,
34 Quantum,
35 QuantumBackedButler,
36 QuantumProvenanceData,
37 Registry,
38 RegistryConfig,
39 StorageClass,
40)
41from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
42from lsst.resources import ResourcePath
44TESTDIR = os.path.abspath(os.path.dirname(__file__))
47class QuantumBackedButlerTestCase(unittest.TestCase):
48 """Test case for QuantumBackedButler."""
50 def setUp(self) -> None:
51 self.root = makeTestTempDir(TESTDIR)
52 self.config = Config()
53 self.config["root"] = self.root
54 self.universe = DimensionUniverse()
56 # Make a butler and import dimension definitions.
57 registryConfig = RegistryConfig(self.config.get("registry"))
58 Registry.createFromConfig(registryConfig, butlerRoot=self.root)
59 self.butler = Butler(self.config, writeable=True, run="RUN")
60 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
62 # make all dataset types
63 graph = self.universe.extract(("instrument", "detector"))
64 storageClass = StorageClass("StructuredDataDict")
65 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass)
66 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass)
67 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass)
68 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass)
69 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass)
71 self.dataset_types: dict[str, DatasetType] = {}
72 dataset_types = (
73 self.datasetTypeInit,
74 self.datasetTypeInput,
75 self.datasetTypeOutput,
76 self.datasetTypeOutput2,
77 self.datasetTypeExtra,
78 )
79 for dataset_type in dataset_types:
80 self.butler.registry.registerDatasetType(dataset_type)
81 self.dataset_types[dataset_type.name] = dataset_type
83 dataIds = [
84 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id))
85 for detector_id in (1, 2, 3, 4)
86 ]
88 # make actual input datasets
89 self.input_refs = [
90 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds
91 ]
92 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])]
93 self.all_input_refs = self.input_refs + self.init_inputs_refs
95 # generate dataset refs for outputs
96 self.output_refs = [DatasetRef(self.datasetTypeOutput, dataId, run="RUN") for dataId in dataIds]
97 self.output_refs2 = [DatasetRef(self.datasetTypeOutput2, dataId, run="RUN") for dataId in dataIds]
99 self.missing_refs = [DatasetRef(self.datasetTypeExtra, dataId, run="RUN") for dataId in dataIds]
101 def tearDown(self) -> None:
102 removeTestTempDir(self.root)
104 def make_quantum(self, step: int = 1) -> Quantum:
105 """Make a Quantum which includes datastore records."""
107 if step == 1:
108 datastore_records = self.butler.datastore.export_records(self.all_input_refs)
109 predictedInputs = {self.datasetTypeInput: self.input_refs}
110 outputs = {self.datasetTypeOutput: self.output_refs}
111 initInputs = {self.datasetTypeInit: self.init_inputs_refs[0]}
112 elif step == 2:
113 # The result should be empty, this is just to test that it works.
114 datastore_records = self.butler.datastore.export_records(self.output_refs)
115 predictedInputs = {self.datasetTypeInput: self.output_refs}
116 outputs = {self.datasetTypeOutput2: self.output_refs2}
117 initInputs = {}
118 else:
119 raise ValueError(f"unexpected {step} value")
121 return Quantum(
122 taskName="some.task.name",
123 inputs=predictedInputs,
124 outputs=outputs,
125 initInputs=initInputs,
126 datastore_records=datastore_records,
127 )
129 def test_initialize(self) -> None:
130 """Test for initialize factory method"""
132 quantum = self.make_quantum()
133 qbb = QuantumBackedButler.initialize(
134 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
135 )
136 self._test_factory(qbb)
138 def test_initialize_repo_index(self) -> None:
139 """Test for initialize using config file and repo index."""
141 # Store config to a file.
142 self.config.dumpToUri(self.root)
144 butler_index = Config()
145 butler_index["label"] = self.root
146 with ResourcePath.temporary_uri(suffix=".yaml") as index_path:
147 butler_index.dumpToUri(index_path)
149 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(index_path)}):
150 quantum = self.make_quantum()
151 qbb = QuantumBackedButler.initialize(
152 config="label",
153 quantum=quantum,
154 dimensions=self.universe,
155 dataset_types=self.dataset_types,
156 )
157 self._test_factory(qbb)
159 def test_from_predicted(self) -> None:
160 """Test for from_predicted factory method"""
162 datastore_records = self.butler.datastore.export_records(self.all_input_refs)
163 qbb = QuantumBackedButler.from_predicted(
164 config=self.config,
165 predicted_inputs=[ref.id for ref in self.all_input_refs],
166 predicted_outputs=[ref.id for ref in self.output_refs],
167 dimensions=self.universe,
168 datastore_records=datastore_records,
169 dataset_types=self.dataset_types,
170 )
171 self._test_factory(qbb)
173 def _test_factory(self, qbb: QuantumBackedButler) -> None:
174 """Test state immediately after construction."""
176 self.assertTrue(qbb.isWriteable())
177 self.assertEqual(qbb._predicted_inputs, {ref.id for ref in self.all_input_refs})
178 self.assertEqual(qbb._predicted_outputs, {ref.id for ref in self.output_refs})
179 self.assertEqual(qbb._available_inputs, set())
180 self.assertEqual(qbb._unavailable_inputs, set())
181 self.assertEqual(qbb._actual_inputs, set())
182 self.assertEqual(qbb._actual_output_refs, set())
184 def test_getput(self) -> None:
185 """Test for getDirect/putDirect methods"""
187 quantum = self.make_quantum()
188 qbb = QuantumBackedButler.initialize(
189 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
190 )
192 # Verify all input data are readable.
193 for ref in self.input_refs:
194 data = qbb.get(ref)
195 self.assertEqual(data, {"data": ref.dataId["detector"]})
196 for ref in self.init_inputs_refs:
197 data = qbb.get(ref)
198 self.assertEqual(data, {"data": -1})
199 for ref in self.missing_refs:
200 with self.assertRaises(FileNotFoundError):
201 data = qbb.get(ref)
203 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
204 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
205 self.assertEqual(qbb._unavailable_inputs, {ref.id for ref in self.missing_refs})
207 # Write all expected outputs.
208 for ref in self.output_refs:
209 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
211 # Must be able to read them back
212 for ref in self.output_refs:
213 data = qbb.get(ref)
214 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
216 self.assertEqual(qbb._actual_output_refs, set(self.output_refs))
218 def test_getDeferred(self) -> None:
219 """Test for getDirectDeferred method"""
221 quantum = self.make_quantum()
222 qbb = QuantumBackedButler.initialize(
223 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
224 )
226 # get some input data
227 input_refs = self.input_refs[:2]
228 for ref in input_refs:
229 data = qbb.getDeferred(ref)
230 self.assertEqual(data.get(), {"data": ref.dataId["detector"]})
231 for ref in self.init_inputs_refs:
232 data = qbb.getDeferred(ref)
233 self.assertEqual(data.get(), {"data": -1})
234 for ref in self.missing_refs:
235 data = qbb.getDeferred(ref)
236 with self.assertRaises(FileNotFoundError):
237 data.get()
239 # _avalable_inputs is not
240 self.assertEqual(qbb._available_inputs, {ref.id for ref in input_refs + self.init_inputs_refs})
241 self.assertEqual(qbb._actual_inputs, {ref.id for ref in input_refs + self.init_inputs_refs})
242 self.assertEqual(qbb._unavailable_inputs, {ref.id for ref in self.missing_refs})
244 def test_datasetExistsDirect(self) -> None:
245 """Test for dataset existence method"""
247 quantum = self.make_quantum()
248 qbb = QuantumBackedButler.initialize(
249 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
250 )
252 # get some input data
253 input_refs = self.input_refs[:2]
254 for ref in input_refs:
255 exists = qbb.stored(ref)
256 self.assertTrue(exists)
257 for ref in self.init_inputs_refs:
258 exists = qbb.stored(ref)
259 self.assertTrue(exists)
260 for ref in self.missing_refs:
261 exists = qbb.stored(ref)
262 self.assertFalse(exists)
264 # Now do the same checks in bulk.
265 missing_set = set(self.missing_refs)
266 refs = input_refs + self.init_inputs_refs + self.missing_refs
267 stored_many = qbb.stored_many(refs)
268 for ref, stored in stored_many.items():
269 if ref in missing_set:
270 self.assertFalse(stored)
271 else:
272 self.assertTrue(stored)
274 # _available_inputs is not
275 self.assertEqual(qbb._available_inputs, {ref.id for ref in input_refs + self.init_inputs_refs})
276 self.assertEqual(qbb._actual_inputs, set())
277 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with getDirect?
279 def test_markInputUnused(self) -> None:
280 """Test for markInputUnused method"""
282 quantum = self.make_quantum()
283 qbb = QuantumBackedButler.initialize(
284 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
285 )
287 # get some input data
288 for ref in self.input_refs:
289 data = qbb.get(ref)
290 self.assertEqual(data, {"data": ref.dataId["detector"]})
291 for ref in self.init_inputs_refs:
292 data = qbb.get(ref)
293 self.assertEqual(data, {"data": -1})
295 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
296 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
298 qbb.markInputUnused(self.input_refs[0])
299 self.assertEqual(qbb._actual_inputs, {ref.id for ref in self.input_refs[1:] + self.init_inputs_refs})
301 def test_pruneDatasets(self) -> None:
302 """Test for pruneDatasets methods"""
304 quantum = self.make_quantum()
305 qbb = QuantumBackedButler.initialize(
306 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
307 )
309 # Write all expected outputs.
310 for ref in self.output_refs:
311 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
313 # Must be able to read them back
314 for ref in self.output_refs:
315 data = qbb.get(ref)
316 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
318 # Check for invalid arguments.
319 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"):
320 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True)
321 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"):
322 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True)
323 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"):
324 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False)
326 # Disassociate only.
327 ref = self.output_refs[0]
328 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False)
329 self.assertFalse(qbb.stored(ref))
330 with self.assertRaises(FileNotFoundError):
331 data = qbb.get(ref)
333 # can store it again
334 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
335 self.assertTrue(qbb.stored(ref))
337 # Purge completely.
338 ref = self.output_refs[1]
339 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True)
340 self.assertFalse(qbb.stored(ref))
341 with self.assertRaises(FileNotFoundError):
342 data = qbb.get(ref)
343 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
344 self.assertTrue(qbb.stored(ref))
346 def test_extract_provenance_data(self) -> None:
347 """Test for extract_provenance_data method"""
349 quantum = self.make_quantum()
350 qbb = QuantumBackedButler.initialize(
351 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
352 )
354 # read/store everything
355 for ref in self.input_refs:
356 qbb.get(ref)
357 for ref in self.init_inputs_refs:
358 qbb.get(ref)
359 for ref in self.output_refs:
360 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
362 provenance1 = qbb.extract_provenance_data()
363 prov_json = provenance1.json()
364 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json))
365 for provenance in (provenance1, provenance2):
366 input_ids = {ref.id for ref in self.input_refs + self.init_inputs_refs}
367 self.assertEqual(provenance.predicted_inputs, input_ids)
368 self.assertEqual(provenance.available_inputs, input_ids)
369 self.assertEqual(provenance.actual_inputs, input_ids)
370 output_ids = {ref.id for ref in self.output_refs}
371 self.assertEqual(provenance.predicted_outputs, output_ids)
372 self.assertEqual(provenance.actual_outputs, output_ids)
373 datastore_name = "FileDatastore@<butlerRoot>/datastore"
374 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name})
375 datastore_records = provenance.datastore_records[datastore_name]
376 self.assertEqual(set(datastore_records.dataset_ids), output_ids)
377 class_name = "lsst.daf.butler.core.storedFileInfo.StoredFileInfo"
378 table_name = "file_datastore_records"
379 self.assertEqual(set(datastore_records.records.keys()), {class_name})
380 self.assertEqual(set(datastore_records.records[class_name].keys()), {table_name})
381 self.assertEqual(
382 {record["dataset_id"] for record in datastore_records.records[class_name][table_name]},
383 output_ids,
384 )
386 def test_collect_and_transfer(self) -> None:
387 """Test for collect_and_transfer method"""
389 quantum1 = self.make_quantum(1)
390 qbb1 = QuantumBackedButler.initialize(
391 config=self.config, quantum=quantum1, dimensions=self.universe, dataset_types=self.dataset_types
392 )
394 quantum2 = self.make_quantum(2)
395 qbb2 = QuantumBackedButler.initialize(
396 config=self.config, quantum=quantum2, dimensions=self.universe, dataset_types=self.dataset_types
397 )
399 # read/store everything
400 for ref in self.input_refs:
401 qbb1.get(ref)
402 for ref in self.init_inputs_refs:
403 qbb1.get(ref)
404 for ref in self.output_refs:
405 qbb1.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
407 for ref in self.output_refs:
408 qbb2.get(ref)
409 for ref in self.output_refs2:
410 qbb2.put({"data": cast(int, ref.dataId["detector"]) ** 3}, ref)
412 QuantumProvenanceData.collect_and_transfer(
413 self.butler,
414 [quantum1, quantum2],
415 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()],
416 )
418 for ref in self.output_refs:
419 data = self.butler.get(ref)
420 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
422 for ref in self.output_refs2:
423 data = self.butler.get(ref)
424 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 3})
427if __name__ == "__main__":
428 unittest.main()