Coverage for tests/test_quantumBackedButler.py: 7%
233 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import json
23import os
24import unittest
25import unittest.mock
26from typing import cast
28from lsst.daf.butler import (
29 Butler,
30 Config,
31 DatasetRef,
32 DatasetType,
33 DimensionUniverse,
34 Quantum,
35 QuantumBackedButler,
36 QuantumProvenanceData,
37 RegistryConfig,
38 StorageClass,
39)
40from lsst.daf.butler.registry import _RegistryFactory
41from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
42from lsst.resources import ResourcePath
44TESTDIR = os.path.abspath(os.path.dirname(__file__))
47class QuantumBackedButlerTestCase(unittest.TestCase):
48 """Test case for QuantumBackedButler."""
50 def setUp(self) -> None:
51 self.root = makeTestTempDir(TESTDIR)
52 self.config = Config()
53 self.config["root"] = self.root
54 self.universe = DimensionUniverse()
56 # Make a butler and import dimension definitions.
57 registryConfig = RegistryConfig(self.config.get("registry"))
58 _RegistryFactory(registryConfig).create_from_config(butlerRoot=self.root)
59 self.butler = Butler(self.config, writeable=True, run="RUN")
60 self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
62 # make all dataset types
63 graph = self.universe.extract(("instrument", "detector"))
64 storageClass = StorageClass("StructuredDataDict")
65 self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass)
66 self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass)
67 self.datasetTypeOutput = DatasetType("test_ds_output", graph, storageClass)
68 self.datasetTypeOutput2 = DatasetType("test_ds_output2", graph, storageClass)
69 self.datasetTypeExtra = DatasetType("test_ds_extra", graph, storageClass)
71 self.dataset_types: dict[str, DatasetType] = {}
72 dataset_types = (
73 self.datasetTypeInit,
74 self.datasetTypeInput,
75 self.datasetTypeOutput,
76 self.datasetTypeOutput2,
77 self.datasetTypeExtra,
78 )
79 for dataset_type in dataset_types:
80 self.butler.registry.registerDatasetType(dataset_type)
81 self.dataset_types[dataset_type.name] = dataset_type
83 dataIds = [
84 self.butler.registry.expandDataId(dict(instrument="Cam1", detector=detector_id))
85 for detector_id in (1, 2, 3, 4)
86 ]
88 # make actual input datasets
89 self.input_refs = [
90 self.butler.put({"data": dataId["detector"]}, self.datasetTypeInput, dataId) for dataId in dataIds
91 ]
92 self.init_inputs_refs = [self.butler.put({"data": -1}, self.datasetTypeInit, dataIds[0])]
93 self.all_input_refs = self.input_refs + self.init_inputs_refs
95 # generate dataset refs for outputs
96 self.output_refs = [DatasetRef(self.datasetTypeOutput, dataId, run="RUN") for dataId in dataIds]
97 self.output_refs2 = [DatasetRef(self.datasetTypeOutput2, dataId, run="RUN") for dataId in dataIds]
99 self.missing_refs = [DatasetRef(self.datasetTypeExtra, dataId, run="RUN") for dataId in dataIds]
101 def tearDown(self) -> None:
102 removeTestTempDir(self.root)
104 def make_quantum(self, step: int = 1) -> Quantum:
105 """Make a Quantum which includes datastore records."""
106 if step == 1:
107 datastore_records = self.butler._datastore.export_records(self.all_input_refs)
108 predictedInputs = {self.datasetTypeInput: self.input_refs}
109 outputs = {self.datasetTypeOutput: self.output_refs}
110 initInputs = {self.datasetTypeInit: self.init_inputs_refs[0]}
111 elif step == 2:
112 # The result should be empty, this is just to test that it works.
113 datastore_records = self.butler._datastore.export_records(self.output_refs)
114 predictedInputs = {self.datasetTypeInput: self.output_refs}
115 outputs = {self.datasetTypeOutput2: self.output_refs2}
116 initInputs = {}
117 else:
118 raise ValueError(f"unexpected {step} value")
120 return Quantum(
121 taskName="some.task.name",
122 inputs=predictedInputs,
123 outputs=outputs,
124 initInputs=initInputs,
125 datastore_records=datastore_records,
126 )
128 def test_initialize(self) -> None:
129 """Test for initialize factory method"""
130 quantum = self.make_quantum()
131 qbb = QuantumBackedButler.initialize(
132 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
133 )
134 self._test_factory(qbb)
136 def test_initialize_repo_index(self) -> None:
137 """Test for initialize using config file and repo index."""
138 # Store config to a file.
139 self.config.dumpToUri(self.root)
141 butler_index = Config()
142 butler_index["label"] = self.root
143 with ResourcePath.temporary_uri(suffix=".yaml") as index_path:
144 butler_index.dumpToUri(index_path)
146 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(index_path)}):
147 quantum = self.make_quantum()
148 qbb = QuantumBackedButler.initialize(
149 config="label",
150 quantum=quantum,
151 dimensions=self.universe,
152 dataset_types=self.dataset_types,
153 )
154 self._test_factory(qbb)
156 def test_from_predicted(self) -> None:
157 """Test for from_predicted factory method"""
158 datastore_records = self.butler._datastore.export_records(self.all_input_refs)
159 qbb = QuantumBackedButler.from_predicted(
160 config=self.config,
161 predicted_inputs=[ref.id for ref in self.all_input_refs],
162 predicted_outputs=[ref.id for ref in self.output_refs],
163 dimensions=self.universe,
164 datastore_records=datastore_records,
165 dataset_types=self.dataset_types,
166 )
167 self._test_factory(qbb)
169 def _test_factory(self, qbb: QuantumBackedButler) -> None:
170 """Test state immediately after construction."""
171 self.assertTrue(qbb.isWriteable())
172 self.assertEqual(qbb._predicted_inputs, {ref.id for ref in self.all_input_refs})
173 self.assertEqual(qbb._predicted_outputs, {ref.id for ref in self.output_refs})
174 self.assertEqual(qbb._available_inputs, set())
175 self.assertEqual(qbb._unavailable_inputs, set())
176 self.assertEqual(qbb._actual_inputs, set())
177 self.assertEqual(qbb._actual_output_refs, set())
179 def test_getput(self) -> None:
180 """Test for getDirect/putDirect methods"""
181 quantum = self.make_quantum()
182 qbb = QuantumBackedButler.initialize(
183 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
184 )
186 # Verify all input data are readable.
187 for ref in self.input_refs:
188 data = qbb.get(ref)
189 self.assertEqual(data, {"data": ref.dataId["detector"]})
190 for ref in self.init_inputs_refs:
191 data = qbb.get(ref)
192 self.assertEqual(data, {"data": -1})
193 for ref in self.missing_refs:
194 with self.assertRaises(FileNotFoundError):
195 data = qbb.get(ref)
197 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
198 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
199 self.assertEqual(qbb._unavailable_inputs, {ref.id for ref in self.missing_refs})
201 # Write all expected outputs.
202 for ref in self.output_refs:
203 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
205 # Must be able to read them back
206 for ref in self.output_refs:
207 data = qbb.get(ref)
208 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
210 self.assertEqual(qbb._actual_output_refs, set(self.output_refs))
212 def test_getDeferred(self) -> None:
213 """Test for getDirectDeferred method"""
214 quantum = self.make_quantum()
215 qbb = QuantumBackedButler.initialize(
216 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
217 )
219 # get some input data
220 input_refs = self.input_refs[:2]
221 for ref in input_refs:
222 data = qbb.getDeferred(ref)
223 self.assertEqual(data.get(), {"data": ref.dataId["detector"]})
224 for ref in self.init_inputs_refs:
225 data = qbb.getDeferred(ref)
226 self.assertEqual(data.get(), {"data": -1})
227 for ref in self.missing_refs:
228 data = qbb.getDeferred(ref)
229 with self.assertRaises(FileNotFoundError):
230 data.get()
232 # _avalable_inputs is not
233 self.assertEqual(qbb._available_inputs, {ref.id for ref in input_refs + self.init_inputs_refs})
234 self.assertEqual(qbb._actual_inputs, {ref.id for ref in input_refs + self.init_inputs_refs})
235 self.assertEqual(qbb._unavailable_inputs, {ref.id for ref in self.missing_refs})
237 def test_datasetExistsDirect(self) -> None:
238 """Test for dataset existence method"""
239 quantum = self.make_quantum()
240 qbb = QuantumBackedButler.initialize(
241 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
242 )
244 # get some input data
245 input_refs = self.input_refs[:2]
246 for ref in input_refs:
247 exists = qbb.stored(ref)
248 self.assertTrue(exists)
249 for ref in self.init_inputs_refs:
250 exists = qbb.stored(ref)
251 self.assertTrue(exists)
252 for ref in self.missing_refs:
253 exists = qbb.stored(ref)
254 self.assertFalse(exists)
256 # Now do the same checks in bulk.
257 missing_set = set(self.missing_refs)
258 refs = input_refs + self.init_inputs_refs + self.missing_refs
259 stored_many = qbb.stored_many(refs)
260 for ref, stored in stored_many.items():
261 if ref in missing_set:
262 self.assertFalse(stored)
263 else:
264 self.assertTrue(stored)
266 # _available_inputs is not
267 self.assertEqual(qbb._available_inputs, {ref.id for ref in input_refs + self.init_inputs_refs})
268 self.assertEqual(qbb._actual_inputs, set())
269 self.assertEqual(qbb._unavailable_inputs, set()) # this is not consistent with getDirect?
271 def test_markInputUnused(self) -> None:
272 """Test for markInputUnused method"""
273 quantum = self.make_quantum()
274 qbb = QuantumBackedButler.initialize(
275 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
276 )
278 # get some input data
279 for ref in self.input_refs:
280 data = qbb.get(ref)
281 self.assertEqual(data, {"data": ref.dataId["detector"]})
282 for ref in self.init_inputs_refs:
283 data = qbb.get(ref)
284 self.assertEqual(data, {"data": -1})
286 self.assertEqual(qbb._available_inputs, qbb._predicted_inputs)
287 self.assertEqual(qbb._actual_inputs, qbb._predicted_inputs)
289 qbb.markInputUnused(self.input_refs[0])
290 self.assertEqual(qbb._actual_inputs, {ref.id for ref in self.input_refs[1:] + self.init_inputs_refs})
292 def test_pruneDatasets(self) -> None:
293 """Test for pruneDatasets methods"""
294 quantum = self.make_quantum()
295 qbb = QuantumBackedButler.initialize(
296 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
297 )
299 # Write all expected outputs.
300 for ref in self.output_refs:
301 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
303 # Must be able to read them back
304 for ref in self.output_refs:
305 data = qbb.get(ref)
306 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
308 # Check for invalid arguments.
309 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without disassociate=True"):
310 qbb.pruneDatasets(self.output_refs, disassociate=False, unstore=True, purge=True)
311 with self.assertRaisesRegex(TypeError, "Cannot pass purge=True without unstore=True"):
312 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=False, purge=True)
313 with self.assertRaisesRegex(TypeError, "Cannot pass disassociate=True without purge=True"):
314 qbb.pruneDatasets(self.output_refs, disassociate=True, unstore=True, purge=False)
316 # Disassociate only.
317 ref = self.output_refs[0]
318 qbb.pruneDatasets([ref], disassociate=False, unstore=True, purge=False)
319 self.assertFalse(qbb.stored(ref))
320 with self.assertRaises(FileNotFoundError):
321 data = qbb.get(ref)
323 # can store it again
324 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
325 self.assertTrue(qbb.stored(ref))
327 # Purge completely.
328 ref = self.output_refs[1]
329 qbb.pruneDatasets([ref], disassociate=True, unstore=True, purge=True)
330 self.assertFalse(qbb.stored(ref))
331 with self.assertRaises(FileNotFoundError):
332 data = qbb.get(ref)
333 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
334 self.assertTrue(qbb.stored(ref))
336 def test_extract_provenance_data(self) -> None:
337 """Test for extract_provenance_data method"""
338 quantum = self.make_quantum()
339 qbb = QuantumBackedButler.initialize(
340 config=self.config, quantum=quantum, dimensions=self.universe, dataset_types=self.dataset_types
341 )
343 # read/store everything
344 for ref in self.input_refs:
345 qbb.get(ref)
346 for ref in self.init_inputs_refs:
347 qbb.get(ref)
348 for ref in self.output_refs:
349 qbb.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
351 provenance1 = qbb.extract_provenance_data()
352 prov_json = provenance1.json()
353 provenance2 = QuantumProvenanceData.direct(**json.loads(prov_json))
354 for provenance in (provenance1, provenance2):
355 input_ids = {ref.id for ref in self.input_refs + self.init_inputs_refs}
356 self.assertEqual(provenance.predicted_inputs, input_ids)
357 self.assertEqual(provenance.available_inputs, input_ids)
358 self.assertEqual(provenance.actual_inputs, input_ids)
359 output_ids = {ref.id for ref in self.output_refs}
360 self.assertEqual(provenance.predicted_outputs, output_ids)
361 self.assertEqual(provenance.actual_outputs, output_ids)
362 datastore_name = "FileDatastore@<butlerRoot>/datastore"
363 self.assertEqual(set(provenance.datastore_records.keys()), {datastore_name})
364 datastore_records = provenance.datastore_records[datastore_name]
365 self.assertEqual(set(datastore_records.dataset_ids), output_ids)
366 class_name = "lsst.daf.butler.core.storedFileInfo.StoredFileInfo"
367 table_name = "file_datastore_records"
368 self.assertEqual(set(datastore_records.records.keys()), {class_name})
369 self.assertEqual(set(datastore_records.records[class_name].keys()), {table_name})
370 self.assertEqual(
371 {record["dataset_id"] for record in datastore_records.records[class_name][table_name]},
372 output_ids,
373 )
375 def test_collect_and_transfer(self) -> None:
376 """Test for collect_and_transfer method"""
377 quantum1 = self.make_quantum(1)
378 qbb1 = QuantumBackedButler.initialize(
379 config=self.config, quantum=quantum1, dimensions=self.universe, dataset_types=self.dataset_types
380 )
382 quantum2 = self.make_quantum(2)
383 qbb2 = QuantumBackedButler.initialize(
384 config=self.config, quantum=quantum2, dimensions=self.universe, dataset_types=self.dataset_types
385 )
387 # read/store everything
388 for ref in self.input_refs:
389 qbb1.get(ref)
390 for ref in self.init_inputs_refs:
391 qbb1.get(ref)
392 for ref in self.output_refs:
393 qbb1.put({"data": cast(int, ref.dataId["detector"]) ** 2}, ref)
395 for ref in self.output_refs:
396 qbb2.get(ref)
397 for ref in self.output_refs2:
398 qbb2.put({"data": cast(int, ref.dataId["detector"]) ** 3}, ref)
400 QuantumProvenanceData.collect_and_transfer(
401 self.butler,
402 [quantum1, quantum2],
403 [qbb1.extract_provenance_data(), qbb2.extract_provenance_data()],
404 )
406 for ref in self.output_refs:
407 data = self.butler.get(ref)
408 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 2})
410 for ref in self.output_refs2:
411 data = self.butler.get(ref)
412 self.assertEqual(data, {"data": cast(int, ref.dataId["detector"]) ** 3})
415if __name__ == "__main__":
416 unittest.main()