Coverage for tests/test_datastore.py: 11%
1075 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 10:53 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 10:53 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30import os
31import pickle
32import shutil
33import tempfile
34import time
35import unittest
36import unittest.mock
37import uuid
38from collections.abc import Callable
39from typing import Any, cast
41import lsst.utils.tests
42import yaml
43from lsst.daf.butler import (
44 Config,
45 DataCoordinate,
46 DatasetIdGenEnum,
47 DatasetRef,
48 DatasetType,
49 DatasetTypeNotSupportedError,
50 Datastore,
51 DimensionUniverse,
52 FileDataset,
53 StorageClass,
54 StorageClassFactory,
55)
56from lsst.daf.butler.datastore import DatasetRefURIs, DatastoreConfig, DatastoreValidationError, NullDatastore
57from lsst.daf.butler.datastore.cache_manager import (
58 DatastoreCacheManager,
59 DatastoreCacheManagerConfig,
60 DatastoreDisabledCacheManager,
61)
62from lsst.daf.butler.datastore.stored_file_info import StoredFileInfo
63from lsst.daf.butler.formatters.yaml import YamlFormatter
64from lsst.daf.butler.tests import (
65 BadNoWriteFormatter,
66 BadWriteFormatter,
67 DatasetTestHelper,
68 DatastoreTestHelper,
69 DummyRegistry,
70 MetricsExample,
71 MetricsExampleDataclass,
72 MetricsExampleModel,
73)
74from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
75from lsst.daf.butler.tests.utils import TestCaseMixin
76from lsst.resources import ResourcePath
77from lsst.utils import doImport
79TESTDIR = os.path.dirname(__file__)
82def makeExampleMetrics(use_none: bool = False) -> MetricsExample:
83 """Make example dataset that can be stored in butler."""
84 if use_none:
85 array = None
86 else:
87 array = [563, 234, 456.7, 105, 2054, -1045]
88 return MetricsExample(
89 {"AM1": 5.2, "AM2": 30.6},
90 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
91 array,
92 )
95class TransactionTestError(Exception):
96 """Specific error for transactions, to prevent misdiagnosing
97 that might otherwise occur when a standard exception is used.
98 """
100 pass
103class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin):
104 """Support routines for datastore testing"""
106 root: str | None = None
107 universe: DimensionUniverse
108 storageClassFactory: StorageClassFactory
110 @classmethod
111 def setUpClass(cls) -> None:
112 # Storage Classes are fixed for all datastores in these tests
113 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
114 cls.storageClassFactory = StorageClassFactory()
115 cls.storageClassFactory.addFromConfig(scConfigFile)
117 # Read the Datastore config so we can get the class
118 # information (since we should not assume the constructor
119 # name here, but rely on the configuration file itself)
120 datastoreConfig = DatastoreConfig(cls.configFile)
121 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"]))
122 cls.universe = DimensionUniverse()
124 def setUp(self) -> None:
125 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
127 def tearDown(self) -> None:
128 if self.root is not None and os.path.exists(self.root):
129 shutil.rmtree(self.root, ignore_errors=True)
132class DatastoreTests(DatastoreTestsBase):
133 """Some basic tests of a simple datastore."""
135 hasUnsupportedPut = True
136 rootKeys: tuple[str, ...] | None = None
137 isEphemeral: bool = False
138 validationCanFail: bool = False
140 def testConfigRoot(self) -> None:
141 full = DatastoreConfig(self.configFile)
142 config = DatastoreConfig(self.configFile, mergeDefaults=False)
143 newroot = "/random/location"
144 self.datastoreType.setConfigRoot(newroot, config, full)
145 if self.rootKeys:
146 for k in self.rootKeys:
147 self.assertIn(newroot, config[k])
149 def testConstructor(self) -> None:
150 datastore = self.makeDatastore()
151 self.assertIsNotNone(datastore)
152 self.assertIs(datastore.isEphemeral, self.isEphemeral)
154 def testConfigurationValidation(self) -> None:
155 datastore = self.makeDatastore()
156 sc = self.storageClassFactory.getStorageClass("ThingOne")
157 datastore.validateConfiguration([sc])
159 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
160 if self.validationCanFail:
161 with self.assertRaises(DatastoreValidationError):
162 datastore.validateConfiguration([sc2], logFailures=True)
164 dimensions = self.universe.conform(("visit", "physical_filter"))
165 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
166 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
167 datastore.validateConfiguration([ref])
169 def testParameterValidation(self) -> None:
170 """Check that parameters are validated"""
171 sc = self.storageClassFactory.getStorageClass("ThingOne")
172 dimensions = self.universe.conform(("visit", "physical_filter"))
173 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
174 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
175 datastore = self.makeDatastore()
176 data = {1: 2, 3: 4}
177 datastore.put(data, ref)
178 newdata = datastore.get(ref)
179 self.assertEqual(data, newdata)
180 with self.assertRaises(KeyError):
181 newdata = datastore.get(ref, parameters={"missing": 5})
183 def testBasicPutGet(self) -> None:
184 metrics = makeExampleMetrics()
185 datastore = self.makeDatastore()
187 # Create multiple storage classes for testing different formulations
188 storageClasses = [
189 self.storageClassFactory.getStorageClass(sc)
190 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
191 ]
193 dimensions = self.universe.conform(("visit", "physical_filter"))
194 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
195 dataId2 = {"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"}
197 for sc in storageClasses:
198 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
199 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2)
201 # Make sure that using getManyURIs without predicting before the
202 # dataset has been put raises.
203 with self.assertRaises(FileNotFoundError):
204 datastore.getManyURIs([ref], predict=False)
206 # Make sure that using getManyURIs with predicting before the
207 # dataset has been put predicts the URI.
208 uris = datastore.getManyURIs([ref, ref2], predict=True)
209 self.assertIn("52", uris[ref].primaryURI.geturl())
210 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
211 self.assertIn("53", uris[ref2].primaryURI.geturl())
212 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
214 datastore.put(metrics, ref)
216 # Does it exist?
217 self.assertTrue(datastore.exists(ref))
218 self.assertTrue(datastore.knows(ref))
219 multi = datastore.knows_these([ref])
220 self.assertTrue(multi[ref])
221 multi = datastore.mexists([ref, ref2])
222 self.assertTrue(multi[ref])
223 self.assertFalse(multi[ref2])
225 # Get
226 metricsOut = datastore.get(ref, parameters=None)
227 self.assertEqual(metrics, metricsOut)
229 uri = datastore.getURI(ref)
230 self.assertEqual(uri.scheme, self.uriScheme)
232 uris = datastore.getManyURIs([ref])
233 self.assertEqual(len(uris), 1)
234 ref, uri = uris.popitem()
235 self.assertTrue(uri.primaryURI.exists())
236 self.assertFalse(uri.componentURIs)
238 # Get a component -- we need to construct new refs for them
239 # with derived storage classes but with parent ID
240 for comp in ("data", "output"):
241 compRef = ref.makeComponentRef(comp)
242 output = datastore.get(compRef)
243 self.assertEqual(output, getattr(metricsOut, comp))
245 uri = datastore.getURI(compRef)
246 self.assertEqual(uri.scheme, self.uriScheme)
248 uris = datastore.getManyURIs([compRef])
249 self.assertEqual(len(uris), 1)
251 storageClass = sc
253 # Check that we can put a metric with None in a component and
254 # get it back as None
255 metricsNone = makeExampleMetrics(use_none=True)
256 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"}
257 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone)
258 datastore.put(metricsNone, refNone)
260 comp = "data"
261 for comp in ("data", "output"):
262 compRef = refNone.makeComponentRef(comp)
263 output = datastore.get(compRef)
264 self.assertEqual(output, getattr(metricsNone, comp))
266 # Check that a put fails if the dataset type is not supported
267 if self.hasUnsupportedPut:
268 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
269 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
270 with self.assertRaises(DatasetTypeNotSupportedError):
271 datastore.put(metrics, ref)
273 # These should raise
274 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId)
275 with self.assertRaises(FileNotFoundError):
276 # non-existing file
277 datastore.get(ref)
279 # Get a URI from it
280 uri = datastore.getURI(ref, predict=True)
281 self.assertEqual(uri.scheme, self.uriScheme)
283 with self.assertRaises(FileNotFoundError):
284 datastore.getURI(ref)
286 def testTrustGetRequest(self) -> None:
287 """Check that we can get datasets that registry knows nothing about."""
288 datastore = self.makeDatastore()
290 # Skip test if the attribute is not defined
291 if not hasattr(datastore, "trustGetRequest"):
292 return
294 metrics = makeExampleMetrics()
296 i = 0
297 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"):
298 i += 1
299 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
301 if sc_name == "StructuredComposite":
302 disassembled = True
303 else:
304 disassembled = False
306 # Start datastore in default configuration of using registry
307 datastore.trustGetRequest = False
309 # Create multiple storage classes for testing with or without
310 # disassembly
311 sc = self.storageClassFactory.getStorageClass(sc_name)
312 dimensions = self.universe.conform(("visit", "physical_filter"))
314 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"}
316 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
317 datastore.put(metrics, ref)
319 # Does it exist?
320 self.assertTrue(datastore.exists(ref))
321 self.assertTrue(datastore.knows(ref))
322 multi = datastore.knows_these([ref])
323 self.assertTrue(multi[ref])
324 multi = datastore.mexists([ref])
325 self.assertTrue(multi[ref])
327 # Get
328 metricsOut = datastore.get(ref)
329 self.assertEqual(metrics, metricsOut)
331 # Get the URI(s)
332 primaryURI, componentURIs = datastore.getURIs(ref)
333 if disassembled:
334 self.assertIsNone(primaryURI)
335 self.assertEqual(len(componentURIs), 3)
336 else:
337 self.assertIn(datasetTypeName, primaryURI.path)
338 self.assertFalse(componentURIs)
340 # Delete registry entry so now we are trusting
341 datastore.removeStoredItemInfo(ref)
343 # Now stop trusting and check that things break
344 datastore.trustGetRequest = False
346 # Does it exist?
347 self.assertFalse(datastore.exists(ref))
348 self.assertFalse(datastore.knows(ref))
349 multi = datastore.knows_these([ref])
350 self.assertFalse(multi[ref])
351 multi = datastore.mexists([ref])
352 self.assertFalse(multi[ref])
354 with self.assertRaises(FileNotFoundError):
355 datastore.get(ref)
357 if sc_name != "StructuredDataNoComponents":
358 with self.assertRaises(FileNotFoundError):
359 datastore.get(ref.makeComponentRef("data"))
361 # URI should fail unless we ask for prediction
362 with self.assertRaises(FileNotFoundError):
363 datastore.getURIs(ref)
365 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
366 if disassembled:
367 self.assertIsNone(predicted_primary)
368 self.assertEqual(len(predicted_disassembled), 3)
369 for uri in predicted_disassembled.values():
370 self.assertEqual(uri.fragment, "predicted")
371 self.assertIn(datasetTypeName, uri.path)
372 else:
373 self.assertIn(datasetTypeName, predicted_primary.path)
374 self.assertFalse(predicted_disassembled)
375 self.assertEqual(predicted_primary.fragment, "predicted")
377 # Now enable registry-free trusting mode
378 datastore.trustGetRequest = True
380 # Try again to get it
381 metricsOut = datastore.get(ref)
382 self.assertEqual(metricsOut, metrics)
384 # Does it exist?
385 self.assertTrue(datastore.exists(ref))
387 # Get a component
388 if sc_name != "StructuredDataNoComponents":
389 comp = "data"
390 compRef = ref.makeComponentRef(comp)
391 output = datastore.get(compRef)
392 self.assertEqual(output, getattr(metrics, comp))
394 # Get the URI -- if we trust this should work even without
395 # enabling prediction.
396 primaryURI2, componentURIs2 = datastore.getURIs(ref)
397 self.assertEqual(primaryURI2, primaryURI)
398 self.assertEqual(componentURIs2, componentURIs)
400 # Check for compatible storage class.
401 if sc_name in ("StructuredDataNoComponents", "StructuredData"):
402 # Make new dataset ref with compatible storage class.
403 ref_comp = ref.overrideStorageClass("StructuredDataDictJson")
405 # Without `set_retrieve_dataset_type_method` it will fail to
406 # find correct file.
407 self.assertFalse(datastore.exists(ref_comp))
408 with self.assertRaises(FileNotFoundError):
409 datastore.get(ref_comp)
410 with self.assertRaises(FileNotFoundError):
411 datastore.get(ref, storageClass="StructuredDataDictJson")
413 # Need a special method to generate stored dataset type.
414 def _stored_dataset_type(name: str, ref: DatasetRef = ref) -> DatasetType:
415 if name == ref.datasetType.name:
416 return ref.datasetType
417 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}")
419 datastore.set_retrieve_dataset_type_method(_stored_dataset_type)
421 # Storage class override with original dataset ref.
422 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson")
423 self.assertIsInstance(metrics_as_dict, dict)
425 # get() should return a dict now.
426 metrics_as_dict = datastore.get(ref_comp)
427 self.assertIsInstance(metrics_as_dict, dict)
429 # exists() should work as well.
430 self.assertTrue(datastore.exists(ref_comp))
432 datastore.set_retrieve_dataset_type_method(None)
434 def testDisassembly(self) -> None:
435 """Test disassembly within datastore."""
436 metrics = makeExampleMetrics()
437 if self.isEphemeral:
438 # in-memory datastore does not disassemble
439 return
441 # Create multiple storage classes for testing different formulations
442 # of composites. One of these will not disassemble to provide
443 # a reference.
444 storageClasses = [
445 self.storageClassFactory.getStorageClass(sc)
446 for sc in (
447 "StructuredComposite",
448 "StructuredCompositeTestA",
449 "StructuredCompositeTestB",
450 "StructuredCompositeReadComp",
451 "StructuredData", # No disassembly
452 "StructuredCompositeReadCompNoDisassembly",
453 )
454 ]
456 # Create the test datastore
457 datastore = self.makeDatastore()
459 # Dummy dataId
460 dimensions = self.universe.conform(("visit", "physical_filter"))
461 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
463 for i, sc in enumerate(storageClasses):
464 with self.subTest(storageClass=sc.name):
465 # Create a different dataset type each time round
466 # so that a test failure in this subtest does not trigger
467 # a cascade of tests because of file clashes
468 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId)
470 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
472 datastore.put(metrics, ref)
474 baseURI, compURIs = datastore.getURIs(ref)
475 if disassembled:
476 self.assertIsNone(baseURI)
477 self.assertEqual(set(compURIs), {"data", "output", "summary"})
478 else:
479 self.assertIsNotNone(baseURI)
480 self.assertEqual(compURIs, {})
482 metrics_get = datastore.get(ref)
483 self.assertEqual(metrics_get, metrics)
485 # Retrieve the composite with read parameter
486 stop = 4
487 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
488 self.assertEqual(metrics_get.summary, metrics.summary)
489 self.assertEqual(metrics_get.output, metrics.output)
490 self.assertEqual(metrics_get.data, metrics.data[:stop])
492 # Retrieve a component
493 data = datastore.get(ref.makeComponentRef("data"))
494 self.assertEqual(data, metrics.data)
496 # On supported storage classes attempt to access a read
497 # only component
498 if "ReadComp" in sc.name:
499 cRef = ref.makeComponentRef("counter")
500 counter = datastore.get(cRef)
501 self.assertEqual(counter, len(metrics.data))
503 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
504 self.assertEqual(counter, stop)
506 datastore.remove(ref)
508 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]:
509 metrics = makeExampleMetrics()
510 datastore = self.makeDatastore()
511 # Put
512 dimensions = self.universe.conform(("visit", "physical_filter"))
513 sc = self.storageClassFactory.getStorageClass("StructuredData")
514 refs = []
515 for i in range(n_refs):
516 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"}
517 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
518 datastore.put(metrics, ref)
520 # Does it exist?
521 self.assertTrue(datastore.exists(ref))
523 # Get
524 metricsOut = datastore.get(ref)
525 self.assertEqual(metrics, metricsOut)
526 refs.append(ref)
528 return datastore, *refs
530 def testRemove(self) -> None:
531 datastore, ref = self.prepDeleteTest()
533 # Remove
534 datastore.remove(ref)
536 # Does it exist?
537 self.assertFalse(datastore.exists(ref))
539 # Do we now get a predicted URI?
540 uri = datastore.getURI(ref, predict=True)
541 self.assertEqual(uri.fragment, "predicted")
543 # Get should now fail
544 with self.assertRaises(FileNotFoundError):
545 datastore.get(ref)
546 # Can only delete once
547 with self.assertRaises(FileNotFoundError):
548 datastore.remove(ref)
550 def testForget(self) -> None:
551 datastore, ref = self.prepDeleteTest()
553 # Remove
554 datastore.forget([ref])
556 # Does it exist (as far as we know)?
557 self.assertFalse(datastore.exists(ref))
559 # Do we now get a predicted URI?
560 uri = datastore.getURI(ref, predict=True)
561 self.assertEqual(uri.fragment, "predicted")
563 # Get should now fail
564 with self.assertRaises(FileNotFoundError):
565 datastore.get(ref)
567 # Forgetting again is a silent no-op
568 datastore.forget([ref])
570 # Predicted URI should still point to the file.
571 self.assertTrue(uri.exists())
573 def testTransfer(self) -> None:
574 metrics = makeExampleMetrics()
576 dimensions = self.universe.conform(("visit", "physical_filter"))
577 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"}
579 sc = self.storageClassFactory.getStorageClass("StructuredData")
580 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
582 inputDatastore = self.makeDatastore("test_input_datastore")
583 outputDatastore = self.makeDatastore("test_output_datastore")
585 inputDatastore.put(metrics, ref)
586 outputDatastore.transfer(inputDatastore, ref)
588 metricsOut = outputDatastore.get(ref)
589 self.assertEqual(metrics, metricsOut)
591 def testBasicTransaction(self) -> None:
592 datastore = self.makeDatastore()
593 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
594 dimensions = self.universe.conform(("visit", "physical_filter"))
595 nDatasets = 6
596 dataIds = [
597 {"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"} for i in range(nDatasets)
598 ]
599 data = [
600 (
601 self.makeDatasetRef("metric", dimensions, storageClass, dataId),
602 makeExampleMetrics(),
603 )
604 for dataId in dataIds
605 ]
606 succeed = data[: nDatasets // 2]
607 fail = data[nDatasets // 2 :]
608 # All datasets added in this transaction should continue to exist
609 with datastore.transaction():
610 for ref, metrics in succeed:
611 datastore.put(metrics, ref)
612 # Whereas datasets added in this transaction should not
613 with self.assertRaises(TransactionTestError):
614 with datastore.transaction():
615 for ref, metrics in fail:
616 datastore.put(metrics, ref)
617 raise TransactionTestError("This should propagate out of the context manager")
618 # Check for datasets that should exist
619 for ref, metrics in succeed:
620 # Does it exist?
621 self.assertTrue(datastore.exists(ref))
622 # Get
623 metricsOut = datastore.get(ref, parameters=None)
624 self.assertEqual(metrics, metricsOut)
625 # URI
626 uri = datastore.getURI(ref)
627 self.assertEqual(uri.scheme, self.uriScheme)
628 # Check for datasets that should not exist
629 for ref, _ in fail:
630 # These should raise
631 with self.assertRaises(FileNotFoundError):
632 # non-existing file
633 datastore.get(ref)
634 with self.assertRaises(FileNotFoundError):
635 datastore.getURI(ref)
637 def testNestedTransaction(self) -> None:
638 datastore = self.makeDatastore()
639 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
640 dimensions = self.universe.conform(("visit", "physical_filter"))
641 metrics = makeExampleMetrics()
643 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}
644 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
645 datastore.put(metrics, refBefore)
646 with self.assertRaises(TransactionTestError):
647 with datastore.transaction():
648 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"}
649 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
650 datastore.put(metrics, refOuter)
651 with datastore.transaction():
652 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"}
653 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
654 datastore.put(metrics, refInner)
655 # All datasets should exist
656 for ref in (refBefore, refOuter, refInner):
657 metricsOut = datastore.get(ref, parameters=None)
658 self.assertEqual(metrics, metricsOut)
659 raise TransactionTestError("This should roll back the transaction")
660 # Dataset(s) inserted before the transaction should still exist
661 metricsOut = datastore.get(refBefore, parameters=None)
662 self.assertEqual(metrics, metricsOut)
663 # But all datasets inserted during the (rolled back) transaction
664 # should be gone
665 with self.assertRaises(FileNotFoundError):
666 datastore.get(refOuter)
667 with self.assertRaises(FileNotFoundError):
668 datastore.get(refInner)
670 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]:
671 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
672 dimensions = self.universe.conform(("visit", "physical_filter"))
673 metrics = makeExampleMetrics()
674 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}
675 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
676 return metrics, ref
678 def runIngestTest(
679 self, func: Callable[[MetricsExample, str, DatasetRef], None], expectOutput: bool = True
680 ) -> None:
681 metrics, ref = self._prepareIngestTest()
682 # The file will be deleted after the test.
683 # For symlink tests this leads to a situation where the datastore
684 # points to a file that does not exist. This will make os.path.exist
685 # return False but then the new symlink will fail with
686 # FileExistsError later in the code so the test still passes.
687 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
688 with open(path, "w") as fd:
689 yaml.dump(metrics._asdict(), stream=fd)
690 func(metrics, path, ref)
692 def testIngestNoTransfer(self) -> None:
693 """Test ingesting existing files with no transfer."""
694 for mode in (None, "auto"):
695 # Some datastores have auto but can't do in place transfer
696 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
697 continue
699 with self.subTest(mode=mode):
700 datastore = self.makeDatastore()
702 def succeed(
703 obj: MetricsExample,
704 path: str,
705 ref: DatasetRef,
706 mode: str | None = mode,
707 datastore: Datastore = datastore,
708 ) -> None:
709 """Ingest a file already in the datastore root."""
710 # first move it into the root, and adjust the path
711 # accordingly
712 path = shutil.copy(path, datastore.root.ospath)
713 path = os.path.relpath(path, start=datastore.root.ospath)
714 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
715 self.assertEqual(obj, datastore.get(ref))
717 def failInputDoesNotExist(
718 obj: MetricsExample,
719 path: str,
720 ref: DatasetRef,
721 mode: str | None = mode,
722 datastore: Datastore = datastore,
723 ) -> None:
724 """Can't ingest files if we're given a bad path."""
725 with self.assertRaises(FileNotFoundError):
726 datastore.ingest(
727 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
728 )
729 self.assertFalse(datastore.exists(ref))
731 def failOutsideRoot(
732 obj: MetricsExample,
733 path: str,
734 ref: DatasetRef,
735 mode: str | None = mode,
736 datastore: Datastore = datastore,
737 ) -> None:
738 """Can't ingest files outside of datastore root unless
739 auto.
740 """
741 if mode == "auto":
742 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
743 self.assertTrue(datastore.exists(ref))
744 else:
745 with self.assertRaises(RuntimeError):
746 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
747 self.assertFalse(datastore.exists(ref))
749 def failNotImplemented(
750 obj: MetricsExample,
751 path: str,
752 ref: DatasetRef,
753 mode: str | None = mode,
754 datastore: Datastore = datastore,
755 ) -> None:
756 with self.assertRaises(NotImplementedError):
757 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
759 if mode in self.ingestTransferModes:
760 self.runIngestTest(failOutsideRoot)
761 self.runIngestTest(failInputDoesNotExist)
762 self.runIngestTest(succeed)
763 else:
764 self.runIngestTest(failNotImplemented)
766 def testIngestTransfer(self) -> None:
767 """Test ingesting existing files after transferring them."""
768 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
769 with self.subTest(mode=mode):
770 datastore = self.makeDatastore(mode)
772 def succeed(
773 obj: MetricsExample,
774 path: str,
775 ref: DatasetRef,
776 mode: str | None = mode,
777 datastore: Datastore = datastore,
778 ) -> None:
779 """Ingest a file by transferring it to the template
780 location.
781 """
782 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
783 self.assertEqual(obj, datastore.get(ref))
785 def failInputDoesNotExist(
786 obj: MetricsExample,
787 path: str,
788 ref: DatasetRef,
789 mode: str | None = mode,
790 datastore: Datastore = datastore,
791 ) -> None:
792 """Can't ingest files if we're given a bad path."""
793 with self.assertRaises(FileNotFoundError):
794 # Ensure the file does not look like it is in
795 # datastore for auto mode
796 datastore.ingest(
797 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
798 )
799 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
801 def failNotImplemented(
802 obj: MetricsExample,
803 path: str,
804 ref: DatasetRef,
805 mode: str | None = mode,
806 datastore: Datastore = datastore,
807 ) -> None:
808 with self.assertRaises(NotImplementedError):
809 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
811 if mode in self.ingestTransferModes:
812 self.runIngestTest(failInputDoesNotExist)
813 self.runIngestTest(succeed, expectOutput=(mode != "move"))
814 else:
815 self.runIngestTest(failNotImplemented)
817 def testIngestSymlinkOfSymlink(self) -> None:
818 """Special test for symlink to a symlink ingest"""
819 metrics, ref = self._prepareIngestTest()
820 # The aim of this test is to create a dataset on disk, then
821 # create a symlink to it and finally ingest the symlink such that
822 # the symlink in the datastore points to the original dataset.
823 for mode in ("symlink", "relsymlink"):
824 if mode not in self.ingestTransferModes:
825 continue
827 print(f"Trying mode {mode}")
828 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
829 with open(realpath, "w") as fd:
830 yaml.dump(metrics._asdict(), stream=fd)
831 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
832 os.symlink(os.path.abspath(realpath), sympath)
834 datastore = self.makeDatastore()
835 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
837 uri = datastore.getURI(ref)
838 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
839 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
841 linkTarget = os.readlink(uri.ospath)
842 if mode == "relsymlink":
843 self.assertFalse(os.path.isabs(linkTarget))
844 else:
845 self.assertEqual(linkTarget, os.path.abspath(realpath))
847 # Check that we can get the dataset back regardless of mode
848 metric2 = datastore.get(ref)
849 self.assertEqual(metric2, metrics)
851 # Cleanup the file for next time round loop
852 # since it will get the same file name in store
853 datastore.remove(ref)
855 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
856 datastore = self.makeDatastore(name)
858 # For now only the FileDatastore can be used for this test.
859 # ChainedDatastore that only includes InMemoryDatastores have to be
860 # skipped as well.
861 for name in datastore.names:
862 if not name.startswith("InMemoryDatastore"):
863 break
864 else:
865 raise unittest.SkipTest("in-memory datastore does not support record export/import")
867 metrics = makeExampleMetrics()
868 dimensions = self.universe.conform(("visit", "physical_filter"))
869 sc = self.storageClassFactory.getStorageClass("StructuredData")
871 refs = []
872 for visit in (2048, 2049, 2050):
873 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"}
874 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
875 datastore.put(metrics, ref)
876 refs.append(ref)
877 return datastore, refs
879 def testExportImportRecords(self) -> None:
880 """Test for export_records and import_records methods."""
881 datastore, refs = self._populate_export_datastore("test_datastore")
882 for exported_refs in (refs, refs[1:]):
883 n_refs = len(exported_refs)
884 records = datastore.export_records(exported_refs)
885 self.assertGreater(len(records), 0)
886 self.assertTrue(set(records.keys()) <= set(datastore.names))
887 # In a ChainedDatastore each FileDatastore will have a complete set
888 for datastore_name in records:
889 record_data = records[datastore_name]
890 self.assertEqual(len(record_data.records), n_refs)
892 # Check that subsetting works, include non-existing dataset ID.
893 dataset_ids = {exported_refs[0].id, uuid.uuid4()}
894 subset = record_data.subset(dataset_ids)
895 assert subset is not None
896 self.assertEqual(len(subset.records), 1)
897 subset = record_data.subset({uuid.uuid4()})
898 self.assertIsNone(subset)
900 # Use the same datastore name to import relative path.
901 datastore2 = self.makeDatastore("test_datastore")
903 records = datastore.export_records(refs[1:])
904 datastore2.import_records(records)
906 with self.assertRaises(FileNotFoundError):
907 data = datastore2.get(refs[0])
908 data = datastore2.get(refs[1])
909 self.assertIsNotNone(data)
910 data = datastore2.get(refs[2])
911 self.assertIsNotNone(data)
913 def testExport(self) -> None:
914 datastore, refs = self._populate_export_datastore("test_datastore")
916 datasets = list(datastore.export(refs))
917 self.assertEqual(len(datasets), 3)
919 for transfer in (None, "auto"):
920 # Both will default to None
921 datasets = list(datastore.export(refs, transfer=transfer))
922 self.assertEqual(len(datasets), 3)
924 with self.assertRaises(TypeError):
925 list(datastore.export(refs, transfer="copy"))
927 with self.assertRaises(TypeError):
928 list(datastore.export(refs, directory="exportDir", transfer="move"))
930 # Create a new ref that is not known to the datastore and try to
931 # export it.
932 sc = self.storageClassFactory.getStorageClass("ThingOne")
933 dimensions = self.universe.conform(("visit", "physical_filter"))
934 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
935 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
936 with self.assertRaises(FileNotFoundError):
937 list(datastore.export(refs + [ref], transfer=None))
939 def test_pydantic_dict_storage_class_conversions(self) -> None:
940 """Test converting a dataset stored as a pydantic model into a dict on
941 read.
942 """
943 datastore = self.makeDatastore()
944 store_as_model = self.makeDatasetRef(
945 "store_as_model",
946 dimensions=self.universe.empty,
947 storageClass="DictConvertibleModel",
948 dataId=DataCoordinate.make_empty(self.universe),
949 )
950 content = {"a": "one", "b": "two"}
951 model = DictConvertibleModel.from_dict(content, extra="original content")
952 datastore.put(model, store_as_model)
953 retrieved_model = datastore.get(store_as_model)
954 self.assertEqual(retrieved_model, model)
955 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel"))
956 self.assertEqual(type(loaded), dict)
957 self.assertEqual(loaded, content)
959 def test_simple_class_put_get(self) -> None:
960 """Test that we can put and get a simple class with dict()
961 constructor.
962 """
963 datastore = self.makeDatastore()
964 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
965 self._assert_different_puts(datastore, "MetricsExample", data)
967 def test_dataclass_put_get(self) -> None:
968 """Test that we can put and get a simple dataclass."""
969 datastore = self.makeDatastore()
970 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
971 self._assert_different_puts(datastore, "MetricsExampleDataclass", data)
973 def test_pydantic_put_get(self) -> None:
974 """Test that we can put and get a simple Pydantic model."""
975 datastore = self.makeDatastore()
976 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
977 self._assert_different_puts(datastore, "MetricsExampleModel", data)
979 def test_tuple_put_get(self) -> None:
980 """Test that we can put and get a tuple."""
981 datastore = self.makeDatastore()
982 data = ("a", "b", 1)
983 self._assert_different_puts(datastore, "TupleExample", data)
985 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None:
986 refs = {
987 x: self.makeDatasetRef(
988 f"stora_as_{x}",
989 dimensions=self.universe.empty,
990 storageClass=f"{storageClass_root}{x}",
991 dataId=DataCoordinate.make_empty(self.universe),
992 )
993 for x in ["A", "B"]
994 }
996 for ref in refs.values():
997 datastore.put(data, ref)
999 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"]))
1002class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
1003 """PosixDatastore specialization"""
1005 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1006 uriScheme = "file"
1007 canIngestNoTransferAuto = True
1008 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
1009 isEphemeral = False
1010 rootKeys = ("root",)
1011 validationCanFail = True
1013 def setUp(self) -> None:
1014 # Override the working directory before calling the base class
1015 self.root = tempfile.mkdtemp(dir=TESTDIR)
1016 super().setUp()
1018 def testAtomicWrite(self) -> None:
1019 """Test that we write to a temporary and then rename"""
1020 datastore = self.makeDatastore()
1021 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1022 dimensions = self.universe.conform(("visit", "physical_filter"))
1023 metrics = makeExampleMetrics()
1025 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}
1026 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1028 with self.assertLogs("lsst.resources", "DEBUG") as cm:
1029 datastore.put(metrics, ref)
1030 move_logs = [ll for ll in cm.output if "transfer=" in ll]
1031 self.assertIn("transfer=move", move_logs[0])
1033 # And the transfer should be file to file.
1034 self.assertEqual(move_logs[0].count("file://"), 2)
1036 def testCanNotDeterminePutFormatterLocation(self) -> None:
1037 """Verify that the expected exception is raised if the FileDatastore
1038 can not determine the put formatter location.
1039 """
1040 _ = makeExampleMetrics()
1041 datastore = self.makeDatastore()
1043 # Create multiple storage classes for testing different formulations
1044 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1046 sccomp = StorageClass("Dummy")
1047 compositeStorageClass = StorageClass(
1048 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
1049 )
1051 dimensions = self.universe.conform(("visit", "physical_filter"))
1052 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
1054 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1055 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId)
1057 def raiser(ref: DatasetRef) -> None:
1058 raise DatasetTypeNotSupportedError()
1060 with unittest.mock.patch.object(
1061 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
1062 "_determine_put_formatter_location",
1063 side_effect=raiser,
1064 ):
1065 # verify the non-composite ref execution path:
1066 with self.assertRaises(DatasetTypeNotSupportedError):
1067 datastore.getURIs(ref, predict=True)
1069 # verify the composite-ref execution path:
1070 with self.assertRaises(DatasetTypeNotSupportedError):
1071 datastore.getURIs(compRef, predict=True)
1073 def test_roots(self):
1074 datastore = self.makeDatastore()
1076 self.assertEqual(set(datastore.names), set(datastore.roots.keys()))
1077 for root in datastore.roots.values():
1078 if root is not None:
1079 self.assertTrue(root.exists())
1082class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
1083 """Posix datastore tests but with checksums disabled."""
1085 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
1087 def testChecksum(self) -> None:
1088 """Ensure that checksums have not been calculated."""
1089 datastore = self.makeDatastore()
1090 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1091 dimensions = self.universe.conform(("visit", "physical_filter"))
1092 metrics = makeExampleMetrics()
1094 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}
1095 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1097 # Configuration should have disabled checksum calculation
1098 datastore.put(metrics, ref)
1099 infos = datastore.getStoredItemsInfo(ref)
1100 self.assertIsNone(infos[0].checksum)
1102 # Remove put back but with checksums enabled explicitly
1103 datastore.remove(ref)
1104 datastore.useChecksum = True
1105 datastore.put(metrics, ref)
1107 infos = datastore.getStoredItemsInfo(ref)
1108 self.assertIsNotNone(infos[0].checksum)
1110 def test_repeat_ingest(self):
1111 """Test that repeatedly ingesting the same file in direct mode
1112 is allowed.
1114 Test can only run with FileDatastore since that is the only one
1115 supporting "direct" ingest.
1116 """
1117 metrics, v4ref = self._prepareIngestTest()
1118 datastore = self.makeDatastore()
1119 v5ref = DatasetRef(
1120 v4ref.datasetType, v4ref.dataId, v4ref.run, id_generation_mode=DatasetIdGenEnum.DATAID_TYPE_RUN
1121 )
1123 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=True) as path:
1124 with open(path, "w") as fd:
1125 yaml.dump(metrics._asdict(), stream=fd)
1127 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct")
1129 # This will fail because the ref is using UUIDv4.
1130 with self.assertRaises(RuntimeError):
1131 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct")
1133 # UUIDv5 can be repeatedly ingested in direct mode.
1134 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct")
1135 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct")
1137 with self.assertRaises(RuntimeError):
1138 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="copy")
1141class TrashDatastoreTestCase(PosixDatastoreTestCase):
1142 """Restrict trash test to FileDatastore."""
1144 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1146 def testTrash(self) -> None:
1147 datastore, *refs = self.prepDeleteTest(n_refs=10)
1149 # Trash one of them.
1150 ref = refs.pop()
1151 uri = datastore.getURI(ref)
1152 datastore.trash(ref)
1153 self.assertTrue(uri.exists(), uri) # Not deleted yet
1154 datastore.emptyTrash()
1155 self.assertFalse(uri.exists(), uri)
1157 # Trash it again should be fine.
1158 datastore.trash(ref)
1160 # Trash multiple items at once.
1161 subset = [refs.pop(), refs.pop()]
1162 datastore.trash(subset)
1163 datastore.emptyTrash()
1165 # Remove a record and trash should do nothing.
1166 # This is execution butler scenario.
1167 ref = refs.pop()
1168 uri = datastore.getURI(ref)
1169 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1170 self.assertTrue(uri.exists())
1171 datastore.trash(ref)
1172 datastore.emptyTrash()
1173 self.assertTrue(uri.exists())
1175 # Switch on trust and it should delete the file.
1176 datastore.trustGetRequest = True
1177 datastore.trash([ref])
1178 self.assertFalse(uri.exists())
1180 # Remove multiples at once in trust mode.
1181 subset = [refs.pop() for i in range(3)]
1182 datastore.trash(subset)
1183 datastore.trash(refs.pop()) # Check that a single ref can trash
1186class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1187 """Test datastore cleans up on failure."""
1189 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1191 def setUp(self) -> None:
1192 # Override the working directory before calling the base class
1193 self.root = tempfile.mkdtemp(dir=TESTDIR)
1194 super().setUp()
1196 def testCleanup(self) -> None:
1197 """Test that a failed formatter write does cleanup a partial file."""
1198 metrics = makeExampleMetrics()
1199 datastore = self.makeDatastore()
1201 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1203 dimensions = self.universe.conform(("visit", "physical_filter"))
1204 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
1206 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1208 # Determine where the file will end up (we assume Formatters use
1209 # the same file extension)
1210 expectedUri = datastore.getURI(ref, predict=True)
1211 self.assertEqual(expectedUri.fragment, "predicted")
1213 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1215 # Try formatter that fails and formatter that fails and leaves
1216 # a file behind
1217 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1218 with self.subTest(formatter=formatter):
1219 # Monkey patch the formatter
1220 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1222 # Try to put the dataset, it should fail
1223 with self.assertRaises(RuntimeError):
1224 datastore.put(metrics, ref)
1226 # Check that there is no file on disk
1227 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1229 # Check that there is a directory
1230 dir = expectedUri.dirname()
1231 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1233 # Force YamlFormatter and check that this time a file is written
1234 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1235 datastore.put(metrics, ref)
1236 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1237 datastore.remove(ref)
1238 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1241class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1242 """PosixDatastore specialization"""
1244 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1245 uriScheme = "mem"
1246 hasUnsupportedPut = False
1247 ingestTransferModes = ()
1248 isEphemeral = True
1249 rootKeys = None
1250 validationCanFail = False
1253class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1254 """ChainedDatastore specialization using a POSIXDatastore"""
1256 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1257 hasUnsupportedPut = False
1258 canIngestNoTransferAuto = False
1259 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1260 isEphemeral = False
1261 rootKeys = (".datastores.1.root", ".datastores.2.root")
1262 validationCanFail = True
1265class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1266 """ChainedDatastore specialization using all InMemoryDatastore"""
1268 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1269 validationCanFail = False
1272class DatastoreConstraintsTests(DatastoreTestsBase):
1273 """Basic tests of constraints model of Datastores."""
1275 def testConstraints(self) -> None:
1276 """Test constraints model. Assumes that each test class has the
1277 same constraints.
1278 """
1279 metrics = makeExampleMetrics()
1280 datastore = self.makeDatastore()
1282 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1283 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1284 dimensions = self.universe.conform(("visit", "physical_filter", "instrument"))
1285 dataId = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"}
1287 # Write empty file suitable for ingest check (JSON and YAML variants)
1288 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1289 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1290 for datasetTypeName, sc, accepted in (
1291 ("metric", sc1, True),
1292 ("metric5", sc1, False),
1293 ("metric33", sc1, True),
1294 ("metric5", sc2, True),
1295 ):
1296 # Choose different temp file depending on StorageClass
1297 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1299 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1300 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
1301 if accepted:
1302 datastore.put(metrics, ref)
1303 self.assertTrue(datastore.exists(ref))
1304 datastore.remove(ref)
1306 # Try ingest
1307 if self.canIngest:
1308 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1309 self.assertTrue(datastore.exists(ref))
1310 datastore.remove(ref)
1311 else:
1312 with self.assertRaises(DatasetTypeNotSupportedError):
1313 datastore.put(metrics, ref)
1314 self.assertFalse(datastore.exists(ref))
1316 # Again with ingest
1317 if self.canIngest:
1318 with self.assertRaises(DatasetTypeNotSupportedError):
1319 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1320 self.assertFalse(datastore.exists(ref))
1323class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1324 """PosixDatastore specialization"""
1326 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1327 canIngest = True
1329 def setUp(self) -> None:
1330 # Override the working directory before calling the base class
1331 self.root = tempfile.mkdtemp(dir=TESTDIR)
1332 super().setUp()
1335class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1336 """InMemoryDatastore specialization."""
1338 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1339 canIngest = False
1342class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1343 """ChainedDatastore specialization using a POSIXDatastore and constraints
1344 at the ChainedDatstore.
1345 """
1347 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1350class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1351 """ChainedDatastore specialization using a POSIXDatastore."""
1353 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1356class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1357 """ChainedDatastore specialization using all InMemoryDatastore."""
1359 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1360 canIngest = False
1363class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1364 """Test that a chained datastore can control constraints per-datastore
1365 even if child datastore would accept.
1366 """
1368 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1370 def setUp(self) -> None:
1371 # Override the working directory before calling the base class
1372 self.root = tempfile.mkdtemp(dir=TESTDIR)
1373 super().setUp()
1375 def testConstraints(self) -> None:
1376 """Test chained datastore constraints model."""
1377 metrics = makeExampleMetrics()
1378 datastore = self.makeDatastore()
1380 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1381 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1382 dimensions = self.universe.conform(("visit", "physical_filter", "instrument"))
1383 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"}
1384 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"}
1386 # Write empty file suitable for ingest check (JSON and YAML variants)
1387 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1388 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1390 for typeName, dataId, sc, accept, ingest in (
1391 ("metric", dataId1, sc1, (False, True, False), True),
1392 ("metric5", dataId1, sc1, (False, False, False), False),
1393 ("metric5", dataId2, sc1, (True, False, False), False),
1394 ("metric33", dataId2, sc2, (True, True, False), True),
1395 ("metric5", dataId1, sc2, (False, True, False), True),
1396 ):
1397 # Choose different temp file depending on StorageClass
1398 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1400 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1401 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId)
1402 if any(accept):
1403 datastore.put(metrics, ref)
1404 self.assertTrue(datastore.exists(ref))
1406 # Check each datastore inside the chained datastore
1407 for childDatastore, expected in zip(datastore.datastores, accept, strict=True):
1408 self.assertEqual(
1409 childDatastore.exists(ref),
1410 expected,
1411 f"Testing presence of {ref} in datastore {childDatastore.name}",
1412 )
1414 datastore.remove(ref)
1416 # Check that ingest works
1417 if ingest:
1418 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1419 self.assertTrue(datastore.exists(ref))
1421 # Check each datastore inside the chained datastore
1422 for childDatastore, expected in zip(datastore.datastores, accept, strict=True):
1423 # Ephemeral datastores means InMemory at the moment
1424 # and that does not accept ingest of files.
1425 if childDatastore.isEphemeral:
1426 expected = False
1427 self.assertEqual(
1428 childDatastore.exists(ref),
1429 expected,
1430 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1431 )
1433 datastore.remove(ref)
1434 else:
1435 with self.assertRaises(DatasetTypeNotSupportedError):
1436 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1438 else:
1439 with self.assertRaises(DatasetTypeNotSupportedError):
1440 datastore.put(metrics, ref)
1441 self.assertFalse(datastore.exists(ref))
1443 # Again with ingest
1444 with self.assertRaises(DatasetTypeNotSupportedError):
1445 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1446 self.assertFalse(datastore.exists(ref))
1449class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1450 """Tests for datastore caching infrastructure."""
1452 @classmethod
1453 def setUpClass(cls) -> None:
1454 cls.storageClassFactory = StorageClassFactory()
1455 cls.universe = DimensionUniverse()
1457 # Ensure that we load the test storage class definitions.
1458 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1459 cls.storageClassFactory.addFromConfig(scConfigFile)
1461 def setUp(self) -> None:
1462 self.id = 0
1464 # Create a root that we can use for caching tests.
1465 self.root = tempfile.mkdtemp(dir=TESTDIR)
1467 # Create some test dataset refs and associated test files
1468 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1469 dimensions = self.universe.conform(("visit", "physical_filter"))
1470 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
1472 # Create list of refs and list of temporary files
1473 n_datasets = 10
1474 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)]
1476 root_uri = ResourcePath(self.root, forceDirectory=True)
1477 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1479 # Create test files.
1480 for uri in self.files:
1481 uri.write(b"0123456789")
1483 # Create some composite refs with component files.
1484 sc = self.storageClassFactory.getStorageClass("StructuredData")
1485 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)]
1486 self.comp_files = []
1487 self.comp_refs = []
1488 for n, ref in enumerate(self.composite_refs):
1489 component_refs = []
1490 component_files = []
1491 for component in sc.components:
1492 component_ref = ref.makeComponentRef(component)
1493 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1494 component_refs.append(component_ref)
1495 component_files.append(file)
1496 file.write(b"9876543210")
1498 self.comp_files.append(component_files)
1499 self.comp_refs.append(component_refs)
1501 def tearDown(self) -> None:
1502 if self.root is not None and os.path.exists(self.root):
1503 shutil.rmtree(self.root, ignore_errors=True)
1505 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1506 config = Config.fromYaml(config_str)
1507 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1509 def testNoCacheDir(self) -> None:
1510 config_str = """
1511cached:
1512 root: null
1513 cacheable:
1514 metric0: true
1515 """
1516 cache_manager = self._make_cache_manager(config_str)
1518 # Look inside to check we don't have a cache directory
1519 self.assertIsNone(cache_manager._cache_directory)
1521 self.assertCache(cache_manager)
1523 # Test that the cache directory is marked temporary
1524 self.assertTrue(cache_manager.cache_directory.isTemporary)
1526 def testNoCacheDirReversed(self) -> None:
1527 """Use default caching status and metric1 to false"""
1528 config_str = """
1529cached:
1530 root: null
1531 default: true
1532 cacheable:
1533 metric1: false
1534 """
1535 cache_manager = self._make_cache_manager(config_str)
1537 self.assertCache(cache_manager)
1539 def testEnvvarCacheDir(self) -> None:
1540 config_str = f"""
1541cached:
1542 root: '{self.root}'
1543 cacheable:
1544 metric0: true
1545 """
1547 root = ResourcePath(self.root, forceDirectory=True)
1548 env_dir = root.join("somewhere", forceDirectory=True)
1549 elsewhere = root.join("elsewhere", forceDirectory=True)
1551 # Environment variable should override the config value.
1552 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1553 cache_manager = self._make_cache_manager(config_str)
1554 self.assertEqual(cache_manager.cache_directory, env_dir)
1556 # This environment variable should not override the config value.
1557 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1558 cache_manager = self._make_cache_manager(config_str)
1559 self.assertEqual(cache_manager.cache_directory, root)
1561 # No default setting.
1562 config_str = """
1563cached:
1564 root: null
1565 default: true
1566 cacheable:
1567 metric1: false
1568 """
1569 cache_manager = self._make_cache_manager(config_str)
1571 # This environment variable should override the config value.
1572 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1573 cache_manager = self._make_cache_manager(config_str)
1574 self.assertEqual(cache_manager.cache_directory, env_dir)
1576 # If both environment variables are set the main (not IF_UNSET)
1577 # variable should win.
1578 with unittest.mock.patch.dict(
1579 os.environ,
1580 {
1581 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1582 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1583 },
1584 ):
1585 cache_manager = self._make_cache_manager(config_str)
1586 self.assertEqual(cache_manager.cache_directory, env_dir)
1588 # Use the API to set the environment variable, making sure that the
1589 # variable is reset on exit.
1590 with unittest.mock.patch.dict(
1591 os.environ,
1592 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1593 ):
1594 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1595 self.assertTrue(defined)
1596 cache_manager = self._make_cache_manager(config_str)
1597 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1599 # Now create the cache manager ahead of time and set the fallback
1600 # later.
1601 cache_manager = self._make_cache_manager(config_str)
1602 self.assertIsNone(cache_manager._cache_directory)
1603 with unittest.mock.patch.dict(
1604 os.environ,
1605 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1606 ):
1607 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1608 self.assertTrue(defined)
1609 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1611 def testExplicitCacheDir(self) -> None:
1612 config_str = f"""
1613cached:
1614 root: '{self.root}'
1615 cacheable:
1616 metric0: true
1617 """
1618 cache_manager = self._make_cache_manager(config_str)
1620 # Look inside to check we do have a cache directory.
1621 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1623 self.assertCache(cache_manager)
1625 # Test that the cache directory is not marked temporary
1626 self.assertFalse(cache_manager.cache_directory.isTemporary)
1628 def assertCache(self, cache_manager: DatastoreCacheManager) -> None:
1629 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1630 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1632 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1633 self.assertIsInstance(uri, ResourcePath)
1634 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1636 # Check presence in cache using ref and then using file extension.
1637 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1638 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1639 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1640 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1642 # Cached file should no longer exist but uncached file should be
1643 # unaffected.
1644 self.assertFalse(self.files[0].exists())
1645 self.assertTrue(self.files[1].exists())
1647 # Should find this file and it should be within the cache directory.
1648 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1649 self.assertTrue(found.exists())
1650 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1652 # Should not be able to find these in cache
1653 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1654 self.assertIsNone(found)
1655 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1656 self.assertIsNone(found)
1658 def testNoCache(self) -> None:
1659 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1660 for uri, ref in zip(self.files, self.refs, strict=True):
1661 self.assertFalse(cache_manager.should_be_cached(ref))
1662 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1663 self.assertFalse(cache_manager.known_to_cache(ref))
1664 with cache_manager.find_in_cache(ref, ".txt") as found:
1665 self.assertIsNone(found, msg=f"{cache_manager}")
1667 def _expiration_config(self, mode: str, threshold: int) -> str:
1668 return f"""
1669cached:
1670 default: true
1671 expiry:
1672 mode: {mode}
1673 threshold: {threshold}
1674 cacheable:
1675 unused: true
1676 """
1678 def testCacheExpiryFiles(self) -> None:
1679 threshold = 2 # Keep at least 2 files.
1680 mode = "files"
1681 config_str = self._expiration_config(mode, threshold)
1683 cache_manager = self._make_cache_manager(config_str)
1685 # Check that an empty cache returns unknown for arbitrary ref
1686 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1688 # Should end with datasets: 2, 3, 4
1689 self.assertExpiration(cache_manager, 5, threshold + 1)
1690 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1692 # Check that we will not expire a file that is actively in use.
1693 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1694 self.assertIsNotNone(found)
1696 # Trigger cache expiration that should remove the file
1697 # we just retrieved. Should now have: 3, 4, 5
1698 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1699 self.assertIsNotNone(cached)
1701 # Cache should still report the standard file count.
1702 self.assertEqual(cache_manager.file_count, threshold + 1)
1704 # Add additional entry to cache.
1705 # Should now have 4, 5, 6
1706 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1707 self.assertIsNotNone(cached)
1709 # Is the file still there?
1710 self.assertTrue(found.exists())
1712 # Can we read it?
1713 data = found.read()
1714 self.assertGreater(len(data), 0)
1716 # Outside context the file should no longer exist.
1717 self.assertFalse(found.exists())
1719 # File count should not have changed.
1720 self.assertEqual(cache_manager.file_count, threshold + 1)
1722 # Dataset 2 was in the exempt directory but because hardlinks
1723 # are used it was deleted from the main cache during cache expiry
1724 # above and so should no longer be found.
1725 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1726 self.assertIsNone(found)
1728 # And the one stored after it is also gone.
1729 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1730 self.assertIsNone(found)
1732 # But dataset 4 is present.
1733 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1734 self.assertIsNotNone(found)
1736 # Adding a new dataset to the cache should now delete it.
1737 cache_manager.move_to_cache(self.files[7], self.refs[7])
1739 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1740 self.assertIsNone(found)
1742 def testCacheExpiryDatasets(self) -> None:
1743 threshold = 2 # Keep 2 datasets.
1744 mode = "datasets"
1745 config_str = self._expiration_config(mode, threshold)
1747 cache_manager = self._make_cache_manager(config_str)
1748 self.assertExpiration(cache_manager, 5, threshold + 1)
1749 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1751 def testCacheExpiryDatasetsComposite(self) -> None:
1752 threshold = 2 # Keep 2 datasets.
1753 mode = "datasets"
1754 config_str = self._expiration_config(mode, threshold)
1756 cache_manager = self._make_cache_manager(config_str)
1758 n_datasets = 3
1759 for i in range(n_datasets):
1760 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i], strict=True):
1761 cached = cache_manager.move_to_cache(component_file, component_ref)
1762 self.assertIsNotNone(cached)
1763 self.assertTrue(cache_manager.known_to_cache(component_ref))
1764 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1765 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1767 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1769 # Write two new non-composite and the number of files should drop.
1770 self.assertExpiration(cache_manager, 2, 5)
1772 def testCacheExpirySize(self) -> None:
1773 threshold = 55 # Each file is 10 bytes
1774 mode = "size"
1775 config_str = self._expiration_config(mode, threshold)
1777 cache_manager = self._make_cache_manager(config_str)
1778 self.assertExpiration(cache_manager, 10, 6)
1779 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1781 def assertExpiration(
1782 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int
1783 ) -> None:
1784 """Insert the datasets and then check the number retained."""
1785 for i in range(n_datasets):
1786 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1787 self.assertIsNotNone(cached)
1789 self.assertEqual(cache_manager.file_count, n_retained)
1791 # The oldest file should not be in the cache any more.
1792 for i in range(n_datasets):
1793 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1794 if i >= n_datasets - n_retained:
1795 self.assertIsInstance(found, ResourcePath)
1796 else:
1797 self.assertIsNone(found)
1799 def testCacheExpiryAge(self) -> None:
1800 threshold = 1 # Expire older than 2 seconds
1801 mode = "age"
1802 config_str = self._expiration_config(mode, threshold)
1804 cache_manager = self._make_cache_manager(config_str)
1805 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1807 # Insert 3 files, then sleep, then insert more.
1808 for i in range(2):
1809 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1810 self.assertIsNotNone(cached)
1811 time.sleep(2.0)
1812 for j in range(4):
1813 i = 2 + j # Continue the counting
1814 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1815 self.assertIsNotNone(cached)
1817 # Only the files written after the sleep should exist.
1818 self.assertEqual(cache_manager.file_count, 4)
1819 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1820 self.assertIsNone(found)
1821 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1822 self.assertIsInstance(found, ResourcePath)
1825class NullDatastoreTestCase(DatasetTestHelper, unittest.TestCase):
1826 """Test the null datastore."""
1828 storageClassFactory = StorageClassFactory()
1830 def test_basics(self) -> None:
1831 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1832 ref = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {})
1834 null = NullDatastore(None, None)
1836 self.assertFalse(null.exists(ref))
1837 self.assertFalse(null.knows(ref))
1838 knows = null.knows_these([ref])
1839 self.assertFalse(knows[ref])
1840 null.validateConfiguration(ref)
1842 with self.assertRaises(FileNotFoundError):
1843 null.get(ref)
1844 with self.assertRaises(NotImplementedError):
1845 null.put("", ref)
1846 with self.assertRaises(FileNotFoundError):
1847 null.getURI(ref)
1848 with self.assertRaises(FileNotFoundError):
1849 null.getURIs(ref)
1850 with self.assertRaises(FileNotFoundError):
1851 null.getManyURIs([ref])
1852 with self.assertRaises(NotImplementedError):
1853 null.getLookupKeys()
1854 with self.assertRaises(NotImplementedError):
1855 null.import_records({})
1856 with self.assertRaises(NotImplementedError):
1857 null.export_records([])
1858 with self.assertRaises(NotImplementedError):
1859 null.export([ref])
1860 with self.assertRaises(NotImplementedError):
1861 null.transfer(null, ref)
1862 with self.assertRaises(NotImplementedError):
1863 null.emptyTrash()
1864 with self.assertRaises(NotImplementedError):
1865 null.trash(ref)
1866 with self.assertRaises(NotImplementedError):
1867 null.forget([ref])
1868 with self.assertRaises(NotImplementedError):
1869 null.remove(ref)
1870 with self.assertRaises(NotImplementedError):
1871 null.retrieveArtifacts([ref], ResourcePath("."))
1872 with self.assertRaises(NotImplementedError):
1873 null.transfer_from(null, [ref])
1874 with self.assertRaises(NotImplementedError):
1875 null.ingest()
1878class DatasetRefURIsTestCase(unittest.TestCase):
1879 """Tests for DatasetRefURIs."""
1881 def testSequenceAccess(self) -> None:
1882 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1883 uris = DatasetRefURIs()
1885 self.assertEqual(len(uris), 2)
1886 self.assertEqual(uris[0], None)
1887 self.assertEqual(uris[1], {})
1889 primaryURI = ResourcePath("1/2/3")
1890 componentURI = ResourcePath("a/b/c")
1892 # affirm that DatasetRefURIs does not support MutableSequence functions
1893 with self.assertRaises(TypeError):
1894 uris[0] = primaryURI
1895 with self.assertRaises(TypeError):
1896 uris[1] = {"foo": componentURI}
1898 # but DatasetRefURIs can be set by property name:
1899 uris.primaryURI = primaryURI
1900 uris.componentURIs = {"foo": componentURI}
1901 self.assertEqual(uris.primaryURI, primaryURI)
1902 self.assertEqual(uris[0], primaryURI)
1904 primary, components = uris
1905 self.assertEqual(primary, primaryURI)
1906 self.assertEqual(components, {"foo": componentURI})
1908 def testRepr(self) -> None:
1909 """Verify __repr__ output."""
1910 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")})
1911 self.assertEqual(
1912 repr(uris),
1913 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})',
1914 )
1917class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase):
1918 """Test the StoredFileInfo class."""
1920 storageClassFactory = StorageClassFactory()
1922 def test_StoredFileInfo(self) -> None:
1923 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1924 ref = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {})
1926 record = dict(
1927 storage_class="StructuredDataDict",
1928 formatter="lsst.daf.butler.Formatter",
1929 path="a/b/c.txt",
1930 component="component",
1931 checksum=None,
1932 file_size=5,
1933 )
1934 info = StoredFileInfo.from_record(record)
1936 self.assertEqual(info.to_record(), record)
1938 ref2 = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {})
1939 rebased = info.rebase(ref2)
1940 self.assertEqual(rebased.rebase(ref), info)
1942 with self.assertRaises(TypeError):
1943 rebased.update(formatter=42)
1945 with self.assertRaises(ValueError):
1946 rebased.update(something=42, new="42")
1948 # Check that pickle works on StoredFileInfo.
1949 pickled_info = pickle.dumps(info)
1950 unpickled_info = pickle.loads(pickled_info)
1951 self.assertEqual(unpickled_info, info)
1954if __name__ == "__main__":
1955 unittest.main()