Coverage for tests/test_datastore.py: 11%
1060 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import pickle
26import shutil
27import tempfile
28import time
29import unittest
30import unittest.mock
31import uuid
32from collections.abc import Callable
33from typing import Any, cast
35import lsst.utils.tests
36import yaml
37from lsst.daf.butler import (
38 Config,
39 DataCoordinate,
40 DatasetRef,
41 DatasetRefURIs,
42 DatasetType,
43 DatasetTypeNotSupportedError,
44 Datastore,
45 DatastoreCacheManager,
46 DatastoreCacheManagerConfig,
47 DatastoreConfig,
48 DatastoreDisabledCacheManager,
49 DatastoreValidationError,
50 DimensionUniverse,
51 FileDataset,
52 NullDatastore,
53 StorageClass,
54 StorageClassFactory,
55 StoredFileInfo,
56)
57from lsst.daf.butler.formatters.yaml import YamlFormatter
58from lsst.daf.butler.tests import (
59 BadNoWriteFormatter,
60 BadWriteFormatter,
61 DatasetTestHelper,
62 DatastoreTestHelper,
63 DummyRegistry,
64 MetricsExample,
65 MetricsExampleDataclass,
66 MetricsExampleModel,
67)
68from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
69from lsst.daf.butler.tests.utils import TestCaseMixin
70from lsst.resources import ResourcePath
71from lsst.utils import doImport
73TESTDIR = os.path.dirname(__file__)
76def makeExampleMetrics(use_none: bool = False) -> MetricsExample:
77 """Make example dataset that can be stored in butler."""
78 if use_none:
79 array = None
80 else:
81 array = [563, 234, 456.7, 105, 2054, -1045]
82 return MetricsExample(
83 {"AM1": 5.2, "AM2": 30.6},
84 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
85 array,
86 )
89class TransactionTestError(Exception):
90 """Specific error for transactions, to prevent misdiagnosing
91 that might otherwise occur when a standard exception is used.
92 """
94 pass
97class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin):
98 """Support routines for datastore testing"""
100 root: str | None = None
101 universe: DimensionUniverse
102 storageClassFactory: StorageClassFactory
104 @classmethod
105 def setUpClass(cls) -> None:
106 # Storage Classes are fixed for all datastores in these tests
107 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
108 cls.storageClassFactory = StorageClassFactory()
109 cls.storageClassFactory.addFromConfig(scConfigFile)
111 # Read the Datastore config so we can get the class
112 # information (since we should not assume the constructor
113 # name here, but rely on the configuration file itself)
114 datastoreConfig = DatastoreConfig(cls.configFile)
115 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"]))
116 cls.universe = DimensionUniverse()
118 def setUp(self) -> None:
119 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
121 def tearDown(self) -> None:
122 if self.root is not None and os.path.exists(self.root):
123 shutil.rmtree(self.root, ignore_errors=True)
126class DatastoreTests(DatastoreTestsBase):
127 """Some basic tests of a simple datastore."""
129 hasUnsupportedPut = True
130 rootKeys: tuple[str, ...] | None = None
131 isEphemeral: bool = False
132 validationCanFail: bool = False
134 def testConfigRoot(self) -> None:
135 full = DatastoreConfig(self.configFile)
136 config = DatastoreConfig(self.configFile, mergeDefaults=False)
137 newroot = "/random/location"
138 self.datastoreType.setConfigRoot(newroot, config, full)
139 if self.rootKeys:
140 for k in self.rootKeys:
141 self.assertIn(newroot, config[k])
143 def testConstructor(self) -> None:
144 datastore = self.makeDatastore()
145 self.assertIsNotNone(datastore)
146 self.assertIs(datastore.isEphemeral, self.isEphemeral)
148 def testConfigurationValidation(self) -> None:
149 datastore = self.makeDatastore()
150 sc = self.storageClassFactory.getStorageClass("ThingOne")
151 datastore.validateConfiguration([sc])
153 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
154 if self.validationCanFail:
155 with self.assertRaises(DatastoreValidationError):
156 datastore.validateConfiguration([sc2], logFailures=True)
158 dimensions = self.universe.extract(("visit", "physical_filter"))
159 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
160 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
161 datastore.validateConfiguration([ref])
163 def testParameterValidation(self) -> None:
164 """Check that parameters are validated"""
165 sc = self.storageClassFactory.getStorageClass("ThingOne")
166 dimensions = self.universe.extract(("visit", "physical_filter"))
167 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
168 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
169 datastore = self.makeDatastore()
170 data = {1: 2, 3: 4}
171 datastore.put(data, ref)
172 newdata = datastore.get(ref)
173 self.assertEqual(data, newdata)
174 with self.assertRaises(KeyError):
175 newdata = datastore.get(ref, parameters={"missing": 5})
177 def testBasicPutGet(self) -> None:
178 metrics = makeExampleMetrics()
179 datastore = self.makeDatastore()
181 # Create multiple storage classes for testing different formulations
182 storageClasses = [
183 self.storageClassFactory.getStorageClass(sc)
184 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
185 ]
187 dimensions = self.universe.extract(("visit", "physical_filter"))
188 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
189 dataId2 = dict({"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"})
191 for sc in storageClasses:
192 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
193 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2)
195 # Make sure that using getManyURIs without predicting before the
196 # dataset has been put raises.
197 with self.assertRaises(FileNotFoundError):
198 datastore.getManyURIs([ref], predict=False)
200 # Make sure that using getManyURIs with predicting before the
201 # dataset has been put predicts the URI.
202 uris = datastore.getManyURIs([ref, ref2], predict=True)
203 self.assertIn("52", uris[ref].primaryURI.geturl())
204 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
205 self.assertIn("53", uris[ref2].primaryURI.geturl())
206 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
208 datastore.put(metrics, ref)
210 # Does it exist?
211 self.assertTrue(datastore.exists(ref))
212 self.assertTrue(datastore.knows(ref))
213 multi = datastore.knows_these([ref])
214 self.assertTrue(multi[ref])
215 multi = datastore.mexists([ref, ref2])
216 self.assertTrue(multi[ref])
217 self.assertFalse(multi[ref2])
219 # Get
220 metricsOut = datastore.get(ref, parameters=None)
221 self.assertEqual(metrics, metricsOut)
223 uri = datastore.getURI(ref)
224 self.assertEqual(uri.scheme, self.uriScheme)
226 uris = datastore.getManyURIs([ref])
227 self.assertEqual(len(uris), 1)
228 ref, uri = uris.popitem()
229 self.assertTrue(uri.primaryURI.exists())
230 self.assertFalse(uri.componentURIs)
232 # Get a component -- we need to construct new refs for them
233 # with derived storage classes but with parent ID
234 for comp in ("data", "output"):
235 compRef = ref.makeComponentRef(comp)
236 output = datastore.get(compRef)
237 self.assertEqual(output, getattr(metricsOut, comp))
239 uri = datastore.getURI(compRef)
240 self.assertEqual(uri.scheme, self.uriScheme)
242 uris = datastore.getManyURIs([compRef])
243 self.assertEqual(len(uris), 1)
245 storageClass = sc
247 # Check that we can put a metric with None in a component and
248 # get it back as None
249 metricsNone = makeExampleMetrics(use_none=True)
250 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"}
251 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone)
252 datastore.put(metricsNone, refNone)
254 comp = "data"
255 for comp in ("data", "output"):
256 compRef = refNone.makeComponentRef(comp)
257 output = datastore.get(compRef)
258 self.assertEqual(output, getattr(metricsNone, comp))
260 # Check that a put fails if the dataset type is not supported
261 if self.hasUnsupportedPut:
262 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
263 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
264 with self.assertRaises(DatasetTypeNotSupportedError):
265 datastore.put(metrics, ref)
267 # These should raise
268 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId)
269 with self.assertRaises(FileNotFoundError):
270 # non-existing file
271 datastore.get(ref)
273 # Get a URI from it
274 uri = datastore.getURI(ref, predict=True)
275 self.assertEqual(uri.scheme, self.uriScheme)
277 with self.assertRaises(FileNotFoundError):
278 datastore.getURI(ref)
280 def testTrustGetRequest(self) -> None:
281 """Check that we can get datasets that registry knows nothing about."""
282 datastore = self.makeDatastore()
284 # Skip test if the attribute is not defined
285 if not hasattr(datastore, "trustGetRequest"):
286 return
288 metrics = makeExampleMetrics()
290 i = 0
291 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"):
292 i += 1
293 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
295 if sc_name == "StructuredComposite":
296 disassembled = True
297 else:
298 disassembled = False
300 # Start datastore in default configuration of using registry
301 datastore.trustGetRequest = False
303 # Create multiple storage classes for testing with or without
304 # disassembly
305 sc = self.storageClassFactory.getStorageClass(sc_name)
306 dimensions = self.universe.extract(("visit", "physical_filter"))
308 dataId = dict({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"})
310 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
311 datastore.put(metrics, ref)
313 # Does it exist?
314 self.assertTrue(datastore.exists(ref))
315 self.assertTrue(datastore.knows(ref))
316 multi = datastore.knows_these([ref])
317 self.assertTrue(multi[ref])
318 multi = datastore.mexists([ref])
319 self.assertTrue(multi[ref])
321 # Get
322 metricsOut = datastore.get(ref)
323 self.assertEqual(metrics, metricsOut)
325 # Get the URI(s)
326 primaryURI, componentURIs = datastore.getURIs(ref)
327 if disassembled:
328 self.assertIsNone(primaryURI)
329 self.assertEqual(len(componentURIs), 3)
330 else:
331 self.assertIn(datasetTypeName, primaryURI.path)
332 self.assertFalse(componentURIs)
334 # Delete registry entry so now we are trusting
335 datastore.removeStoredItemInfo(ref)
337 # Now stop trusting and check that things break
338 datastore.trustGetRequest = False
340 # Does it exist?
341 self.assertFalse(datastore.exists(ref))
342 self.assertFalse(datastore.knows(ref))
343 multi = datastore.knows_these([ref])
344 self.assertFalse(multi[ref])
345 multi = datastore.mexists([ref])
346 self.assertFalse(multi[ref])
348 with self.assertRaises(FileNotFoundError):
349 datastore.get(ref)
351 if sc_name != "StructuredDataNoComponents":
352 with self.assertRaises(FileNotFoundError):
353 datastore.get(ref.makeComponentRef("data"))
355 # URI should fail unless we ask for prediction
356 with self.assertRaises(FileNotFoundError):
357 datastore.getURIs(ref)
359 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
360 if disassembled:
361 self.assertIsNone(predicted_primary)
362 self.assertEqual(len(predicted_disassembled), 3)
363 for uri in predicted_disassembled.values():
364 self.assertEqual(uri.fragment, "predicted")
365 self.assertIn(datasetTypeName, uri.path)
366 else:
367 self.assertIn(datasetTypeName, predicted_primary.path)
368 self.assertFalse(predicted_disassembled)
369 self.assertEqual(predicted_primary.fragment, "predicted")
371 # Now enable registry-free trusting mode
372 datastore.trustGetRequest = True
374 # Try again to get it
375 metricsOut = datastore.get(ref)
376 self.assertEqual(metricsOut, metrics)
378 # Does it exist?
379 self.assertTrue(datastore.exists(ref))
381 # Get a component
382 if sc_name != "StructuredDataNoComponents":
383 comp = "data"
384 compRef = ref.makeComponentRef(comp)
385 output = datastore.get(compRef)
386 self.assertEqual(output, getattr(metrics, comp))
388 # Get the URI -- if we trust this should work even without
389 # enabling prediction.
390 primaryURI2, componentURIs2 = datastore.getURIs(ref)
391 self.assertEqual(primaryURI2, primaryURI)
392 self.assertEqual(componentURIs2, componentURIs)
394 # Check for compatible storage class.
395 if sc_name in ("StructuredDataNoComponents", "StructuredData"):
396 # Make new dataset ref with compatible storage class.
397 ref_comp = ref.overrideStorageClass("StructuredDataDictJson")
399 # Without `set_retrieve_dataset_type_method` it will fail to
400 # find correct file.
401 self.assertFalse(datastore.exists(ref_comp))
402 with self.assertRaises(FileNotFoundError):
403 datastore.get(ref_comp)
404 with self.assertRaises(FileNotFoundError):
405 datastore.get(ref, storageClass="StructuredDataDictJson")
407 # Need a special method to generate stored dataset type.
408 def _stored_dataset_type(name: str, ref: DatasetRef = ref) -> DatasetType:
409 if name == ref.datasetType.name:
410 return ref.datasetType
411 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}")
413 datastore.set_retrieve_dataset_type_method(_stored_dataset_type)
415 # Storage class override with original dataset ref.
416 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson")
417 self.assertIsInstance(metrics_as_dict, dict)
419 # get() should return a dict now.
420 metrics_as_dict = datastore.get(ref_comp)
421 self.assertIsInstance(metrics_as_dict, dict)
423 # exists() should work as well.
424 self.assertTrue(datastore.exists(ref_comp))
426 datastore.set_retrieve_dataset_type_method(None)
428 def testDisassembly(self) -> None:
429 """Test disassembly within datastore."""
430 metrics = makeExampleMetrics()
431 if self.isEphemeral:
432 # in-memory datastore does not disassemble
433 return
435 # Create multiple storage classes for testing different formulations
436 # of composites. One of these will not disassemble to provide
437 # a reference.
438 storageClasses = [
439 self.storageClassFactory.getStorageClass(sc)
440 for sc in (
441 "StructuredComposite",
442 "StructuredCompositeTestA",
443 "StructuredCompositeTestB",
444 "StructuredCompositeReadComp",
445 "StructuredData", # No disassembly
446 "StructuredCompositeReadCompNoDisassembly",
447 )
448 ]
450 # Create the test datastore
451 datastore = self.makeDatastore()
453 # Dummy dataId
454 dimensions = self.universe.extract(("visit", "physical_filter"))
455 dataId = dict({"instrument": "dummy", "visit": 428, "physical_filter": "R"})
457 for i, sc in enumerate(storageClasses):
458 with self.subTest(storageClass=sc.name):
459 # Create a different dataset type each time round
460 # so that a test failure in this subtest does not trigger
461 # a cascade of tests because of file clashes
462 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId)
464 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
466 datastore.put(metrics, ref)
468 baseURI, compURIs = datastore.getURIs(ref)
469 if disassembled:
470 self.assertIsNone(baseURI)
471 self.assertEqual(set(compURIs), {"data", "output", "summary"})
472 else:
473 self.assertIsNotNone(baseURI)
474 self.assertEqual(compURIs, {})
476 metrics_get = datastore.get(ref)
477 self.assertEqual(metrics_get, metrics)
479 # Retrieve the composite with read parameter
480 stop = 4
481 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
482 self.assertEqual(metrics_get.summary, metrics.summary)
483 self.assertEqual(metrics_get.output, metrics.output)
484 self.assertEqual(metrics_get.data, metrics.data[:stop])
486 # Retrieve a component
487 data = datastore.get(ref.makeComponentRef("data"))
488 self.assertEqual(data, metrics.data)
490 # On supported storage classes attempt to access a read
491 # only component
492 if "ReadComp" in sc.name:
493 cRef = ref.makeComponentRef("counter")
494 counter = datastore.get(cRef)
495 self.assertEqual(counter, len(metrics.data))
497 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
498 self.assertEqual(counter, stop)
500 datastore.remove(ref)
502 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]:
503 metrics = makeExampleMetrics()
504 datastore = self.makeDatastore()
505 # Put
506 dimensions = self.universe.extract(("visit", "physical_filter"))
507 sc = self.storageClassFactory.getStorageClass("StructuredData")
508 refs = []
509 for i in range(n_refs):
510 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"}
511 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
512 datastore.put(metrics, ref)
514 # Does it exist?
515 self.assertTrue(datastore.exists(ref))
517 # Get
518 metricsOut = datastore.get(ref)
519 self.assertEqual(metrics, metricsOut)
520 refs.append(ref)
522 return datastore, *refs
524 def testRemove(self) -> None:
525 datastore, ref = self.prepDeleteTest()
527 # Remove
528 datastore.remove(ref)
530 # Does it exist?
531 self.assertFalse(datastore.exists(ref))
533 # Do we now get a predicted URI?
534 uri = datastore.getURI(ref, predict=True)
535 self.assertEqual(uri.fragment, "predicted")
537 # Get should now fail
538 with self.assertRaises(FileNotFoundError):
539 datastore.get(ref)
540 # Can only delete once
541 with self.assertRaises(FileNotFoundError):
542 datastore.remove(ref)
544 def testForget(self) -> None:
545 datastore, ref = self.prepDeleteTest()
547 # Remove
548 datastore.forget([ref])
550 # Does it exist (as far as we know)?
551 self.assertFalse(datastore.exists(ref))
553 # Do we now get a predicted URI?
554 uri = datastore.getURI(ref, predict=True)
555 self.assertEqual(uri.fragment, "predicted")
557 # Get should now fail
558 with self.assertRaises(FileNotFoundError):
559 datastore.get(ref)
561 # Forgetting again is a silent no-op
562 datastore.forget([ref])
564 # Predicted URI should still point to the file.
565 self.assertTrue(uri.exists())
567 def testTransfer(self) -> None:
568 metrics = makeExampleMetrics()
570 dimensions = self.universe.extract(("visit", "physical_filter"))
571 dataId = dict({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"})
573 sc = self.storageClassFactory.getStorageClass("StructuredData")
574 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
576 inputDatastore = self.makeDatastore("test_input_datastore")
577 outputDatastore = self.makeDatastore("test_output_datastore")
579 inputDatastore.put(metrics, ref)
580 outputDatastore.transfer(inputDatastore, ref)
582 metricsOut = outputDatastore.get(ref)
583 self.assertEqual(metrics, metricsOut)
585 def testBasicTransaction(self) -> None:
586 datastore = self.makeDatastore()
587 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
588 dimensions = self.universe.extract(("visit", "physical_filter"))
589 nDatasets = 6
590 dataIds = [
591 dict({"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"})
592 for i in range(nDatasets)
593 ]
594 data = [
595 (
596 self.makeDatasetRef("metric", dimensions, storageClass, dataId),
597 makeExampleMetrics(),
598 )
599 for dataId in dataIds
600 ]
601 succeed = data[: nDatasets // 2]
602 fail = data[nDatasets // 2 :]
603 # All datasets added in this transaction should continue to exist
604 with datastore.transaction():
605 for ref, metrics in succeed:
606 datastore.put(metrics, ref)
607 # Whereas datasets added in this transaction should not
608 with self.assertRaises(TransactionTestError):
609 with datastore.transaction():
610 for ref, metrics in fail:
611 datastore.put(metrics, ref)
612 raise TransactionTestError("This should propagate out of the context manager")
613 # Check for datasets that should exist
614 for ref, metrics in succeed:
615 # Does it exist?
616 self.assertTrue(datastore.exists(ref))
617 # Get
618 metricsOut = datastore.get(ref, parameters=None)
619 self.assertEqual(metrics, metricsOut)
620 # URI
621 uri = datastore.getURI(ref)
622 self.assertEqual(uri.scheme, self.uriScheme)
623 # Check for datasets that should not exist
624 for ref, _ in fail:
625 # These should raise
626 with self.assertRaises(FileNotFoundError):
627 # non-existing file
628 datastore.get(ref)
629 with self.assertRaises(FileNotFoundError):
630 datastore.getURI(ref)
632 def testNestedTransaction(self) -> None:
633 datastore = self.makeDatastore()
634 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
635 dimensions = self.universe.extract(("visit", "physical_filter"))
636 metrics = makeExampleMetrics()
638 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
639 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
640 datastore.put(metrics, refBefore)
641 with self.assertRaises(TransactionTestError):
642 with datastore.transaction():
643 dataId = dict({"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"})
644 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
645 datastore.put(metrics, refOuter)
646 with datastore.transaction():
647 dataId = dict({"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"})
648 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
649 datastore.put(metrics, refInner)
650 # All datasets should exist
651 for ref in (refBefore, refOuter, refInner):
652 metricsOut = datastore.get(ref, parameters=None)
653 self.assertEqual(metrics, metricsOut)
654 raise TransactionTestError("This should roll back the transaction")
655 # Dataset(s) inserted before the transaction should still exist
656 metricsOut = datastore.get(refBefore, parameters=None)
657 self.assertEqual(metrics, metricsOut)
658 # But all datasets inserted during the (rolled back) transaction
659 # should be gone
660 with self.assertRaises(FileNotFoundError):
661 datastore.get(refOuter)
662 with self.assertRaises(FileNotFoundError):
663 datastore.get(refInner)
665 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]:
666 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
667 dimensions = self.universe.extract(("visit", "physical_filter"))
668 metrics = makeExampleMetrics()
669 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
670 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
671 return metrics, ref
673 def runIngestTest(
674 self, func: Callable[[MetricsExample, str, DatasetRef], None], expectOutput: bool = True
675 ) -> None:
676 metrics, ref = self._prepareIngestTest()
677 # The file will be deleted after the test.
678 # For symlink tests this leads to a situation where the datastore
679 # points to a file that does not exist. This will make os.path.exist
680 # return False but then the new symlink will fail with
681 # FileExistsError later in the code so the test still passes.
682 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
683 with open(path, "w") as fd:
684 yaml.dump(metrics._asdict(), stream=fd)
685 func(metrics, path, ref)
687 def testIngestNoTransfer(self) -> None:
688 """Test ingesting existing files with no transfer."""
689 for mode in (None, "auto"):
690 # Some datastores have auto but can't do in place transfer
691 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
692 continue
694 with self.subTest(mode=mode):
695 datastore = self.makeDatastore()
697 def succeed(
698 obj: MetricsExample,
699 path: str,
700 ref: DatasetRef,
701 mode: str | None = mode,
702 datastore: Datastore = datastore,
703 ) -> None:
704 """Ingest a file already in the datastore root."""
705 # first move it into the root, and adjust the path
706 # accordingly
707 path = shutil.copy(path, datastore.root.ospath)
708 path = os.path.relpath(path, start=datastore.root.ospath)
709 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
710 self.assertEqual(obj, datastore.get(ref))
712 def failInputDoesNotExist(
713 obj: MetricsExample,
714 path: str,
715 ref: DatasetRef,
716 mode: str | None = mode,
717 datastore: Datastore = datastore,
718 ) -> None:
719 """Can't ingest files if we're given a bad path."""
720 with self.assertRaises(FileNotFoundError):
721 datastore.ingest(
722 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
723 )
724 self.assertFalse(datastore.exists(ref))
726 def failOutsideRoot(
727 obj: MetricsExample,
728 path: str,
729 ref: DatasetRef,
730 mode: str | None = mode,
731 datastore: Datastore = datastore,
732 ) -> None:
733 """Can't ingest files outside of datastore root unless
734 auto.
735 """
736 if mode == "auto":
737 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
738 self.assertTrue(datastore.exists(ref))
739 else:
740 with self.assertRaises(RuntimeError):
741 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
742 self.assertFalse(datastore.exists(ref))
744 def failNotImplemented(
745 obj: MetricsExample,
746 path: str,
747 ref: DatasetRef,
748 mode: str | None = mode,
749 datastore: Datastore = datastore,
750 ) -> None:
751 with self.assertRaises(NotImplementedError):
752 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
754 if mode in self.ingestTransferModes:
755 self.runIngestTest(failOutsideRoot)
756 self.runIngestTest(failInputDoesNotExist)
757 self.runIngestTest(succeed)
758 else:
759 self.runIngestTest(failNotImplemented)
761 def testIngestTransfer(self) -> None:
762 """Test ingesting existing files after transferring them."""
763 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
764 with self.subTest(mode=mode):
765 datastore = self.makeDatastore(mode)
767 def succeed(
768 obj: MetricsExample,
769 path: str,
770 ref: DatasetRef,
771 mode: str | None = mode,
772 datastore: Datastore = datastore,
773 ) -> None:
774 """Ingest a file by transferring it to the template
775 location.
776 """
777 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
778 self.assertEqual(obj, datastore.get(ref))
780 def failInputDoesNotExist(
781 obj: MetricsExample,
782 path: str,
783 ref: DatasetRef,
784 mode: str | None = mode,
785 datastore: Datastore = datastore,
786 ) -> None:
787 """Can't ingest files if we're given a bad path."""
788 with self.assertRaises(FileNotFoundError):
789 # Ensure the file does not look like it is in
790 # datastore for auto mode
791 datastore.ingest(
792 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
793 )
794 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
796 def failNotImplemented(
797 obj: MetricsExample,
798 path: str,
799 ref: DatasetRef,
800 mode: str | None = mode,
801 datastore: Datastore = datastore,
802 ) -> None:
803 with self.assertRaises(NotImplementedError):
804 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
806 if mode in self.ingestTransferModes:
807 self.runIngestTest(failInputDoesNotExist)
808 self.runIngestTest(succeed, expectOutput=(mode != "move"))
809 else:
810 self.runIngestTest(failNotImplemented)
812 def testIngestSymlinkOfSymlink(self) -> None:
813 """Special test for symlink to a symlink ingest"""
814 metrics, ref = self._prepareIngestTest()
815 # The aim of this test is to create a dataset on disk, then
816 # create a symlink to it and finally ingest the symlink such that
817 # the symlink in the datastore points to the original dataset.
818 for mode in ("symlink", "relsymlink"):
819 if mode not in self.ingestTransferModes:
820 continue
822 print(f"Trying mode {mode}")
823 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
824 with open(realpath, "w") as fd:
825 yaml.dump(metrics._asdict(), stream=fd)
826 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
827 os.symlink(os.path.abspath(realpath), sympath)
829 datastore = self.makeDatastore()
830 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
832 uri = datastore.getURI(ref)
833 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
834 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
836 linkTarget = os.readlink(uri.ospath)
837 if mode == "relsymlink":
838 self.assertFalse(os.path.isabs(linkTarget))
839 else:
840 self.assertEqual(linkTarget, os.path.abspath(realpath))
842 # Check that we can get the dataset back regardless of mode
843 metric2 = datastore.get(ref)
844 self.assertEqual(metric2, metrics)
846 # Cleanup the file for next time round loop
847 # since it will get the same file name in store
848 datastore.remove(ref)
850 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
851 datastore = self.makeDatastore(name)
853 # For now only the FileDatastore can be used for this test.
854 # ChainedDatastore that only includes InMemoryDatastores have to be
855 # skipped as well.
856 for name in datastore.names:
857 if not name.startswith("InMemoryDatastore"):
858 break
859 else:
860 raise unittest.SkipTest("in-memory datastore does not support record export/import")
862 metrics = makeExampleMetrics()
863 dimensions = self.universe.extract(("visit", "physical_filter"))
864 sc = self.storageClassFactory.getStorageClass("StructuredData")
866 refs = []
867 for visit in (2048, 2049, 2050):
868 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"}
869 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
870 datastore.put(metrics, ref)
871 refs.append(ref)
872 return datastore, refs
874 def testExportImportRecords(self) -> None:
875 """Test for export_records and import_records methods."""
876 datastore, refs = self._populate_export_datastore("test_datastore")
877 for exported_refs in (refs, refs[1:]):
878 n_refs = len(exported_refs)
879 records = datastore.export_records(exported_refs)
880 self.assertGreater(len(records), 0)
881 self.assertTrue(set(records.keys()) <= set(datastore.names))
882 # In a ChainedDatastore each FileDatastore will have a complete set
883 for datastore_name in records:
884 record_data = records[datastore_name]
885 self.assertEqual(len(record_data.records), n_refs)
887 # Check that subsetting works, include non-existing dataset ID.
888 dataset_ids = {exported_refs[0].id, uuid.uuid4()}
889 subset = record_data.subset(dataset_ids)
890 assert subset is not None
891 self.assertEqual(len(subset.records), 1)
892 subset = record_data.subset({uuid.uuid4()})
893 self.assertIsNone(subset)
895 # Use the same datastore name to import relative path.
896 datastore2 = self.makeDatastore("test_datastore")
898 records = datastore.export_records(refs[1:])
899 datastore2.import_records(records)
901 with self.assertRaises(FileNotFoundError):
902 data = datastore2.get(refs[0])
903 data = datastore2.get(refs[1])
904 self.assertIsNotNone(data)
905 data = datastore2.get(refs[2])
906 self.assertIsNotNone(data)
908 def testExport(self) -> None:
909 datastore, refs = self._populate_export_datastore("test_datastore")
911 datasets = list(datastore.export(refs))
912 self.assertEqual(len(datasets), 3)
914 for transfer in (None, "auto"):
915 # Both will default to None
916 datasets = list(datastore.export(refs, transfer=transfer))
917 self.assertEqual(len(datasets), 3)
919 with self.assertRaises(TypeError):
920 list(datastore.export(refs, transfer="copy"))
922 with self.assertRaises(TypeError):
923 list(datastore.export(refs, directory="exportDir", transfer="move"))
925 # Create a new ref that is not known to the datastore and try to
926 # export it.
927 sc = self.storageClassFactory.getStorageClass("ThingOne")
928 dimensions = self.universe.extract(("visit", "physical_filter"))
929 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
930 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
931 with self.assertRaises(FileNotFoundError):
932 list(datastore.export(refs + [ref], transfer=None))
934 def test_pydantic_dict_storage_class_conversions(self) -> None:
935 """Test converting a dataset stored as a pydantic model into a dict on
936 read.
937 """
938 datastore = self.makeDatastore()
939 store_as_model = self.makeDatasetRef(
940 "store_as_model",
941 dimensions=self.universe.empty,
942 storageClass="DictConvertibleModel",
943 dataId=DataCoordinate.makeEmpty(self.universe),
944 )
945 content = {"a": "one", "b": "two"}
946 model = DictConvertibleModel.from_dict(content, extra="original content")
947 datastore.put(model, store_as_model)
948 retrieved_model = datastore.get(store_as_model)
949 self.assertEqual(retrieved_model, model)
950 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel"))
951 self.assertEqual(type(loaded), dict)
952 self.assertEqual(loaded, content)
954 def test_simple_class_put_get(self) -> None:
955 """Test that we can put and get a simple class with dict()
956 constructor.
957 """
958 datastore = self.makeDatastore()
959 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
960 self._assert_different_puts(datastore, "MetricsExample", data)
962 def test_dataclass_put_get(self) -> None:
963 """Test that we can put and get a simple dataclass."""
964 datastore = self.makeDatastore()
965 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
966 self._assert_different_puts(datastore, "MetricsExampleDataclass", data)
968 def test_pydantic_put_get(self) -> None:
969 """Test that we can put and get a simple Pydantic model."""
970 datastore = self.makeDatastore()
971 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
972 self._assert_different_puts(datastore, "MetricsExampleModel", data)
974 def test_tuple_put_get(self) -> None:
975 """Test that we can put and get a tuple."""
976 datastore = self.makeDatastore()
977 data = tuple(["a", "b", 1])
978 self._assert_different_puts(datastore, "TupleExample", data)
980 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None:
981 refs = {
982 x: self.makeDatasetRef(
983 f"stora_as_{x}",
984 dimensions=self.universe.empty,
985 storageClass=f"{storageClass_root}{x}",
986 dataId=DataCoordinate.makeEmpty(self.universe),
987 )
988 for x in ["A", "B"]
989 }
991 for ref in refs.values():
992 datastore.put(data, ref)
994 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"]))
997class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
998 """PosixDatastore specialization"""
1000 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1001 uriScheme = "file"
1002 canIngestNoTransferAuto = True
1003 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
1004 isEphemeral = False
1005 rootKeys = ("root",)
1006 validationCanFail = True
1008 def setUp(self) -> None:
1009 # Override the working directory before calling the base class
1010 self.root = tempfile.mkdtemp(dir=TESTDIR)
1011 super().setUp()
1013 def testAtomicWrite(self) -> None:
1014 """Test that we write to a temporary and then rename"""
1015 datastore = self.makeDatastore()
1016 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1017 dimensions = self.universe.extract(("visit", "physical_filter"))
1018 metrics = makeExampleMetrics()
1020 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
1021 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1023 with self.assertLogs("lsst.resources", "DEBUG") as cm:
1024 datastore.put(metrics, ref)
1025 move_logs = [ll for ll in cm.output if "transfer=" in ll]
1026 self.assertIn("transfer=move", move_logs[0])
1028 # And the transfer should be file to file.
1029 self.assertEqual(move_logs[0].count("file://"), 2)
1031 def testCanNotDeterminePutFormatterLocation(self) -> None:
1032 """Verify that the expected exception is raised if the FileDatastore
1033 can not determine the put formatter location.
1034 """
1035 _ = makeExampleMetrics()
1036 datastore = self.makeDatastore()
1038 # Create multiple storage classes for testing different formulations
1039 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1041 sccomp = StorageClass("Dummy")
1042 compositeStorageClass = StorageClass(
1043 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
1044 )
1046 dimensions = self.universe.extract(("visit", "physical_filter"))
1047 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
1049 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1050 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId)
1052 def raiser(ref: DatasetRef) -> None:
1053 raise DatasetTypeNotSupportedError()
1055 with unittest.mock.patch.object(
1056 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
1057 "_determine_put_formatter_location",
1058 side_effect=raiser,
1059 ):
1060 # verify the non-composite ref execution path:
1061 with self.assertRaises(DatasetTypeNotSupportedError):
1062 datastore.getURIs(ref, predict=True)
1064 # verify the composite-ref execution path:
1065 with self.assertRaises(DatasetTypeNotSupportedError):
1066 datastore.getURIs(compRef, predict=True)
1068 def test_roots(self):
1069 datastore = self.makeDatastore()
1071 self.assertEqual(set(datastore.names), set(datastore.roots.keys()))
1072 for root in datastore.roots.values():
1073 if root is not None:
1074 self.assertTrue(root.exists())
1077class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
1078 """Posix datastore tests but with checksums disabled."""
1080 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
1082 def testChecksum(self) -> None:
1083 """Ensure that checksums have not been calculated."""
1084 datastore = self.makeDatastore()
1085 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1086 dimensions = self.universe.extract(("visit", "physical_filter"))
1087 metrics = makeExampleMetrics()
1089 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
1090 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1092 # Configuration should have disabled checksum calculation
1093 datastore.put(metrics, ref)
1094 infos = datastore.getStoredItemsInfo(ref)
1095 self.assertIsNone(infos[0].checksum)
1097 # Remove put back but with checksums enabled explicitly
1098 datastore.remove(ref)
1099 datastore.useChecksum = True
1100 datastore.put(metrics, ref)
1102 infos = datastore.getStoredItemsInfo(ref)
1103 self.assertIsNotNone(infos[0].checksum)
1106class TrashDatastoreTestCase(PosixDatastoreTestCase):
1107 """Restrict trash test to FileDatastore."""
1109 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1111 def testTrash(self) -> None:
1112 datastore, *refs = self.prepDeleteTest(n_refs=10)
1114 # Trash one of them.
1115 ref = refs.pop()
1116 uri = datastore.getURI(ref)
1117 datastore.trash(ref)
1118 self.assertTrue(uri.exists(), uri) # Not deleted yet
1119 datastore.emptyTrash()
1120 self.assertFalse(uri.exists(), uri)
1122 # Trash it again should be fine.
1123 datastore.trash(ref)
1125 # Trash multiple items at once.
1126 subset = [refs.pop(), refs.pop()]
1127 datastore.trash(subset)
1128 datastore.emptyTrash()
1130 # Remove a record and trash should do nothing.
1131 # This is execution butler scenario.
1132 ref = refs.pop()
1133 uri = datastore.getURI(ref)
1134 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1135 self.assertTrue(uri.exists())
1136 datastore.trash(ref)
1137 datastore.emptyTrash()
1138 self.assertTrue(uri.exists())
1140 # Switch on trust and it should delete the file.
1141 datastore.trustGetRequest = True
1142 datastore.trash([ref])
1143 self.assertFalse(uri.exists())
1145 # Remove multiples at once in trust mode.
1146 subset = [refs.pop() for i in range(3)]
1147 datastore.trash(subset)
1148 datastore.trash(refs.pop()) # Check that a single ref can trash
1151class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1152 """Test datastore cleans up on failure."""
1154 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1156 def setUp(self) -> None:
1157 # Override the working directory before calling the base class
1158 self.root = tempfile.mkdtemp(dir=TESTDIR)
1159 super().setUp()
1161 def testCleanup(self) -> None:
1162 """Test that a failed formatter write does cleanup a partial file."""
1163 metrics = makeExampleMetrics()
1164 datastore = self.makeDatastore()
1166 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1168 dimensions = self.universe.extract(("visit", "physical_filter"))
1169 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
1171 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1173 # Determine where the file will end up (we assume Formatters use
1174 # the same file extension)
1175 expectedUri = datastore.getURI(ref, predict=True)
1176 self.assertEqual(expectedUri.fragment, "predicted")
1178 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1180 # Try formatter that fails and formatter that fails and leaves
1181 # a file behind
1182 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1183 with self.subTest(formatter=formatter):
1184 # Monkey patch the formatter
1185 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1187 # Try to put the dataset, it should fail
1188 with self.assertRaises(RuntimeError):
1189 datastore.put(metrics, ref)
1191 # Check that there is no file on disk
1192 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1194 # Check that there is a directory
1195 dir = expectedUri.dirname()
1196 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1198 # Force YamlFormatter and check that this time a file is written
1199 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1200 datastore.put(metrics, ref)
1201 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1202 datastore.remove(ref)
1203 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1206class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1207 """PosixDatastore specialization"""
1209 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1210 uriScheme = "mem"
1211 hasUnsupportedPut = False
1212 ingestTransferModes = ()
1213 isEphemeral = True
1214 rootKeys = None
1215 validationCanFail = False
1218class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1219 """ChainedDatastore specialization using a POSIXDatastore"""
1221 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1222 hasUnsupportedPut = False
1223 canIngestNoTransferAuto = False
1224 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1225 isEphemeral = False
1226 rootKeys = (".datastores.1.root", ".datastores.2.root")
1227 validationCanFail = True
1230class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1231 """ChainedDatastore specialization using all InMemoryDatastore"""
1233 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1234 validationCanFail = False
1237class DatastoreConstraintsTests(DatastoreTestsBase):
1238 """Basic tests of constraints model of Datastores."""
1240 def testConstraints(self) -> None:
1241 """Test constraints model. Assumes that each test class has the
1242 same constraints.
1243 """
1244 metrics = makeExampleMetrics()
1245 datastore = self.makeDatastore()
1247 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1248 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1249 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1250 dataId = dict({"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"})
1252 # Write empty file suitable for ingest check (JSON and YAML variants)
1253 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1254 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1255 for datasetTypeName, sc, accepted in (
1256 ("metric", sc1, True),
1257 ("metric5", sc1, False),
1258 ("metric33", sc1, True),
1259 ("metric5", sc2, True),
1260 ):
1261 # Choose different temp file depending on StorageClass
1262 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1264 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1265 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
1266 if accepted:
1267 datastore.put(metrics, ref)
1268 self.assertTrue(datastore.exists(ref))
1269 datastore.remove(ref)
1271 # Try ingest
1272 if self.canIngest:
1273 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1274 self.assertTrue(datastore.exists(ref))
1275 datastore.remove(ref)
1276 else:
1277 with self.assertRaises(DatasetTypeNotSupportedError):
1278 datastore.put(metrics, ref)
1279 self.assertFalse(datastore.exists(ref))
1281 # Again with ingest
1282 if self.canIngest:
1283 with self.assertRaises(DatasetTypeNotSupportedError):
1284 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1285 self.assertFalse(datastore.exists(ref))
1288class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1289 """PosixDatastore specialization"""
1291 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1292 canIngest = True
1294 def setUp(self) -> None:
1295 # Override the working directory before calling the base class
1296 self.root = tempfile.mkdtemp(dir=TESTDIR)
1297 super().setUp()
1300class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1301 """InMemoryDatastore specialization."""
1303 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1304 canIngest = False
1307class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1308 """ChainedDatastore specialization using a POSIXDatastore and constraints
1309 at the ChainedDatstore.
1310 """
1312 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1315class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1316 """ChainedDatastore specialization using a POSIXDatastore."""
1318 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1321class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1322 """ChainedDatastore specialization using all InMemoryDatastore."""
1324 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1325 canIngest = False
1328class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1329 """Test that a chained datastore can control constraints per-datastore
1330 even if child datastore would accept.
1331 """
1333 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1335 def setUp(self) -> None:
1336 # Override the working directory before calling the base class
1337 self.root = tempfile.mkdtemp(dir=TESTDIR)
1338 super().setUp()
1340 def testConstraints(self) -> None:
1341 """Test chained datastore constraints model."""
1342 metrics = makeExampleMetrics()
1343 datastore = self.makeDatastore()
1345 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1346 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1347 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1348 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"}
1349 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"}
1351 # Write empty file suitable for ingest check (JSON and YAML variants)
1352 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1353 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1355 for typeName, dataId, sc, accept, ingest in (
1356 ("metric", dataId1, sc1, (False, True, False), True),
1357 ("metric5", dataId1, sc1, (False, False, False), False),
1358 ("metric5", dataId2, sc1, (True, False, False), False),
1359 ("metric33", dataId2, sc2, (True, True, False), True),
1360 ("metric5", dataId1, sc2, (False, True, False), True),
1361 ):
1362 # Choose different temp file depending on StorageClass
1363 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1365 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1366 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId)
1367 if any(accept):
1368 datastore.put(metrics, ref)
1369 self.assertTrue(datastore.exists(ref))
1371 # Check each datastore inside the chained datastore
1372 for childDatastore, expected in zip(datastore.datastores, accept, strict=True):
1373 self.assertEqual(
1374 childDatastore.exists(ref),
1375 expected,
1376 f"Testing presence of {ref} in datastore {childDatastore.name}",
1377 )
1379 datastore.remove(ref)
1381 # Check that ingest works
1382 if ingest:
1383 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1384 self.assertTrue(datastore.exists(ref))
1386 # Check each datastore inside the chained datastore
1387 for childDatastore, expected in zip(datastore.datastores, accept, strict=True):
1388 # Ephemeral datastores means InMemory at the moment
1389 # and that does not accept ingest of files.
1390 if childDatastore.isEphemeral:
1391 expected = False
1392 self.assertEqual(
1393 childDatastore.exists(ref),
1394 expected,
1395 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1396 )
1398 datastore.remove(ref)
1399 else:
1400 with self.assertRaises(DatasetTypeNotSupportedError):
1401 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1403 else:
1404 with self.assertRaises(DatasetTypeNotSupportedError):
1405 datastore.put(metrics, ref)
1406 self.assertFalse(datastore.exists(ref))
1408 # Again with ingest
1409 with self.assertRaises(DatasetTypeNotSupportedError):
1410 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1411 self.assertFalse(datastore.exists(ref))
1414class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1415 """Tests for datastore caching infrastructure."""
1417 @classmethod
1418 def setUpClass(cls) -> None:
1419 cls.storageClassFactory = StorageClassFactory()
1420 cls.universe = DimensionUniverse()
1422 # Ensure that we load the test storage class definitions.
1423 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1424 cls.storageClassFactory.addFromConfig(scConfigFile)
1426 def setUp(self) -> None:
1427 self.id = 0
1429 # Create a root that we can use for caching tests.
1430 self.root = tempfile.mkdtemp(dir=TESTDIR)
1432 # Create some test dataset refs and associated test files
1433 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1434 dimensions = self.universe.extract(("visit", "physical_filter"))
1435 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
1437 # Create list of refs and list of temporary files
1438 n_datasets = 10
1439 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)]
1441 root_uri = ResourcePath(self.root, forceDirectory=True)
1442 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1444 # Create test files.
1445 for uri in self.files:
1446 uri.write(b"0123456789")
1448 # Create some composite refs with component files.
1449 sc = self.storageClassFactory.getStorageClass("StructuredData")
1450 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)]
1451 self.comp_files = []
1452 self.comp_refs = []
1453 for n, ref in enumerate(self.composite_refs):
1454 component_refs = []
1455 component_files = []
1456 for component in sc.components:
1457 component_ref = ref.makeComponentRef(component)
1458 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1459 component_refs.append(component_ref)
1460 component_files.append(file)
1461 file.write(b"9876543210")
1463 self.comp_files.append(component_files)
1464 self.comp_refs.append(component_refs)
1466 def tearDown(self) -> None:
1467 if self.root is not None and os.path.exists(self.root):
1468 shutil.rmtree(self.root, ignore_errors=True)
1470 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1471 config = Config.fromYaml(config_str)
1472 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1474 def testNoCacheDir(self) -> None:
1475 config_str = """
1476cached:
1477 root: null
1478 cacheable:
1479 metric0: true
1480 """
1481 cache_manager = self._make_cache_manager(config_str)
1483 # Look inside to check we don't have a cache directory
1484 self.assertIsNone(cache_manager._cache_directory)
1486 self.assertCache(cache_manager)
1488 # Test that the cache directory is marked temporary
1489 self.assertTrue(cache_manager.cache_directory.isTemporary)
1491 def testNoCacheDirReversed(self) -> None:
1492 """Use default caching status and metric1 to false"""
1493 config_str = """
1494cached:
1495 root: null
1496 default: true
1497 cacheable:
1498 metric1: false
1499 """
1500 cache_manager = self._make_cache_manager(config_str)
1502 self.assertCache(cache_manager)
1504 def testEnvvarCacheDir(self) -> None:
1505 config_str = f"""
1506cached:
1507 root: '{self.root}'
1508 cacheable:
1509 metric0: true
1510 """
1512 root = ResourcePath(self.root, forceDirectory=True)
1513 env_dir = root.join("somewhere", forceDirectory=True)
1514 elsewhere = root.join("elsewhere", forceDirectory=True)
1516 # Environment variable should override the config value.
1517 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1518 cache_manager = self._make_cache_manager(config_str)
1519 self.assertEqual(cache_manager.cache_directory, env_dir)
1521 # This environment variable should not override the config value.
1522 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1523 cache_manager = self._make_cache_manager(config_str)
1524 self.assertEqual(cache_manager.cache_directory, root)
1526 # No default setting.
1527 config_str = """
1528cached:
1529 root: null
1530 default: true
1531 cacheable:
1532 metric1: false
1533 """
1534 cache_manager = self._make_cache_manager(config_str)
1536 # This environment variable should override the config value.
1537 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1538 cache_manager = self._make_cache_manager(config_str)
1539 self.assertEqual(cache_manager.cache_directory, env_dir)
1541 # If both environment variables are set the main (not IF_UNSET)
1542 # variable should win.
1543 with unittest.mock.patch.dict(
1544 os.environ,
1545 {
1546 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1547 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1548 },
1549 ):
1550 cache_manager = self._make_cache_manager(config_str)
1551 self.assertEqual(cache_manager.cache_directory, env_dir)
1553 # Use the API to set the environment variable, making sure that the
1554 # variable is reset on exit.
1555 with unittest.mock.patch.dict(
1556 os.environ,
1557 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1558 ):
1559 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1560 self.assertTrue(defined)
1561 cache_manager = self._make_cache_manager(config_str)
1562 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1564 # Now create the cache manager ahead of time and set the fallback
1565 # later.
1566 cache_manager = self._make_cache_manager(config_str)
1567 self.assertIsNone(cache_manager._cache_directory)
1568 with unittest.mock.patch.dict(
1569 os.environ,
1570 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1571 ):
1572 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1573 self.assertTrue(defined)
1574 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1576 def testExplicitCacheDir(self) -> None:
1577 config_str = f"""
1578cached:
1579 root: '{self.root}'
1580 cacheable:
1581 metric0: true
1582 """
1583 cache_manager = self._make_cache_manager(config_str)
1585 # Look inside to check we do have a cache directory.
1586 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1588 self.assertCache(cache_manager)
1590 # Test that the cache directory is not marked temporary
1591 self.assertFalse(cache_manager.cache_directory.isTemporary)
1593 def assertCache(self, cache_manager: DatastoreCacheManager) -> None:
1594 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1595 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1597 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1598 self.assertIsInstance(uri, ResourcePath)
1599 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1601 # Check presence in cache using ref and then using file extension.
1602 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1603 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1604 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1605 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1607 # Cached file should no longer exist but uncached file should be
1608 # unaffected.
1609 self.assertFalse(self.files[0].exists())
1610 self.assertTrue(self.files[1].exists())
1612 # Should find this file and it should be within the cache directory.
1613 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1614 self.assertTrue(found.exists())
1615 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1617 # Should not be able to find these in cache
1618 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1619 self.assertIsNone(found)
1620 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1621 self.assertIsNone(found)
1623 def testNoCache(self) -> None:
1624 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1625 for uri, ref in zip(self.files, self.refs, strict=True):
1626 self.assertFalse(cache_manager.should_be_cached(ref))
1627 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1628 self.assertFalse(cache_manager.known_to_cache(ref))
1629 with cache_manager.find_in_cache(ref, ".txt") as found:
1630 self.assertIsNone(found, msg=f"{cache_manager}")
1632 def _expiration_config(self, mode: str, threshold: int) -> str:
1633 return f"""
1634cached:
1635 default: true
1636 expiry:
1637 mode: {mode}
1638 threshold: {threshold}
1639 cacheable:
1640 unused: true
1641 """
1643 def testCacheExpiryFiles(self) -> None:
1644 threshold = 2 # Keep at least 2 files.
1645 mode = "files"
1646 config_str = self._expiration_config(mode, threshold)
1648 cache_manager = self._make_cache_manager(config_str)
1650 # Check that an empty cache returns unknown for arbitrary ref
1651 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1653 # Should end with datasets: 2, 3, 4
1654 self.assertExpiration(cache_manager, 5, threshold + 1)
1655 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1657 # Check that we will not expire a file that is actively in use.
1658 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1659 self.assertIsNotNone(found)
1661 # Trigger cache expiration that should remove the file
1662 # we just retrieved. Should now have: 3, 4, 5
1663 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1664 self.assertIsNotNone(cached)
1666 # Cache should still report the standard file count.
1667 self.assertEqual(cache_manager.file_count, threshold + 1)
1669 # Add additional entry to cache.
1670 # Should now have 4, 5, 6
1671 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1672 self.assertIsNotNone(cached)
1674 # Is the file still there?
1675 self.assertTrue(found.exists())
1677 # Can we read it?
1678 data = found.read()
1679 self.assertGreater(len(data), 0)
1681 # Outside context the file should no longer exist.
1682 self.assertFalse(found.exists())
1684 # File count should not have changed.
1685 self.assertEqual(cache_manager.file_count, threshold + 1)
1687 # Dataset 2 was in the exempt directory but because hardlinks
1688 # are used it was deleted from the main cache during cache expiry
1689 # above and so should no longer be found.
1690 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1691 self.assertIsNone(found)
1693 # And the one stored after it is also gone.
1694 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1695 self.assertIsNone(found)
1697 # But dataset 4 is present.
1698 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1699 self.assertIsNotNone(found)
1701 # Adding a new dataset to the cache should now delete it.
1702 cache_manager.move_to_cache(self.files[7], self.refs[7])
1704 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1705 self.assertIsNone(found)
1707 def testCacheExpiryDatasets(self) -> None:
1708 threshold = 2 # Keep 2 datasets.
1709 mode = "datasets"
1710 config_str = self._expiration_config(mode, threshold)
1712 cache_manager = self._make_cache_manager(config_str)
1713 self.assertExpiration(cache_manager, 5, threshold + 1)
1714 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1716 def testCacheExpiryDatasetsComposite(self) -> None:
1717 threshold = 2 # Keep 2 datasets.
1718 mode = "datasets"
1719 config_str = self._expiration_config(mode, threshold)
1721 cache_manager = self._make_cache_manager(config_str)
1723 n_datasets = 3
1724 for i in range(n_datasets):
1725 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i], strict=True):
1726 cached = cache_manager.move_to_cache(component_file, component_ref)
1727 self.assertIsNotNone(cached)
1728 self.assertTrue(cache_manager.known_to_cache(component_ref))
1729 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1730 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1732 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1734 # Write two new non-composite and the number of files should drop.
1735 self.assertExpiration(cache_manager, 2, 5)
1737 def testCacheExpirySize(self) -> None:
1738 threshold = 55 # Each file is 10 bytes
1739 mode = "size"
1740 config_str = self._expiration_config(mode, threshold)
1742 cache_manager = self._make_cache_manager(config_str)
1743 self.assertExpiration(cache_manager, 10, 6)
1744 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1746 def assertExpiration(
1747 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int
1748 ) -> None:
1749 """Insert the datasets and then check the number retained."""
1750 for i in range(n_datasets):
1751 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1752 self.assertIsNotNone(cached)
1754 self.assertEqual(cache_manager.file_count, n_retained)
1756 # The oldest file should not be in the cache any more.
1757 for i in range(n_datasets):
1758 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1759 if i >= n_datasets - n_retained:
1760 self.assertIsInstance(found, ResourcePath)
1761 else:
1762 self.assertIsNone(found)
1764 def testCacheExpiryAge(self) -> None:
1765 threshold = 1 # Expire older than 2 seconds
1766 mode = "age"
1767 config_str = self._expiration_config(mode, threshold)
1769 cache_manager = self._make_cache_manager(config_str)
1770 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1772 # Insert 3 files, then sleep, then insert more.
1773 for i in range(2):
1774 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1775 self.assertIsNotNone(cached)
1776 time.sleep(2.0)
1777 for j in range(4):
1778 i = 2 + j # Continue the counting
1779 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1780 self.assertIsNotNone(cached)
1782 # Only the files written after the sleep should exist.
1783 self.assertEqual(cache_manager.file_count, 4)
1784 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1785 self.assertIsNone(found)
1786 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1787 self.assertIsInstance(found, ResourcePath)
1790class NullDatastoreTestCase(DatasetTestHelper, unittest.TestCase):
1791 """Test the null datastore."""
1793 storageClassFactory = StorageClassFactory()
1795 def test_basics(self) -> None:
1796 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1797 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {})
1799 null = NullDatastore(None, None)
1801 self.assertFalse(null.exists(ref))
1802 self.assertFalse(null.knows(ref))
1803 knows = null.knows_these([ref])
1804 self.assertFalse(knows[ref])
1805 null.validateConfiguration(ref)
1807 with self.assertRaises(FileNotFoundError):
1808 null.get(ref)
1809 with self.assertRaises(NotImplementedError):
1810 null.put("", ref)
1811 with self.assertRaises(FileNotFoundError):
1812 null.getURI(ref)
1813 with self.assertRaises(FileNotFoundError):
1814 null.getURIs(ref)
1815 with self.assertRaises(FileNotFoundError):
1816 null.getManyURIs([ref])
1817 with self.assertRaises(NotImplementedError):
1818 null.getLookupKeys()
1819 with self.assertRaises(NotImplementedError):
1820 null.import_records({})
1821 with self.assertRaises(NotImplementedError):
1822 null.export_records([])
1823 with self.assertRaises(NotImplementedError):
1824 null.export([ref])
1825 with self.assertRaises(NotImplementedError):
1826 null.transfer(null, ref)
1827 with self.assertRaises(NotImplementedError):
1828 null.emptyTrash()
1829 with self.assertRaises(NotImplementedError):
1830 null.trash(ref)
1831 with self.assertRaises(NotImplementedError):
1832 null.forget([ref])
1833 with self.assertRaises(NotImplementedError):
1834 null.remove(ref)
1835 with self.assertRaises(NotImplementedError):
1836 null.retrieveArtifacts([ref], ResourcePath("."))
1837 with self.assertRaises(NotImplementedError):
1838 null.transfer_from(null, [ref])
1839 with self.assertRaises(NotImplementedError):
1840 null.ingest()
1843class DatasetRefURIsTestCase(unittest.TestCase):
1844 """Tests for DatasetRefURIs."""
1846 def testSequenceAccess(self) -> None:
1847 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1848 uris = DatasetRefURIs()
1850 self.assertEqual(len(uris), 2)
1851 self.assertEqual(uris[0], None)
1852 self.assertEqual(uris[1], {})
1854 primaryURI = ResourcePath("1/2/3")
1855 componentURI = ResourcePath("a/b/c")
1857 # affirm that DatasetRefURIs does not support MutableSequence functions
1858 with self.assertRaises(TypeError):
1859 uris[0] = primaryURI
1860 with self.assertRaises(TypeError):
1861 uris[1] = {"foo": componentURI}
1863 # but DatasetRefURIs can be set by property name:
1864 uris.primaryURI = primaryURI
1865 uris.componentURIs = {"foo": componentURI}
1866 self.assertEqual(uris.primaryURI, primaryURI)
1867 self.assertEqual(uris[0], primaryURI)
1869 primary, components = uris
1870 self.assertEqual(primary, primaryURI)
1871 self.assertEqual(components, {"foo": componentURI})
1873 def testRepr(self) -> None:
1874 """Verify __repr__ output."""
1875 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")})
1876 self.assertEqual(
1877 repr(uris),
1878 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})',
1879 )
1882class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase):
1883 """Test the StoredFileInfo class."""
1885 storageClassFactory = StorageClassFactory()
1887 def test_StoredFileInfo(self) -> None:
1888 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1889 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {})
1891 record = dict(
1892 storage_class="StructuredDataDict",
1893 formatter="lsst.daf.butler.Formatter",
1894 path="a/b/c.txt",
1895 component="component",
1896 dataset_id=ref.id,
1897 checksum=None,
1898 file_size=5,
1899 )
1900 info = StoredFileInfo.from_record(record)
1902 self.assertEqual(info.dataset_id, ref.id)
1903 self.assertEqual(info.to_record(), record)
1905 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {})
1906 rebased = info.rebase(ref2)
1907 self.assertEqual(rebased.dataset_id, ref2.id)
1908 self.assertEqual(rebased.rebase(ref), info)
1910 with self.assertRaises(TypeError):
1911 rebased.update(formatter=42)
1913 with self.assertRaises(ValueError):
1914 rebased.update(something=42, new="42")
1916 # Check that pickle works on StoredFileInfo.
1917 pickled_info = pickle.dumps(info)
1918 unpickled_info = pickle.loads(pickled_info)
1919 self.assertEqual(unpickled_info, info)
1922if __name__ == "__main__":
1923 unittest.main()