Coverage for tests/test_datastore.py: 11%
1084 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 10:50 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 10:50 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30import contextlib
31import os
32import pickle
33import shutil
34import tempfile
35import time
36import unittest
37import unittest.mock
38import uuid
39from collections.abc import Callable
40from typing import Any, Iterator, cast
42import lsst.utils.tests
43import yaml
44from lsst.daf.butler import (
45 Config,
46 DataCoordinate,
47 DatasetIdGenEnum,
48 DatasetRef,
49 DatasetType,
50 DatasetTypeNotSupportedError,
51 Datastore,
52 DimensionUniverse,
53 FileDataset,
54 StorageClass,
55 StorageClassFactory,
56)
57from lsst.daf.butler.datastore import DatasetRefURIs, DatastoreConfig, DatastoreValidationError, NullDatastore
58from lsst.daf.butler.datastore.cache_manager import (
59 DatastoreCacheManager,
60 DatastoreCacheManagerConfig,
61 DatastoreDisabledCacheManager,
62)
63from lsst.daf.butler.datastore.stored_file_info import StoredFileInfo
64from lsst.daf.butler.formatters.yaml import YamlFormatter
65from lsst.daf.butler.tests import (
66 BadNoWriteFormatter,
67 BadWriteFormatter,
68 DatasetTestHelper,
69 DatastoreTestHelper,
70 DummyRegistry,
71 MetricsExample,
72 MetricsExampleDataclass,
73 MetricsExampleModel,
74)
75from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
76from lsst.daf.butler.tests.utils import TestCaseMixin
77from lsst.resources import ResourcePath
78from lsst.utils import doImport
80TESTDIR = os.path.dirname(__file__)
83def makeExampleMetrics(use_none: bool = False) -> MetricsExample:
84 """Make example dataset that can be stored in butler."""
85 if use_none:
86 array = None
87 else:
88 array = [563, 234, 456.7, 105, 2054, -1045]
89 return MetricsExample(
90 {"AM1": 5.2, "AM2": 30.6},
91 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
92 array,
93 )
96class TransactionTestError(Exception):
97 """Specific error for transactions, to prevent misdiagnosing
98 that might otherwise occur when a standard exception is used.
99 """
101 pass
104class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin):
105 """Support routines for datastore testing"""
107 root: str | None = None
108 universe: DimensionUniverse
109 storageClassFactory: StorageClassFactory
111 @classmethod
112 def setUpClass(cls) -> None:
113 # Storage Classes are fixed for all datastores in these tests
114 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
115 cls.storageClassFactory = StorageClassFactory()
116 cls.storageClassFactory.addFromConfig(scConfigFile)
118 # Read the Datastore config so we can get the class
119 # information (since we should not assume the constructor
120 # name here, but rely on the configuration file itself)
121 datastoreConfig = DatastoreConfig(cls.configFile)
122 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"]))
123 cls.universe = DimensionUniverse()
125 def setUp(self) -> None:
126 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
128 def tearDown(self) -> None:
129 if self.root is not None and os.path.exists(self.root):
130 shutil.rmtree(self.root, ignore_errors=True)
133class DatastoreTests(DatastoreTestsBase):
134 """Some basic tests of a simple datastore."""
136 hasUnsupportedPut = True
137 rootKeys: tuple[str, ...] | None = None
138 isEphemeral: bool = False
139 validationCanFail: bool = False
141 def testConfigRoot(self) -> None:
142 full = DatastoreConfig(self.configFile)
143 config = DatastoreConfig(self.configFile, mergeDefaults=False)
144 newroot = "/random/location"
145 self.datastoreType.setConfigRoot(newroot, config, full)
146 if self.rootKeys:
147 for k in self.rootKeys:
148 self.assertIn(newroot, config[k])
150 def testConstructor(self) -> None:
151 datastore = self.makeDatastore()
152 self.assertIsNotNone(datastore)
153 self.assertIs(datastore.isEphemeral, self.isEphemeral)
155 def testConfigurationValidation(self) -> None:
156 datastore = self.makeDatastore()
157 sc = self.storageClassFactory.getStorageClass("ThingOne")
158 datastore.validateConfiguration([sc])
160 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
161 if self.validationCanFail:
162 with self.assertRaises(DatastoreValidationError):
163 datastore.validateConfiguration([sc2], logFailures=True)
165 dimensions = self.universe.conform(("visit", "physical_filter"))
166 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
167 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
168 datastore.validateConfiguration([ref])
170 def testParameterValidation(self) -> None:
171 """Check that parameters are validated"""
172 sc = self.storageClassFactory.getStorageClass("ThingOne")
173 dimensions = self.universe.conform(("visit", "physical_filter"))
174 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
175 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
176 datastore = self.makeDatastore()
177 data = {1: 2, 3: 4}
178 datastore.put(data, ref)
179 newdata = datastore.get(ref)
180 self.assertEqual(data, newdata)
181 with self.assertRaises(KeyError):
182 newdata = datastore.get(ref, parameters={"missing": 5})
184 def testBasicPutGet(self) -> None:
185 metrics = makeExampleMetrics()
186 datastore = self.makeDatastore()
188 # Create multiple storage classes for testing different formulations
189 storageClasses = [
190 self.storageClassFactory.getStorageClass(sc)
191 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
192 ]
194 dimensions = self.universe.conform(("visit", "physical_filter"))
195 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
196 dataId2 = {"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"}
198 for sc in storageClasses:
199 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
200 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2)
202 # Make sure that using getManyURIs without predicting before the
203 # dataset has been put raises.
204 with self.assertRaises(FileNotFoundError):
205 datastore.getManyURIs([ref], predict=False)
207 # Make sure that using getManyURIs with predicting before the
208 # dataset has been put predicts the URI.
209 uris = datastore.getManyURIs([ref, ref2], predict=True)
210 self.assertIn("52", uris[ref].primaryURI.geturl())
211 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
212 self.assertIn("53", uris[ref2].primaryURI.geturl())
213 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
215 datastore.put(metrics, ref)
217 # Does it exist?
218 self.assertTrue(datastore.exists(ref))
219 self.assertTrue(datastore.knows(ref))
220 multi = datastore.knows_these([ref])
221 self.assertTrue(multi[ref])
222 multi = datastore.mexists([ref, ref2])
223 self.assertTrue(multi[ref])
224 self.assertFalse(multi[ref2])
226 # Get
227 metricsOut = datastore.get(ref, parameters=None)
228 self.assertEqual(metrics, metricsOut)
230 uri = datastore.getURI(ref)
231 self.assertEqual(uri.scheme, self.uriScheme)
233 uris = datastore.getManyURIs([ref])
234 self.assertEqual(len(uris), 1)
235 ref, uri = uris.popitem()
236 self.assertTrue(uri.primaryURI.exists())
237 self.assertFalse(uri.componentURIs)
239 # Get a component -- we need to construct new refs for them
240 # with derived storage classes but with parent ID
241 for comp in ("data", "output"):
242 compRef = ref.makeComponentRef(comp)
243 output = datastore.get(compRef)
244 self.assertEqual(output, getattr(metricsOut, comp))
246 uri = datastore.getURI(compRef)
247 self.assertEqual(uri.scheme, self.uriScheme)
249 uris = datastore.getManyURIs([compRef])
250 self.assertEqual(len(uris), 1)
252 storageClass = sc
254 # Check that we can put a metric with None in a component and
255 # get it back as None
256 metricsNone = makeExampleMetrics(use_none=True)
257 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"}
258 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone)
259 datastore.put(metricsNone, refNone)
261 comp = "data"
262 for comp in ("data", "output"):
263 compRef = refNone.makeComponentRef(comp)
264 output = datastore.get(compRef)
265 self.assertEqual(output, getattr(metricsNone, comp))
267 # Check that a put fails if the dataset type is not supported
268 if self.hasUnsupportedPut:
269 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
270 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
271 with self.assertRaises(DatasetTypeNotSupportedError):
272 datastore.put(metrics, ref)
274 # These should raise
275 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId)
276 with self.assertRaises(FileNotFoundError):
277 # non-existing file
278 datastore.get(ref)
280 # Get a URI from it
281 uri = datastore.getURI(ref, predict=True)
282 self.assertEqual(uri.scheme, self.uriScheme)
284 with self.assertRaises(FileNotFoundError):
285 datastore.getURI(ref)
287 def testTrustGetRequest(self) -> None:
288 """Check that we can get datasets that registry knows nothing about."""
289 datastore = self.makeDatastore()
291 # Skip test if the attribute is not defined
292 if not hasattr(datastore, "trustGetRequest"):
293 return
295 metrics = makeExampleMetrics()
297 i = 0
298 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"):
299 i += 1
300 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
302 if sc_name == "StructuredComposite":
303 disassembled = True
304 else:
305 disassembled = False
307 # Start datastore in default configuration of using registry
308 datastore.trustGetRequest = False
310 # Create multiple storage classes for testing with or without
311 # disassembly
312 sc = self.storageClassFactory.getStorageClass(sc_name)
313 dimensions = self.universe.conform(("visit", "physical_filter"))
315 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"}
317 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
318 datastore.put(metrics, ref)
320 # Does it exist?
321 self.assertTrue(datastore.exists(ref))
322 self.assertTrue(datastore.knows(ref))
323 multi = datastore.knows_these([ref])
324 self.assertTrue(multi[ref])
325 multi = datastore.mexists([ref])
326 self.assertTrue(multi[ref])
328 # Get
329 metricsOut = datastore.get(ref)
330 self.assertEqual(metrics, metricsOut)
332 # Get the URI(s)
333 primaryURI, componentURIs = datastore.getURIs(ref)
334 if disassembled:
335 self.assertIsNone(primaryURI)
336 self.assertEqual(len(componentURIs), 3)
337 else:
338 self.assertIn(datasetTypeName, primaryURI.path)
339 self.assertFalse(componentURIs)
341 # Delete registry entry so now we are trusting
342 datastore.removeStoredItemInfo(ref)
344 # Now stop trusting and check that things break
345 datastore.trustGetRequest = False
347 # Does it exist?
348 self.assertFalse(datastore.exists(ref))
349 self.assertFalse(datastore.knows(ref))
350 multi = datastore.knows_these([ref])
351 self.assertFalse(multi[ref])
352 multi = datastore.mexists([ref])
353 self.assertFalse(multi[ref])
355 with self.assertRaises(FileNotFoundError):
356 datastore.get(ref)
358 if sc_name != "StructuredDataNoComponents":
359 with self.assertRaises(FileNotFoundError):
360 datastore.get(ref.makeComponentRef("data"))
362 # URI should fail unless we ask for prediction
363 with self.assertRaises(FileNotFoundError):
364 datastore.getURIs(ref)
366 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
367 if disassembled:
368 self.assertIsNone(predicted_primary)
369 self.assertEqual(len(predicted_disassembled), 3)
370 for uri in predicted_disassembled.values():
371 self.assertEqual(uri.fragment, "predicted")
372 self.assertIn(datasetTypeName, uri.path)
373 else:
374 self.assertIn(datasetTypeName, predicted_primary.path)
375 self.assertFalse(predicted_disassembled)
376 self.assertEqual(predicted_primary.fragment, "predicted")
378 # Now enable registry-free trusting mode
379 datastore.trustGetRequest = True
381 # Try again to get it
382 metricsOut = datastore.get(ref)
383 self.assertEqual(metricsOut, metrics)
385 # Does it exist?
386 self.assertTrue(datastore.exists(ref))
388 # Get a component
389 if sc_name != "StructuredDataNoComponents":
390 comp = "data"
391 compRef = ref.makeComponentRef(comp)
392 output = datastore.get(compRef)
393 self.assertEqual(output, getattr(metrics, comp))
395 # Get the URI -- if we trust this should work even without
396 # enabling prediction.
397 primaryURI2, componentURIs2 = datastore.getURIs(ref)
398 self.assertEqual(primaryURI2, primaryURI)
399 self.assertEqual(componentURIs2, componentURIs)
401 # Check for compatible storage class.
402 if sc_name in ("StructuredDataNoComponents", "StructuredData"):
403 # Make new dataset ref with compatible storage class.
404 ref_comp = ref.overrideStorageClass("StructuredDataDictJson")
406 # Without `set_retrieve_dataset_type_method` it will fail to
407 # find correct file.
408 self.assertFalse(datastore.exists(ref_comp))
409 with self.assertRaises(FileNotFoundError):
410 datastore.get(ref_comp)
411 with self.assertRaises(FileNotFoundError):
412 datastore.get(ref, storageClass="StructuredDataDictJson")
414 # Need a special method to generate stored dataset type.
415 def _stored_dataset_type(name: str, ref: DatasetRef = ref) -> DatasetType:
416 if name == ref.datasetType.name:
417 return ref.datasetType
418 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}")
420 datastore.set_retrieve_dataset_type_method(_stored_dataset_type)
422 # Storage class override with original dataset ref.
423 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson")
424 self.assertIsInstance(metrics_as_dict, dict)
426 # get() should return a dict now.
427 metrics_as_dict = datastore.get(ref_comp)
428 self.assertIsInstance(metrics_as_dict, dict)
430 # exists() should work as well.
431 self.assertTrue(datastore.exists(ref_comp))
433 datastore.set_retrieve_dataset_type_method(None)
435 def testDisassembly(self) -> None:
436 """Test disassembly within datastore."""
437 metrics = makeExampleMetrics()
438 if self.isEphemeral:
439 # in-memory datastore does not disassemble
440 return
442 # Create multiple storage classes for testing different formulations
443 # of composites. One of these will not disassemble to provide
444 # a reference.
445 storageClasses = [
446 self.storageClassFactory.getStorageClass(sc)
447 for sc in (
448 "StructuredComposite",
449 "StructuredCompositeTestA",
450 "StructuredCompositeTestB",
451 "StructuredCompositeReadComp",
452 "StructuredData", # No disassembly
453 "StructuredCompositeReadCompNoDisassembly",
454 )
455 ]
457 # Create the test datastore
458 datastore = self.makeDatastore()
460 # Dummy dataId
461 dimensions = self.universe.conform(("visit", "physical_filter"))
462 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"}
464 for i, sc in enumerate(storageClasses):
465 with self.subTest(storageClass=sc.name):
466 # Create a different dataset type each time round
467 # so that a test failure in this subtest does not trigger
468 # a cascade of tests because of file clashes
469 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId)
471 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
473 datastore.put(metrics, ref)
475 baseURI, compURIs = datastore.getURIs(ref)
476 if disassembled:
477 self.assertIsNone(baseURI)
478 self.assertEqual(set(compURIs), {"data", "output", "summary"})
479 else:
480 self.assertIsNotNone(baseURI)
481 self.assertEqual(compURIs, {})
483 metrics_get = datastore.get(ref)
484 self.assertEqual(metrics_get, metrics)
486 # Retrieve the composite with read parameter
487 stop = 4
488 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
489 self.assertEqual(metrics_get.summary, metrics.summary)
490 self.assertEqual(metrics_get.output, metrics.output)
491 self.assertEqual(metrics_get.data, metrics.data[:stop])
493 # Retrieve a component
494 data = datastore.get(ref.makeComponentRef("data"))
495 self.assertEqual(data, metrics.data)
497 # On supported storage classes attempt to access a read
498 # only component
499 if "ReadComp" in sc.name:
500 cRef = ref.makeComponentRef("counter")
501 counter = datastore.get(cRef)
502 self.assertEqual(counter, len(metrics.data))
504 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
505 self.assertEqual(counter, stop)
507 datastore.remove(ref)
509 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]:
510 metrics = makeExampleMetrics()
511 datastore = self.makeDatastore()
512 # Put
513 dimensions = self.universe.conform(("visit", "physical_filter"))
514 sc = self.storageClassFactory.getStorageClass("StructuredData")
515 refs = []
516 for i in range(n_refs):
517 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"}
518 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
519 datastore.put(metrics, ref)
521 # Does it exist?
522 self.assertTrue(datastore.exists(ref))
524 # Get
525 metricsOut = datastore.get(ref)
526 self.assertEqual(metrics, metricsOut)
527 refs.append(ref)
529 return datastore, *refs
531 def testRemove(self) -> None:
532 datastore, ref = self.prepDeleteTest()
534 # Remove
535 datastore.remove(ref)
537 # Does it exist?
538 self.assertFalse(datastore.exists(ref))
540 # Do we now get a predicted URI?
541 uri = datastore.getURI(ref, predict=True)
542 self.assertEqual(uri.fragment, "predicted")
544 # Get should now fail
545 with self.assertRaises(FileNotFoundError):
546 datastore.get(ref)
547 # Can only delete once
548 with self.assertRaises(FileNotFoundError):
549 datastore.remove(ref)
551 def testForget(self) -> None:
552 datastore, ref = self.prepDeleteTest()
554 # Remove
555 datastore.forget([ref])
557 # Does it exist (as far as we know)?
558 self.assertFalse(datastore.exists(ref))
560 # Do we now get a predicted URI?
561 uri = datastore.getURI(ref, predict=True)
562 self.assertEqual(uri.fragment, "predicted")
564 # Get should now fail
565 with self.assertRaises(FileNotFoundError):
566 datastore.get(ref)
568 # Forgetting again is a silent no-op
569 datastore.forget([ref])
571 # Predicted URI should still point to the file.
572 self.assertTrue(uri.exists())
574 def testTransfer(self) -> None:
575 metrics = makeExampleMetrics()
577 dimensions = self.universe.conform(("visit", "physical_filter"))
578 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"}
580 sc = self.storageClassFactory.getStorageClass("StructuredData")
581 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
583 inputDatastore = self.makeDatastore("test_input_datastore")
584 outputDatastore = self.makeDatastore("test_output_datastore")
586 inputDatastore.put(metrics, ref)
587 outputDatastore.transfer(inputDatastore, ref)
589 metricsOut = outputDatastore.get(ref)
590 self.assertEqual(metrics, metricsOut)
592 def testBasicTransaction(self) -> None:
593 datastore = self.makeDatastore()
594 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
595 dimensions = self.universe.conform(("visit", "physical_filter"))
596 nDatasets = 6
597 dataIds = [
598 {"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"} for i in range(nDatasets)
599 ]
600 data = [
601 (
602 self.makeDatasetRef("metric", dimensions, storageClass, dataId),
603 makeExampleMetrics(),
604 )
605 for dataId in dataIds
606 ]
607 succeed = data[: nDatasets // 2]
608 fail = data[nDatasets // 2 :]
609 # All datasets added in this transaction should continue to exist
610 with datastore.transaction():
611 for ref, metrics in succeed:
612 datastore.put(metrics, ref)
613 # Whereas datasets added in this transaction should not
614 with self.assertRaises(TransactionTestError):
615 with datastore.transaction():
616 for ref, metrics in fail:
617 datastore.put(metrics, ref)
618 raise TransactionTestError("This should propagate out of the context manager")
619 # Check for datasets that should exist
620 for ref, metrics in succeed:
621 # Does it exist?
622 self.assertTrue(datastore.exists(ref))
623 # Get
624 metricsOut = datastore.get(ref, parameters=None)
625 self.assertEqual(metrics, metricsOut)
626 # URI
627 uri = datastore.getURI(ref)
628 self.assertEqual(uri.scheme, self.uriScheme)
629 # Check for datasets that should not exist
630 for ref, _ in fail:
631 # These should raise
632 with self.assertRaises(FileNotFoundError):
633 # non-existing file
634 datastore.get(ref)
635 with self.assertRaises(FileNotFoundError):
636 datastore.getURI(ref)
638 def testNestedTransaction(self) -> None:
639 datastore = self.makeDatastore()
640 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
641 dimensions = self.universe.conform(("visit", "physical_filter"))
642 metrics = makeExampleMetrics()
644 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}
645 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
646 datastore.put(metrics, refBefore)
647 with self.assertRaises(TransactionTestError):
648 with datastore.transaction():
649 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"}
650 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
651 datastore.put(metrics, refOuter)
652 with datastore.transaction():
653 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"}
654 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
655 datastore.put(metrics, refInner)
656 # All datasets should exist
657 for ref in (refBefore, refOuter, refInner):
658 metricsOut = datastore.get(ref, parameters=None)
659 self.assertEqual(metrics, metricsOut)
660 raise TransactionTestError("This should roll back the transaction")
661 # Dataset(s) inserted before the transaction should still exist
662 metricsOut = datastore.get(refBefore, parameters=None)
663 self.assertEqual(metrics, metricsOut)
664 # But all datasets inserted during the (rolled back) transaction
665 # should be gone
666 with self.assertRaises(FileNotFoundError):
667 datastore.get(refOuter)
668 with self.assertRaises(FileNotFoundError):
669 datastore.get(refInner)
671 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]:
672 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
673 dimensions = self.universe.conform(("visit", "physical_filter"))
674 metrics = makeExampleMetrics()
675 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}
676 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
677 return metrics, ref
679 def runIngestTest(self, func: Callable[[MetricsExample, str, DatasetRef], None]) -> None:
680 metrics, ref = self._prepareIngestTest()
681 # The file will be deleted after the test.
682 # For symlink tests this leads to a situation where the datastore
683 # points to a file that does not exist. This will make os.path.exist
684 # return False but then the new symlink will fail with
685 # FileExistsError later in the code so the test still passes.
686 with _temp_yaml_file(metrics._asdict()) as path:
687 func(metrics, path, ref)
689 def testIngestNoTransfer(self) -> None:
690 """Test ingesting existing files with no transfer."""
691 for mode in (None, "auto"):
692 # Some datastores have auto but can't do in place transfer
693 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
694 continue
696 with self.subTest(mode=mode):
697 datastore = self.makeDatastore()
699 def succeed(
700 obj: MetricsExample,
701 path: str,
702 ref: DatasetRef,
703 mode: str | None = mode,
704 datastore: Datastore = datastore,
705 ) -> None:
706 """Ingest a file already in the datastore root."""
707 # first move it into the root, and adjust the path
708 # accordingly
709 path = shutil.copy(path, datastore.root.ospath)
710 path = os.path.relpath(path, start=datastore.root.ospath)
711 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
712 self.assertEqual(obj, datastore.get(ref))
714 def failInputDoesNotExist(
715 obj: MetricsExample,
716 path: str,
717 ref: DatasetRef,
718 mode: str | None = mode,
719 datastore: Datastore = datastore,
720 ) -> None:
721 """Can't ingest files if we're given a bad path."""
722 with self.assertRaises(FileNotFoundError):
723 datastore.ingest(
724 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
725 )
726 self.assertFalse(datastore.exists(ref))
728 def failOutsideRoot(
729 obj: MetricsExample,
730 path: str,
731 ref: DatasetRef,
732 mode: str | None = mode,
733 datastore: Datastore = datastore,
734 ) -> None:
735 """Can't ingest files outside of datastore root unless
736 auto.
737 """
738 if mode == "auto":
739 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
740 self.assertTrue(datastore.exists(ref))
741 else:
742 with self.assertRaises(RuntimeError):
743 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
744 self.assertFalse(datastore.exists(ref))
746 def failNotImplemented(
747 obj: MetricsExample,
748 path: str,
749 ref: DatasetRef,
750 mode: str | None = mode,
751 datastore: Datastore = datastore,
752 ) -> None:
753 with self.assertRaises(NotImplementedError):
754 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
756 if mode in self.ingestTransferModes:
757 self.runIngestTest(failOutsideRoot)
758 self.runIngestTest(failInputDoesNotExist)
759 self.runIngestTest(succeed)
760 else:
761 self.runIngestTest(failNotImplemented)
763 def testIngestTransfer(self) -> None:
764 """Test ingesting existing files after transferring them."""
765 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
766 with self.subTest(mode=mode):
767 datastore = self.makeDatastore(mode)
769 def succeed(
770 obj: MetricsExample,
771 path: str,
772 ref: DatasetRef,
773 mode: str | None = mode,
774 datastore: Datastore = datastore,
775 ) -> None:
776 """Ingest a file by transferring it to the template
777 location.
778 """
779 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
780 self.assertEqual(obj, datastore.get(ref))
781 file_exists = os.path.exists(path)
782 if mode == "move":
783 self.assertFalse(file_exists)
784 else:
785 self.assertTrue(file_exists)
787 def failInputDoesNotExist(
788 obj: MetricsExample,
789 path: str,
790 ref: DatasetRef,
791 mode: str | None = mode,
792 datastore: Datastore = datastore,
793 ) -> None:
794 """Can't ingest files if we're given a bad path."""
795 with self.assertRaises(FileNotFoundError):
796 # Ensure the file does not look like it is in
797 # datastore for auto mode
798 datastore.ingest(
799 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
800 )
801 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
803 def failNotImplemented(
804 obj: MetricsExample,
805 path: str,
806 ref: DatasetRef,
807 mode: str | None = mode,
808 datastore: Datastore = datastore,
809 ) -> None:
810 with self.assertRaises(NotImplementedError):
811 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
813 if mode in self.ingestTransferModes:
814 self.runIngestTest(failInputDoesNotExist)
815 self.runIngestTest(succeed)
816 else:
817 self.runIngestTest(failNotImplemented)
819 def testIngestSymlinkOfSymlink(self) -> None:
820 """Special test for symlink to a symlink ingest"""
821 metrics, ref = self._prepareIngestTest()
822 # The aim of this test is to create a dataset on disk, then
823 # create a symlink to it and finally ingest the symlink such that
824 # the symlink in the datastore points to the original dataset.
825 for mode in ("symlink", "relsymlink"):
826 if mode not in self.ingestTransferModes:
827 continue
829 print(f"Trying mode {mode}")
830 with _temp_yaml_file(metrics._asdict()) as realpath:
831 with tempfile.TemporaryDirectory() as tmpdir:
832 sympath = os.path.join(tmpdir, "symlink.yaml")
833 os.symlink(os.path.realpath(realpath), sympath)
835 datastore = self.makeDatastore()
836 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
838 uri = datastore.getURI(ref)
839 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
840 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
842 linkTarget = os.readlink(uri.ospath)
843 if mode == "relsymlink":
844 self.assertFalse(os.path.isabs(linkTarget))
845 else:
846 self.assertTrue(os.path.samefile(linkTarget, realpath))
848 # Check that we can get the dataset back regardless of mode
849 metric2 = datastore.get(ref)
850 self.assertEqual(metric2, metrics)
852 # Cleanup the file for next time round loop
853 # since it will get the same file name in store
854 datastore.remove(ref)
856 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
857 datastore = self.makeDatastore(name)
859 # For now only the FileDatastore can be used for this test.
860 # ChainedDatastore that only includes InMemoryDatastores have to be
861 # skipped as well.
862 for name in datastore.names:
863 if not name.startswith("InMemoryDatastore"):
864 break
865 else:
866 raise unittest.SkipTest("in-memory datastore does not support record export/import")
868 metrics = makeExampleMetrics()
869 dimensions = self.universe.conform(("visit", "physical_filter"))
870 sc = self.storageClassFactory.getStorageClass("StructuredData")
872 refs = []
873 for visit in (2048, 2049, 2050):
874 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"}
875 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
876 datastore.put(metrics, ref)
877 refs.append(ref)
878 return datastore, refs
880 def testExportImportRecords(self) -> None:
881 """Test for export_records and import_records methods."""
882 datastore, refs = self._populate_export_datastore("test_datastore")
883 for exported_refs in (refs, refs[1:]):
884 n_refs = len(exported_refs)
885 records = datastore.export_records(exported_refs)
886 self.assertGreater(len(records), 0)
887 self.assertTrue(set(records.keys()) <= set(datastore.names))
888 # In a ChainedDatastore each FileDatastore will have a complete set
889 for datastore_name in records:
890 record_data = records[datastore_name]
891 self.assertEqual(len(record_data.records), n_refs)
893 # Check that subsetting works, include non-existing dataset ID.
894 dataset_ids = {exported_refs[0].id, uuid.uuid4()}
895 subset = record_data.subset(dataset_ids)
896 assert subset is not None
897 self.assertEqual(len(subset.records), 1)
898 subset = record_data.subset({uuid.uuid4()})
899 self.assertIsNone(subset)
901 # Use the same datastore name to import relative path.
902 datastore2 = self.makeDatastore("test_datastore")
904 records = datastore.export_records(refs[1:])
905 datastore2.import_records(records)
907 with self.assertRaises(FileNotFoundError):
908 data = datastore2.get(refs[0])
909 data = datastore2.get(refs[1])
910 self.assertIsNotNone(data)
911 data = datastore2.get(refs[2])
912 self.assertIsNotNone(data)
914 def testExport(self) -> None:
915 datastore, refs = self._populate_export_datastore("test_datastore")
917 datasets = list(datastore.export(refs))
918 self.assertEqual(len(datasets), 3)
920 for transfer in (None, "auto"):
921 # Both will default to None
922 datasets = list(datastore.export(refs, transfer=transfer))
923 self.assertEqual(len(datasets), 3)
925 with self.assertRaises(TypeError):
926 list(datastore.export(refs, transfer="copy"))
928 with self.assertRaises(TypeError):
929 list(datastore.export(refs, directory="exportDir", transfer="move"))
931 # Create a new ref that is not known to the datastore and try to
932 # export it.
933 sc = self.storageClassFactory.getStorageClass("ThingOne")
934 dimensions = self.universe.conform(("visit", "physical_filter"))
935 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
936 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
937 with self.assertRaises(FileNotFoundError):
938 list(datastore.export(refs + [ref], transfer=None))
940 def test_pydantic_dict_storage_class_conversions(self) -> None:
941 """Test converting a dataset stored as a pydantic model into a dict on
942 read.
943 """
944 datastore = self.makeDatastore()
945 store_as_model = self.makeDatasetRef(
946 "store_as_model",
947 dimensions=self.universe.empty,
948 storageClass="DictConvertibleModel",
949 dataId=DataCoordinate.make_empty(self.universe),
950 )
951 content = {"a": "one", "b": "two"}
952 model = DictConvertibleModel.from_dict(content, extra="original content")
953 datastore.put(model, store_as_model)
954 retrieved_model = datastore.get(store_as_model)
955 self.assertEqual(retrieved_model, model)
956 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel"))
957 self.assertEqual(type(loaded), dict)
958 self.assertEqual(loaded, content)
960 def test_simple_class_put_get(self) -> None:
961 """Test that we can put and get a simple class with dict()
962 constructor.
963 """
964 datastore = self.makeDatastore()
965 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
966 self._assert_different_puts(datastore, "MetricsExample", data)
968 def test_dataclass_put_get(self) -> None:
969 """Test that we can put and get a simple dataclass."""
970 datastore = self.makeDatastore()
971 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
972 self._assert_different_puts(datastore, "MetricsExampleDataclass", data)
974 def test_pydantic_put_get(self) -> None:
975 """Test that we can put and get a simple Pydantic model."""
976 datastore = self.makeDatastore()
977 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
978 self._assert_different_puts(datastore, "MetricsExampleModel", data)
980 def test_tuple_put_get(self) -> None:
981 """Test that we can put and get a tuple."""
982 datastore = self.makeDatastore()
983 data = ("a", "b", 1)
984 self._assert_different_puts(datastore, "TupleExample", data)
986 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None:
987 refs = {
988 x: self.makeDatasetRef(
989 f"stora_as_{x}",
990 dimensions=self.universe.empty,
991 storageClass=f"{storageClass_root}{x}",
992 dataId=DataCoordinate.make_empty(self.universe),
993 )
994 for x in ["A", "B"]
995 }
997 for ref in refs.values():
998 datastore.put(data, ref)
1000 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"]))
1003class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
1004 """PosixDatastore specialization"""
1006 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1007 uriScheme = "file"
1008 canIngestNoTransferAuto = True
1009 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
1010 isEphemeral = False
1011 rootKeys = ("root",)
1012 validationCanFail = True
1014 def setUp(self) -> None:
1015 # The call to os.path.realpath is necessary because Mac temporary files
1016 # can end up in either /private/var/folders or /var/folders, which
1017 # refer to the same location but don't appear to.
1018 # This matters for "relsymlink" transfer mode, because it needs to be
1019 # able to read the file through a relative symlink, but some of the
1020 # intermediate directories are not traversable if you try to get from a
1021 # tempfile in /var/folders to one in /private/var/folders via a
1022 # relative path.
1023 self.root = os.path.realpath(self.enterContext(tempfile.TemporaryDirectory()))
1024 super().setUp()
1026 def testAtomicWrite(self) -> None:
1027 """Test that we write to a temporary and then rename"""
1028 datastore = self.makeDatastore()
1029 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1030 dimensions = self.universe.conform(("visit", "physical_filter"))
1031 metrics = makeExampleMetrics()
1033 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}
1034 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1036 with self.assertLogs("lsst.resources", "DEBUG") as cm:
1037 datastore.put(metrics, ref)
1038 move_logs = [ll for ll in cm.output if "transfer=" in ll]
1039 self.assertIn("transfer=move", move_logs[0])
1041 # And the transfer should be file to file.
1042 self.assertEqual(move_logs[0].count("file://"), 2)
1044 def testCanNotDeterminePutFormatterLocation(self) -> None:
1045 """Verify that the expected exception is raised if the FileDatastore
1046 can not determine the put formatter location.
1047 """
1048 _ = makeExampleMetrics()
1049 datastore = self.makeDatastore()
1051 # Create multiple storage classes for testing different formulations
1052 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1054 sccomp = StorageClass("Dummy")
1055 compositeStorageClass = StorageClass(
1056 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
1057 )
1059 dimensions = self.universe.conform(("visit", "physical_filter"))
1060 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
1062 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1063 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId)
1065 def raiser(ref: DatasetRef) -> None:
1066 raise DatasetTypeNotSupportedError()
1068 with unittest.mock.patch.object(
1069 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
1070 "_determine_put_formatter_location",
1071 side_effect=raiser,
1072 ):
1073 # verify the non-composite ref execution path:
1074 with self.assertRaises(DatasetTypeNotSupportedError):
1075 datastore.getURIs(ref, predict=True)
1077 # verify the composite-ref execution path:
1078 with self.assertRaises(DatasetTypeNotSupportedError):
1079 datastore.getURIs(compRef, predict=True)
1081 def test_roots(self):
1082 datastore = self.makeDatastore()
1084 self.assertEqual(set(datastore.names), set(datastore.roots.keys()))
1085 for root in datastore.roots.values():
1086 if root is not None:
1087 self.assertTrue(root.exists())
1090class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
1091 """Posix datastore tests but with checksums disabled."""
1093 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
1095 def testChecksum(self) -> None:
1096 """Ensure that checksums have not been calculated."""
1097 datastore = self.makeDatastore()
1098 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1099 dimensions = self.universe.conform(("visit", "physical_filter"))
1100 metrics = makeExampleMetrics()
1102 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}
1103 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1105 # Configuration should have disabled checksum calculation
1106 datastore.put(metrics, ref)
1107 infos = datastore.getStoredItemsInfo(ref)
1108 self.assertIsNone(infos[0].checksum)
1110 # Remove put back but with checksums enabled explicitly
1111 datastore.remove(ref)
1112 datastore.useChecksum = True
1113 datastore.put(metrics, ref)
1115 infos = datastore.getStoredItemsInfo(ref)
1116 self.assertIsNotNone(infos[0].checksum)
1118 def test_repeat_ingest(self):
1119 """Test that repeatedly ingesting the same file in direct mode
1120 is allowed.
1122 Test can only run with FileDatastore since that is the only one
1123 supporting "direct" ingest.
1124 """
1125 metrics, v4ref = self._prepareIngestTest()
1126 datastore = self.makeDatastore()
1127 v5ref = DatasetRef(
1128 v4ref.datasetType, v4ref.dataId, v4ref.run, id_generation_mode=DatasetIdGenEnum.DATAID_TYPE_RUN
1129 )
1131 with _temp_yaml_file(metrics._asdict()) as path:
1132 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct")
1134 # This will fail because the ref is using UUIDv4.
1135 with self.assertRaises(RuntimeError):
1136 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct")
1138 # UUIDv5 can be repeatedly ingested in direct mode.
1139 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct")
1140 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct")
1142 with self.assertRaises(RuntimeError):
1143 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="copy")
1146class TrashDatastoreTestCase(PosixDatastoreTestCase):
1147 """Restrict trash test to FileDatastore."""
1149 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1151 def testTrash(self) -> None:
1152 datastore, *refs = self.prepDeleteTest(n_refs=10)
1154 # Trash one of them.
1155 ref = refs.pop()
1156 uri = datastore.getURI(ref)
1157 datastore.trash(ref)
1158 self.assertTrue(uri.exists(), uri) # Not deleted yet
1159 datastore.emptyTrash()
1160 self.assertFalse(uri.exists(), uri)
1162 # Trash it again should be fine.
1163 datastore.trash(ref)
1165 # Trash multiple items at once.
1166 subset = [refs.pop(), refs.pop()]
1167 datastore.trash(subset)
1168 datastore.emptyTrash()
1170 # Remove a record and trash should do nothing.
1171 # This is execution butler scenario.
1172 ref = refs.pop()
1173 uri = datastore.getURI(ref)
1174 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1175 self.assertTrue(uri.exists())
1176 datastore.trash(ref)
1177 datastore.emptyTrash()
1178 self.assertTrue(uri.exists())
1180 # Switch on trust and it should delete the file.
1181 datastore.trustGetRequest = True
1182 datastore.trash([ref])
1183 self.assertFalse(uri.exists())
1185 # Remove multiples at once in trust mode.
1186 subset = [refs.pop() for i in range(3)]
1187 datastore.trash(subset)
1188 datastore.trash(refs.pop()) # Check that a single ref can trash
1191class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1192 """Test datastore cleans up on failure."""
1194 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1196 def setUp(self) -> None:
1197 # Override the working directory before calling the base class
1198 self.root = tempfile.mkdtemp()
1199 super().setUp()
1201 def testCleanup(self) -> None:
1202 """Test that a failed formatter write does cleanup a partial file."""
1203 metrics = makeExampleMetrics()
1204 datastore = self.makeDatastore()
1206 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1208 dimensions = self.universe.conform(("visit", "physical_filter"))
1209 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
1211 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1213 # Determine where the file will end up (we assume Formatters use
1214 # the same file extension)
1215 expectedUri = datastore.getURI(ref, predict=True)
1216 self.assertEqual(expectedUri.fragment, "predicted")
1218 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1220 # Try formatter that fails and formatter that fails and leaves
1221 # a file behind
1222 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1223 with self.subTest(formatter=formatter):
1224 # Monkey patch the formatter
1225 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1227 # Try to put the dataset, it should fail
1228 with self.assertRaises(RuntimeError):
1229 datastore.put(metrics, ref)
1231 # Check that there is no file on disk
1232 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1234 # Check that there is a directory
1235 dir = expectedUri.dirname()
1236 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1238 # Force YamlFormatter and check that this time a file is written
1239 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1240 datastore.put(metrics, ref)
1241 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1242 datastore.remove(ref)
1243 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1246class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1247 """PosixDatastore specialization"""
1249 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1250 uriScheme = "mem"
1251 hasUnsupportedPut = False
1252 ingestTransferModes = ()
1253 isEphemeral = True
1254 rootKeys = None
1255 validationCanFail = False
1258class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1259 """ChainedDatastore specialization using a POSIXDatastore"""
1261 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1262 hasUnsupportedPut = False
1263 canIngestNoTransferAuto = False
1264 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1265 isEphemeral = False
1266 rootKeys = (".datastores.1.root", ".datastores.2.root")
1267 validationCanFail = True
1270class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1271 """ChainedDatastore specialization using all InMemoryDatastore"""
1273 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1274 validationCanFail = False
1277class DatastoreConstraintsTests(DatastoreTestsBase):
1278 """Basic tests of constraints model of Datastores."""
1280 def testConstraints(self) -> None:
1281 """Test constraints model. Assumes that each test class has the
1282 same constraints.
1283 """
1284 metrics = makeExampleMetrics()
1285 datastore = self.makeDatastore()
1287 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1288 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1289 dimensions = self.universe.conform(("visit", "physical_filter", "instrument"))
1290 dataId = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"}
1292 # Write empty file suitable for ingest check (JSON and YAML variants)
1293 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1294 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1295 for datasetTypeName, sc, accepted in (
1296 ("metric", sc1, True),
1297 ("metric5", sc1, False),
1298 ("metric33", sc1, True),
1299 ("metric5", sc2, True),
1300 ):
1301 # Choose different temp file depending on StorageClass
1302 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1304 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1305 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
1306 if accepted:
1307 datastore.put(metrics, ref)
1308 self.assertTrue(datastore.exists(ref))
1309 datastore.remove(ref)
1311 # Try ingest
1312 if self.canIngest:
1313 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1314 self.assertTrue(datastore.exists(ref))
1315 datastore.remove(ref)
1316 else:
1317 with self.assertRaises(DatasetTypeNotSupportedError):
1318 datastore.put(metrics, ref)
1319 self.assertFalse(datastore.exists(ref))
1321 # Again with ingest
1322 if self.canIngest:
1323 with self.assertRaises(DatasetTypeNotSupportedError):
1324 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1325 self.assertFalse(datastore.exists(ref))
1328class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1329 """PosixDatastore specialization"""
1331 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1332 canIngest = True
1334 def setUp(self) -> None:
1335 # Override the working directory before calling the base class
1336 self.root = tempfile.mkdtemp()
1337 super().setUp()
1340class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1341 """InMemoryDatastore specialization."""
1343 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1344 canIngest = False
1347class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1348 """ChainedDatastore specialization using a POSIXDatastore and constraints
1349 at the ChainedDatstore.
1350 """
1352 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1355class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1356 """ChainedDatastore specialization using a POSIXDatastore."""
1358 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1361class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1362 """ChainedDatastore specialization using all InMemoryDatastore."""
1364 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1365 canIngest = False
1368class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1369 """Test that a chained datastore can control constraints per-datastore
1370 even if child datastore would accept.
1371 """
1373 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1375 def setUp(self) -> None:
1376 # Override the working directory before calling the base class
1377 self.root = tempfile.mkdtemp()
1378 super().setUp()
1380 def testConstraints(self) -> None:
1381 """Test chained datastore constraints model."""
1382 metrics = makeExampleMetrics()
1383 datastore = self.makeDatastore()
1385 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1386 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1387 dimensions = self.universe.conform(("visit", "physical_filter", "instrument"))
1388 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"}
1389 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"}
1391 # Write empty file suitable for ingest check (JSON and YAML variants)
1392 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1393 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1395 for typeName, dataId, sc, accept, ingest in (
1396 ("metric", dataId1, sc1, (False, True, False), True),
1397 ("metric5", dataId1, sc1, (False, False, False), False),
1398 ("metric5", dataId2, sc1, (True, False, False), False),
1399 ("metric33", dataId2, sc2, (True, True, False), True),
1400 ("metric5", dataId1, sc2, (False, True, False), True),
1401 ):
1402 # Choose different temp file depending on StorageClass
1403 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1405 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1406 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId)
1407 if any(accept):
1408 datastore.put(metrics, ref)
1409 self.assertTrue(datastore.exists(ref))
1411 # Check each datastore inside the chained datastore
1412 for childDatastore, expected in zip(datastore.datastores, accept, strict=True):
1413 self.assertEqual(
1414 childDatastore.exists(ref),
1415 expected,
1416 f"Testing presence of {ref} in datastore {childDatastore.name}",
1417 )
1419 datastore.remove(ref)
1421 # Check that ingest works
1422 if ingest:
1423 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1424 self.assertTrue(datastore.exists(ref))
1426 # Check each datastore inside the chained datastore
1427 for childDatastore, expected in zip(datastore.datastores, accept, strict=True):
1428 # Ephemeral datastores means InMemory at the moment
1429 # and that does not accept ingest of files.
1430 if childDatastore.isEphemeral:
1431 expected = False
1432 self.assertEqual(
1433 childDatastore.exists(ref),
1434 expected,
1435 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1436 )
1438 datastore.remove(ref)
1439 else:
1440 with self.assertRaises(DatasetTypeNotSupportedError):
1441 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1443 else:
1444 with self.assertRaises(DatasetTypeNotSupportedError):
1445 datastore.put(metrics, ref)
1446 self.assertFalse(datastore.exists(ref))
1448 # Again with ingest
1449 with self.assertRaises(DatasetTypeNotSupportedError):
1450 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1451 self.assertFalse(datastore.exists(ref))
1454class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1455 """Tests for datastore caching infrastructure."""
1457 @classmethod
1458 def setUpClass(cls) -> None:
1459 cls.storageClassFactory = StorageClassFactory()
1460 cls.universe = DimensionUniverse()
1462 # Ensure that we load the test storage class definitions.
1463 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1464 cls.storageClassFactory.addFromConfig(scConfigFile)
1466 def setUp(self) -> None:
1467 self.id = 0
1469 # Create a root that we can use for caching tests.
1470 self.root = tempfile.mkdtemp()
1472 # Create some test dataset refs and associated test files
1473 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1474 dimensions = self.universe.conform(("visit", "physical_filter"))
1475 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}
1477 # Create list of refs and list of temporary files
1478 n_datasets = 10
1479 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)]
1481 root_uri = ResourcePath(self.root, forceDirectory=True)
1482 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1484 # Create test files.
1485 for uri in self.files:
1486 uri.write(b"0123456789")
1488 # Create some composite refs with component files.
1489 sc = self.storageClassFactory.getStorageClass("StructuredData")
1490 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)]
1491 self.comp_files = []
1492 self.comp_refs = []
1493 for n, ref in enumerate(self.composite_refs):
1494 component_refs = []
1495 component_files = []
1496 for component in sc.components:
1497 component_ref = ref.makeComponentRef(component)
1498 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1499 component_refs.append(component_ref)
1500 component_files.append(file)
1501 file.write(b"9876543210")
1503 self.comp_files.append(component_files)
1504 self.comp_refs.append(component_refs)
1506 def tearDown(self) -> None:
1507 if self.root is not None and os.path.exists(self.root):
1508 shutil.rmtree(self.root, ignore_errors=True)
1510 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1511 config = Config.fromYaml(config_str)
1512 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1514 def testNoCacheDir(self) -> None:
1515 config_str = """
1516cached:
1517 root: null
1518 cacheable:
1519 metric0: true
1520 """
1521 cache_manager = self._make_cache_manager(config_str)
1523 # Look inside to check we don't have a cache directory
1524 self.assertIsNone(cache_manager._cache_directory)
1526 self.assertCache(cache_manager)
1528 # Test that the cache directory is marked temporary
1529 self.assertTrue(cache_manager.cache_directory.isTemporary)
1531 def testNoCacheDirReversed(self) -> None:
1532 """Use default caching status and metric1 to false"""
1533 config_str = """
1534cached:
1535 root: null
1536 default: true
1537 cacheable:
1538 metric1: false
1539 """
1540 cache_manager = self._make_cache_manager(config_str)
1542 self.assertCache(cache_manager)
1544 def testEnvvarCacheDir(self) -> None:
1545 config_str = f"""
1546cached:
1547 root: '{self.root}'
1548 cacheable:
1549 metric0: true
1550 """
1552 root = ResourcePath(self.root, forceDirectory=True)
1553 env_dir = root.join("somewhere", forceDirectory=True)
1554 elsewhere = root.join("elsewhere", forceDirectory=True)
1556 # Environment variable should override the config value.
1557 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1558 cache_manager = self._make_cache_manager(config_str)
1559 self.assertEqual(cache_manager.cache_directory, env_dir)
1561 # This environment variable should not override the config value.
1562 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1563 cache_manager = self._make_cache_manager(config_str)
1564 self.assertEqual(cache_manager.cache_directory, root)
1566 # No default setting.
1567 config_str = """
1568cached:
1569 root: null
1570 default: true
1571 cacheable:
1572 metric1: false
1573 """
1574 cache_manager = self._make_cache_manager(config_str)
1576 # This environment variable should override the config value.
1577 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1578 cache_manager = self._make_cache_manager(config_str)
1579 self.assertEqual(cache_manager.cache_directory, env_dir)
1581 # If both environment variables are set the main (not IF_UNSET)
1582 # variable should win.
1583 with unittest.mock.patch.dict(
1584 os.environ,
1585 {
1586 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1587 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1588 },
1589 ):
1590 cache_manager = self._make_cache_manager(config_str)
1591 self.assertEqual(cache_manager.cache_directory, env_dir)
1593 # Use the API to set the environment variable, making sure that the
1594 # variable is reset on exit.
1595 with unittest.mock.patch.dict(
1596 os.environ,
1597 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1598 ):
1599 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1600 self.assertTrue(defined)
1601 cache_manager = self._make_cache_manager(config_str)
1602 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1604 # Now create the cache manager ahead of time and set the fallback
1605 # later.
1606 cache_manager = self._make_cache_manager(config_str)
1607 self.assertIsNone(cache_manager._cache_directory)
1608 with unittest.mock.patch.dict(
1609 os.environ,
1610 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1611 ):
1612 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1613 self.assertTrue(defined)
1614 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1616 def testExplicitCacheDir(self) -> None:
1617 config_str = f"""
1618cached:
1619 root: '{self.root}'
1620 cacheable:
1621 metric0: true
1622 """
1623 cache_manager = self._make_cache_manager(config_str)
1625 # Look inside to check we do have a cache directory.
1626 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1628 self.assertCache(cache_manager)
1630 # Test that the cache directory is not marked temporary
1631 self.assertFalse(cache_manager.cache_directory.isTemporary)
1633 def assertCache(self, cache_manager: DatastoreCacheManager) -> None:
1634 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1635 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1637 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1638 self.assertIsInstance(uri, ResourcePath)
1639 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1641 # Check presence in cache using ref and then using file extension.
1642 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1643 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1644 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1645 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1647 # Cached file should no longer exist but uncached file should be
1648 # unaffected.
1649 self.assertFalse(self.files[0].exists())
1650 self.assertTrue(self.files[1].exists())
1652 # Should find this file and it should be within the cache directory.
1653 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1654 self.assertTrue(found.exists())
1655 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1657 # Should not be able to find these in cache
1658 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1659 self.assertIsNone(found)
1660 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1661 self.assertIsNone(found)
1663 def testNoCache(self) -> None:
1664 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1665 for uri, ref in zip(self.files, self.refs, strict=True):
1666 self.assertFalse(cache_manager.should_be_cached(ref))
1667 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1668 self.assertFalse(cache_manager.known_to_cache(ref))
1669 with cache_manager.find_in_cache(ref, ".txt") as found:
1670 self.assertIsNone(found, msg=f"{cache_manager}")
1672 def _expiration_config(self, mode: str, threshold: int) -> str:
1673 return f"""
1674cached:
1675 default: true
1676 expiry:
1677 mode: {mode}
1678 threshold: {threshold}
1679 cacheable:
1680 unused: true
1681 """
1683 def testCacheExpiryFiles(self) -> None:
1684 threshold = 2 # Keep at least 2 files.
1685 mode = "files"
1686 config_str = self._expiration_config(mode, threshold)
1688 cache_manager = self._make_cache_manager(config_str)
1690 # Check that an empty cache returns unknown for arbitrary ref
1691 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1693 # Should end with datasets: 2, 3, 4
1694 self.assertExpiration(cache_manager, 5, threshold + 1)
1695 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1697 # Check that we will not expire a file that is actively in use.
1698 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1699 self.assertIsNotNone(found)
1701 # Trigger cache expiration that should remove the file
1702 # we just retrieved. Should now have: 3, 4, 5
1703 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1704 self.assertIsNotNone(cached)
1706 # Cache should still report the standard file count.
1707 self.assertEqual(cache_manager.file_count, threshold + 1)
1709 # Add additional entry to cache.
1710 # Should now have 4, 5, 6
1711 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1712 self.assertIsNotNone(cached)
1714 # Is the file still there?
1715 self.assertTrue(found.exists())
1717 # Can we read it?
1718 data = found.read()
1719 self.assertGreater(len(data), 0)
1721 # Outside context the file should no longer exist.
1722 self.assertFalse(found.exists())
1724 # File count should not have changed.
1725 self.assertEqual(cache_manager.file_count, threshold + 1)
1727 # Dataset 2 was in the exempt directory but because hardlinks
1728 # are used it was deleted from the main cache during cache expiry
1729 # above and so should no longer be found.
1730 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1731 self.assertIsNone(found)
1733 # And the one stored after it is also gone.
1734 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1735 self.assertIsNone(found)
1737 # But dataset 4 is present.
1738 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1739 self.assertIsNotNone(found)
1741 # Adding a new dataset to the cache should now delete it.
1742 cache_manager.move_to_cache(self.files[7], self.refs[7])
1744 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1745 self.assertIsNone(found)
1747 def testCacheExpiryDatasets(self) -> None:
1748 threshold = 2 # Keep 2 datasets.
1749 mode = "datasets"
1750 config_str = self._expiration_config(mode, threshold)
1752 cache_manager = self._make_cache_manager(config_str)
1753 self.assertExpiration(cache_manager, 5, threshold + 1)
1754 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1756 def testCacheExpiryDatasetsComposite(self) -> None:
1757 threshold = 2 # Keep 2 datasets.
1758 mode = "datasets"
1759 config_str = self._expiration_config(mode, threshold)
1761 cache_manager = self._make_cache_manager(config_str)
1763 n_datasets = 3
1764 for i in range(n_datasets):
1765 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i], strict=True):
1766 cached = cache_manager.move_to_cache(component_file, component_ref)
1767 self.assertIsNotNone(cached)
1768 self.assertTrue(cache_manager.known_to_cache(component_ref))
1769 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1770 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1772 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1774 # Write two new non-composite and the number of files should drop.
1775 self.assertExpiration(cache_manager, 2, 5)
1777 def testCacheExpirySize(self) -> None:
1778 threshold = 55 # Each file is 10 bytes
1779 mode = "size"
1780 config_str = self._expiration_config(mode, threshold)
1782 cache_manager = self._make_cache_manager(config_str)
1783 self.assertExpiration(cache_manager, 10, 6)
1784 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1786 def assertExpiration(
1787 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int
1788 ) -> None:
1789 """Insert the datasets and then check the number retained."""
1790 for i in range(n_datasets):
1791 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1792 self.assertIsNotNone(cached)
1794 self.assertEqual(cache_manager.file_count, n_retained)
1796 # The oldest file should not be in the cache any more.
1797 for i in range(n_datasets):
1798 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1799 if i >= n_datasets - n_retained:
1800 self.assertIsInstance(found, ResourcePath)
1801 else:
1802 self.assertIsNone(found)
1804 def testCacheExpiryAge(self) -> None:
1805 threshold = 1 # Expire older than 2 seconds
1806 mode = "age"
1807 config_str = self._expiration_config(mode, threshold)
1809 cache_manager = self._make_cache_manager(config_str)
1810 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1812 # Insert 3 files, then sleep, then insert more.
1813 for i in range(2):
1814 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1815 self.assertIsNotNone(cached)
1816 time.sleep(2.0)
1817 for j in range(4):
1818 i = 2 + j # Continue the counting
1819 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1820 self.assertIsNotNone(cached)
1822 # Only the files written after the sleep should exist.
1823 self.assertEqual(cache_manager.file_count, 4)
1824 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1825 self.assertIsNone(found)
1826 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1827 self.assertIsInstance(found, ResourcePath)
1830class NullDatastoreTestCase(DatasetTestHelper, unittest.TestCase):
1831 """Test the null datastore."""
1833 storageClassFactory = StorageClassFactory()
1835 def test_basics(self) -> None:
1836 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1837 ref = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {})
1839 null = NullDatastore(None, None)
1841 self.assertFalse(null.exists(ref))
1842 self.assertFalse(null.knows(ref))
1843 knows = null.knows_these([ref])
1844 self.assertFalse(knows[ref])
1845 null.validateConfiguration(ref)
1847 with self.assertRaises(FileNotFoundError):
1848 null.get(ref)
1849 with self.assertRaises(NotImplementedError):
1850 null.put("", ref)
1851 with self.assertRaises(FileNotFoundError):
1852 null.getURI(ref)
1853 with self.assertRaises(FileNotFoundError):
1854 null.getURIs(ref)
1855 with self.assertRaises(FileNotFoundError):
1856 null.getManyURIs([ref])
1857 with self.assertRaises(NotImplementedError):
1858 null.getLookupKeys()
1859 with self.assertRaises(NotImplementedError):
1860 null.import_records({})
1861 with self.assertRaises(NotImplementedError):
1862 null.export_records([])
1863 with self.assertRaises(NotImplementedError):
1864 null.export([ref])
1865 with self.assertRaises(NotImplementedError):
1866 null.transfer(null, ref)
1867 with self.assertRaises(NotImplementedError):
1868 null.emptyTrash()
1869 with self.assertRaises(NotImplementedError):
1870 null.trash(ref)
1871 with self.assertRaises(NotImplementedError):
1872 null.forget([ref])
1873 with self.assertRaises(NotImplementedError):
1874 null.remove(ref)
1875 with self.assertRaises(NotImplementedError):
1876 null.retrieveArtifacts([ref], ResourcePath("."))
1877 with self.assertRaises(NotImplementedError):
1878 null.transfer_from(null, [ref])
1879 with self.assertRaises(NotImplementedError):
1880 null.ingest()
1883class DatasetRefURIsTestCase(unittest.TestCase):
1884 """Tests for DatasetRefURIs."""
1886 def testSequenceAccess(self) -> None:
1887 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1888 uris = DatasetRefURIs()
1890 self.assertEqual(len(uris), 2)
1891 self.assertEqual(uris[0], None)
1892 self.assertEqual(uris[1], {})
1894 primaryURI = ResourcePath("1/2/3")
1895 componentURI = ResourcePath("a/b/c")
1897 # affirm that DatasetRefURIs does not support MutableSequence functions
1898 with self.assertRaises(TypeError):
1899 uris[0] = primaryURI
1900 with self.assertRaises(TypeError):
1901 uris[1] = {"foo": componentURI}
1903 # but DatasetRefURIs can be set by property name:
1904 uris.primaryURI = primaryURI
1905 uris.componentURIs = {"foo": componentURI}
1906 self.assertEqual(uris.primaryURI, primaryURI)
1907 self.assertEqual(uris[0], primaryURI)
1909 primary, components = uris
1910 self.assertEqual(primary, primaryURI)
1911 self.assertEqual(components, {"foo": componentURI})
1913 def testRepr(self) -> None:
1914 """Verify __repr__ output."""
1915 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")})
1916 self.assertEqual(
1917 repr(uris),
1918 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})',
1919 )
1922class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase):
1923 """Test the StoredFileInfo class."""
1925 storageClassFactory = StorageClassFactory()
1927 def test_StoredFileInfo(self) -> None:
1928 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1929 ref = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {})
1931 record = dict(
1932 storage_class="StructuredDataDict",
1933 formatter="lsst.daf.butler.Formatter",
1934 path="a/b/c.txt",
1935 component="component",
1936 checksum=None,
1937 file_size=5,
1938 )
1939 info = StoredFileInfo.from_record(record)
1941 self.assertEqual(info.to_record(), record)
1943 ref2 = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {})
1944 rebased = info.rebase(ref2)
1945 self.assertEqual(rebased.rebase(ref), info)
1947 with self.assertRaises(TypeError):
1948 rebased.update(formatter=42)
1950 with self.assertRaises(ValueError):
1951 rebased.update(something=42, new="42")
1953 # Check that pickle works on StoredFileInfo.
1954 pickled_info = pickle.dumps(info)
1955 unpickled_info = pickle.loads(pickled_info)
1956 self.assertEqual(unpickled_info, info)
1959@contextlib.contextmanager
1960def _temp_yaml_file(data: Any) -> Iterator[str]:
1961 fh = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml")
1962 try:
1963 yaml.dump(data, stream=fh)
1964 fh.flush()
1965 yield fh.name
1966 finally:
1967 # Some tests delete the file
1968 with contextlib.suppress(FileNotFoundError):
1969 fh.close()
1972if __name__ == "__main__":
1973 unittest.main()